4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
17 FS::UID->install_callback( sub {
23 @EXPORT_OK = qw( get_district );
27 FS::Misc::Geo - routines to fetch geographic information
33 =item get_censustract_ffiec LOCATION YEAR
35 Given a location hash (see L<FS::location_Mixin>) and a census map year,
36 returns a census tract code (consisting of state, county, and tract
37 codes) or an error message.
41 sub get_censustract_ffiec {
47 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
51 warn Dumper($location, $year) if $DEBUG;
53 # the old FFIEC geocoding service was shut down December 1, 2014.
54 # welcome to the future.
55 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
56 # build the single-line query
57 my $single_line = join(', ', $location->{address1},
61 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
62 my $request = POST( $url,
63 'Content-Type' => 'application/json; charset=utf-8',
64 'Accept' => 'application/json',
65 'Content' => encode_json($hashref)
68 my $ua = new LWP::UserAgent;
69 my $res = $ua->request( $request );
74 if (!$res->is_success) {
76 die "Census tract lookup error: ".$res->message;
81 my $content = eval { decode_json($res->content) };
82 die "Census tract JSON error: $@\n" if $@;
84 if ( !exists $content->{d}->{sStatus} ) {
85 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
87 if ( $content->{d}->{sStatus} eq 'Y' ) {
89 # this also contains the (partial) standardized address, correct zip
90 # code, coordinates, etc., and we could get all of them, but right now
91 # we only want the census tract
92 my $tract = join('', $content->{d}->{sStateCode},
93 $content->{d}->{sCountyCode},
94 $content->{d}->{sTractCode});
99 my $error = $content->{d}->{sMsg}
100 || 'FFIEC lookup failed, but with no status message.';
106 #sub get_district_methods {
108 # 'wa_sales' => 'Washington sales tax',
111 =item get_district LOCATION METHOD
113 For the location hash in LOCATION, using lookup method METHOD, fetch
114 tax district information. Currently the only available method is
115 'wa_sales' (the Washington Department of Revenue sales tax lookup).
117 Returns a hash reference containing the following fields:
122 - exempt_amount (currently zero)
123 - city, county, state, country (from
125 The intent is that you can assign this to an L<FS::cust_main_county>
126 object and insert it if there's not yet a tax rate defined for that
129 get_district will die on error.
137 my $location = shift;
138 my $method = shift or return '';
139 warn Dumper($location, $method) if $DEBUG;
144 my $location = shift;
146 return '' if $location->{state} ne 'WA';
148 my $return = { %$location };
149 $return->{'exempt_amount'} = 0.00;
151 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
152 my $ua = new LWP::UserAgent;
154 my $delim = '<|>'; # yes, <|>
155 my $year = (localtime)[5] + 1900;
156 my $month = (localtime)[4] + 1;
157 my @zip = split('-', $location->{zip});
160 'TaxType=S', #sales; 'P' = property
161 'Src=0', #does something complicated
163 'Addr='.uri_escape($location->{address1}),
164 'City='.uri_escape($location->{city}),
166 'Zip1='.($zip[1] || ''), #optional
173 my $query_string = join($delim, @args );
174 $url .= "?$query_string";
175 warn "\nrequest: $url\n\n" if $DEBUG > 1;
177 my $res = $ua->request( GET( "$url?$query_string" ) );
182 if ($res->code ne '200') {
183 $error = $res->message;
186 my $content = $res->content;
187 my $p = new HTML::TokeParser \$content;
189 while ( my $t = $p->get_tag('script') ) {
190 my $u = $p->get_token; #either enclosed text or the </script> tag
191 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
196 if ( $js ) { #found it
197 # strip down to the quoted string, which contains escaped single quotes.
198 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
199 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
200 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
202 $p = new HTML::TokeParser \$js;
203 TD: while ( my $td = $p->get_tag('td') ) {
204 while ( my $u = $p->get_token ) {
205 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
206 next if $u->[0] ne 'T'; # skip non-text
209 if ( lc($text) eq 'location code' ) {
210 $p->get_tag('td'); # skip to the next column
212 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
213 $return->{'district'} = $u->[1];
215 elsif ( lc($text) eq 'total tax rate' ) {
218 $u = $p->get_token until $u->[0] eq 'T';
219 $return->{'tax'} = $u->[1];
225 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
226 $return->{'tax'} *= 100; #percentage
227 warn Dumper($return) if $DEBUG > 1;
231 $error = 'district code/tax rate not found';
235 $error = "failed to parse document";
238 die "WA tax district lookup error: $error";
241 ###### USPS Standardization ######
243 sub standardize_usps {
246 eval "use Business::US::USPS::WebTools::AddressStandardization";
249 my $location = shift;
250 if ( $location->{country} ne 'US' ) {
252 warn "standardize_usps not for use in country ".$location->{country}."\n";
253 $location->{addr_clean} = '';
256 my $userid = $conf->config('usps_webtools-userid');
257 my $password = $conf->config('usps_webtools-password');
258 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
260 Password => $password,
262 } ) or die "error starting USPS WebTools\n";
264 my($zip5, $zip4) = split('-',$location->{'zip'});
267 FirmName => $location->{company},
268 Address2 => $location->{address1},
269 Address1 => $location->{address2},
270 City => $location->{city},
271 State => $location->{state},
275 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
278 my $hash = $verifier->verify_address( %usps_args );
280 warn $verifier->response
283 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
284 if $verifier->is_error;
286 my $zip = $hash->{Zip5};
287 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
289 { company => $hash->{FirmName},
290 address1 => $hash->{Address2},
291 address2 => $hash->{Address1},
292 city => $hash->{City},
293 state => $hash->{State},
299 ###### U.S. Census Bureau ######
301 sub standardize_uscensus {
303 my $location = shift;
304 my $log = FS::Log->new('FS::Misc::Geo::standardize_uscensus');
305 $log->debug(join("\n", @{$location}{'address1', 'city', 'state', 'zip'}));
307 eval "use Geo::USCensus::Geocoding";
310 if ( $location->{country} ne 'US' ) {
312 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
313 $location->{addr_clean} = '';
318 street => $location->{address1},
319 city => $location->{city},
320 state => $location->{state},
321 zip => $location->{zip},
322 debug => ($DEBUG || 0),
325 my $result = Geo::USCensus::Geocoding->query($request);
326 if ( $result->is_match ) {
327 # unfortunately we get the address back as a single line
328 $log->debug($result->address);
329 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
335 address2 => uc($location->{address2}),
336 latitude => $result->latitude,
337 longitude => $result->longitude,
338 censustract => $result->censustract,
341 die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n";
343 } elsif ( $result->match_level eq 'Tie' ) {
344 die "Geocoding was not able to identify a unique matching address.\n";
345 } elsif ( $result->match_level ) {
346 die "Geocoding did not find a matching address.\n";
348 $log->error($result->error_message);
349 return; # for internal errors, don't return anything
353 ####### EZLOCATE (obsolete) #######
355 sub _tomtom_query { # helper method for the below
357 my $result = Geo::TomTom::Geocoding->query(%args);
358 die "TomTom geocoding error: ".$result->message."\n"
359 unless ( $result->is_success );
360 my ($match) = $result->locations;
361 my $type = $match->{type};
362 # match levels below "intersection" should not be considered clean
363 my $clean = ($type eq 'addresspoint' ||
366 $type eq 'intersection'
368 warn "tomtom returned $type match\n" if $DEBUG;
369 warn Dumper($match) if $DEBUG > 1;
373 sub standardize_tomtom {
374 # post-2013 TomTom API
375 # much better, but incompatible with ezlocate
377 my $location = shift;
378 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
381 my $key = $conf->config('tomtom-userid')
382 or die "no tomtom-userid configured\n";
384 my $country = code2country($location->{country});
385 my ($address1, $address2) = ($location->{address1}, $location->{address2});
389 $address1 =~ s/^\s+//;
390 $address1 =~ s/\s+$//;
391 $address2 =~ s/^\s+//;
392 $address2 =~ s/\s+$//;
394 # try to fix some cases of the address fields being switched
395 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
396 $address2 = $address1;
397 $address1 = $location->{address2};
399 # parse sublocation part (unit/suite/apartment...) and clean up
400 # non-sublocation address2
401 ($subloc, $address2) =
402 subloc_address2($address1, $address2, $location->{country});
403 # ask TomTom to standardize address1:
407 L => $location->{city},
408 AA => $location->{state},
409 PC => $location->{zip},
410 CC => country2code($country, LOCALE_CODE_ALPHA_3),
413 my ($match, $clean) = _tomtom_query(%args);
415 if (!$match or !$clean) {
416 # Then try cleaning up the input; TomTom is picky about junk in the
417 # address. Any of these can still be a clean match.
418 my $h = Geo::StreetAddress::US->parse_location($address1);
419 # First conservatively:
420 if ( $h->{sec_unit_type} ) {
421 my $strip = '\s+' . $h->{sec_unit_type};
422 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
424 $args{T} =~ s/$strip//;
425 ($match, $clean) = _tomtom_query(%args);
427 if ( !$match or !$clean ) {
428 # Then more aggressively:
429 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
430 ($match, $clean) = _tomtom_query(%args);
434 if ( !$match or !$clean ) { # partial matches are not useful
435 die "Address not found\n";
438 if ( defined $match->{censusTract} ) {
439 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
440 join('.', $match->{censusTract} =~ /(....)(..)/);
443 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
444 $address1 .= $match->{street} if $match->{street};
445 $address1 .= ' '.$subloc if $subloc;
446 $address1 = uc($address1); # USPS standards
449 address1 => $address1,
450 address2 => $address2,
451 city => uc($match->{city}),
452 state => uc($location->{state}),
453 country => uc($location->{country}),
454 zip => ($match->{standardPostalCode} || $match->{postcode}),
455 latitude => $match->{latitude},
456 longitude => $match->{longitude},
457 censustract => $tract,
458 addr_clean => $clean,
462 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
464 Given 'address1' and 'address2' strings, extract the sublocation part
465 (from either one) and return it. If the sublocation was found in ADDRESS1,
466 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
467 contain something relevant.
472 # Postal Addressing Standards, Appendix C
473 # (plus correction of "hanger" to "hangar")
501 # Canada Post Addressing Guidelines 4.3
512 sub subloc_address2 {
513 # Some things seen in the address2 field:
515 # The complete address (with address1 containing part of the company name,
516 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
519 # try to parse sublocation parts from address1; if they are present we'll
520 # append them back to address1 after standardizing
522 my ($addr1, $addr2, $country) = map uc, @_;
523 my $dict = $subloc_forms{$country} or return('', $addr2);
525 my $found_in = 0; # which address is the sublocation
528 # patterns to try to parse
530 "$addr1 Nullcity, CA"
532 $h = Geo::StreetAddress::US->parse_location($addr1);
533 last if exists($h->{sec_unit_type});
535 if (exists($h->{sec_unit_type})) {
542 "$addr1, $addr2 Nullcity, CA"
544 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
545 last if exists($h->{sec_unit_type});
547 if (exists($h->{sec_unit_type})) {
552 $subloc = $h->{sec_unit_type};
553 # special case: do not combine P.O. box sublocs with address1
554 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
555 if ( $found_in == 2 ) {
556 $addr2 = "PO BOX ".$h->{sec_unit_num};
557 } # else it's in addr1, and leave it alone
559 } elsif ( exists($dict->{$subloc}) ) {
560 # substitute the official abbreviation
561 $subloc = $dict->{$subloc};
563 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
564 } # otherwise $subloc = ''
566 if ( $found_in == 2 ) {
567 # address2 should be fully combined into address1
568 return ($subloc, '');
570 # else address2 is not the canonical sublocation, but do our best to
574 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
576 # remove all punctuation and spaces
577 foreach my $w (split(/\W+/, $addr2)) {
578 if ( exists($dict->{$w}) ) {
579 push @words, $dict->{$w};
583 my $result = join(' ', @words);
584 # correct spacing of pound sign + number
585 $result =~ s/NUMBER(\d)/# $1/;
586 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
589 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
593 sub standardize_melissa {
595 my $location = shift;
598 eval "use Geo::Melissa::WebSmart";
601 my $id = $conf->config('melissa-userid')
602 or die "no melissa-userid configured\n";
603 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
607 a1 => $location->{address1},
608 a2 => $location->{address2},
609 city => $location->{city},
610 state => $location->{state},
611 ctry => $location->{country},
612 zip => $location->{zip},
615 my $result = Geo::Melissa::WebSmart->query($request);
616 if ( $result->code =~ /AS01/ ) { # always present on success
617 my $addr = $result->address;
618 warn Dumper $addr if $DEBUG > 1;
620 address1 => $addr->{Address1},
621 address2 => $addr->{Address2},
622 city => $addr->{City}->{Name},
623 state => $addr->{State}->{Abbreviation},
624 country => $addr->{Country}->{Abbreviation},
626 latitude => $addr->{Latitude},
627 longitude => $addr->{Longitude},
630 if ( $addr->{Census}->{Tract} ) {
631 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
632 # insert decimal point two digits from the end
633 $censustract =~ s/(\d\d)$/\.$1/;
634 $out->{censustract} = $censustract;
635 $out->{censusyear} = $conf->config('census_year');
637 # we could do a lot more nuanced reporting of the warning/status codes,
638 # but the UI doesn't support that yet.
641 die $result->status_message;