4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
18 FS::UID->install_callback( sub {
24 @EXPORT_OK = qw( get_district );
28 FS::Misc::Geo - routines to fetch geographic information
34 =item get_censustract_ffiec LOCATION YEAR
36 Given a location hash (see L<FS::location_Mixin>) and a census map year,
37 returns a census tract code (consisting of state, county, and tract
38 codes) or an error message.
40 Data source: Federal Financial Institutions Examination Council
42 Note: This is the old method for pre-2022 (census year 2020) reporting.
46 sub get_censustract_ffiec {
52 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
56 warn Dumper($location, $year) if $DEBUG;
58 # the old FFIEC geocoding service was shut down December 1, 2014.
59 # welcome to the future.
60 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
61 # build the single-line query
62 my $single_line = join(', ', $location->{address1},
66 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
67 my $request = POST( $url,
68 'Content-Type' => 'application/json; charset=utf-8',
69 'Accept' => 'application/json',
70 'Content' => encode_json($hashref)
73 my $ua = new LWP::UserAgent;
74 my $res = $ua->request( $request );
79 if (!$res->is_success) {
81 die "Census tract lookup error: ".$res->message;
86 my $content = eval { decode_json($res->content) };
87 die "Census tract JSON error: $@\n" if $@;
89 if ( !exists $content->{d}->{sStatus} ) {
90 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
92 if ( $content->{d}->{sStatus} eq 'Y' ) {
94 # this also contains the (partial) standardized address, correct zip
95 # code, coordinates, etc., and we could get all of them, but right now
96 # we only want the census tract
97 my $tract = join('', $content->{d}->{sStateCode},
98 $content->{d}->{sCountyCode},
99 $content->{d}->{sTractCode});
104 my $error = $content->{d}->{sMsg}
105 || 'FFIEC lookup failed, but with no status message.';
111 =item get_censustract_uscensus LOCATION YEAR
113 Given a location hash (see L<FS::location_Mixin>) and a census map year,
114 returns a census tract code (consisting of state, county, tract, and block
115 codes) or an error message.
117 Data source: US Census Bureau
119 This is the new method for 2022+ (census year 2020) reporting.
123 sub get_censustract_uscensus {
125 my $location = shift;
126 my $year = shift || 2020;
128 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
132 warn Dumper($location, $year) if $DEBUG;
134 my $url = 'https://geocoding.geo.census.gov/geocoder/geographies/address?';
136 my $address1 = $location->{address1};
137 $address1 =~ s/(apt|ste|suite|unit)[\s\d]\w*\s*$//i;
141 city => $location->{city},
142 state => $location->{state},
143 benchmark => 'Public_AR_Current',
144 vintage => 'Census'.$year.'_Current',
148 my $full_url = URI->new($url);
149 $full_url->query_form($query_hash);
151 warn "Full Request URL: \n".$full_url if $DEBUG;
153 my $ua = new LWP::UserAgent;
154 my $res = $ua->get( $full_url );
156 warn $res->as_string if $DEBUG > 2;
158 if (!$res->is_success) {
159 die 'Census tract lookup error: '.$res->message;
163 my $content = eval { decode_json($res->content) };
164 die "Census tract JSON error: $@\n" if $@;
166 warn Dumper($content) if $DEBUG;
168 if ( $content->{result}->{addressMatches} ) {
170 my $tract = $content->{result}->{addressMatches}[0]->{geographies}->{'Census Blocks'}[0]->{GEOID};
175 my $error = 'Lookup failed, but with no status message.';
177 if ( $content->{errors} ) {
178 $error = join("\n", $content->{errors});
187 #sub get_district_methods {
189 # 'wa_sales' => 'Washington sales tax',
192 =item get_district LOCATION METHOD
194 For the location hash in LOCATION, using lookup method METHOD, fetch
195 tax district information. Currently the only available method is
196 'wa_sales' (the Washington Department of Revenue sales tax lookup).
198 Returns a hash reference containing the following fields:
203 - exempt_amount (currently zero)
204 - city, county, state, country (from
206 The intent is that you can assign this to an L<FS::cust_main_county>
207 object and insert it if there's not yet a tax rate defined for that
210 get_district will die on error.
218 my $location = shift;
219 my $method = shift or return '';
220 warn Dumper($location, $method) if $DEBUG;
225 my $location = shift;
227 return '' if $location->{state} ne 'WA';
229 my $return = { %$location };
230 $return->{'exempt_amount'} = 0.00;
232 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
233 my $ua = new LWP::UserAgent;
235 my $delim = '<|>'; # yes, <|>
236 my $year = (localtime)[5] + 1900;
237 my $month = (localtime)[4] + 1;
238 my @zip = split('-', $location->{zip});
241 'TaxType=S', #sales; 'P' = property
242 'Src=0', #does something complicated
244 'Addr='.uri_escape($location->{address1}),
245 'City='.uri_escape($location->{city}),
247 'Zip1='.($zip[1] || ''), #optional
254 my $query_string = join($delim, @args );
255 $url .= "?$query_string";
256 warn "\nrequest: $url\n\n" if $DEBUG > 1;
258 my $res = $ua->request( GET( "$url?$query_string" ) );
263 if ($res->code ne '200') {
264 $error = $res->message;
267 my $content = $res->content;
268 my $p = new HTML::TokeParser \$content;
270 while ( my $t = $p->get_tag('script') ) {
271 my $u = $p->get_token; #either enclosed text or the </script> tag
272 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
277 if ( $js ) { #found it
278 # strip down to the quoted string, which contains escaped single quotes.
279 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
280 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
281 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
283 $p = new HTML::TokeParser \$js;
284 TD: while ( my $td = $p->get_tag('td') ) {
285 while ( my $u = $p->get_token ) {
286 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
287 next if $u->[0] ne 'T'; # skip non-text
290 if ( lc($text) eq 'location code' ) {
291 $p->get_tag('td'); # skip to the next column
293 $u = $p->get_token until ($u->[0] || '') eq 'T'; # and then skip non-text
294 $return->{'district'} = $u->[1];
296 elsif ( lc($text) eq 'total tax rate' ) {
299 $u = $p->get_token until ($u->[0] || '') eq 'T';
300 $return->{'tax'} = $u->[1];
306 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
307 $return->{'tax'} *= 100; #percentage
308 warn Dumper($return) if $DEBUG > 1;
312 $error = 'district code/tax rate not found';
316 $error = "failed to parse document";
319 die "WA tax district lookup error: $error";
322 ###### USPS Standardization ######
324 sub standardize_usps {
327 eval "use Business::US::USPS::WebTools::AddressStandardization";
330 my $location = shift;
331 if ( $location->{country} ne 'US' ) {
333 warn "standardize_usps not for use in country ".$location->{country}."\n";
334 $location->{addr_clean} = '';
337 my $userid = $conf->config('usps_webtools-userid');
338 my $password = $conf->config('usps_webtools-password');
339 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
341 Password => $password,
343 } ) or die "error starting USPS WebTools\n";
345 my($zip5, $zip4) = split('-',$location->{'zip'});
348 FirmName => $location->{company},
349 Address2 => $location->{address1},
350 Address1 => $location->{address2},
351 City => $location->{city},
352 State => $location->{state},
356 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
359 my $hash = $verifier->verify_address( %usps_args );
361 warn $verifier->response
364 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
365 if $verifier->is_error;
367 my $zip = $hash->{Zip5};
368 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
370 { company => $hash->{FirmName},
371 address1 => $hash->{Address2},
372 address2 => $hash->{Address1},
373 city => $hash->{City},
374 state => $hash->{State},
380 ###### U.S. Census Bureau ######
382 sub standardize_uscensus {
384 my $location = shift;
385 my $log = FS::Log->new('FS::Misc::Geo::standardize_uscensus');
386 $log->debug(join("\n", @{$location}{'address1', 'city', 'state', 'zip'}));
388 eval "use Geo::USCensus::Geocoding";
391 if ( $location->{country} ne 'US' ) {
393 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
394 $location->{addr_clean} = '';
399 street => $location->{address1},
400 city => $location->{city},
401 state => $location->{state},
402 zip => $location->{zip},
403 debug => ($DEBUG || 0),
406 my $result = Geo::USCensus::Geocoding->query($request);
407 if ( $result->is_match ) {
408 # unfortunately we get the address back as a single line
409 $log->debug($result->address);
410 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
416 address2 => uc($location->{address2}),
417 latitude => $result->latitude,
418 longitude => $result->longitude,
419 censustract => $result->censustract,
422 die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n";
424 } elsif ( $result->match_level eq 'Tie' ) {
425 die "Geocoding was not able to identify a unique matching address.\n";
426 } elsif ( $result->match_level ) {
427 die "Geocoding did not find a matching address.\n";
429 $log->error($result->error_message);
430 return; # for internal errors, don't return anything
434 ####### EZLOCATE (obsolete) #######
436 my %ezlocate_error = ( # USA_Geo_002 documentation
437 10 => 'State not found',
438 11 => 'City not found',
439 12 => 'Invalid street address',
440 14 => 'Street name not found',
441 15 => 'Address range does not exist',
442 16 => 'Ambiguous address',
443 17 => 'Intersection not found', #unused?
446 sub standardize_ezlocate {
448 my $location = shift;
450 #if ( $location->{country} eq 'US' ) {
451 # $class = 'USA_Geo_004Tool';
453 #elsif ( $location->{country} eq 'CA' ) {
454 # $class = 'CAN_Geo_001Tool';
456 #else { # shouldn't be a fatal error, just pass through unverified address
457 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
458 # "' not available\n";
461 #my $path = $conf->config('teleatlas-path') || '';
462 #local @INC = (@INC, $path);
465 # die "Loading $class failed:\n$@".
466 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
469 $class = 'Geo::EZLocate'; # use our own library
470 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
473 my $userid = $conf->config('ezlocate-userid')
474 or die "no ezlocate-userid configured\n";
475 my $password = $conf->config('ezlocate-password')
476 or die "no ezlocate-password configured\n";
478 my $tool = $class->new($userid, $password);
479 my $match = $tool->findAddress(
480 $location->{address1},
483 $location->{zip}, #12345-6789 format is allowed
485 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
486 # error handling - B codes indicate success
487 die $ezlocate_error{$match->{MAT_STAT}}."\n"
488 unless $match->{MAT_STAT} =~ /^B\d$/;
491 address1 => $match->{MAT_ADDR},
492 address2 => $location->{address2},
493 city => $match->{MAT_CITY},
494 state => $match->{MAT_ST},
495 country => $location->{country},
496 zip => $match->{MAT_ZIP},
497 latitude => $match->{MAT_LAT},
498 longitude => $match->{MAT_LON},
499 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
500 sprintf('%07.2f',$match->{CEN_TRCT}),
503 if ( $match->{STD_ADDR} ) {
504 # then they have a postal standardized address for us
506 address1 => $match->{STD_ADDR},
507 address2 => $location->{address2},
508 city => $match->{STD_CITY},
509 state => $match->{STD_ST},
510 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
517 sub _tomtom_query { # helper method for the below
519 my $result = Geo::TomTom::Geocoding->query(%args);
520 die "TomTom geocoding error: ".$result->message."\n"
521 unless ( $result->is_success );
522 my ($match) = $result->locations;
523 my $type = $match->{type};
524 # match levels below "intersection" should not be considered clean
525 my $clean = ($type eq 'addresspoint' ||
528 $type eq 'intersection'
530 warn "tomtom returned $type match\n" if $DEBUG;
531 warn Dumper($match) if $DEBUG > 1;
535 sub standardize_tomtom {
536 # post-2013 TomTom API
537 # much better, but incompatible with ezlocate
539 my $location = shift;
540 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
543 my $key = $conf->config('tomtom-userid')
544 or die "no tomtom-userid configured\n";
546 my $country = code2country($location->{country});
547 my ($address1, $address2) = ($location->{address1}, $location->{address2});
551 $address1 =~ s/^\s+//;
552 $address1 =~ s/\s+$//;
553 $address2 =~ s/^\s+//;
554 $address2 =~ s/\s+$//;
556 # try to fix some cases of the address fields being switched
557 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
558 $address2 = $address1;
559 $address1 = $location->{address2};
561 # parse sublocation part (unit/suite/apartment...) and clean up
562 # non-sublocation address2
563 ($subloc, $address2) =
564 subloc_address2($address1, $address2, $location->{country});
565 # ask TomTom to standardize address1:
569 L => $location->{city},
570 AA => $location->{state},
571 PC => $location->{zip},
572 CC => country2code($country, LOCALE_CODE_ALPHA_3),
575 my ($match, $clean) = _tomtom_query(%args);
577 if (!$match or !$clean) {
578 # Then try cleaning up the input; TomTom is picky about junk in the
579 # address. Any of these can still be a clean match.
580 my $h = Geo::StreetAddress::US->parse_location($address1);
581 # First conservatively:
582 if ( $h->{sec_unit_type} ) {
583 my $strip = '\s+' . $h->{sec_unit_type};
584 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
586 $args{T} =~ s/$strip//;
587 ($match, $clean) = _tomtom_query(%args);
589 if ( !$match or !$clean ) {
590 # Then more aggressively:
591 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
592 ($match, $clean) = _tomtom_query(%args);
596 if ( !$match or !$clean ) { # partial matches are not useful
597 die "Address not found\n";
600 if ( defined $match->{censusTract} ) {
601 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
602 join('.', $match->{censusTract} =~ /(....)(..)/);
605 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
606 $address1 .= $match->{street} if $match->{street};
607 $address1 .= ' '.$subloc if $subloc;
608 $address1 = uc($address1); # USPS standards
611 address1 => $address1,
612 address2 => $address2,
613 city => uc($match->{city}),
614 state => uc($location->{state}),
615 country => uc($location->{country}),
616 zip => ($match->{standardPostalCode} || $match->{postcode}),
617 latitude => $match->{latitude},
618 longitude => $match->{longitude},
619 censustract => $tract,
620 addr_clean => $clean,
624 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
626 Given 'address1' and 'address2' strings, extract the sublocation part
627 (from either one) and return it. If the sublocation was found in ADDRESS1,
628 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
629 contain something relevant.
634 # Postal Addressing Standards, Appendix C
635 # (plus correction of "hanger" to "hangar")
663 # Canada Post Addressing Guidelines 4.3
674 sub subloc_address2 {
675 # Some things seen in the address2 field:
677 # The complete address (with address1 containing part of the company name,
678 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
681 # try to parse sublocation parts from address1; if they are present we'll
682 # append them back to address1 after standardizing
684 my ($addr1, $addr2, $country) = map uc, @_;
685 my $dict = $subloc_forms{$country} or return('', $addr2);
687 my $found_in = 0; # which address is the sublocation
690 # patterns to try to parse
692 "$addr1 Nullcity, CA"
694 $h = Geo::StreetAddress::US->parse_location($addr1);
695 last if exists($h->{sec_unit_type});
697 if (exists($h->{sec_unit_type})) {
704 "$addr1, $addr2 Nullcity, CA"
706 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
707 last if exists($h->{sec_unit_type});
709 if (exists($h->{sec_unit_type})) {
714 $subloc = $h->{sec_unit_type};
715 # special case: do not combine P.O. box sublocs with address1
716 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
717 if ( $found_in == 2 ) {
718 $addr2 = "PO BOX ".$h->{sec_unit_num};
719 } # else it's in addr1, and leave it alone
721 } elsif ( exists($dict->{$subloc}) ) {
722 # substitute the official abbreviation
723 $subloc = $dict->{$subloc};
725 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
726 } # otherwise $subloc = ''
728 if ( $found_in == 2 ) {
729 # address2 should be fully combined into address1
730 return ($subloc, '');
732 # else address2 is not the canonical sublocation, but do our best to
736 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
738 # remove all punctuation and spaces
739 foreach my $w (split(/\W+/, $addr2)) {
740 if ( exists($dict->{$w}) ) {
741 push @words, $dict->{$w};
745 my $result = join(' ', @words);
746 # correct spacing of pound sign + number
747 $result =~ s/NUMBER(\d)/# $1/;
748 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
751 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
755 #is anyone still using this?
756 sub standardize_melissa {
758 my $location = shift;
761 eval "use Geo::Melissa::WebSmart";
764 my $id = $conf->config('melissa-userid')
765 or die "no melissa-userid configured\n";
766 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
770 a1 => $location->{address1},
771 a2 => $location->{address2},
772 city => $location->{city},
773 state => $location->{state},
774 ctry => $location->{country},
775 zip => $location->{zip},
778 my $result = Geo::Melissa::WebSmart->query($request);
779 if ( $result->code =~ /AS01/ ) { # always present on success
780 my $addr = $result->address;
781 warn Dumper $addr if $DEBUG > 1;
783 address1 => $addr->{Address1},
784 address2 => $addr->{Address2},
785 city => $addr->{City}->{Name},
786 state => $addr->{State}->{Abbreviation},
787 country => $addr->{Country}->{Abbreviation},
789 latitude => $addr->{Latitude},
790 longitude => $addr->{Longitude},
793 if ( $addr->{Census}->{Tract} ) {
794 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
795 # insert decimal point two digits from the end
796 $censustract =~ s/(\d\d)$/\.$1/;
797 $out->{censustract} = $censustract;
798 $out->{censusyear} = $conf->config('census_year');
800 # we could do a lot more nuanced reporting of the warning/status codes,
801 # but the UI doesn't support that yet.
804 die $result->status_message;
808 sub standardize_freeside {
810 my $location = shift;
812 my $url = 'https://ws.freeside.biz/normalize';
814 #free freeside.biz normalization only for US
815 if ( $location->{country} ne 'US' ) {
817 #why? something else could have cleaned it $location->{addr_clean} = '';
821 my $ua = LWP::UserAgent->new(
823 verify_hostname => 0,
824 SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE,
827 my $response = $ua->request( POST $url, [
828 'support-key' => scalar($conf->config('support-key')),
832 die "Address normalization error: ". $response->message
833 unless $response->is_success;
836 my $content = eval { decode_json($response->content) };
838 warn $response->content;
839 die "Address normalization JSON error : $@\n";
842 die $content->{error}."\n"
843 if $content->{error};
845 { 'addr_clean' => 'Y',
846 map { $_ => $content->{$_} }
847 qw( address1 address2 city state zip country )