4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
18 FS::UID->install_callback( sub {
24 @EXPORT_OK = qw( get_district );
28 FS::Misc::Geo - routines to fetch geographic information
34 =item get_censustract_ffiec LOCATION YEAR
36 Given a location hash (see L<FS::location_Mixin>) and a census map year,
37 returns a census tract code (consisting of state, county, and tract
38 codes) or an error message.
42 sub get_censustract_ffiec {
48 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
52 warn Dumper($location, $year) if $DEBUG;
54 # the old FFIEC geocoding service was shut down December 1, 2014.
55 # welcome to the future.
56 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
57 # build the single-line query
58 my $single_line = join(', ', $location->{address1},
62 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
63 my $request = POST( $url,
64 'Content-Type' => 'application/json; charset=utf-8',
65 'Accept' => 'application/json',
66 'Content' => encode_json($hashref)
69 my $ua = new LWP::UserAgent;
70 my $res = $ua->request( $request );
75 if (!$res->is_success) {
77 die "Census tract lookup error: ".$res->message;
82 my $content = eval { decode_json($res->content) };
83 die "Census tract JSON error: $@\n" if $@;
85 if ( !exists $content->{d}->{sStatus} ) {
86 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
88 if ( $content->{d}->{sStatus} eq 'Y' ) {
90 # this also contains the (partial) standardized address, correct zip
91 # code, coordinates, etc., and we could get all of them, but right now
92 # we only want the census tract
93 my $tract = join('', $content->{d}->{sStateCode},
94 $content->{d}->{sCountyCode},
95 $content->{d}->{sTractCode});
100 my $error = $content->{d}->{sMsg}
101 || 'FFIEC lookup failed, but with no status message.';
107 #sub get_district_methods {
109 # 'wa_sales' => 'Washington sales tax',
112 =item get_district LOCATION METHOD
114 For the location hash in LOCATION, using lookup method METHOD, fetch
115 tax district information. Currently the only available method is
116 'wa_sales' (the Washington Department of Revenue sales tax lookup).
118 Returns a hash reference containing the following fields:
123 - exempt_amount (currently zero)
124 - city, county, state, country (from
126 The intent is that you can assign this to an L<FS::cust_main_county>
127 object and insert it if there's not yet a tax rate defined for that
130 get_district will die on error.
138 my $location = shift;
139 my $method = shift or return '';
140 warn Dumper($location, $method) if $DEBUG;
145 my $location = shift;
147 return '' if $location->{state} ne 'WA';
149 my $return = { %$location };
150 $return->{'exempt_amount'} = 0.00;
152 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
153 my $ua = new LWP::UserAgent;
155 my $delim = '<|>'; # yes, <|>
156 my $year = (localtime)[5] + 1900;
157 my $month = (localtime)[4] + 1;
158 my @zip = split('-', $location->{zip});
161 'TaxType=S', #sales; 'P' = property
162 'Src=0', #does something complicated
164 'Addr='.uri_escape($location->{address1}),
165 'City='.uri_escape($location->{city}),
167 'Zip1='.($zip[1] || ''), #optional
174 my $query_string = join($delim, @args );
175 $url .= "?$query_string";
176 warn "\nrequest: $url\n\n" if $DEBUG > 1;
178 my $res = $ua->request( GET( "$url?$query_string" ) );
183 if ($res->code ne '200') {
184 $error = $res->message;
187 my $content = $res->content;
188 my $p = new HTML::TokeParser \$content;
190 while ( my $t = $p->get_tag('script') ) {
191 my $u = $p->get_token; #either enclosed text or the </script> tag
192 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
197 if ( $js ) { #found it
198 # strip down to the quoted string, which contains escaped single quotes.
199 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
200 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
201 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
203 $p = new HTML::TokeParser \$js;
204 TD: while ( my $td = $p->get_tag('td') ) {
205 while ( my $u = $p->get_token ) {
206 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
207 next if $u->[0] ne 'T'; # skip non-text
210 if ( lc($text) eq 'location code' ) {
211 $p->get_tag('td'); # skip to the next column
213 $u = $p->get_token until ($u->[0] || '') eq 'T'; # and then skip non-text
214 $return->{'district'} = $u->[1];
216 elsif ( lc($text) eq 'total tax rate' ) {
219 $u = $p->get_token until ($u->[0] || '') eq 'T';
220 $return->{'tax'} = $u->[1];
226 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
227 $return->{'tax'} *= 100; #percentage
228 warn Dumper($return) if $DEBUG > 1;
232 $error = 'district code/tax rate not found';
236 $error = "failed to parse document";
239 die "WA tax district lookup error: $error";
242 ###### USPS Standardization ######
244 sub standardize_usps {
247 eval "use Business::US::USPS::WebTools::AddressStandardization";
250 my $location = shift;
251 if ( $location->{country} ne 'US' ) {
253 warn "standardize_usps not for use in country ".$location->{country}."\n";
254 $location->{addr_clean} = '';
257 my $userid = $conf->config('usps_webtools-userid');
258 my $password = $conf->config('usps_webtools-password');
259 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
261 Password => $password,
263 } ) or die "error starting USPS WebTools\n";
265 my($zip5, $zip4) = split('-',$location->{'zip'});
268 FirmName => $location->{company},
269 Address2 => $location->{address1},
270 Address1 => $location->{address2},
271 City => $location->{city},
272 State => $location->{state},
276 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
279 my $hash = $verifier->verify_address( %usps_args );
281 warn $verifier->response
284 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
285 if $verifier->is_error;
287 my $zip = $hash->{Zip5};
288 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
290 { company => $hash->{FirmName},
291 address1 => $hash->{Address2},
292 address2 => $hash->{Address1},
293 city => $hash->{City},
294 state => $hash->{State},
300 ###### U.S. Census Bureau ######
302 sub standardize_uscensus {
304 my $location = shift;
305 my $log = FS::Log->new('FS::Misc::Geo::standardize_uscensus');
306 $log->debug(join("\n", @{$location}{'address1', 'city', 'state', 'zip'}));
308 eval "use Geo::USCensus::Geocoding";
311 if ( $location->{country} ne 'US' ) {
313 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
314 $location->{addr_clean} = '';
319 street => $location->{address1},
320 city => $location->{city},
321 state => $location->{state},
322 zip => $location->{zip},
323 debug => ($DEBUG || 0),
326 my $result = Geo::USCensus::Geocoding->query($request);
327 if ( $result->is_match ) {
328 # unfortunately we get the address back as a single line
329 $log->debug($result->address);
330 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
336 address2 => uc($location->{address2}),
337 latitude => $result->latitude,
338 longitude => $result->longitude,
339 censustract => $result->censustract,
342 die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n";
344 } elsif ( $result->match_level eq 'Tie' ) {
345 die "Geocoding was not able to identify a unique matching address.\n";
346 } elsif ( $result->match_level ) {
347 die "Geocoding did not find a matching address.\n";
349 $log->error($result->error_message);
350 return; # for internal errors, don't return anything
354 ####### EZLOCATE (obsolete) #######
356 my %ezlocate_error = ( # USA_Geo_002 documentation
357 10 => 'State not found',
358 11 => 'City not found',
359 12 => 'Invalid street address',
360 14 => 'Street name not found',
361 15 => 'Address range does not exist',
362 16 => 'Ambiguous address',
363 17 => 'Intersection not found', #unused?
366 sub standardize_ezlocate {
368 my $location = shift;
370 #if ( $location->{country} eq 'US' ) {
371 # $class = 'USA_Geo_004Tool';
373 #elsif ( $location->{country} eq 'CA' ) {
374 # $class = 'CAN_Geo_001Tool';
376 #else { # shouldn't be a fatal error, just pass through unverified address
377 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
378 # "' not available\n";
381 #my $path = $conf->config('teleatlas-path') || '';
382 #local @INC = (@INC, $path);
385 # die "Loading $class failed:\n$@".
386 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
389 $class = 'Geo::EZLocate'; # use our own library
390 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
393 my $userid = $conf->config('ezlocate-userid')
394 or die "no ezlocate-userid configured\n";
395 my $password = $conf->config('ezlocate-password')
396 or die "no ezlocate-password configured\n";
398 my $tool = $class->new($userid, $password);
399 my $match = $tool->findAddress(
400 $location->{address1},
403 $location->{zip}, #12345-6789 format is allowed
405 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
406 # error handling - B codes indicate success
407 die $ezlocate_error{$match->{MAT_STAT}}."\n"
408 unless $match->{MAT_STAT} =~ /^B\d$/;
411 address1 => $match->{MAT_ADDR},
412 address2 => $location->{address2},
413 city => $match->{MAT_CITY},
414 state => $match->{MAT_ST},
415 country => $location->{country},
416 zip => $match->{MAT_ZIP},
417 latitude => $match->{MAT_LAT},
418 longitude => $match->{MAT_LON},
419 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
420 sprintf('%07.2f',$match->{CEN_TRCT}),
423 if ( $match->{STD_ADDR} ) {
424 # then they have a postal standardized address for us
426 address1 => $match->{STD_ADDR},
427 address2 => $location->{address2},
428 city => $match->{STD_CITY},
429 state => $match->{STD_ST},
430 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
437 sub _tomtom_query { # helper method for the below
439 my $result = Geo::TomTom::Geocoding->query(%args);
440 die "TomTom geocoding error: ".$result->message."\n"
441 unless ( $result->is_success );
442 my ($match) = $result->locations;
443 my $type = $match->{type};
444 # match levels below "intersection" should not be considered clean
445 my $clean = ($type eq 'addresspoint' ||
448 $type eq 'intersection'
450 warn "tomtom returned $type match\n" if $DEBUG;
451 warn Dumper($match) if $DEBUG > 1;
455 sub standardize_tomtom {
456 # post-2013 TomTom API
457 # much better, but incompatible with ezlocate
459 my $location = shift;
460 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
463 my $key = $conf->config('tomtom-userid')
464 or die "no tomtom-userid configured\n";
466 my $country = code2country($location->{country});
467 my ($address1, $address2) = ($location->{address1}, $location->{address2});
471 $address1 =~ s/^\s+//;
472 $address1 =~ s/\s+$//;
473 $address2 =~ s/^\s+//;
474 $address2 =~ s/\s+$//;
476 # try to fix some cases of the address fields being switched
477 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
478 $address2 = $address1;
479 $address1 = $location->{address2};
481 # parse sublocation part (unit/suite/apartment...) and clean up
482 # non-sublocation address2
483 ($subloc, $address2) =
484 subloc_address2($address1, $address2, $location->{country});
485 # ask TomTom to standardize address1:
489 L => $location->{city},
490 AA => $location->{state},
491 PC => $location->{zip},
492 CC => country2code($country, LOCALE_CODE_ALPHA_3),
495 my ($match, $clean) = _tomtom_query(%args);
497 if (!$match or !$clean) {
498 # Then try cleaning up the input; TomTom is picky about junk in the
499 # address. Any of these can still be a clean match.
500 my $h = Geo::StreetAddress::US->parse_location($address1);
501 # First conservatively:
502 if ( $h->{sec_unit_type} ) {
503 my $strip = '\s+' . $h->{sec_unit_type};
504 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
506 $args{T} =~ s/$strip//;
507 ($match, $clean) = _tomtom_query(%args);
509 if ( !$match or !$clean ) {
510 # Then more aggressively:
511 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
512 ($match, $clean) = _tomtom_query(%args);
516 if ( !$match or !$clean ) { # partial matches are not useful
517 die "Address not found\n";
520 if ( defined $match->{censusTract} ) {
521 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
522 join('.', $match->{censusTract} =~ /(....)(..)/);
525 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
526 $address1 .= $match->{street} if $match->{street};
527 $address1 .= ' '.$subloc if $subloc;
528 $address1 = uc($address1); # USPS standards
531 address1 => $address1,
532 address2 => $address2,
533 city => uc($match->{city}),
534 state => uc($location->{state}),
535 country => uc($location->{country}),
536 zip => ($match->{standardPostalCode} || $match->{postcode}),
537 latitude => $match->{latitude},
538 longitude => $match->{longitude},
539 censustract => $tract,
540 addr_clean => $clean,
544 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
546 Given 'address1' and 'address2' strings, extract the sublocation part
547 (from either one) and return it. If the sublocation was found in ADDRESS1,
548 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
549 contain something relevant.
554 # Postal Addressing Standards, Appendix C
555 # (plus correction of "hanger" to "hangar")
583 # Canada Post Addressing Guidelines 4.3
594 sub subloc_address2 {
595 # Some things seen in the address2 field:
597 # The complete address (with address1 containing part of the company name,
598 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
601 # try to parse sublocation parts from address1; if they are present we'll
602 # append them back to address1 after standardizing
604 my ($addr1, $addr2, $country) = map uc, @_;
605 my $dict = $subloc_forms{$country} or return('', $addr2);
607 my $found_in = 0; # which address is the sublocation
610 # patterns to try to parse
612 "$addr1 Nullcity, CA"
614 $h = Geo::StreetAddress::US->parse_location($addr1);
615 last if exists($h->{sec_unit_type});
617 if (exists($h->{sec_unit_type})) {
624 "$addr1, $addr2 Nullcity, CA"
626 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
627 last if exists($h->{sec_unit_type});
629 if (exists($h->{sec_unit_type})) {
634 $subloc = $h->{sec_unit_type};
635 # special case: do not combine P.O. box sublocs with address1
636 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
637 if ( $found_in == 2 ) {
638 $addr2 = "PO BOX ".$h->{sec_unit_num};
639 } # else it's in addr1, and leave it alone
641 } elsif ( exists($dict->{$subloc}) ) {
642 # substitute the official abbreviation
643 $subloc = $dict->{$subloc};
645 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
646 } # otherwise $subloc = ''
648 if ( $found_in == 2 ) {
649 # address2 should be fully combined into address1
650 return ($subloc, '');
652 # else address2 is not the canonical sublocation, but do our best to
656 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
658 # remove all punctuation and spaces
659 foreach my $w (split(/\W+/, $addr2)) {
660 if ( exists($dict->{$w}) ) {
661 push @words, $dict->{$w};
665 my $result = join(' ', @words);
666 # correct spacing of pound sign + number
667 $result =~ s/NUMBER(\d)/# $1/;
668 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
671 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
675 sub standardize_melissa {
677 my $location = shift;
680 eval "use Geo::Melissa::WebSmart";
683 my $id = $conf->config('melissa-userid')
684 or die "no melissa-userid configured\n";
685 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
689 a1 => $location->{address1},
690 a2 => $location->{address2},
691 city => $location->{city},
692 state => $location->{state},
693 ctry => $location->{country},
694 zip => $location->{zip},
697 my $result = Geo::Melissa::WebSmart->query($request);
698 if ( $result->code =~ /AS01/ ) { # always present on success
699 my $addr = $result->address;
700 warn Dumper $addr if $DEBUG > 1;
702 address1 => $addr->{Address1},
703 address2 => $addr->{Address2},
704 city => $addr->{City}->{Name},
705 state => $addr->{State}->{Abbreviation},
706 country => $addr->{Country}->{Abbreviation},
708 latitude => $addr->{Latitude},
709 longitude => $addr->{Longitude},
712 if ( $addr->{Census}->{Tract} ) {
713 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
714 # insert decimal point two digits from the end
715 $censustract =~ s/(\d\d)$/\.$1/;
716 $out->{censustract} = $censustract;
717 $out->{censusyear} = $conf->config('census_year');
719 # we could do a lot more nuanced reporting of the warning/status codes,
720 # but the UI doesn't support that yet.
723 die $result->status_message;
727 sub standardize_freeside {
729 my $location = shift;
731 my $url = 'https://ws.freeside.biz/normalize';
733 #free freeside.biz normalization only for US
734 if ( $location->{country} ne 'US' ) {
736 #why? something else could have cleaned it $location->{addr_clean} = '';
740 my $ua = LWP::UserAgent->new(
742 verify_hostname => 0,
743 SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE,
746 my $response = $ua->request( POST $url, [
747 'support-key' => scalar($conf->config('support-key')),
751 die "Address normalization error: ". $response->message
752 unless $response->is_success;
755 my $content = eval { decode_json($response->content) };
757 warn $response->content;
758 die "Address normalization JSON error : $@\n";
761 die $content->{error}."\n"
762 if $content->{error};
764 { 'addr_clean' => 'Y',
765 map { $_ => $content->{$_} }
766 qw( address1 address2 city state zip country )