4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract_ffiec LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
46 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
50 warn Dumper($location, $year) if $DEBUG;
52 # the old FFIEC geocoding service was shut down December 1, 2014.
53 # welcome to the future.
54 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
55 # build the single-line query
56 my $single_line = join(', ', $location->{address1},
60 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
61 my $request = POST( $url,
62 'Content-Type' => 'application/json; charset=utf-8',
63 'Accept' => 'application/json',
64 'Content' => encode_json($hashref)
67 my $ua = new LWP::UserAgent;
68 my $res = $ua->request( $request );
73 if (!$res->is_success) {
75 die "Census tract lookup error: ".$res->message;
80 my $content = eval { decode_json($res->content) };
81 die "Census tract JSON error: $@\n" if $@;
83 if ( !exists $content->{d}->{sStatus} ) {
84 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
86 if ( $content->{d}->{sStatus} eq 'Y' ) {
88 # this also contains the (partial) standardized address, correct zip
89 # code, coordinates, etc., and we could get all of them, but right now
90 # we only want the census tract
91 my $tract = join('', $content->{d}->{sStateCode},
92 $content->{d}->{sCountyCode},
93 $content->{d}->{sTractCode});
98 my $error = $content->{d}->{sMsg}
99 || 'FFIEC lookup failed, but with no status message.';
105 #sub get_district_methods {
107 # 'wa_sales' => 'Washington sales tax',
110 =item get_district LOCATION METHOD
112 For the location hash in LOCATION, using lookup method METHOD, fetch
113 tax district information. Currently the only available method is
114 'wa_sales' (the Washington Department of Revenue sales tax lookup).
116 Returns a hash reference containing the following fields:
121 - exempt_amount (currently zero)
122 - city, county, state, country (from
124 The intent is that you can assign this to an L<FS::cust_main_county>
125 object and insert it if there's not yet a tax rate defined for that
128 get_district will die on error.
136 my $location = shift;
137 my $method = shift or return '';
138 warn Dumper($location, $method) if $DEBUG;
143 my $location = shift;
145 return '' if $location->{state} ne 'WA';
147 my $return = { %$location };
148 $return->{'exempt_amount'} = 0.00;
150 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
151 my $ua = new LWP::UserAgent;
153 my $delim = '<|>'; # yes, <|>
154 my $year = (localtime)[5] + 1900;
155 my $month = (localtime)[4] + 1;
156 my @zip = split('-', $location->{zip});
159 'TaxType=S', #sales; 'P' = property
160 'Src=0', #does something complicated
162 'Addr='.uri_escape($location->{address1}),
163 'City='.uri_escape($location->{city}),
165 'Zip1='.($zip[1] || ''), #optional
172 my $query_string = join($delim, @args );
173 $url .= "?$query_string";
174 warn "\nrequest: $url\n\n" if $DEBUG > 1;
176 my $res = $ua->request( GET( "$url?$query_string" ) );
181 if ($res->code ne '200') {
182 $error = $res->message;
185 my $content = $res->content;
186 my $p = new HTML::TokeParser \$content;
188 while ( my $t = $p->get_tag('script') ) {
189 my $u = $p->get_token; #either enclosed text or the </script> tag
190 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
195 if ( $js ) { #found it
196 # strip down to the quoted string, which contains escaped single quotes.
197 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
198 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
199 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
201 $p = new HTML::TokeParser \$js;
202 TD: while ( my $td = $p->get_tag('td') ) {
203 while ( my $u = $p->get_token ) {
204 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
205 next if $u->[0] ne 'T'; # skip non-text
208 if ( lc($text) eq 'location code' ) {
209 $p->get_tag('td'); # skip to the next column
211 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
212 $return->{'district'} = $u->[1];
214 elsif ( lc($text) eq 'total tax rate' ) {
217 $u = $p->get_token until $u->[0] eq 'T';
218 $return->{'tax'} = $u->[1];
224 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
225 $return->{'tax'} *= 100; #percentage
226 warn Dumper($return) if $DEBUG > 1;
230 $error = 'district code/tax rate not found';
234 $error = "failed to parse document";
237 die "WA tax district lookup error: $error";
240 ###### USPS Standardization ######
242 sub standardize_usps {
245 eval "use Business::US::USPS::WebTools::AddressStandardization";
248 my $location = shift;
249 if ( $location->{country} ne 'US' ) {
251 warn "standardize_usps not for use in country ".$location->{country}."\n";
252 $location->{addr_clean} = '';
255 my $userid = $conf->config('usps_webtools-userid');
256 my $password = $conf->config('usps_webtools-password');
257 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
259 Password => $password,
261 } ) or die "error starting USPS WebTools\n";
263 my($zip5, $zip4) = split('-',$location->{'zip'});
266 FirmName => $location->{company},
267 Address2 => $location->{address1},
268 Address1 => $location->{address2},
269 City => $location->{city},
270 State => $location->{state},
274 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
277 my $hash = $verifier->verify_address( %usps_args );
279 warn $verifier->response
282 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
283 if $verifier->is_error;
285 my $zip = $hash->{Zip5};
286 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
288 { company => $hash->{FirmName},
289 address1 => $hash->{Address2},
290 address2 => $hash->{Address1},
291 city => $hash->{City},
292 state => $hash->{State},
298 ###### U.S. Census Bureau ######
300 sub standardize_uscensus {
302 my $location = shift;
304 eval "use Geo::USCensus::Geocoding";
307 if ( $location->{country} ne 'US' ) {
309 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
310 $location->{addr_clean} = '';
315 street => $location->{address1},
316 city => $location->{city},
317 state => $location->{state},
318 zip => $location->{zip},
319 debug => ($DEBUG || 0),
322 my $result = Geo::USCensus::Geocoding->query($request);
323 if ( $result->is_match ) {
324 # unfortunately we get the address back as a single line
325 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
331 address2 => uc($location->{address2}),
332 latitude => $result->latitude,
333 longitude => $result->longitude,
334 censustract => $result->censustract,
337 die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n";
339 } elsif ( $result->match_level eq 'Tie' ) {
340 die "Geocoding was not able to identify a unique matching address.\n";
341 } elsif ( $result->match_level ) {
342 die "Geocoding did not find a matching address.\n";
344 warn Dumper($result) if $DEBUG;
345 die $result->error_message;
349 ####### EZLOCATE (obsolete) #######
351 my %ezlocate_error = ( # USA_Geo_002 documentation
352 10 => 'State not found',
353 11 => 'City not found',
354 12 => 'Invalid street address',
355 14 => 'Street name not found',
356 15 => 'Address range does not exist',
357 16 => 'Ambiguous address',
358 17 => 'Intersection not found', #unused?
361 sub standardize_ezlocate {
363 my $location = shift;
365 #if ( $location->{country} eq 'US' ) {
366 # $class = 'USA_Geo_004Tool';
368 #elsif ( $location->{country} eq 'CA' ) {
369 # $class = 'CAN_Geo_001Tool';
371 #else { # shouldn't be a fatal error, just pass through unverified address
372 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
373 # "' not available\n";
376 #my $path = $conf->config('teleatlas-path') || '';
377 #local @INC = (@INC, $path);
380 # die "Loading $class failed:\n$@".
381 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
384 $class = 'Geo::EZLocate'; # use our own library
385 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
388 my $userid = $conf->config('ezlocate-userid')
389 or die "no ezlocate-userid configured\n";
390 my $password = $conf->config('ezlocate-password')
391 or die "no ezlocate-password configured\n";
393 my $tool = $class->new($userid, $password);
394 my $match = $tool->findAddress(
395 $location->{address1},
398 $location->{zip}, #12345-6789 format is allowed
400 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
401 # error handling - B codes indicate success
402 die $ezlocate_error{$match->{MAT_STAT}}."\n"
403 unless $match->{MAT_STAT} =~ /^B\d$/;
406 address1 => $match->{MAT_ADDR},
407 address2 => $location->{address2},
408 city => $match->{MAT_CITY},
409 state => $match->{MAT_ST},
410 country => $location->{country},
411 zip => $match->{MAT_ZIP},
412 latitude => $match->{MAT_LAT},
413 longitude => $match->{MAT_LON},
414 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
415 sprintf('%07.2f',$match->{CEN_TRCT}),
418 if ( $match->{STD_ADDR} ) {
419 # then they have a postal standardized address for us
421 address1 => $match->{STD_ADDR},
422 address2 => $location->{address2},
423 city => $match->{STD_CITY},
424 state => $match->{STD_ST},
425 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
432 sub _tomtom_query { # helper method for the below
434 my $result = Geo::TomTom::Geocoding->query(%args);
435 die "TomTom geocoding error: ".$result->message."\n"
436 unless ( $result->is_success );
437 my ($match) = $result->locations;
438 my $type = $match->{type};
439 # match levels below "intersection" should not be considered clean
440 my $clean = ($type eq 'addresspoint' ||
443 $type eq 'intersection'
445 warn "tomtom returned $type match\n" if $DEBUG;
446 warn Dumper($match) if $DEBUG > 1;
450 sub standardize_tomtom {
451 # post-2013 TomTom API
452 # much better, but incompatible with ezlocate
454 my $location = shift;
455 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
458 my $key = $conf->config('tomtom-userid')
459 or die "no tomtom-userid configured\n";
461 my $country = code2country($location->{country});
462 my ($address1, $address2) = ($location->{address1}, $location->{address2});
466 $address1 =~ s/^\s+//;
467 $address1 =~ s/\s+$//;
468 $address2 =~ s/^\s+//;
469 $address2 =~ s/\s+$//;
471 # try to fix some cases of the address fields being switched
472 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
473 $address2 = $address1;
474 $address1 = $location->{address2};
476 # parse sublocation part (unit/suite/apartment...) and clean up
477 # non-sublocation address2
478 ($subloc, $address2) =
479 subloc_address2($address1, $address2, $location->{country});
480 # ask TomTom to standardize address1:
484 L => $location->{city},
485 AA => $location->{state},
486 PC => $location->{zip},
487 CC => country2code($country, LOCALE_CODE_ALPHA_3),
490 my ($match, $clean) = _tomtom_query(%args);
492 if (!$match or !$clean) {
493 # Then try cleaning up the input; TomTom is picky about junk in the
494 # address. Any of these can still be a clean match.
495 my $h = Geo::StreetAddress::US->parse_location($address1);
496 # First conservatively:
497 if ( $h->{sec_unit_type} ) {
498 my $strip = '\s+' . $h->{sec_unit_type};
499 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
501 $args{T} =~ s/$strip//;
502 ($match, $clean) = _tomtom_query(%args);
504 if ( !$match or !$clean ) {
505 # Then more aggressively:
506 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
507 ($match, $clean) = _tomtom_query(%args);
511 if ( !$match or !$clean ) { # partial matches are not useful
512 die "Address not found\n";
515 if ( defined $match->{censusTract} ) {
516 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
517 join('.', $match->{censusTract} =~ /(....)(..)/);
520 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
521 $address1 .= $match->{street} if $match->{street};
522 $address1 .= ' '.$subloc if $subloc;
523 $address1 = uc($address1); # USPS standards
526 address1 => $address1,
527 address2 => $address2,
528 city => uc($match->{city}),
529 state => uc($location->{state}),
530 country => uc($location->{country}),
531 zip => ($match->{standardPostalCode} || $match->{postcode}),
532 latitude => $match->{latitude},
533 longitude => $match->{longitude},
534 censustract => $tract,
535 addr_clean => $clean,
539 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
541 Given 'address1' and 'address2' strings, extract the sublocation part
542 (from either one) and return it. If the sublocation was found in ADDRESS1,
543 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
544 contain something relevant.
549 # Postal Addressing Standards, Appendix C
550 # (plus correction of "hanger" to "hangar")
578 # Canada Post Addressing Guidelines 4.3
589 sub subloc_address2 {
590 # Some things seen in the address2 field:
592 # The complete address (with address1 containing part of the company name,
593 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
596 # try to parse sublocation parts from address1; if they are present we'll
597 # append them back to address1 after standardizing
599 my ($addr1, $addr2, $country) = map uc, @_;
600 my $dict = $subloc_forms{$country} or return('', $addr2);
602 my $found_in = 0; # which address is the sublocation
605 # patterns to try to parse
607 "$addr1 Nullcity, CA"
609 $h = Geo::StreetAddress::US->parse_location($addr1);
610 last if exists($h->{sec_unit_type});
612 if (exists($h->{sec_unit_type})) {
619 "$addr1, $addr2 Nullcity, CA"
621 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
622 last if exists($h->{sec_unit_type});
624 if (exists($h->{sec_unit_type})) {
629 $subloc = $h->{sec_unit_type};
630 # special case: do not combine P.O. box sublocs with address1
631 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
632 if ( $found_in == 2 ) {
633 $addr2 = "PO BOX ".$h->{sec_unit_num};
634 } # else it's in addr1, and leave it alone
636 } elsif ( exists($dict->{$subloc}) ) {
637 # substitute the official abbreviation
638 $subloc = $dict->{$subloc};
640 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
641 } # otherwise $subloc = ''
643 if ( $found_in == 2 ) {
644 # address2 should be fully combined into address1
645 return ($subloc, '');
647 # else address2 is not the canonical sublocation, but do our best to
651 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
653 # remove all punctuation and spaces
654 foreach my $w (split(/\W+/, $addr2)) {
655 if ( exists($dict->{$w}) ) {
656 push @words, $dict->{$w};
660 my $result = join(' ', @words);
661 # correct spacing of pound sign + number
662 $result =~ s/NUMBER(\d)/# $1/;
663 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
666 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
670 sub standardize_melissa {
672 my $location = shift;
675 eval "use Geo::Melissa::WebSmart";
678 my $id = $conf->config('melissa-userid')
679 or die "no melissa-userid configured\n";
680 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
684 a1 => $location->{address1},
685 a2 => $location->{address2},
686 city => $location->{city},
687 state => $location->{state},
688 ctry => $location->{country},
689 zip => $location->{zip},
692 my $result = Geo::Melissa::WebSmart->query($request);
693 if ( $result->code =~ /AS01/ ) { # always present on success
694 my $addr = $result->address;
695 warn Dumper $addr if $DEBUG > 1;
697 address1 => $addr->{Address1},
698 address2 => $addr->{Address2},
699 city => $addr->{City}->{Name},
700 state => $addr->{State}->{Abbreviation},
701 country => $addr->{Country}->{Abbreviation},
703 latitude => $addr->{Latitude},
704 longitude => $addr->{Longitude},
707 if ( $addr->{Census}->{Tract} ) {
708 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
709 # insert decimal point two digits from the end
710 $censustract =~ s/(\d\d)$/\.$1/;
711 $out->{censustract} = $censustract;
712 $out->{censusyear} = $conf->config('census_year');
714 # we could do a lot more nuanced reporting of the warning/status codes,
715 # but the UI doesn't support that yet.
718 die $result->status_message;