4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
15 FS::UID->install_callback( sub {
21 @EXPORT_OK = qw( get_district );
25 FS::Misc::Geo - routines to fetch geographic information
31 =item get_censustract_ffiec LOCATION YEAR
33 Given a location hash (see L<FS::location_Mixin>) and a census map year,
34 returns a census tract code (consisting of state, county, and tract
35 codes) or an error message.
39 sub get_censustract_ffiec {
45 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
49 warn Dumper($location, $year) if $DEBUG;
51 # the old FFIEC geocoding service was shut down December 1, 2014.
52 # welcome to the future.
53 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
54 # build the single-line query
55 my $single_line = join(', ', $location->{address1},
59 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
60 my $request = POST( $url,
61 'Content-Type' => 'application/json; charset=utf-8',
62 'Accept' => 'application/json',
63 'Content' => encode_json($hashref)
66 my $ua = new LWP::UserAgent;
67 my $res = $ua->request( $request );
72 if (!$res->is_success) {
74 die "Census tract lookup error: ".$res->message;
79 my $content = eval { decode_json($res->content) };
80 die "Census tract JSON error: $@\n" if $@;
82 if ( !exists $content->{d}->{sStatus} ) {
83 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
85 if ( $content->{d}->{sStatus} eq 'Y' ) {
87 # this also contains the (partial) standardized address, correct zip
88 # code, coordinates, etc., and we could get all of them, but right now
89 # we only want the census tract
90 my $tract = join('', $content->{d}->{sStateCode},
91 $content->{d}->{sCountyCode},
92 $content->{d}->{sTractCode});
97 my $error = $content->{d}->{sMsg}
98 || 'FFIEC lookup failed, but with no status message.';
104 #sub get_district_methods {
106 # 'wa_sales' => 'Washington sales tax',
109 =item get_district LOCATION METHOD
111 For the location hash in LOCATION, using lookup method METHOD, fetch
112 tax district information. Currently the only available method is
113 'wa_sales' (the Washington Department of Revenue sales tax lookup).
115 Returns a hash reference containing the following fields:
120 - exempt_amount (currently zero)
121 - city, county, state, country (from
123 The intent is that you can assign this to an L<FS::cust_main_county>
124 object and insert it if there's not yet a tax rate defined for that
127 get_district will die on error.
135 my $location = shift;
136 my $method = shift or return '';
137 warn Dumper($location, $method) if $DEBUG;
142 my $location = shift;
144 return '' if $location->{state} ne 'WA';
146 my $return = { %$location };
147 $return->{'exempt_amount'} = 0.00;
149 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
150 my $ua = new LWP::UserAgent;
152 my $delim = '<|>'; # yes, <|>
153 my $year = (localtime)[5] + 1900;
154 my $month = (localtime)[4] + 1;
155 my @zip = split('-', $location->{zip});
158 'TaxType=S', #sales; 'P' = property
159 'Src=0', #does something complicated
161 'Addr='.uri_escape($location->{address1}),
162 'City='.uri_escape($location->{city}),
164 'Zip1='.($zip[1] || ''), #optional
171 my $query_string = join($delim, @args );
172 $url .= "?$query_string";
173 warn "\nrequest: $url\n\n" if $DEBUG > 1;
175 my $res = $ua->request( GET( "$url?$query_string" ) );
180 if ($res->code ne '200') {
181 $error = $res->message;
184 my $content = $res->content;
185 my $p = new HTML::TokeParser \$content;
187 while ( my $t = $p->get_tag('script') ) {
188 my $u = $p->get_token; #either enclosed text or the </script> tag
189 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
194 if ( $js ) { #found it
195 # strip down to the quoted string, which contains escaped single quotes.
196 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
197 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
198 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
200 $p = new HTML::TokeParser \$js;
201 TD: while ( my $td = $p->get_tag('td') ) {
202 while ( my $u = $p->get_token ) {
203 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
204 next if $u->[0] ne 'T'; # skip non-text
207 if ( lc($text) eq 'location code' ) {
208 $p->get_tag('td'); # skip to the next column
210 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
211 $return->{'district'} = $u->[1];
213 elsif ( lc($text) eq 'total tax rate' ) {
216 $u = $p->get_token until $u->[0] eq 'T';
217 $return->{'tax'} = $u->[1];
223 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
224 $return->{'tax'} *= 100; #percentage
225 warn Dumper($return) if $DEBUG > 1;
229 $error = 'district code/tax rate not found';
233 $error = "failed to parse document";
236 die "WA tax district lookup error: $error";
239 sub standardize_usps {
242 eval "use Business::US::USPS::WebTools::AddressStandardization";
245 my $location = shift;
246 if ( $location->{country} ne 'US' ) {
248 warn "standardize_usps not for use in country ".$location->{country}."\n";
249 $location->{addr_clean} = '';
252 my $userid = $conf->config('usps_webtools-userid');
253 my $password = $conf->config('usps_webtools-password');
254 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
256 Password => $password,
258 } ) or die "error starting USPS WebTools\n";
260 my($zip5, $zip4) = split('-',$location->{'zip'});
263 FirmName => $location->{company},
264 Address2 => $location->{address1},
265 Address1 => $location->{address2},
266 City => $location->{city},
267 State => $location->{state},
271 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
274 my $hash = $verifier->verify_address( %usps_args );
276 warn $verifier->response
279 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
280 if $verifier->is_error;
282 my $zip = $hash->{Zip5};
283 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
285 { company => $hash->{FirmName},
286 address1 => $hash->{Address2},
287 address2 => $hash->{Address1},
288 city => $hash->{City},
289 state => $hash->{State},
295 my %ezlocate_error = ( # USA_Geo_002 documentation
296 10 => 'State not found',
297 11 => 'City not found',
298 12 => 'Invalid street address',
299 14 => 'Street name not found',
300 15 => 'Address range does not exist',
301 16 => 'Ambiguous address',
302 17 => 'Intersection not found', #unused?
305 sub standardize_ezlocate {
307 my $location = shift;
309 #if ( $location->{country} eq 'US' ) {
310 # $class = 'USA_Geo_004Tool';
312 #elsif ( $location->{country} eq 'CA' ) {
313 # $class = 'CAN_Geo_001Tool';
315 #else { # shouldn't be a fatal error, just pass through unverified address
316 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
317 # "' not available\n";
320 #my $path = $conf->config('teleatlas-path') || '';
321 #local @INC = (@INC, $path);
324 # die "Loading $class failed:\n$@".
325 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
328 $class = 'Geo::EZLocate'; # use our own library
329 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
332 my $userid = $conf->config('ezlocate-userid')
333 or die "no ezlocate-userid configured\n";
334 my $password = $conf->config('ezlocate-password')
335 or die "no ezlocate-password configured\n";
337 my $tool = $class->new($userid, $password);
338 my $match = $tool->findAddress(
339 $location->{address1},
342 $location->{zip}, #12345-6789 format is allowed
344 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
345 # error handling - B codes indicate success
346 die $ezlocate_error{$match->{MAT_STAT}}."\n"
347 unless $match->{MAT_STAT} =~ /^B\d$/;
350 address1 => $match->{MAT_ADDR},
351 address2 => $location->{address2},
352 city => $match->{MAT_CITY},
353 state => $match->{MAT_ST},
354 country => $location->{country},
355 zip => $match->{MAT_ZIP},
356 latitude => $match->{MAT_LAT},
357 longitude => $match->{MAT_LON},
358 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
359 sprintf('%07.2f',$match->{CEN_TRCT}),
362 if ( $match->{STD_ADDR} ) {
363 # then they have a postal standardized address for us
365 address1 => $match->{STD_ADDR},
366 address2 => $location->{address2},
367 city => $match->{STD_CITY},
368 state => $match->{STD_ST},
369 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
376 sub _tomtom_query { # helper method for the below
378 my $result = Geo::TomTom::Geocoding->query(%args);
379 die "TomTom geocoding error: ".$result->message."\n"
380 unless ( $result->is_success );
381 my ($match) = $result->locations;
382 my $type = $match->{type};
383 # match levels below "intersection" should not be considered clean
384 my $clean = ($type eq 'addresspoint' ||
387 $type eq 'intersection'
389 warn "tomtom returned $type match\n" if $DEBUG;
390 warn Dumper($match) if $DEBUG > 1;
394 sub standardize_tomtom {
395 # post-2013 TomTom API
396 # much better, but incompatible with ezlocate
398 my $location = shift;
399 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
402 my $key = $conf->config('tomtom-userid')
403 or die "no tomtom-userid configured\n";
405 my $country = code2country($location->{country});
406 my ($address1, $address2) = ($location->{address1}, $location->{address2});
410 $address1 =~ s/^\s+//;
411 $address1 =~ s/\s+$//;
412 $address2 =~ s/^\s+//;
413 $address2 =~ s/\s+$//;
415 # try to fix some cases of the address fields being switched
416 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
417 $address2 = $address1;
418 $address1 = $location->{address2};
420 # parse sublocation part (unit/suite/apartment...) and clean up
421 # non-sublocation address2
422 ($subloc, $address2) =
423 subloc_address2($address1, $address2, $location->{country});
424 # ask TomTom to standardize address1:
428 L => $location->{city},
429 AA => $location->{state},
430 PC => $location->{zip},
431 CC => country2code($country, LOCALE_CODE_ALPHA_3),
434 my ($match, $clean) = _tomtom_query(%args);
436 if (!$match or !$clean) {
437 # Then try cleaning up the input; TomTom is picky about junk in the
438 # address. Any of these can still be a clean match.
439 my $h = Geo::StreetAddress::US->parse_location($address1);
440 # First conservatively:
441 if ( $h->{sec_unit_type} ) {
442 my $strip = '\s+' . $h->{sec_unit_type};
443 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
445 $args{T} =~ s/$strip//;
446 ($match, $clean) = _tomtom_query(%args);
448 if ( !$match or !$clean ) {
449 # Then more aggressively:
450 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
451 ($match, $clean) = _tomtom_query(%args);
455 if ( !$match or !$clean ) { # partial matches are not useful
456 die "Address not found\n";
459 if ( defined $match->{censusTract} ) {
460 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
461 join('.', $match->{censusTract} =~ /(....)(..)/);
464 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
465 $address1 .= $match->{street} if $match->{street};
466 $address1 .= ' '.$subloc if $subloc;
467 $address1 = uc($address1); # USPS standards
470 address1 => $address1,
471 address2 => $address2,
472 city => uc($match->{city}),
473 state => uc($location->{state}),
474 country => uc($location->{country}),
475 zip => ($match->{standardPostalCode} || $match->{postcode}),
476 latitude => $match->{latitude},
477 longitude => $match->{longitude},
478 censustract => $tract,
479 addr_clean => $clean,
483 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
485 Given 'address1' and 'address2' strings, extract the sublocation part
486 (from either one) and return it. If the sublocation was found in ADDRESS1,
487 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
488 contain something relevant.
493 # Postal Addressing Standards, Appendix C
494 # (plus correction of "hanger" to "hangar")
522 # Canada Post Addressing Guidelines 4.3
533 sub subloc_address2 {
534 # Some things seen in the address2 field:
536 # The complete address (with address1 containing part of the company name,
537 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
540 # try to parse sublocation parts from address1; if they are present we'll
541 # append them back to address1 after standardizing
543 my ($addr1, $addr2, $country) = map uc, @_;
544 my $dict = $subloc_forms{$country} or return('', $addr2);
546 my $found_in = 0; # which address is the sublocation
549 # patterns to try to parse
551 "$addr1 Nullcity, CA"
553 $h = Geo::StreetAddress::US->parse_location($addr1);
554 last if exists($h->{sec_unit_type});
556 if (exists($h->{sec_unit_type})) {
563 "$addr1, $addr2 Nullcity, CA"
565 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
566 last if exists($h->{sec_unit_type});
568 if (exists($h->{sec_unit_type})) {
573 $subloc = $h->{sec_unit_type};
574 # special case: do not combine P.O. box sublocs with address1
575 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
576 if ( $found_in == 2 ) {
577 $addr2 = "PO BOX ".$h->{sec_unit_num};
578 } # else it's in addr1, and leave it alone
580 } elsif ( exists($dict->{$subloc}) ) {
581 # substitute the official abbreviation
582 $subloc = $dict->{$subloc};
584 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
585 } # otherwise $subloc = ''
587 if ( $found_in == 2 ) {
588 # address2 should be fully combined into address1
589 return ($subloc, '');
591 # else address2 is not the canonical sublocation, but do our best to
595 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
597 # remove all punctuation and spaces
598 foreach my $w (split(/\W+/, $addr2)) {
599 if ( exists($dict->{$w}) ) {
600 push @words, $dict->{$w};
604 my $result = join(' ', @words);
605 # correct spacing of pound sign + number
606 $result =~ s/NUMBER(\d)/# $1/;
607 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
610 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
614 sub standardize_melissa {
616 my $location = shift;
619 eval "use Geo::Melissa::WebSmart";
622 my $id = $conf->config('melissa-userid')
623 or die "no melissa-userid configured\n";
624 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
628 a1 => $location->{address1},
629 a2 => $location->{address2},
630 city => $location->{city},
631 state => $location->{state},
632 ctry => $location->{country},
633 zip => $location->{zip},
636 my $result = Geo::Melissa::WebSmart->query($request);
637 if ( $result->code =~ /AS01/ ) { # always present on success
638 my $addr = $result->address;
639 warn Dumper $addr if $DEBUG > 1;
641 address1 => $addr->{Address1},
642 address2 => $addr->{Address2},
643 city => $addr->{City}->{Name},
644 state => $addr->{State}->{Abbreviation},
645 country => $addr->{Country}->{Abbreviation},
647 latitude => $addr->{Latitude},
648 longitude => $addr->{Longitude},
651 if ( $addr->{Census}->{Tract} ) {
652 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
653 # insert decimal point two digits from the end
654 $censustract =~ s/(\d\d)$/\.$1/;
655 $out->{censustract} = $censustract;
656 $out->{censusyear} = $conf->config('census_year');
658 # we could do a lot more nuanced reporting of the warning/status codes,
659 # but the UI doesn't support that yet.
662 die $result->status_message;