4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
45 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
49 warn Dumper($location, $year) if $DEBUG;
51 my $url = 'http://www.ffiec.gov/Geocode/default.aspx';
56 my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new);
57 my $res = $ua->request( GET( $url ) );
62 if (!$res->is_success) {
64 $error = $res->message;
68 my $content = $res->content;
70 my $p = new HTML::TokeParser \$content;
73 while (my $token = $p->get_tag('input') ) {
74 if ($token->[1]->{name} eq '__VIEWSTATE') {
75 $viewstate = $token->[1]->{value};
77 if ($token->[1]->{name} eq '__EVENTVALIDATION') {
78 $eventvalidation = $token->[1]->{value};
80 last if $viewstate && $eventvalidation;
83 if (!$viewstate or !$eventvalidation ) {
85 $error = "either no __VIEWSTATE or __EVENTVALIDATION found";
89 my($zip5, $zip4) = split('-',$location->{zip});
93 __VIEWSTATE => $viewstate,
94 __EVENTVALIDATION => $eventvalidation,
95 __VIEWSTATEENCRYPTED => '',
97 txtAddress => $location->{address1},
98 txtCity => $location->{city},
99 ddlbState => $location->{state},
101 btnSearch => 'Search',
103 warn join("\n", @ffiec_args )
106 push @{ $ua->requests_redirectable }, 'POST';
107 $res = $ua->request( POST( $url, \@ffiec_args ) );
111 unless ($res->code eq '200') {
113 $error = $res->message;
117 my @id = qw( MSACode StateCode CountyCode TractCode );
118 $content = $res->content;
119 warn $res->content if $DEBUG > 2;
120 $p = new HTML::TokeParser \$content;
121 my $prefix = 'UcGeoResult11_lb';
123 sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) };
125 while (my $token = $p->get_tag('span') ) {
126 next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) );
127 $token->[1]->{id} =~ /^$prefix(\w+)$/;
128 $return->{lc($1)} = $p->get_trimmed_text("/span");
131 unless ( $return->{tractcode} ) {
132 warn "$error: $content ". Dumper($return) if $DEBUG;
133 $error = "No census tract found";
135 $return->{tractcode} .= ' '
136 unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround
138 } #unless ($res->code eq '200')
140 } #unless ($viewstate)
142 } #unless ($res->code eq '200')
144 die "FFIEC Geocoding error: $error\n" if $error;
146 $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'};
149 #sub get_district_methods {
151 # 'wa_sales' => 'Washington sales tax',
154 =item get_district LOCATION METHOD
156 For the location hash in LOCATION, using lookup method METHOD, fetch
157 tax district information. Currently the only available method is
158 'wa_sales' (the Washington Department of Revenue sales tax lookup).
160 Returns a hash reference containing the following fields:
165 - exempt_amount (currently zero)
166 - city, county, state, country (from
168 The intent is that you can assign this to an L<FS::cust_main_county>
169 object and insert it if there's not yet a tax rate defined for that
172 get_district will die on error.
180 my $location = shift;
181 my $method = shift or return '';
182 warn Dumper($location, $method) if $DEBUG;
187 my $location = shift;
189 return '' if $location->{state} ne 'WA';
191 my $return = { %$location };
192 $return->{'exempt_amount'} = 0.00;
194 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
195 my $ua = new LWP::UserAgent;
197 my $delim = '<|>'; # yes, <|>
198 my $year = (localtime)[5] + 1900;
199 my $month = (localtime)[4] + 1;
200 my @zip = split('-', $location->{zip});
203 'TaxType=S', #sales; 'P' = property
204 'Src=0', #does something complicated
206 'Addr='.uri_escape($location->{address1}),
207 'City='.uri_escape($location->{city}),
209 'Zip1='.($zip[1] || ''), #optional
216 my $query_string = join($delim, @args );
217 $url .= "?$query_string";
218 warn "\nrequest: $url\n\n" if $DEBUG > 1;
220 my $res = $ua->request( GET( "$url?$query_string" ) );
225 if ($res->code ne '200') {
226 $error = $res->message;
229 my $content = $res->content;
230 my $p = new HTML::TokeParser \$content;
232 while ( my $t = $p->get_tag('script') ) {
233 my $u = $p->get_token; #either enclosed text or the </script> tag
234 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
239 if ( $js ) { #found it
240 # strip down to the quoted string, which contains escaped single quotes.
241 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
242 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
243 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
245 $p = new HTML::TokeParser \$js;
246 TD: while ( my $td = $p->get_tag('td') ) {
247 while ( my $u = $p->get_token ) {
248 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
249 next if $u->[0] ne 'T'; # skip non-text
252 if ( lc($text) eq 'location code' ) {
253 $p->get_tag('td'); # skip to the next column
255 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
256 $return->{'district'} = $u->[1];
258 elsif ( lc($text) eq 'total tax rate' ) {
261 $u = $p->get_token until $u->[0] eq 'T';
262 $return->{'tax'} = $u->[1];
268 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
269 $return->{'tax'} *= 100; #percentage
270 warn Dumper($return) if $DEBUG > 1;
274 $error = 'district code/tax rate not found';
278 $error = "failed to parse document";
281 die "WA tax district lookup error: $error";
284 sub standardize_usps {
287 eval "use Business::US::USPS::WebTools::AddressStandardization";
290 my $location = shift;
291 if ( $location->{country} ne 'US' ) {
293 warn "standardize_usps not for use in country ".$location->{country}."\n";
294 $location->{addr_clean} = '';
297 my $userid = $conf->config('usps_webtools-userid');
298 my $password = $conf->config('usps_webtools-password');
299 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
301 Password => $password,
303 } ) or die "error starting USPS WebTools\n";
305 my($zip5, $zip4) = split('-',$location->{'zip'});
308 FirmName => $location->{company},
309 Address2 => $location->{address1},
310 Address1 => $location->{address2},
311 City => $location->{city},
312 State => $location->{state},
316 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
319 my $hash = $verifier->verify_address( %usps_args );
321 warn $verifier->response
324 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
325 if $verifier->is_error;
327 my $zip = $hash->{Zip5};
328 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
330 { company => $hash->{FirmName},
331 address1 => $hash->{Address2},
332 address2 => $hash->{Address1},
333 city => $hash->{City},
334 state => $hash->{State},
340 my %ezlocate_error = ( # USA_Geo_002 documentation
341 10 => 'State not found',
342 11 => 'City not found',
343 12 => 'Invalid street address',
344 14 => 'Street name not found',
345 15 => 'Address range does not exist',
346 16 => 'Ambiguous address',
347 17 => 'Intersection not found', #unused?
350 sub standardize_ezlocate {
352 my $location = shift;
354 #if ( $location->{country} eq 'US' ) {
355 # $class = 'USA_Geo_004Tool';
357 #elsif ( $location->{country} eq 'CA' ) {
358 # $class = 'CAN_Geo_001Tool';
360 #else { # shouldn't be a fatal error, just pass through unverified address
361 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
362 # "' not available\n";
365 #my $path = $conf->config('teleatlas-path') || '';
366 #local @INC = (@INC, $path);
369 # die "Loading $class failed:\n$@".
370 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
373 $class = 'Geo::EZLocate'; # use our own library
374 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
377 my $userid = $conf->config('ezlocate-userid')
378 or die "no ezlocate-userid configured\n";
379 my $password = $conf->config('ezlocate-password')
380 or die "no ezlocate-password configured\n";
382 my $tool = $class->new($userid, $password);
383 my $match = $tool->findAddress(
384 $location->{address1},
387 $location->{zip}, #12345-6789 format is allowed
389 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
390 # error handling - B codes indicate success
391 die $ezlocate_error{$match->{MAT_STAT}}."\n"
392 unless $match->{MAT_STAT} =~ /^B\d$/;
395 address1 => $match->{MAT_ADDR},
396 address2 => $location->{address2},
397 city => $match->{MAT_CITY},
398 state => $match->{MAT_ST},
399 country => $location->{country},
400 zip => $match->{MAT_ZIP},
401 latitude => $match->{MAT_LAT},
402 longitude => $match->{MAT_LON},
403 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
404 sprintf('%07.2f',$match->{CEN_TRCT}),
407 if ( $match->{STD_ADDR} ) {
408 # then they have a postal standardized address for us
410 address1 => $match->{STD_ADDR},
411 address2 => $location->{address2},
412 city => $match->{STD_CITY},
413 state => $match->{STD_ST},
414 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
421 sub _tomtom_query { # helper method for the below
423 my $result = Geo::TomTom::Geocoding->query(%args);
424 die "TomTom geocoding error: ".$result->message."\n"
425 unless ( $result->is_success );
426 my ($match) = $result->locations;
427 my $type = $match->{type};
428 # match levels below "intersection" should not be considered clean
429 my $clean = ($type eq 'addresspoint' ||
432 $type eq 'intersection'
434 warn "tomtom returned $type match\n" if $DEBUG;
435 warn Dumper($match) if $DEBUG > 1;
439 sub standardize_tomtom {
440 # post-2013 TomTom API
441 # much better, but incompatible with ezlocate
443 my $location = shift;
444 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
447 my $key = $conf->config('tomtom-userid')
448 or die "no tomtom-userid configured\n";
450 my $country = code2country($location->{country});
451 my ($address1, $address2) = ($location->{address1}, $location->{address2});
455 $address1 =~ s/^\s+//;
456 $address1 =~ s/\s+$//;
457 $address2 =~ s/^\s+//;
458 $address2 =~ s/\s+$//;
460 # try to fix some cases of the address fields being switched
461 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
462 $address2 = $address1;
463 $address1 = $location->{address2};
465 # parse sublocation part (unit/suite/apartment...) and clean up
466 # non-sublocation address2
467 ($subloc, $address2) =
468 subloc_address2($address1, $address2, $location->{country});
469 # ask TomTom to standardize address1:
473 L => $location->{city},
474 AA => $location->{state},
475 PC => $location->{zip},
476 CC => country2code($country, LOCALE_CODE_ALPHA_3),
479 my ($match, $clean) = _tomtom_query(%args);
481 if (!$match or !$clean) {
482 # Then try cleaning up the input; TomTom is picky about junk in the
483 # address. Any of these can still be a clean match.
484 my $h = Geo::StreetAddress::US->parse_location($address1);
485 # First conservatively:
486 if ( $h->{sec_unit_type} ) {
487 my $strip = '\s+' . $h->{sec_unit_type};
488 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
490 $args{T} =~ s/$strip//;
491 ($match, $clean) = _tomtom_query(%args);
493 if ( !$match or !$clean ) {
494 # Then more aggressively:
495 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
496 ($match, $clean) = _tomtom_query(%args);
500 if ( !$match or !$clean ) { # partial matches are not useful
501 die "Address not found\n";
504 if ( defined $match->{censusTract} ) {
505 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
506 join('.', $match->{censusTract} =~ /(....)(..)/);
509 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
510 $address1 .= $match->{street} if $match->{street};
511 $address1 .= ' '.$subloc if $subloc;
512 $address1 = uc($address1); # USPS standards
515 address1 => $address1,
516 address2 => $address2,
517 city => uc($match->{city}),
518 state => uc($location->{state}),
519 country => uc($location->{country}),
520 zip => ($match->{standardPostalCode} || $match->{postcode}),
521 latitude => $match->{latitude},
522 longitude => $match->{longitude},
523 censustract => $tract,
524 addr_clean => $clean,
528 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
530 Given 'address1' and 'address2' strings, extract the sublocation part
531 (from either one) and return it. If the sublocation was found in ADDRESS1,
532 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
533 contain something relevant.
538 # Postal Addressing Standards, Appendix C
539 # (plus correction of "hanger" to "hangar")
567 # Canada Post Addressing Guidelines 4.3
578 sub subloc_address2 {
579 # Some things seen in the address2 field:
581 # The complete address (with address1 containing part of the company name,
582 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
585 # try to parse sublocation parts from address1; if they are present we'll
586 # append them back to address1 after standardizing
588 my ($addr1, $addr2, $country) = map uc, @_;
589 my $dict = $subloc_forms{$country} or return('', $addr2);
591 my $found_in = 0; # which address is the sublocation
594 # patterns to try to parse
596 "$addr1 Nullcity, CA"
598 $h = Geo::StreetAddress::US->parse_location($addr1);
599 last if exists($h->{sec_unit_type});
601 if (exists($h->{sec_unit_type})) {
608 "$addr1, $addr2 Nullcity, CA"
610 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
611 last if exists($h->{sec_unit_type});
613 if (exists($h->{sec_unit_type})) {
618 $subloc = $h->{sec_unit_type};
619 # special case: do not combine P.O. box sublocs with address1
620 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
621 if ( $found_in == 2 ) {
622 $addr2 = "PO BOX ".$h->{sec_unit_num};
623 } # else it's in addr1, and leave it alone
625 } elsif ( exists($dict->{$subloc}) ) {
626 # substitute the official abbreviation
627 $subloc = $dict->{$subloc};
629 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
630 } # otherwise $subloc = ''
632 if ( $found_in == 2 ) {
633 # address2 should be fully combined into address1
634 return ($subloc, '');
636 # else address2 is not the canonical sublocation, but do our best to
640 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
642 # remove all punctuation and spaces
643 foreach my $w (split(/\W+/, $addr2)) {
644 if ( exists($dict->{$w}) ) {
645 push @words, $dict->{$w};
649 my $result = join(' ', @words);
650 # correct spacing of pound sign + number
651 $result =~ s/NUMBER(\d)/# $1/;
652 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
655 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
659 sub standardize_melissa {
661 my $location = shift;
664 eval "use Geo::Melissa::WebSmart";
667 my $id = $conf->config('melissa-userid')
668 or die "no melissa-userid configured\n";
669 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
673 a1 => $location->{address1},
674 a2 => $location->{address2},
675 city => $location->{city},
676 state => $location->{state},
677 ctry => $location->{country},
678 zip => $location->{zip},
681 my $result = Geo::Melissa::WebSmart->query($request);
682 if ( $result->code =~ /AS01/ ) { # always present on success
683 my $addr = $result->address;
684 warn Dumper $addr if $DEBUG > 1;
686 address1 => $addr->{Address1},
687 address2 => $addr->{Address2},
688 city => $addr->{City}->{Name},
689 state => $addr->{State}->{Abbreviation},
690 country => $addr->{Country}->{Abbreviation},
692 latitude => $addr->{Latitude},
693 longitude => $addr->{Longitude},
696 if ( $addr->{Census}->{Tract} ) {
697 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
698 # insert decimal point two digits from the end
699 $censustract =~ s/(\d\d)$/\.$1/;
700 $out->{censustract} = $censustract;
701 $out->{censusyear} = $conf->config('census_year');
703 # we could do a lot more nuanced reporting of the warning/status codes,
704 # but the UI doesn't support that yet.
707 die $result->status_message;