4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
45 warn Dumper($location, $year) if $DEBUG;
47 my $url = 'http://www.ffiec.gov/Geocode/default.aspx';
52 my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new);
53 my $res = $ua->request( GET( $url ) );
58 if (!$res->is_success) {
60 $error = $res->message;
64 my $content = $res->content;
66 my $p = new HTML::TokeParser \$content;
69 while (my $token = $p->get_tag('input') ) {
70 if ($token->[1]->{name} eq '__VIEWSTATE') {
71 $viewstate = $token->[1]->{value};
73 if ($token->[1]->{name} eq '__EVENTVALIDATION') {
74 $eventvalidation = $token->[1]->{value};
76 last if $viewstate && $eventvalidation;
79 if (!$viewstate or !$eventvalidation ) {
81 $error = "either no __VIEWSTATE or __EVENTVALIDATION found";
85 my($zip5, $zip4) = split('-',$location->{zip});
89 __VIEWSTATE => $viewstate,
90 __EVENTVALIDATION => $eventvalidation,
91 __VIEWSTATEENCRYPTED => '',
93 txtAddress => $location->{address1},
94 txtCity => $location->{city},
95 ddlbState => $location->{state},
97 btnSearch => 'Search',
99 warn join("\n", @ffiec_args )
102 push @{ $ua->requests_redirectable }, 'POST';
103 $res = $ua->request( POST( $url, \@ffiec_args ) );
107 unless ($res->code eq '200') {
109 $error = $res->message;
113 my @id = qw( MSACode StateCode CountyCode TractCode );
114 $content = $res->content;
115 warn $res->content if $DEBUG > 2;
116 $p = new HTML::TokeParser \$content;
117 my $prefix = 'UcGeoResult11_lb';
119 sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) };
121 while (my $token = $p->get_tag('span') ) {
122 next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) );
123 $token->[1]->{id} =~ /^$prefix(\w+)$/;
124 $return->{lc($1)} = $p->get_trimmed_text("/span");
127 unless ( $return->{tractcode} ) {
128 warn "$error: $content ". Dumper($return) if $DEBUG;
129 $error = "No census tract found";
131 $return->{tractcode} .= ' '
132 unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround
134 } #unless ($res->code eq '200')
136 } #unless ($viewstate)
138 } #unless ($res->code eq '200')
140 die "FFIEC Geocoding error: $error\n" if $error;
142 $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'};
145 #sub get_district_methods {
147 # 'wa_sales' => 'Washington sales tax',
150 =item get_district LOCATION METHOD
152 For the location hash in LOCATION, using lookup method METHOD, fetch
153 tax district information. Currently the only available method is
154 'wa_sales' (the Washington Department of Revenue sales tax lookup).
156 Returns a hash reference containing the following fields:
161 - exempt_amount (currently zero)
162 - city, county, state, country (from
164 The intent is that you can assign this to an L<FS::cust_main_county>
165 object and insert it if there's not yet a tax rate defined for that
168 get_district will die on error.
176 my $location = shift;
177 my $method = shift or return '';
178 warn Dumper($location, $method) if $DEBUG;
183 my $location = shift;
185 return '' if $location->{state} ne 'WA';
187 my $return = { %$location };
188 $return->{'exempt_amount'} = 0.00;
190 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
191 my $ua = new LWP::UserAgent;
193 my $delim = '<|>'; # yes, <|>
194 my $year = (localtime)[5] + 1900;
195 my $month = (localtime)[4] + 1;
196 my @zip = split('-', $location->{zip});
199 'TaxType=S', #sales; 'P' = property
200 'Src=0', #does something complicated
202 'Addr='.uri_escape($location->{address1}),
203 'City='.uri_escape($location->{city}),
205 'Zip1='.($zip[1] || ''), #optional
212 my $query_string = join($delim, @args );
213 $url .= "?$query_string";
214 warn "\nrequest: $url\n\n" if $DEBUG > 1;
216 my $res = $ua->request( GET( "$url?$query_string" ) );
221 if ($res->code ne '200') {
222 $error = $res->message;
225 my $content = $res->content;
226 my $p = new HTML::TokeParser \$content;
228 while ( my $t = $p->get_tag('script') ) {
229 my $u = $p->get_token; #either enclosed text or the </script> tag
230 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
235 if ( $js ) { #found it
236 # strip down to the quoted string, which contains escaped single quotes.
237 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
238 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
239 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
241 $p = new HTML::TokeParser \$js;
242 TD: while ( my $td = $p->get_tag('td') ) {
243 while ( my $u = $p->get_token ) {
244 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
245 next if $u->[0] ne 'T'; # skip non-text
248 if ( lc($text) eq 'location code' ) {
249 $p->get_tag('td'); # skip to the next column
251 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
252 $return->{'district'} = $u->[1];
254 elsif ( lc($text) eq 'total tax rate' ) {
257 $u = $p->get_token until $u->[0] eq 'T';
258 $return->{'tax'} = $u->[1];
264 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
265 $return->{'tax'} *= 100; #percentage
266 warn Dumper($return) if $DEBUG > 1;
270 $error = 'district code/tax rate not found';
274 $error = "failed to parse document";
277 die "WA tax district lookup error: $error";
280 sub standardize_usps {
283 eval "use Business::US::USPS::WebTools::AddressStandardization";
286 my $location = shift;
287 if ( $location->{country} ne 'US' ) {
289 warn "standardize_usps not for use in country ".$location->{country}."\n";
290 $location->{addr_clean} = '';
293 my $userid = $conf->config('usps_webtools-userid');
294 my $password = $conf->config('usps_webtools-password');
295 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
297 Password => $password,
299 } ) or die "error starting USPS WebTools\n";
301 my($zip5, $zip4) = split('-',$location->{'zip'});
304 FirmName => $location->{company},
305 Address2 => $location->{address1},
306 Address1 => $location->{address2},
307 City => $location->{city},
308 State => $location->{state},
312 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
315 my $hash = $verifier->verify_address( %usps_args );
317 warn $verifier->response
320 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
321 if $verifier->is_error;
323 my $zip = $hash->{Zip5};
324 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
326 { company => $hash->{FirmName},
327 address1 => $hash->{Address2},
328 address2 => $hash->{Address1},
329 city => $hash->{City},
330 state => $hash->{State},
336 my %ezlocate_error = ( # USA_Geo_002 documentation
337 10 => 'State not found',
338 11 => 'City not found',
339 12 => 'Invalid street address',
340 14 => 'Street name not found',
341 15 => 'Address range does not exist',
342 16 => 'Ambiguous address',
343 17 => 'Intersection not found', #unused?
346 sub standardize_ezlocate {
348 my $location = shift;
350 #if ( $location->{country} eq 'US' ) {
351 # $class = 'USA_Geo_004Tool';
353 #elsif ( $location->{country} eq 'CA' ) {
354 # $class = 'CAN_Geo_001Tool';
356 #else { # shouldn't be a fatal error, just pass through unverified address
357 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
358 # "' not available\n";
361 #my $path = $conf->config('teleatlas-path') || '';
362 #local @INC = (@INC, $path);
365 # die "Loading $class failed:\n$@".
366 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
369 $class = 'Geo::EZLocate'; # use our own library
370 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
373 my $userid = $conf->config('ezlocate-userid')
374 or die "no ezlocate-userid configured\n";
375 my $password = $conf->config('ezlocate-password')
376 or die "no ezlocate-password configured\n";
378 my $tool = $class->new($userid, $password);
379 my $match = $tool->findAddress(
380 $location->{address1},
383 $location->{zip}, #12345-6789 format is allowed
385 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
386 # error handling - B codes indicate success
387 die $ezlocate_error{$match->{MAT_STAT}}."\n"
388 unless $match->{MAT_STAT} =~ /^B\d$/;
391 address1 => $match->{MAT_ADDR},
392 address2 => $location->{address2},
393 city => $match->{MAT_CITY},
394 state => $match->{MAT_ST},
395 country => $location->{country},
396 zip => $match->{MAT_ZIP},
397 latitude => $match->{MAT_LAT},
398 longitude => $match->{MAT_LON},
399 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
400 sprintf('%07.2f',$match->{CEN_TRCT}),
403 if ( $match->{STD_ADDR} ) {
404 # then they have a postal standardized address for us
406 address1 => $match->{STD_ADDR},
407 address2 => $location->{address2},
408 city => $match->{STD_CITY},
409 state => $match->{STD_ST},
410 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
417 sub _tomtom_query { # helper method for the below
419 my $result = Geo::TomTom::Geocoding->query(%args);
420 die "TomTom geocoding error: ".$result->message."\n"
421 unless ( $result->is_success );
422 my ($match) = $result->locations;
423 my $type = $match->{type};
424 # match levels below "intersection" should not be considered clean
425 my $clean = ($type eq 'addresspoint' ||
428 $type eq 'intersection'
430 warn "tomtom returned $type match\n" if $DEBUG;
431 warn Dumper($match) if $DEBUG > 1;
435 sub standardize_tomtom {
436 # post-2013 TomTom API
437 # much better, but incompatible with ezlocate
439 my $location = shift;
440 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
443 my $key = $conf->config('tomtom-userid')
444 or die "no tomtom-userid configured\n";
446 my $country = code2country($location->{country});
447 my ($address1, $address2) = ($location->{address1}, $location->{address2});
451 $address1 =~ s/^\s+//;
452 $address1 =~ s/\s+$//;
453 $address2 =~ s/^\s+//;
454 $address2 =~ s/\s+$//;
456 # try to fix some cases of the address fields being switched
457 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
458 $address2 = $address1;
459 $address1 = $location->{address2};
461 # parse sublocation part (unit/suite/apartment...) and clean up
462 # non-sublocation address2
463 ($subloc, $address2) =
464 subloc_address2($address1, $address2, $location->{country});
465 # ask TomTom to standardize address1:
469 L => $location->{city},
470 AA => $location->{state},
471 PC => $location->{zip},
472 CC => country2code($country, LOCALE_CODE_ALPHA_3),
475 my ($match, $clean) = _tomtom_query(%args);
477 if (!$match or !$clean) {
478 # Then try cleaning up the input; TomTom is picky about junk in the
479 # address. Any of these can still be a clean match.
480 my $h = Geo::StreetAddress::US->parse_location($address1);
481 # First conservatively:
482 if ( $h->{sec_unit_type} ) {
483 my $strip = '\s+' . $h->{sec_unit_type};
484 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
486 $args{T} =~ s/$strip//;
487 ($match, $clean) = _tomtom_query(%args);
489 if ( !$match or !$clean ) {
490 # Then more aggressively:
491 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
492 ($match, $clean) = _tomtom_query(%args);
496 if ( !$match or !$clean ) { # partial matches are not useful
497 die "Address not found\n";
500 if ( defined $match->{censusTract} ) {
501 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
502 join('.', $match->{censusTract} =~ /(....)(..)/);
505 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
506 $address1 .= $match->{street} if $match->{street};
507 $address1 .= ' '.$subloc if $subloc;
508 $address1 = uc($address1); # USPS standards
511 address1 => $address1,
512 address2 => $address2,
513 city => uc($match->{city}),
514 state => uc($location->{state}),
515 country => uc($location->{country}),
516 zip => ($match->{standardPostalCode} || $match->{postcode}),
517 latitude => $match->{latitude},
518 longitude => $match->{longitude},
519 censustract => $tract,
520 addr_clean => $clean,
524 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
526 Given 'address1' and 'address2' strings, extract the sublocation part
527 (from either one) and return it. If the sublocation was found in ADDRESS1,
528 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
529 contain something relevant.
534 # Postal Addressing Standards, Appendix C
535 # (plus correction of "hanger" to "hangar")
563 # Canada Post Addressing Guidelines 4.3
574 sub subloc_address2 {
575 # Some things seen in the address2 field:
577 # The complete address (with address1 containing part of the company name,
578 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
581 # try to parse sublocation parts from address1; if they are present we'll
582 # append them back to address1 after standardizing
584 my ($addr1, $addr2, $country) = map uc, @_;
585 my $dict = $subloc_forms{$country} or return('', $addr2);
587 my $found_in = 0; # which address is the sublocation
590 # patterns to try to parse
592 "$addr1 Nullcity, CA"
594 $h = Geo::StreetAddress::US->parse_location($addr1);
595 last if exists($h->{sec_unit_type});
597 if (exists($h->{sec_unit_type})) {
604 "$addr1, $addr2 Nullcity, CA"
606 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
607 last if exists($h->{sec_unit_type});
609 if (exists($h->{sec_unit_type})) {
614 $subloc = $h->{sec_unit_type};
615 # special case: do not combine P.O. box sublocs with address1
616 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
617 if ( $found_in == 2 ) {
618 $addr2 = "PO BOX ".$h->{sec_unit_num};
619 } # else it's in addr1, and leave it alone
621 } elsif ( exists($dict->{$subloc}) ) {
622 # substitute the official abbreviation
623 $subloc = $dict->{$subloc};
625 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
626 } # otherwise $subloc = ''
628 if ( $found_in == 2 ) {
629 # address2 should be fully combined into address1
630 return ($subloc, '');
632 # else address2 is not the canonical sublocation, but do our best to
636 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
638 # remove all punctuation and spaces
639 foreach my $w (split(/\W+/, $addr2)) {
640 if ( exists($dict->{$w}) ) {
641 push @words, $dict->{$w};
645 my $result = join(' ', @words);
646 # correct spacing of pound sign + number
647 $result =~ s/NUMBER(\d)/# $1/;
648 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
651 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
655 sub standardize_melissa {
657 my $location = shift;
660 eval "use Geo::Melissa::WebSmart";
663 my $id = $conf->config('melissa-userid')
664 or die "no melissa-userid configured\n";
665 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
669 a1 => $location->{address1},
670 a2 => $location->{address2},
671 city => $location->{city},
672 state => $location->{state},
673 ctry => $location->{country},
674 zip => $location->{zip},
677 my $result = Geo::Melissa::WebSmart->query($request);
678 if ( $result->code =~ /AS01/ ) { # always present on success
679 my $addr = $result->address;
680 warn Dumper $addr if $DEBUG > 1;
682 address1 => $addr->{Address1},
683 address2 => $addr->{Address2},
684 city => $addr->{City}->{Name},
685 state => $addr->{State}->{Abbreviation},
686 country => $addr->{Country}->{Abbreviation},
688 latitude => $addr->{Latitude},
689 longitude => $addr->{Longitude},
692 if ( $addr->{Census}->{Tract} ) {
693 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
694 # insert decimal point two digits from the end
695 $censustract =~ s/(\d\d)$/\.$1/;
696 $out->{censustract} = $censustract;
697 $out->{censusyear} = $conf->config('census_year');
699 # we could do a lot more nuanced reporting of the warning/status codes,
700 # but the UI doesn't support that yet.
703 die $result->status_message;