4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
45 warn Dumper($location, $year) if $DEBUG;
47 my $url = 'http://www.ffiec.gov/Geocode/default.aspx';
52 my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new);
53 my $res = $ua->request( GET( $url ) );
58 if (!$res->is_success) {
60 $error = $res->message;
64 my $content = $res->content;
66 my $p = new HTML::TokeParser \$content;
69 while (my $token = $p->get_tag('input') ) {
70 if ($token->[1]->{name} eq '__VIEWSTATE') {
71 $viewstate = $token->[1]->{value};
73 if ($token->[1]->{name} eq '__EVENTVALIDATION') {
74 $eventvalidation = $token->[1]->{value};
76 last if $viewstate && $eventvalidation;
79 if (!$viewstate or !$eventvalidation ) {
81 $error = "either no __VIEWSTATE or __EVENTVALIDATION found";
85 my($zip5, $zip4) = split('-',$location->{zip});
89 __VIEWSTATE => $viewstate,
90 __EVENTVALIDATION => $eventvalidation,
91 __VIEWSTATEENCRYPTED => '',
93 txtAddress => $location->{address1},
94 txtCity => $location->{city},
95 ddlbState => $location->{state},
97 btnSearch => 'Search',
99 warn join("\n", @ffiec_args )
102 push @{ $ua->requests_redirectable }, 'POST';
103 $res = $ua->request( POST( $url, \@ffiec_args ) );
107 unless ($res->code eq '200') {
109 $error = $res->message;
113 my @id = qw( MSACode StateCode CountyCode TractCode );
114 $content = $res->content;
115 warn $res->content if $DEBUG > 2;
116 $p = new HTML::TokeParser \$content;
117 my $prefix = 'UcGeoResult11_lb';
119 sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) };
121 while (my $token = $p->get_tag('span') ) {
122 next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) );
123 $token->[1]->{id} =~ /^$prefix(\w+)$/;
124 $return->{lc($1)} = $p->get_trimmed_text("/span");
127 unless ( $return->{tractcode} ) {
128 warn "$error: $content ". Dumper($return) if $DEBUG;
129 $error = "No census tract found";
131 $return->{tractcode} .= ' '
132 unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround
134 } #unless ($res->code eq '200')
136 } #unless ($viewstate)
138 } #unless ($res->code eq '200')
140 die "FFIEC Geocoding error: $error\n" if $error;
142 $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'};
145 #sub get_district_methods {
147 # 'wa_sales' => 'Washington sales tax',
150 =item get_district LOCATION METHOD
152 For the location hash in LOCATION, using lookup method METHOD, fetch
153 tax district information. Currently the only available method is
154 'wa_sales' (the Washington Department of Revenue sales tax lookup).
156 Returns a hash reference containing the following fields:
161 - exempt_amount (currently zero)
162 - city, county, state, country (from
164 The intent is that you can assign this to an L<FS::cust_main_county>
165 object and insert it if there's not yet a tax rate defined for that
168 get_district will die on error.
176 my $location = shift;
177 my $method = shift or return '';
178 warn Dumper($location, $method) if $DEBUG;
183 my $location = shift;
185 return '' if $location->{state} ne 'WA';
187 my $return = { %$location };
188 $return->{'exempt_amount'} = 0.00;
190 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
191 my $ua = new LWP::UserAgent;
193 my $delim = '<|>'; # yes, <|>
194 my $year = (localtime)[5] + 1900;
195 my $month = (localtime)[4] + 1;
196 my @zip = split('-', $location->{zip});
199 'TaxType=S', #sales; 'P' = property
200 'Src=0', #does something complicated
202 'Addr='.uri_escape($location->{address1}),
203 'City='.uri_escape($location->{city}),
205 'Zip1='.($zip[1] || ''), #optional
212 my $query_string = join($delim, @args );
213 $url .= "?$query_string";
214 warn "\nrequest: $url\n\n" if $DEBUG > 1;
216 my $res = $ua->request( GET( "$url?$query_string" ) );
221 if ($res->code ne '200') {
222 $error = $res->message;
225 my $content = $res->content;
226 my $p = new HTML::TokeParser \$content;
228 while ( my $t = $p->get_tag('script') ) {
229 my $u = $p->get_token; #either enclosed text or the </script> tag
230 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
235 if ( $js ) { #found it
236 # strip down to the quoted string, which contains escaped single quotes.
237 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
238 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
239 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
241 $p = new HTML::TokeParser \$js;
242 TD: while ( my $td = $p->get_tag('td') ) {
243 while ( my $u = $p->get_token ) {
244 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
245 next if $u->[0] ne 'T'; # skip non-text
248 if ( lc($text) eq 'location code' ) {
249 $p->get_tag('td'); # skip to the next column
251 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
252 $return->{'district'} = $u->[1];
254 elsif ( lc($text) eq 'total tax rate' ) {
257 $u = $p->get_token until $u->[0] eq 'T';
258 $return->{'tax'} = $u->[1];
264 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
265 $return->{'tax'} *= 100; #percentage
266 warn Dumper($return) if $DEBUG > 1;
270 $error = 'district code/tax rate not found';
274 $error = "failed to parse document";
277 die "WA tax district lookup error: $error";
280 sub standardize_usps {
283 eval "use Business::US::USPS::WebTools::AddressStandardization";
286 my $location = shift;
287 if ( $location->{country} ne 'US' ) {
289 warn "standardize_usps not for use in country ".$location->{country}."\n";
290 $location->{addr_clean} = '';
293 my $userid = $conf->config('usps_webtools-userid');
294 my $password = $conf->config('usps_webtools-password');
295 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
297 Password => $password,
299 } ) or die "error starting USPS WebTools\n";
301 my($zip5, $zip4) = split('-',$location->{'zip'});
304 FirmName => $location->{company},
305 Address2 => $location->{address1},
306 Address1 => $location->{address2},
307 City => $location->{city},
308 State => $location->{state},
312 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
315 my $hash = $verifier->verify_address( %usps_args );
317 warn $verifier->response
320 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
321 if $verifier->is_error;
323 my $zip = $hash->{Zip5};
324 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
326 { company => $hash->{FirmName},
327 address1 => $hash->{Address2},
328 address2 => $hash->{Address1},
329 city => $hash->{City},
330 state => $hash->{State},
336 my %ezlocate_error = ( # USA_Geo_002 documentation
337 10 => 'State not found',
338 11 => 'City not found',
339 12 => 'Invalid street address',
340 14 => 'Street name not found',
341 15 => 'Address range does not exist',
342 16 => 'Ambiguous address',
343 17 => 'Intersection not found', #unused?
346 sub standardize_ezlocate {
348 my $location = shift;
350 #if ( $location->{country} eq 'US' ) {
351 # $class = 'USA_Geo_004Tool';
353 #elsif ( $location->{country} eq 'CA' ) {
354 # $class = 'CAN_Geo_001Tool';
356 #else { # shouldn't be a fatal error, just pass through unverified address
357 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
358 # "' not available\n";
361 #my $path = $conf->config('teleatlas-path') || '';
362 #local @INC = (@INC, $path);
365 # die "Loading $class failed:\n$@".
366 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
369 $class = 'Geo::EZLocate'; # use our own library
370 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
373 my $userid = $conf->config('ezlocate-userid')
374 or die "no ezlocate-userid configured\n";
375 my $password = $conf->config('ezlocate-password')
376 or die "no ezlocate-password configured\n";
378 my $tool = $class->new($userid, $password);
379 my $match = $tool->findAddress(
380 $location->{address1},
383 $location->{zip}, #12345-6789 format is allowed
385 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
386 # error handling - B codes indicate success
387 die $ezlocate_error{$match->{MAT_STAT}}."\n"
388 unless $match->{MAT_STAT} =~ /^B\d$/;
391 address1 => $match->{MAT_ADDR},
392 address2 => $location->{address2},
393 city => $match->{MAT_CITY},
394 state => $match->{MAT_ST},
395 country => $location->{country},
396 zip => $match->{MAT_ZIP},
397 latitude => $match->{MAT_LAT},
398 longitude => $match->{MAT_LON},
399 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
400 sprintf('%07.2f',$match->{CEN_TRCT}),
403 if ( $match->{STD_ADDR} ) {
404 # then they have a postal standardized address for us
406 address1 => $match->{STD_ADDR},
407 address2 => $location->{address2},
408 city => $match->{STD_CITY},
409 state => $match->{STD_ST},
410 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
417 sub _tomtom_query { # helper method for the below
419 my $result = Geo::TomTom::Geocoding->query(%args);
420 die "TomTom geocoding error: ".$result->message."\n"
421 unless ( $result->is_success );
422 my ($match) = $result->locations;
423 my $type = $match->{type};
424 # match levels below "intersection" should not be considered clean
425 my $clean = ($type eq 'addresspoint' ||
428 $type eq 'intersection'
430 warn "tomtom returned $type match\n" if $DEBUG;
431 warn Dumper($match) if $DEBUG > 1;
435 sub standardize_tomtom {
436 # post-2013 TomTom API
437 # much better, but incompatible with ezlocate
439 my $location = shift;
440 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
443 my $key = $conf->config('tomtom-userid')
444 or die "no tomtom-userid configured\n";
446 my $country = code2country($location->{country});
447 my ($address1, $address2) = ($location->{address1}, $location->{address2});
450 # try to fix some cases of the address fields being switched
451 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
452 $address2 = $address1;
453 $address1 = $location->{address2};
455 # parse sublocation part (unit/suite/apartment...) and clean up
456 # non-sublocation address2
457 ($subloc, $address2) =
458 subloc_address2($address1, $address2, $location->{country});
459 # ask TomTom to standardize address1:
463 L => $location->{city},
464 AA => $location->{state},
465 PC => $location->{zip},
466 CC => country2code($country, LOCALE_CODE_ALPHA_3),
469 my ($match, $clean) = _tomtom_query(%args);
471 if (!$match or !$clean) {
472 # Then try cleaning up the input; TomTom is picky about junk in the
473 # address. Any of these can still be a clean match.
474 my $h = Geo::StreetAddress::US->parse_location($address1);
475 # First conservatively:
476 if ( $h->{sec_unit_type} ) {
477 my $strip = '\s+' . $h->{sec_unit_type};
478 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
480 $args{T} =~ s/$strip//;
481 ($match, $clean) = _tomtom_query(%args);
483 if ( !$match or !$clean ) {
484 # Then more aggressively:
485 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
486 ($match, $clean) = _tomtom_query(%args);
491 die "Location not found.\n";
494 if ( defined $match->{censusTract} ) {
495 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
496 join('.', $match->{censusTract} =~ /(....)(..)/);
499 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
500 $address1 .= $match->{street} if $match->{street};
501 $address1 .= ' '.$subloc if $subloc;
502 $address1 = uc($address1); # USPS standards
505 address1 => $address1,
506 address2 => $address2,
507 city => uc($match->{city}),
508 state => uc($location->{state}),
509 country => uc($location->{country}),
510 zip => ($match->{standardPostalCode} || $match->{postcode}),
511 latitude => $match->{latitude},
512 longitude => $match->{longitude},
513 censustract => $tract,
514 addr_clean => $clean,
518 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
520 Given 'address1' and 'address2' strings, extract the sublocation part
521 (from either one) and return it. If the sublocation was found in ADDRESS1,
522 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
523 contain something relevant.
528 # Postal Addressing Standards, Appendix C
529 # (plus correction of "hanger" to "hangar")
557 # Canada Post Addressing Guidelines 4.3
568 sub subloc_address2 {
569 # Some things seen in the address2 field:
571 # The complete address (with address1 containing part of the company name,
572 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
575 # try to parse sublocation parts from address1; if they are present we'll
576 # append them back to address1 after standardizing
578 my ($addr1, $addr2, $country) = map uc, @_;
579 my $dict = $subloc_forms{$country} or return('', $addr2);
581 my $found_in = 0; # which address is the sublocation
584 # patterns to try to parse
586 "$addr1 Nullcity, CA"
588 $h = Geo::StreetAddress::US->parse_location($addr1);
589 last if exists($h->{sec_unit_type});
591 if (exists($h->{sec_unit_type})) {
598 "$addr1, $addr2 Nullcity, CA"
600 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
601 last if exists($h->{sec_unit_type});
603 if (exists($h->{sec_unit_type})) {
608 $subloc = $h->{sec_unit_type};
609 # special case: do not combine P.O. box sublocs with address1
610 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
611 if ( $found_in == 2 ) {
612 $addr2 = "PO BOX ".$h->{sec_unit_num};
613 } # else it's in addr1, and leave it alone
615 } elsif ( exists($dict->{$subloc}) ) {
616 # substitute the official abbreviation
617 $subloc = $dict->{$subloc};
619 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
620 } # otherwise $subloc = ''
622 if ( $found_in == 2 ) {
623 # address2 should be fully combined into address1
624 return ($subloc, '');
626 # else address2 is not the canonical sublocation, but do our best to
630 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
632 # remove all punctuation and spaces
633 foreach my $w (split(/\W+/, $addr2)) {
634 if ( exists($dict->{$w}) ) {
635 push @words, $dict->{$w};
639 my $result = join(' ', @words);
640 # correct spacing of pound sign + number
641 $result =~ s/NUMBER(\d)/# $1/;
642 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;