4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
45 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
49 warn Dumper($location, $year) if $DEBUG;
51 my $url = 'http://www.ffiec.gov/Geocode/default.aspx';
56 my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new);
57 my $res = $ua->request( GET( $url ) );
62 if (!$res->is_success) {
64 $error = $res->message;
68 my $content = $res->content;
70 my $p = new HTML::TokeParser \$content;
73 while (my $token = $p->get_tag('input') ) {
74 if ($token->[1]->{name} eq '__VIEWSTATE') {
75 $viewstate = $token->[1]->{value};
77 if ($token->[1]->{name} eq '__EVENTVALIDATION') {
78 $eventvalidation = $token->[1]->{value};
80 last if $viewstate && $eventvalidation;
83 if (!$viewstate or !$eventvalidation ) {
85 $error = "either no __VIEWSTATE or __EVENTVALIDATION found";
89 my($zip5, $zip4) = split('-',$location->{zip});
93 __VIEWSTATE => $viewstate,
94 __EVENTVALIDATION => $eventvalidation,
95 __VIEWSTATEENCRYPTED => '',
97 txtAddress => $location->{address1},
98 txtCity => $location->{city},
99 ddlbState => $location->{state},
101 btnSearch => 'Search',
103 warn join("\n", @ffiec_args )
106 push @{ $ua->requests_redirectable }, 'POST';
107 $res = $ua->request( POST( $url, \@ffiec_args ) );
111 unless ($res->code eq '200') {
113 $error = $res->message;
117 my @id = qw( MSACode StateCode CountyCode TractCode );
118 $content = $res->content;
119 warn $res->content if $DEBUG > 2;
120 $p = new HTML::TokeParser \$content;
121 my $prefix = 'UcGeoResult11_lb';
123 sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) };
125 while (my $token = $p->get_tag('span') ) {
126 next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) );
127 $token->[1]->{id} =~ /^$prefix(\w+)$/;
128 $return->{lc($1)} = $p->get_trimmed_text("/span");
131 unless ( $return->{tractcode} ) {
132 warn "$error: $content ". Dumper($return) if $DEBUG;
133 $error = "No census tract found";
135 $return->{tractcode} .= ' '
136 unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround
138 } #unless ($res->code eq '200')
140 } #unless ($viewstate)
142 } #unless ($res->code eq '200')
144 die "FFIEC Geocoding error: $error\n" if $error;
146 $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'};
149 #sub get_district_methods {
151 # 'wa_sales' => 'Washington sales tax',
154 =item get_district LOCATION METHOD
156 For the location hash in LOCATION, using lookup method METHOD, fetch
157 tax district information. Currently the only available method is
158 'wa_sales' (the Washington Department of Revenue sales tax lookup).
160 Returns a hash reference containing the following fields:
165 - exempt_amount (currently zero)
166 - city, county, state, country (from
168 The intent is that you can assign this to an L<FS::cust_main_county>
169 object and insert it if there's not yet a tax rate defined for that
172 get_district will die on error.
180 my $location = shift;
181 my $method = shift or return '';
182 warn Dumper($location, $method) if $DEBUG;
187 my $location = shift;
189 return '' if $location->{state} ne 'WA';
191 my $return = { %$location };
192 $return->{'exempt_amount'} = 0.00;
194 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
195 my $ua = new LWP::UserAgent;
197 my $delim = '<|>'; # yes, <|>
198 my $year = (localtime)[5] + 1900;
199 my $month = (localtime)[4] + 1;
200 my @zip = split('-', $location->{zip});
203 'TaxType=S', #sales; 'P' = property
204 'Src=0', #does something complicated
206 'Addr='.uri_escape($location->{address1}),
207 'City='.uri_escape($location->{city}),
209 'Zip1='.($zip[1] || ''), #optional
216 my $query_string = join($delim, @args );
217 $url .= "?$query_string";
218 warn "\nrequest: $url\n\n" if $DEBUG > 1;
220 my $res = $ua->request( GET( "$url?$query_string" ) );
225 if ($res->code ne '200') {
226 $error = $res->message;
229 my $content = $res->content;
230 my $p = new HTML::TokeParser \$content;
232 while ( my $t = $p->get_tag('script') ) {
233 my $u = $p->get_token; #either enclosed text or the </script> tag
234 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
239 if ( $js ) { #found it
240 # strip down to the quoted string, which contains escaped single quotes.
241 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
242 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
243 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
245 $p = new HTML::TokeParser \$js;
246 TD: while ( my $td = $p->get_tag('td') ) {
247 while ( my $u = $p->get_token ) {
248 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
249 next if $u->[0] ne 'T'; # skip non-text
252 if ( lc($text) eq 'location code' ) {
253 $p->get_tag('td'); # skip to the next column
255 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
256 $return->{'district'} = $u->[1];
258 elsif ( lc($text) eq 'total tax rate' ) {
261 $u = $p->get_token until $u->[0] eq 'T';
262 $return->{'tax'} = $u->[1];
268 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
269 $return->{'tax'} *= 100; #percentage
270 warn Dumper($return) if $DEBUG > 1;
274 $error = 'district code/tax rate not found';
278 $error = "failed to parse document";
281 die "WA tax district lookup error: $error";
284 sub standardize_usps {
287 eval "use Business::US::USPS::WebTools::AddressStandardization";
290 my $location = shift;
291 if ( $location->{country} ne 'US' ) {
293 warn "standardize_usps not for use in country ".$location->{country}."\n";
294 $location->{addr_clean} = '';
297 my $userid = $conf->config('usps_webtools-userid');
298 my $password = $conf->config('usps_webtools-password');
299 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
301 Password => $password,
303 } ) or die "error starting USPS WebTools\n";
305 my($zip5, $zip4) = split('-',$location->{'zip'});
308 FirmName => $location->{company},
309 Address2 => $location->{address1},
310 Address1 => $location->{address2},
311 City => $location->{city},
312 State => $location->{state},
316 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
319 my $hash = $verifier->verify_address( %usps_args );
321 warn $verifier->response
324 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
325 if $verifier->is_error;
327 my $zip = $hash->{Zip5};
328 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
330 { company => $hash->{FirmName},
331 address1 => $hash->{Address2},
332 address2 => $hash->{Address1},
333 city => $hash->{City},
334 state => $hash->{State},
340 sub _tomtom_query { # helper method for the below
342 my $result = Geo::TomTom::Geocoding->query(%args);
343 die "TomTom geocoding error: ".$result->message."\n"
344 unless ( $result->is_success );
345 my ($match) = $result->locations;
346 my $type = $match->{type};
347 # match levels below "intersection" should not be considered clean
348 my $clean = ($type eq 'addresspoint' ||
351 $type eq 'intersection'
353 warn "tomtom returned $type match\n" if $DEBUG;
354 warn Dumper($match) if $DEBUG > 1;
358 sub standardize_tomtom {
359 # post-2013 TomTom API
360 # much better, but incompatible with ezlocate
362 my $location = shift;
363 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
366 my $key = $conf->config('tomtom-userid')
367 or die "no tomtom-userid configured\n";
369 my $country = code2country($location->{country});
370 my ($address1, $address2) = ($location->{address1}, $location->{address2});
374 $address1 =~ s/^\s+//;
375 $address1 =~ s/\s+$//;
376 $address2 =~ s/^\s+//;
377 $address2 =~ s/\s+$//;
379 # try to fix some cases of the address fields being switched
380 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
381 $address2 = $address1;
382 $address1 = $location->{address2};
384 # parse sublocation part (unit/suite/apartment...) and clean up
385 # non-sublocation address2
386 ($subloc, $address2) =
387 subloc_address2($address1, $address2, $location->{country});
388 # ask TomTom to standardize address1:
392 L => $location->{city},
393 AA => $location->{state},
394 PC => $location->{zip},
395 CC => country2code($country, LOCALE_CODE_ALPHA_3),
398 my ($match, $clean) = _tomtom_query(%args);
400 if (!$match or !$clean) {
401 # Then try cleaning up the input; TomTom is picky about junk in the
402 # address. Any of these can still be a clean match.
403 my $h = Geo::StreetAddress::US->parse_location($address1);
404 # First conservatively:
405 if ( $h->{sec_unit_type} ) {
406 my $strip = '\s+' . $h->{sec_unit_type};
407 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
409 $args{T} =~ s/$strip//;
410 ($match, $clean) = _tomtom_query(%args);
412 if ( !$match or !$clean ) {
413 # Then more aggressively:
414 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
415 ($match, $clean) = _tomtom_query(%args);
419 if ( !$match or !$clean ) { # partial matches are not useful
420 die "Address not found\n";
423 if ( defined $match->{censusTract} ) {
424 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
425 join('.', $match->{censusTract} =~ /(....)(..)/);
428 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
429 $address1 .= $match->{street} if $match->{street};
430 $address1 .= ' '.$subloc if $subloc;
431 $address1 = uc($address1); # USPS standards
434 address1 => $address1,
435 address2 => $address2,
436 city => uc($match->{city}),
437 state => uc($location->{state}),
438 country => uc($location->{country}),
439 zip => ($match->{standardPostalCode} || $match->{postcode}),
440 latitude => $match->{latitude},
441 longitude => $match->{longitude},
442 censustract => $tract,
443 addr_clean => $clean,
447 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
449 Given 'address1' and 'address2' strings, extract the sublocation part
450 (from either one) and return it. If the sublocation was found in ADDRESS1,
451 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
452 contain something relevant.
457 # Postal Addressing Standards, Appendix C
458 # (plus correction of "hanger" to "hangar")
486 # Canada Post Addressing Guidelines 4.3
497 sub subloc_address2 {
498 # Some things seen in the address2 field:
500 # The complete address (with address1 containing part of the company name,
501 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
504 # try to parse sublocation parts from address1; if they are present we'll
505 # append them back to address1 after standardizing
507 my ($addr1, $addr2, $country) = map uc, @_;
508 my $dict = $subloc_forms{$country} or return('', $addr2);
510 my $found_in = 0; # which address is the sublocation
513 # patterns to try to parse
515 "$addr1 Nullcity, CA"
517 $h = Geo::StreetAddress::US->parse_location($addr1);
518 last if exists($h->{sec_unit_type});
520 if (exists($h->{sec_unit_type})) {
527 "$addr1, $addr2 Nullcity, CA"
529 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
530 last if exists($h->{sec_unit_type});
532 if (exists($h->{sec_unit_type})) {
537 $subloc = $h->{sec_unit_type};
538 # special case: do not combine P.O. box sublocs with address1
539 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
540 if ( $found_in == 2 ) {
541 $addr2 = "PO BOX ".$h->{sec_unit_num};
542 } # else it's in addr1, and leave it alone
544 } elsif ( exists($dict->{$subloc}) ) {
545 # substitute the official abbreviation
546 $subloc = $dict->{$subloc};
548 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
549 } # otherwise $subloc = ''
551 if ( $found_in == 2 ) {
552 # address2 should be fully combined into address1
553 return ($subloc, '');
555 # else address2 is not the canonical sublocation, but do our best to
559 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
561 # remove all punctuation and spaces
562 foreach my $w (split(/\W+/, $addr2)) {
563 if ( exists($dict->{$w}) ) {
564 push @words, $dict->{$w};
568 my $result = join(' ', @words);
569 # correct spacing of pound sign + number
570 $result =~ s/NUMBER(\d)/# $1/;
571 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
574 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
578 sub standardize_melissa {
580 my $location = shift;
583 eval "use Geo::Melissa::WebSmart";
586 my $id = $conf->config('melissa-userid')
587 or die "no melissa-userid configured\n";
588 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
592 a1 => $location->{address1},
593 a2 => $location->{address2},
594 city => $location->{city},
595 state => $location->{state},
596 ctry => $location->{country},
597 zip => $location->{zip},
600 my $result = Geo::Melissa::WebSmart->query($request);
601 if ( $result->code =~ /AS01/ ) { # always present on success
602 my $addr = $result->address;
603 warn Dumper $addr if $DEBUG > 1;
605 address1 => $addr->{Address1},
606 address2 => $addr->{Address2},
607 city => $addr->{City}->{Name},
608 state => $addr->{State}->{Abbreviation},
609 country => $addr->{Country}->{Abbreviation},
611 latitude => $addr->{Latitude},
612 longitude => $addr->{Longitude},
615 if ( $addr->{Census}->{Tract} ) {
616 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
617 # insert decimal point two digits from the end
618 $censustract =~ s/(\d\d)$/\.$1/;
619 $out->{censustract} = $censustract;
620 $out->{censusyear} = $conf->config('census_year');
622 # we could do a lot more nuanced reporting of the warning/status codes,
623 # but the UI doesn't support that yet.
626 die $result->status_message;