4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
15 FS::UID->install_callback( sub {
21 @EXPORT_OK = qw( get_district );
25 FS::Misc::Geo - routines to fetch geographic information
31 =item get_censustract_ffiec LOCATION YEAR
33 Given a location hash (see L<FS::location_Mixin>) and a census map year,
34 returns a census tract code (consisting of state, county, and tract
35 codes) or an error message.
39 sub get_censustract_ffiec {
45 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
49 warn Dumper($location, $year) if $DEBUG;
51 # the old FFIEC geocoding service was shut down December 1, 2014.
52 # welcome to the future.
53 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
54 # build the single-line query
55 my $single_line = join(', ', $location->{address1},
59 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
60 my $request = POST( $url,
61 'Content-Type' => 'application/json; charset=utf-8',
62 'Accept' => 'application/json',
63 'Content' => encode_json($hashref)
66 my $ua = new LWP::UserAgent;
67 my $res = $ua->request( $request );
72 if (!$res->is_success) {
74 die "Census tract lookup error: ".$res->message;
79 my $content = eval { decode_json($res->content) };
80 die "Census tract JSON error: $@\n" if $@;
82 if ( !exists $content->{d}->{sStatus} ) {
83 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
85 if ( $content->{d}->{sStatus} eq 'Y' ) {
87 # this also contains the (partial) standardized address, correct zip
88 # code, coordinates, etc., and we could get all of them, but right now
89 # we only want the census tract
90 my $tract = join('', $content->{d}->{sStateCode},
91 $content->{d}->{sCountyCode},
92 $content->{d}->{sTractCode});
97 my $error = $content->{d}->{sMsg}
98 || 'FFIEC lookup failed, but with no status message.';
104 #sub get_district_methods {
106 # 'wa_sales' => 'Washington sales tax',
109 =item get_district LOCATION METHOD
111 For the location hash in LOCATION, using lookup method METHOD, fetch
112 tax district information. Currently the only available method is
113 'wa_sales' (the Washington Department of Revenue sales tax lookup).
115 Returns a hash reference containing the following fields:
120 - exempt_amount (currently zero)
121 - city, county, state, country (from
123 The intent is that you can assign this to an L<FS::cust_main_county>
124 object and insert it if there's not yet a tax rate defined for that
127 get_district will die on error.
135 my $location = shift;
136 my $method = shift or return '';
137 warn Dumper($location, $method) if $DEBUG;
142 my $location = shift;
144 return '' if $location->{state} ne 'WA';
146 my $return = { %$location };
147 $return->{'exempt_amount'} = 0.00;
149 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
150 my $ua = new LWP::UserAgent;
152 my $delim = '<|>'; # yes, <|>
153 my $year = (localtime)[5] + 1900;
154 my $month = (localtime)[4] + 1;
155 my @zip = split('-', $location->{zip});
158 'TaxType=S', #sales; 'P' = property
159 'Src=0', #does something complicated
161 'Addr='.uri_escape($location->{address1}),
162 'City='.uri_escape($location->{city}),
164 'Zip1='.($zip[1] || ''), #optional
171 my $query_string = join($delim, @args );
172 $url .= "?$query_string";
173 warn "\nrequest: $url\n\n" if $DEBUG > 1;
175 my $res = $ua->request( GET( "$url?$query_string" ) );
180 if ($res->code ne '200') {
181 $error = $res->message;
184 my $content = $res->content;
185 my $p = new HTML::TokeParser \$content;
187 while ( my $t = $p->get_tag('script') ) {
188 my $u = $p->get_token; #either enclosed text or the </script> tag
189 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
194 if ( $js ) { #found it
195 # strip down to the quoted string, which contains escaped single quotes.
196 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
197 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
198 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
200 $p = new HTML::TokeParser \$js;
201 TD: while ( my $td = $p->get_tag('td') ) {
202 while ( my $u = $p->get_token ) {
203 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
204 next if $u->[0] ne 'T'; # skip non-text
207 if ( lc($text) eq 'location code' ) {
208 $p->get_tag('td'); # skip to the next column
210 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
211 $return->{'district'} = $u->[1];
213 elsif ( lc($text) eq 'total tax rate' ) {
216 $u = $p->get_token until $u->[0] eq 'T';
217 $return->{'tax'} = $u->[1];
223 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
224 $return->{'tax'} *= 100; #percentage
225 warn Dumper($return) if $DEBUG > 1;
229 $error = 'district code/tax rate not found';
233 $error = "failed to parse document";
236 die "WA tax district lookup error: $error";
239 ###### USPS Standardization ######
241 sub standardize_usps {
244 eval "use Business::US::USPS::WebTools::AddressStandardization";
247 my $location = shift;
248 if ( $location->{country} ne 'US' ) {
250 warn "standardize_usps not for use in country ".$location->{country}."\n";
251 $location->{addr_clean} = '';
254 my $userid = $conf->config('usps_webtools-userid');
255 my $password = $conf->config('usps_webtools-password');
256 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
258 Password => $password,
260 } ) or die "error starting USPS WebTools\n";
262 my($zip5, $zip4) = split('-',$location->{'zip'});
265 FirmName => $location->{company},
266 Address2 => $location->{address1},
267 Address1 => $location->{address2},
268 City => $location->{city},
269 State => $location->{state},
273 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
276 my $hash = $verifier->verify_address( %usps_args );
278 warn $verifier->response
281 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
282 if $verifier->is_error;
284 my $zip = $hash->{Zip5};
285 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
287 { company => $hash->{FirmName},
288 address1 => $hash->{Address2},
289 address2 => $hash->{Address1},
290 city => $hash->{City},
291 state => $hash->{State},
297 ###### U.S. Census Bureau ######
299 sub standardize_uscensus {
301 my $location = shift;
303 eval "use Geo::USCensus::Geocoding";
306 if ( $location->{country} ne 'US' ) {
308 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
309 $location->{addr_clean} = '';
314 street => $location->{address1},
315 city => $location->{city},
316 state => $location->{state},
317 zip => $location->{zip},
318 debug => ($DEBUG || 0),
321 my $result = Geo::USCensus::Geocoding->query($request);
322 if ( $result->is_match ) {
323 # unfortunately we get the address back as a single line
324 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
330 address2 => uc($location->{address2}),
331 latitude => $result->latitude,
332 longitude => $result->longitude,
333 censustract => $result->censustract,
336 die "can't parse address '".$result->address."'";
339 warn Dumper($result) if $DEBUG;
340 die $result->error_message;
344 ####### EZLOCATE (obsolete) #######
346 sub _tomtom_query { # helper method for the below
348 my $result = Geo::TomTom::Geocoding->query(%args);
349 die "TomTom geocoding error: ".$result->message."\n"
350 unless ( $result->is_success );
351 my ($match) = $result->locations;
352 my $type = $match->{type};
353 # match levels below "intersection" should not be considered clean
354 my $clean = ($type eq 'addresspoint' ||
357 $type eq 'intersection'
359 warn "tomtom returned $type match\n" if $DEBUG;
360 warn Dumper($match) if $DEBUG > 1;
364 sub standardize_tomtom {
365 # post-2013 TomTom API
366 # much better, but incompatible with ezlocate
368 my $location = shift;
369 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
372 my $key = $conf->config('tomtom-userid')
373 or die "no tomtom-userid configured\n";
375 my $country = code2country($location->{country});
376 my ($address1, $address2) = ($location->{address1}, $location->{address2});
380 $address1 =~ s/^\s+//;
381 $address1 =~ s/\s+$//;
382 $address2 =~ s/^\s+//;
383 $address2 =~ s/\s+$//;
385 # try to fix some cases of the address fields being switched
386 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
387 $address2 = $address1;
388 $address1 = $location->{address2};
390 # parse sublocation part (unit/suite/apartment...) and clean up
391 # non-sublocation address2
392 ($subloc, $address2) =
393 subloc_address2($address1, $address2, $location->{country});
394 # ask TomTom to standardize address1:
398 L => $location->{city},
399 AA => $location->{state},
400 PC => $location->{zip},
401 CC => country2code($country, LOCALE_CODE_ALPHA_3),
404 my ($match, $clean) = _tomtom_query(%args);
406 if (!$match or !$clean) {
407 # Then try cleaning up the input; TomTom is picky about junk in the
408 # address. Any of these can still be a clean match.
409 my $h = Geo::StreetAddress::US->parse_location($address1);
410 # First conservatively:
411 if ( $h->{sec_unit_type} ) {
412 my $strip = '\s+' . $h->{sec_unit_type};
413 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
415 $args{T} =~ s/$strip//;
416 ($match, $clean) = _tomtom_query(%args);
418 if ( !$match or !$clean ) {
419 # Then more aggressively:
420 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
421 ($match, $clean) = _tomtom_query(%args);
425 if ( !$match or !$clean ) { # partial matches are not useful
426 die "Address not found\n";
429 if ( defined $match->{censusTract} ) {
430 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
431 join('.', $match->{censusTract} =~ /(....)(..)/);
434 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
435 $address1 .= $match->{street} if $match->{street};
436 $address1 .= ' '.$subloc if $subloc;
437 $address1 = uc($address1); # USPS standards
440 address1 => $address1,
441 address2 => $address2,
442 city => uc($match->{city}),
443 state => uc($location->{state}),
444 country => uc($location->{country}),
445 zip => ($match->{standardPostalCode} || $match->{postcode}),
446 latitude => $match->{latitude},
447 longitude => $match->{longitude},
448 censustract => $tract,
449 addr_clean => $clean,
453 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
455 Given 'address1' and 'address2' strings, extract the sublocation part
456 (from either one) and return it. If the sublocation was found in ADDRESS1,
457 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
458 contain something relevant.
463 # Postal Addressing Standards, Appendix C
464 # (plus correction of "hanger" to "hangar")
492 # Canada Post Addressing Guidelines 4.3
503 sub subloc_address2 {
504 # Some things seen in the address2 field:
506 # The complete address (with address1 containing part of the company name,
507 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
510 # try to parse sublocation parts from address1; if they are present we'll
511 # append them back to address1 after standardizing
513 my ($addr1, $addr2, $country) = map uc, @_;
514 my $dict = $subloc_forms{$country} or return('', $addr2);
516 my $found_in = 0; # which address is the sublocation
519 # patterns to try to parse
521 "$addr1 Nullcity, CA"
523 $h = Geo::StreetAddress::US->parse_location($addr1);
524 last if exists($h->{sec_unit_type});
526 if (exists($h->{sec_unit_type})) {
533 "$addr1, $addr2 Nullcity, CA"
535 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
536 last if exists($h->{sec_unit_type});
538 if (exists($h->{sec_unit_type})) {
543 $subloc = $h->{sec_unit_type};
544 # special case: do not combine P.O. box sublocs with address1
545 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
546 if ( $found_in == 2 ) {
547 $addr2 = "PO BOX ".$h->{sec_unit_num};
548 } # else it's in addr1, and leave it alone
550 } elsif ( exists($dict->{$subloc}) ) {
551 # substitute the official abbreviation
552 $subloc = $dict->{$subloc};
554 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
555 } # otherwise $subloc = ''
557 if ( $found_in == 2 ) {
558 # address2 should be fully combined into address1
559 return ($subloc, '');
561 # else address2 is not the canonical sublocation, but do our best to
565 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
567 # remove all punctuation and spaces
568 foreach my $w (split(/\W+/, $addr2)) {
569 if ( exists($dict->{$w}) ) {
570 push @words, $dict->{$w};
574 my $result = join(' ', @words);
575 # correct spacing of pound sign + number
576 $result =~ s/NUMBER(\d)/# $1/;
577 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
580 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
584 sub standardize_melissa {
586 my $location = shift;
589 eval "use Geo::Melissa::WebSmart";
592 my $id = $conf->config('melissa-userid')
593 or die "no melissa-userid configured\n";
594 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
598 a1 => $location->{address1},
599 a2 => $location->{address2},
600 city => $location->{city},
601 state => $location->{state},
602 ctry => $location->{country},
603 zip => $location->{zip},
606 my $result = Geo::Melissa::WebSmart->query($request);
607 if ( $result->code =~ /AS01/ ) { # always present on success
608 my $addr = $result->address;
609 warn Dumper $addr if $DEBUG > 1;
611 address1 => $addr->{Address1},
612 address2 => $addr->{Address2},
613 city => $addr->{City}->{Name},
614 state => $addr->{State}->{Abbreviation},
615 country => $addr->{Country}->{Abbreviation},
617 latitude => $addr->{Latitude},
618 longitude => $addr->{Longitude},
621 if ( $addr->{Census}->{Tract} ) {
622 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
623 # insert decimal point two digits from the end
624 $censustract =~ s/(\d\d)$/\.$1/;
625 $out->{censustract} = $censustract;
626 $out->{censusyear} = $conf->config('census_year');
628 # we could do a lot more nuanced reporting of the warning/status codes,
629 # but the UI doesn't support that yet.
632 die $result->status_message;