4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract_ffiec LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
46 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
50 warn Dumper($location, $year) if $DEBUG;
52 # the old FFIEC geocoding service was shut down December 1, 2014.
53 # welcome to the future.
54 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
55 # build the single-line query
56 my $single_line = join(', ', $location->{address1},
60 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
61 my $request = POST( $url,
62 'Content-Type' => 'application/json; charset=utf-8',
63 'Accept' => 'application/json',
64 'Content' => encode_json($hashref)
67 my $ua = new LWP::UserAgent;
68 my $res = $ua->request( $request );
73 if (!$res->is_success) {
75 die "Census tract lookup error: ".$res->message;
80 my $content = eval { decode_json($res->content) };
81 die "Census tract JSON error: $@\n" if $@;
83 if ( !exists $content->{d}->{sStatus} ) {
84 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
86 if ( $content->{d}->{sStatus} eq 'Y' ) {
88 # this also contains the (partial) standardized address, correct zip
89 # code, coordinates, etc., and we could get all of them, but right now
90 # we only want the census tract
91 my $tract = join('', $content->{d}->{sStateCode},
92 $content->{d}->{sCountyCode},
93 $content->{d}->{sTractCode});
98 my $error = $content->{d}->{sMsg}
99 || 'FFIEC lookup failed, but with no status message.';
105 #sub get_district_methods {
107 # 'wa_sales' => 'Washington sales tax',
110 =item get_district LOCATION METHOD
112 For the location hash in LOCATION, using lookup method METHOD, fetch
113 tax district information. Currently the only available method is
114 'wa_sales' (the Washington Department of Revenue sales tax lookup).
116 Returns a hash reference containing the following fields:
121 - exempt_amount (currently zero)
122 - city, county, state, country (from
124 The intent is that you can assign this to an L<FS::cust_main_county>
125 object and insert it if there's not yet a tax rate defined for that
128 get_district will die on error.
136 my $location = shift;
137 my $method = shift or return '';
138 warn Dumper($location, $method) if $DEBUG;
143 my $location = shift;
145 return '' if $location->{state} ne 'WA';
147 my $return = { %$location };
148 $return->{'exempt_amount'} = 0.00;
150 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
151 my $ua = new LWP::UserAgent;
153 my $delim = '<|>'; # yes, <|>
154 my $year = (localtime)[5] + 1900;
155 my $month = (localtime)[4] + 1;
156 my @zip = split('-', $location->{zip});
159 'TaxType=S', #sales; 'P' = property
160 'Src=0', #does something complicated
162 'Addr='.uri_escape($location->{address1}),
163 'City='.uri_escape($location->{city}),
165 'Zip1='.($zip[1] || ''), #optional
172 my $query_string = join($delim, @args );
173 $url .= "?$query_string";
174 warn "\nrequest: $url\n\n" if $DEBUG > 1;
176 my $res = $ua->request( GET( "$url?$query_string" ) );
181 if ($res->code ne '200') {
182 $error = $res->message;
185 my $content = $res->content;
186 my $p = new HTML::TokeParser \$content;
188 while ( my $t = $p->get_tag('script') ) {
189 my $u = $p->get_token; #either enclosed text or the </script> tag
190 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
195 if ( $js ) { #found it
196 # strip down to the quoted string, which contains escaped single quotes.
197 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
198 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
199 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
201 $p = new HTML::TokeParser \$js;
202 TD: while ( my $td = $p->get_tag('td') ) {
203 while ( my $u = $p->get_token ) {
204 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
205 next if $u->[0] ne 'T'; # skip non-text
208 if ( lc($text) eq 'location code' ) {
209 $p->get_tag('td'); # skip to the next column
211 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
212 $return->{'district'} = $u->[1];
214 elsif ( lc($text) eq 'total tax rate' ) {
217 $u = $p->get_token until $u->[0] eq 'T';
218 $return->{'tax'} = $u->[1];
224 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
225 $return->{'tax'} *= 100; #percentage
226 warn Dumper($return) if $DEBUG > 1;
230 $error = 'district code/tax rate not found';
234 $error = "failed to parse document";
237 die "WA tax district lookup error: $error";
240 ###### USPS Standardization ######
242 sub standardize_usps {
245 eval "use Business::US::USPS::WebTools::AddressStandardization";
248 my $location = shift;
249 if ( $location->{country} ne 'US' ) {
251 warn "standardize_usps not for use in country ".$location->{country}."\n";
252 $location->{addr_clean} = '';
255 my $userid = $conf->config('usps_webtools-userid');
256 my $password = $conf->config('usps_webtools-password');
257 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
259 Password => $password,
261 } ) or die "error starting USPS WebTools\n";
263 my($zip5, $zip4) = split('-',$location->{'zip'});
266 FirmName => $location->{company},
267 Address2 => $location->{address1},
268 Address1 => $location->{address2},
269 City => $location->{city},
270 State => $location->{state},
274 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
277 my $hash = $verifier->verify_address( %usps_args );
279 warn $verifier->response
282 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
283 if $verifier->is_error;
285 my $zip = $hash->{Zip5};
286 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
288 { company => $hash->{FirmName},
289 address1 => $hash->{Address2},
290 address2 => $hash->{Address1},
291 city => $hash->{City},
292 state => $hash->{State},
298 ###### U.S. Census Bureau ######
300 sub standardize_uscensus {
302 my $location = shift;
304 eval "use Geo::USCensus::Geocoding";
307 if ( $location->{country} ne 'US' ) {
309 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
310 $location->{addr_clean} = '';
315 street => $location->{address1},
316 city => $location->{city},
317 state => $location->{state},
318 zip => $location->{zip},
319 debug => ($DEBUG || 0),
322 my $result = Geo::USCensus::Geocoding->query($request);
323 if ( $result->is_match ) {
324 # unfortunately we get the address back as a single line
325 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
331 address2 => uc($location->{address2}),
332 latitude => $result->latitude,
333 longitude => $result->longitude,
334 censustract => $result->censustract,
337 die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n";
339 } elsif ( $result->match_level eq 'Tie' ) {
340 die "Geocoding was not able to identify a unique matching address.\n";
341 } elsif ( $result->match_level ) {
342 die "Geocoding did not find a matching address.\n";
344 warn Dumper($result) if $DEBUG;
345 die $result->error_message;
349 ####### EZLOCATE (obsolete) #######
351 sub _tomtom_query { # helper method for the below
353 my $result = Geo::TomTom::Geocoding->query(%args);
354 die "TomTom geocoding error: ".$result->message."\n"
355 unless ( $result->is_success );
356 my ($match) = $result->locations;
357 my $type = $match->{type};
358 # match levels below "intersection" should not be considered clean
359 my $clean = ($type eq 'addresspoint' ||
362 $type eq 'intersection'
364 warn "tomtom returned $type match\n" if $DEBUG;
365 warn Dumper($match) if $DEBUG > 1;
369 sub standardize_tomtom {
370 # post-2013 TomTom API
371 # much better, but incompatible with ezlocate
373 my $location = shift;
374 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
377 my $key = $conf->config('tomtom-userid')
378 or die "no tomtom-userid configured\n";
380 my $country = code2country($location->{country});
381 my ($address1, $address2) = ($location->{address1}, $location->{address2});
385 $address1 =~ s/^\s+//;
386 $address1 =~ s/\s+$//;
387 $address2 =~ s/^\s+//;
388 $address2 =~ s/\s+$//;
390 # try to fix some cases of the address fields being switched
391 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
392 $address2 = $address1;
393 $address1 = $location->{address2};
395 # parse sublocation part (unit/suite/apartment...) and clean up
396 # non-sublocation address2
397 ($subloc, $address2) =
398 subloc_address2($address1, $address2, $location->{country});
399 # ask TomTom to standardize address1:
403 L => $location->{city},
404 AA => $location->{state},
405 PC => $location->{zip},
406 CC => country2code($country, LOCALE_CODE_ALPHA_3),
409 my ($match, $clean) = _tomtom_query(%args);
411 if (!$match or !$clean) {
412 # Then try cleaning up the input; TomTom is picky about junk in the
413 # address. Any of these can still be a clean match.
414 my $h = Geo::StreetAddress::US->parse_location($address1);
415 # First conservatively:
416 if ( $h->{sec_unit_type} ) {
417 my $strip = '\s+' . $h->{sec_unit_type};
418 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
420 $args{T} =~ s/$strip//;
421 ($match, $clean) = _tomtom_query(%args);
423 if ( !$match or !$clean ) {
424 # Then more aggressively:
425 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
426 ($match, $clean) = _tomtom_query(%args);
430 if ( !$match or !$clean ) { # partial matches are not useful
431 die "Address not found\n";
434 if ( defined $match->{censusTract} ) {
435 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
436 join('.', $match->{censusTract} =~ /(....)(..)/);
439 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
440 $address1 .= $match->{street} if $match->{street};
441 $address1 .= ' '.$subloc if $subloc;
442 $address1 = uc($address1); # USPS standards
445 address1 => $address1,
446 address2 => $address2,
447 city => uc($match->{city}),
448 state => uc($location->{state}),
449 country => uc($location->{country}),
450 zip => ($match->{standardPostalCode} || $match->{postcode}),
451 latitude => $match->{latitude},
452 longitude => $match->{longitude},
453 censustract => $tract,
454 addr_clean => $clean,
458 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
460 Given 'address1' and 'address2' strings, extract the sublocation part
461 (from either one) and return it. If the sublocation was found in ADDRESS1,
462 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
463 contain something relevant.
468 # Postal Addressing Standards, Appendix C
469 # (plus correction of "hanger" to "hangar")
497 # Canada Post Addressing Guidelines 4.3
508 sub subloc_address2 {
509 # Some things seen in the address2 field:
511 # The complete address (with address1 containing part of the company name,
512 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
515 # try to parse sublocation parts from address1; if they are present we'll
516 # append them back to address1 after standardizing
518 my ($addr1, $addr2, $country) = map uc, @_;
519 my $dict = $subloc_forms{$country} or return('', $addr2);
521 my $found_in = 0; # which address is the sublocation
524 # patterns to try to parse
526 "$addr1 Nullcity, CA"
528 $h = Geo::StreetAddress::US->parse_location($addr1);
529 last if exists($h->{sec_unit_type});
531 if (exists($h->{sec_unit_type})) {
538 "$addr1, $addr2 Nullcity, CA"
540 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
541 last if exists($h->{sec_unit_type});
543 if (exists($h->{sec_unit_type})) {
548 $subloc = $h->{sec_unit_type};
549 # special case: do not combine P.O. box sublocs with address1
550 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
551 if ( $found_in == 2 ) {
552 $addr2 = "PO BOX ".$h->{sec_unit_num};
553 } # else it's in addr1, and leave it alone
555 } elsif ( exists($dict->{$subloc}) ) {
556 # substitute the official abbreviation
557 $subloc = $dict->{$subloc};
559 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
560 } # otherwise $subloc = ''
562 if ( $found_in == 2 ) {
563 # address2 should be fully combined into address1
564 return ($subloc, '');
566 # else address2 is not the canonical sublocation, but do our best to
570 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
572 # remove all punctuation and spaces
573 foreach my $w (split(/\W+/, $addr2)) {
574 if ( exists($dict->{$w}) ) {
575 push @words, $dict->{$w};
579 my $result = join(' ', @words);
580 # correct spacing of pound sign + number
581 $result =~ s/NUMBER(\d)/# $1/;
582 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
585 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
589 sub standardize_melissa {
591 my $location = shift;
594 eval "use Geo::Melissa::WebSmart";
597 my $id = $conf->config('melissa-userid')
598 or die "no melissa-userid configured\n";
599 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
603 a1 => $location->{address1},
604 a2 => $location->{address2},
605 city => $location->{city},
606 state => $location->{state},
607 ctry => $location->{country},
608 zip => $location->{zip},
611 my $result = Geo::Melissa::WebSmart->query($request);
612 if ( $result->code =~ /AS01/ ) { # always present on success
613 my $addr = $result->address;
614 warn Dumper $addr if $DEBUG > 1;
616 address1 => $addr->{Address1},
617 address2 => $addr->{Address2},
618 city => $addr->{City}->{Name},
619 state => $addr->{State}->{Abbreviation},
620 country => $addr->{Country}->{Abbreviation},
622 latitude => $addr->{Latitude},
623 longitude => $addr->{Longitude},
626 if ( $addr->{Census}->{Tract} ) {
627 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
628 # insert decimal point two digits from the end
629 $censustract =~ s/(\d\d)$/\.$1/;
630 $out->{censustract} = $censustract;
631 $out->{censusyear} = $conf->config('census_year');
633 # we could do a lot more nuanced reporting of the warning/status codes,
634 # but the UI doesn't support that yet.
637 die $result->status_message;