4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract_ffiec LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
46 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
50 warn Dumper($location, $year) if $DEBUG;
52 # the old FFIEC geocoding service was shut down December 1, 2014.
53 # welcome to the future.
54 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
55 # build the single-line query
56 my $single_line = join(', ', $location->{address1},
60 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
61 my $request = POST( $url,
62 'Content-Type' => 'application/json; charset=utf-8',
63 'Accept' => 'application/json',
64 'Content' => encode_json($hashref)
67 my $ua = new LWP::UserAgent;
68 my $res = $ua->request( $request );
73 if (!$res->is_success) {
75 die "Census tract lookup error: ".$res->message;
80 my $content = eval { decode_json($res->content) };
81 die "Census tract JSON error: $@\n" if $@;
83 if ( !exists $content->{d}->{sStatus} ) {
84 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
86 if ( $content->{d}->{sStatus} eq 'Y' ) {
88 # this also contains the (partial) standardized address, correct zip
89 # code, coordinates, etc., and we could get all of them, but right now
90 # we only want the census tract
91 my $tract = join('', $content->{d}->{sStateCode},
92 $content->{d}->{sCountyCode},
93 $content->{d}->{sTractCode});
98 my $error = $content->{d}->{sMsg}
99 || 'FFIEC lookup failed, but with no status message.';
105 #sub get_district_methods {
107 # 'wa_sales' => 'Washington sales tax',
110 =item get_district LOCATION METHOD
112 For the location hash in LOCATION, using lookup method METHOD, fetch
113 tax district information. Currently the only available method is
114 'wa_sales' (the Washington Department of Revenue sales tax lookup).
116 Returns a hash reference containing the following fields:
121 - exempt_amount (currently zero)
122 - city, county, state, country (from
124 The intent is that you can assign this to an L<FS::cust_main_county>
125 object and insert it if there's not yet a tax rate defined for that
128 get_district will die on error.
136 my $location = shift;
137 my $method = shift or return '';
138 warn Dumper($location, $method) if $DEBUG;
143 my $location = shift;
145 return '' if $location->{state} ne 'WA';
147 my $return = { %$location };
148 $return->{'exempt_amount'} = 0.00;
150 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
151 my $ua = new LWP::UserAgent;
153 my $delim = '<|>'; # yes, <|>
154 my $year = (localtime)[5] + 1900;
155 my $month = (localtime)[4] + 1;
156 my @zip = split('-', $location->{zip});
159 'TaxType=S', #sales; 'P' = property
160 'Src=0', #does something complicated
162 'Addr='.uri_escape($location->{address1}),
163 'City='.uri_escape($location->{city}),
165 'Zip1='.($zip[1] || ''), #optional
172 my $query_string = join($delim, @args );
173 $url .= "?$query_string";
174 warn "\nrequest: $url\n\n" if $DEBUG > 1;
176 my $res = $ua->request( GET( "$url?$query_string" ) );
181 if ($res->code ne '200') {
182 $error = $res->message;
185 my $content = $res->content;
186 my $p = new HTML::TokeParser \$content;
188 while ( my $t = $p->get_tag('script') ) {
189 my $u = $p->get_token; #either enclosed text or the </script> tag
190 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
195 if ( $js ) { #found it
196 # strip down to the quoted string, which contains escaped single quotes.
197 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
198 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
199 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
201 $p = new HTML::TokeParser \$js;
202 TD: while ( my $td = $p->get_tag('td') ) {
203 while ( my $u = $p->get_token ) {
204 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
205 next if $u->[0] ne 'T'; # skip non-text
208 if ( lc($text) eq 'location code' ) {
209 $p->get_tag('td'); # skip to the next column
211 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
212 $return->{'district'} = $u->[1];
214 elsif ( lc($text) eq 'total tax rate' ) {
217 $u = $p->get_token until $u->[0] eq 'T';
218 $return->{'tax'} = $u->[1];
224 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
225 $return->{'tax'} *= 100; #percentage
226 warn Dumper($return) if $DEBUG > 1;
230 $error = 'district code/tax rate not found';
234 $error = "failed to parse document";
237 die "WA tax district lookup error: $error";
240 ###### USPS Standardization ######
242 sub standardize_usps {
245 eval "use Business::US::USPS::WebTools::AddressStandardization";
248 my $location = shift;
249 if ( $location->{country} ne 'US' ) {
251 warn "standardize_usps not for use in country ".$location->{country}."\n";
252 $location->{addr_clean} = '';
255 my $userid = $conf->config('usps_webtools-userid');
256 my $password = $conf->config('usps_webtools-password');
257 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
259 Password => $password,
261 } ) or die "error starting USPS WebTools\n";
263 my($zip5, $zip4) = split('-',$location->{'zip'});
266 FirmName => $location->{company},
267 Address2 => $location->{address1},
268 Address1 => $location->{address2},
269 City => $location->{city},
270 State => $location->{state},
274 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
277 my $hash = $verifier->verify_address( %usps_args );
279 warn $verifier->response
282 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
283 if $verifier->is_error;
285 my $zip = $hash->{Zip5};
286 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
288 { company => $hash->{FirmName},
289 address1 => $hash->{Address2},
290 address2 => $hash->{Address1},
291 city => $hash->{City},
292 state => $hash->{State},
298 ###### U.S. Census Bureau ######
300 sub standardize_uscensus {
302 my $location = shift;
304 eval "use Geo::USCensus::Geocoding";
307 if ( $location->{country} ne 'US' ) {
309 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
310 $location->{addr_clean} = '';
315 street => $location->{address1},
316 city => $location->{city},
317 state => $location->{state},
318 zip => $location->{zip},
319 debug => ($DEBUG || 0),
322 my $result = Geo::USCensus::Geocoding->query($request);
323 if ( $result->is_match ) {
324 # unfortunately we get the address back as a single line
325 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
331 address2 => uc($location->{address2}),
332 latitude => $result->latitude,
333 longitude => $result->longitude,
334 censustract => $result->censustract,
337 die "can't parse address '".$result->address."'";
340 warn Dumper($result) if $DEBUG;
341 die $result->error_message;
345 ####### EZLOCATE (obsolete) #######
347 sub _tomtom_query { # helper method for the below
349 my $result = Geo::TomTom::Geocoding->query(%args);
350 die "TomTom geocoding error: ".$result->message."\n"
351 unless ( $result->is_success );
352 my ($match) = $result->locations;
353 my $type = $match->{type};
354 # match levels below "intersection" should not be considered clean
355 my $clean = ($type eq 'addresspoint' ||
358 $type eq 'intersection'
360 warn "tomtom returned $type match\n" if $DEBUG;
361 warn Dumper($match) if $DEBUG > 1;
365 sub standardize_tomtom {
366 # post-2013 TomTom API
367 # much better, but incompatible with ezlocate
369 my $location = shift;
370 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
373 my $key = $conf->config('tomtom-userid')
374 or die "no tomtom-userid configured\n";
376 my $country = code2country($location->{country});
377 my ($address1, $address2) = ($location->{address1}, $location->{address2});
381 $address1 =~ s/^\s+//;
382 $address1 =~ s/\s+$//;
383 $address2 =~ s/^\s+//;
384 $address2 =~ s/\s+$//;
386 # try to fix some cases of the address fields being switched
387 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
388 $address2 = $address1;
389 $address1 = $location->{address2};
391 # parse sublocation part (unit/suite/apartment...) and clean up
392 # non-sublocation address2
393 ($subloc, $address2) =
394 subloc_address2($address1, $address2, $location->{country});
395 # ask TomTom to standardize address1:
399 L => $location->{city},
400 AA => $location->{state},
401 PC => $location->{zip},
402 CC => country2code($country, LOCALE_CODE_ALPHA_3),
405 my ($match, $clean) = _tomtom_query(%args);
407 if (!$match or !$clean) {
408 # Then try cleaning up the input; TomTom is picky about junk in the
409 # address. Any of these can still be a clean match.
410 my $h = Geo::StreetAddress::US->parse_location($address1);
411 # First conservatively:
412 if ( $h->{sec_unit_type} ) {
413 my $strip = '\s+' . $h->{sec_unit_type};
414 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
416 $args{T} =~ s/$strip//;
417 ($match, $clean) = _tomtom_query(%args);
419 if ( !$match or !$clean ) {
420 # Then more aggressively:
421 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
422 ($match, $clean) = _tomtom_query(%args);
426 if ( !$match or !$clean ) { # partial matches are not useful
427 die "Address not found\n";
430 if ( defined $match->{censusTract} ) {
431 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
432 join('.', $match->{censusTract} =~ /(....)(..)/);
435 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
436 $address1 .= $match->{street} if $match->{street};
437 $address1 .= ' '.$subloc if $subloc;
438 $address1 = uc($address1); # USPS standards
441 address1 => $address1,
442 address2 => $address2,
443 city => uc($match->{city}),
444 state => uc($location->{state}),
445 country => uc($location->{country}),
446 zip => ($match->{standardPostalCode} || $match->{postcode}),
447 latitude => $match->{latitude},
448 longitude => $match->{longitude},
449 censustract => $tract,
450 addr_clean => $clean,
454 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
456 Given 'address1' and 'address2' strings, extract the sublocation part
457 (from either one) and return it. If the sublocation was found in ADDRESS1,
458 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
459 contain something relevant.
464 # Postal Addressing Standards, Appendix C
465 # (plus correction of "hanger" to "hangar")
493 # Canada Post Addressing Guidelines 4.3
504 sub subloc_address2 {
505 # Some things seen in the address2 field:
507 # The complete address (with address1 containing part of the company name,
508 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
511 # try to parse sublocation parts from address1; if they are present we'll
512 # append them back to address1 after standardizing
514 my ($addr1, $addr2, $country) = map uc, @_;
515 my $dict = $subloc_forms{$country} or return('', $addr2);
517 my $found_in = 0; # which address is the sublocation
520 # patterns to try to parse
522 "$addr1 Nullcity, CA"
524 $h = Geo::StreetAddress::US->parse_location($addr1);
525 last if exists($h->{sec_unit_type});
527 if (exists($h->{sec_unit_type})) {
534 "$addr1, $addr2 Nullcity, CA"
536 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
537 last if exists($h->{sec_unit_type});
539 if (exists($h->{sec_unit_type})) {
544 $subloc = $h->{sec_unit_type};
545 # special case: do not combine P.O. box sublocs with address1
546 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
547 if ( $found_in == 2 ) {
548 $addr2 = "PO BOX ".$h->{sec_unit_num};
549 } # else it's in addr1, and leave it alone
551 } elsif ( exists($dict->{$subloc}) ) {
552 # substitute the official abbreviation
553 $subloc = $dict->{$subloc};
555 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
556 } # otherwise $subloc = ''
558 if ( $found_in == 2 ) {
559 # address2 should be fully combined into address1
560 return ($subloc, '');
562 # else address2 is not the canonical sublocation, but do our best to
566 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
568 # remove all punctuation and spaces
569 foreach my $w (split(/\W+/, $addr2)) {
570 if ( exists($dict->{$w}) ) {
571 push @words, $dict->{$w};
575 my $result = join(' ', @words);
576 # correct spacing of pound sign + number
577 $result =~ s/NUMBER(\d)/# $1/;
578 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
581 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
585 sub standardize_melissa {
587 my $location = shift;
590 eval "use Geo::Melissa::WebSmart";
593 my $id = $conf->config('melissa-userid')
594 or die "no melissa-userid configured\n";
595 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
599 a1 => $location->{address1},
600 a2 => $location->{address2},
601 city => $location->{city},
602 state => $location->{state},
603 ctry => $location->{country},
604 zip => $location->{zip},
607 my $result = Geo::Melissa::WebSmart->query($request);
608 if ( $result->code =~ /AS01/ ) { # always present on success
609 my $addr = $result->address;
610 warn Dumper $addr if $DEBUG > 1;
612 address1 => $addr->{Address1},
613 address2 => $addr->{Address2},
614 city => $addr->{City}->{Name},
615 state => $addr->{State}->{Abbreviation},
616 country => $addr->{Country}->{Abbreviation},
618 latitude => $addr->{Latitude},
619 longitude => $addr->{Longitude},
622 if ( $addr->{Census}->{Tract} ) {
623 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
624 # insert decimal point two digits from the end
625 $censustract =~ s/(\d\d)$/\.$1/;
626 $out->{censustract} = $censustract;
627 $out->{censusyear} = $conf->config('census_year');
629 # we could do a lot more nuanced reporting of the warning/status codes,
630 # but the UI doesn't support that yet.
633 die $result->status_message;