4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
15 FS::UID->install_callback( sub {
21 @EXPORT_OK = qw( get_district );
25 FS::Misc::Geo - routines to fetch geographic information
31 =item get_censustract_ffiec LOCATION YEAR
33 Given a location hash (see L<FS::location_Mixin>) and a census map year,
34 returns a census tract code (consisting of state, county, and tract
35 codes) or an error message.
39 sub get_censustract_ffiec {
45 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
49 warn Dumper($location, $year) if $DEBUG;
51 # the old FFIEC geocoding service was shut down December 1, 2014.
52 # welcome to the future.
53 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
54 # build the single-line query
55 my $single_line = join(', ', $location->{address1},
59 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
60 my $request = POST( $url,
61 'Content-Type' => 'application/json; charset=utf-8',
62 'Accept' => 'application/json',
63 'Content' => encode_json($hashref)
66 my $ua = new LWP::UserAgent;
67 my $res = $ua->request( $request );
72 if (!$res->is_success) {
74 die "Census tract lookup error: ".$res->message;
79 my $content = eval { decode_json($res->content) };
80 die "Census tract JSON error: $@\n" if $@;
82 if ( !exists $content->{d}->{sStatus} ) {
83 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
85 if ( $content->{d}->{sStatus} eq 'Y' ) {
87 # this also contains the (partial) standardized address, correct zip
88 # code, coordinates, etc., and we could get all of them, but right now
89 # we only want the census tract
90 my $tract = join('', $content->{d}->{sStateCode},
91 $content->{d}->{sCountyCode},
92 $content->{d}->{sTractCode});
97 my $error = $content->{d}->{sMsg}
98 || 'FFIEC lookup failed, but with no status message.';
104 #sub get_district_methods {
106 # 'wa_sales' => 'Washington sales tax',
109 =item get_district LOCATION METHOD
111 For the location hash in LOCATION, using lookup method METHOD, fetch
112 tax district information. Currently the only available method is
113 'wa_sales' (the Washington Department of Revenue sales tax lookup).
115 Returns a hash reference containing the following fields:
120 - exempt_amount (currently zero)
121 - city, county, state, country (from
123 The intent is that you can assign this to an L<FS::cust_main_county>
124 object and insert it if there's not yet a tax rate defined for that
127 get_district will die on error.
135 my $location = shift;
136 my $method = shift or return '';
137 warn Dumper($location, $method) if $DEBUG;
142 my $location = shift;
144 return '' if $location->{state} ne 'WA';
146 my $return = { %$location };
147 $return->{'exempt_amount'} = 0.00;
149 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
150 my $ua = new LWP::UserAgent;
152 my $delim = '<|>'; # yes, <|>
153 my $year = (localtime)[5] + 1900;
154 my $month = (localtime)[4] + 1;
155 my @zip = split('-', $location->{zip});
158 'TaxType=S', #sales; 'P' = property
159 'Src=0', #does something complicated
161 'Addr='.uri_escape($location->{address1}),
162 'City='.uri_escape($location->{city}),
164 'Zip1='.($zip[1] || ''), #optional
171 my $query_string = join($delim, @args );
172 $url .= "?$query_string";
173 warn "\nrequest: $url\n\n" if $DEBUG > 1;
175 my $res = $ua->request( GET( "$url?$query_string" ) );
180 if ($res->code ne '200') {
181 $error = $res->message;
184 my $content = $res->content;
185 my $p = new HTML::TokeParser \$content;
187 while ( my $t = $p->get_tag('script') ) {
188 my $u = $p->get_token; #either enclosed text or the </script> tag
189 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
194 if ( $js ) { #found it
195 # strip down to the quoted string, which contains escaped single quotes.
196 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
197 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
198 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
200 $p = new HTML::TokeParser \$js;
201 TD: while ( my $td = $p->get_tag('td') ) {
202 while ( my $u = $p->get_token ) {
203 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
204 next if $u->[0] ne 'T'; # skip non-text
207 if ( lc($text) eq 'location code' ) {
208 $p->get_tag('td'); # skip to the next column
210 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
211 $return->{'district'} = $u->[1];
213 elsif ( lc($text) eq 'total tax rate' ) {
216 $u = $p->get_token until $u->[0] eq 'T';
217 $return->{'tax'} = $u->[1];
223 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
224 $return->{'tax'} *= 100; #percentage
225 warn Dumper($return) if $DEBUG > 1;
229 $error = 'district code/tax rate not found';
233 $error = "failed to parse document";
236 die "WA tax district lookup error: $error";
239 sub standardize_usps {
242 eval "use Business::US::USPS::WebTools::AddressStandardization";
245 my $location = shift;
246 if ( $location->{country} ne 'US' ) {
248 warn "standardize_usps not for use in country ".$location->{country}."\n";
249 $location->{addr_clean} = '';
252 my $userid = $conf->config('usps_webtools-userid');
253 my $password = $conf->config('usps_webtools-password');
254 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
256 Password => $password,
258 } ) or die "error starting USPS WebTools\n";
260 my($zip5, $zip4) = split('-',$location->{'zip'});
263 FirmName => $location->{company},
264 Address2 => $location->{address1},
265 Address1 => $location->{address2},
266 City => $location->{city},
267 State => $location->{state},
271 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
274 my $hash = $verifier->verify_address( %usps_args );
276 warn $verifier->response
279 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
280 if $verifier->is_error;
282 my $zip = $hash->{Zip5};
283 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
285 { company => $hash->{FirmName},
286 address1 => $hash->{Address2},
287 address2 => $hash->{Address1},
288 city => $hash->{City},
289 state => $hash->{State},
295 sub _tomtom_query { # helper method for the below
297 my $result = Geo::TomTom::Geocoding->query(%args);
298 die "TomTom geocoding error: ".$result->message."\n"
299 unless ( $result->is_success );
300 my ($match) = $result->locations;
301 my $type = $match->{type};
302 # match levels below "intersection" should not be considered clean
303 my $clean = ($type eq 'addresspoint' ||
306 $type eq 'intersection'
308 warn "tomtom returned $type match\n" if $DEBUG;
309 warn Dumper($match) if $DEBUG > 1;
313 sub standardize_tomtom {
314 # post-2013 TomTom API
315 # much better, but incompatible with ezlocate
317 my $location = shift;
318 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
321 my $key = $conf->config('tomtom-userid')
322 or die "no tomtom-userid configured\n";
324 my $country = code2country($location->{country});
325 my ($address1, $address2) = ($location->{address1}, $location->{address2});
329 $address1 =~ s/^\s+//;
330 $address1 =~ s/\s+$//;
331 $address2 =~ s/^\s+//;
332 $address2 =~ s/\s+$//;
334 # try to fix some cases of the address fields being switched
335 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
336 $address2 = $address1;
337 $address1 = $location->{address2};
339 # parse sublocation part (unit/suite/apartment...) and clean up
340 # non-sublocation address2
341 ($subloc, $address2) =
342 subloc_address2($address1, $address2, $location->{country});
343 # ask TomTom to standardize address1:
347 L => $location->{city},
348 AA => $location->{state},
349 PC => $location->{zip},
350 CC => country2code($country, LOCALE_CODE_ALPHA_3),
353 my ($match, $clean) = _tomtom_query(%args);
355 if (!$match or !$clean) {
356 # Then try cleaning up the input; TomTom is picky about junk in the
357 # address. Any of these can still be a clean match.
358 my $h = Geo::StreetAddress::US->parse_location($address1);
359 # First conservatively:
360 if ( $h->{sec_unit_type} ) {
361 my $strip = '\s+' . $h->{sec_unit_type};
362 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
364 $args{T} =~ s/$strip//;
365 ($match, $clean) = _tomtom_query(%args);
367 if ( !$match or !$clean ) {
368 # Then more aggressively:
369 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
370 ($match, $clean) = _tomtom_query(%args);
374 if ( !$match or !$clean ) { # partial matches are not useful
375 die "Address not found\n";
378 if ( defined $match->{censusTract} ) {
379 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
380 join('.', $match->{censusTract} =~ /(....)(..)/);
383 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
384 $address1 .= $match->{street} if $match->{street};
385 $address1 .= ' '.$subloc if $subloc;
386 $address1 = uc($address1); # USPS standards
389 address1 => $address1,
390 address2 => $address2,
391 city => uc($match->{city}),
392 state => uc($location->{state}),
393 country => uc($location->{country}),
394 zip => ($match->{standardPostalCode} || $match->{postcode}),
395 latitude => $match->{latitude},
396 longitude => $match->{longitude},
397 censustract => $tract,
398 addr_clean => $clean,
402 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
404 Given 'address1' and 'address2' strings, extract the sublocation part
405 (from either one) and return it. If the sublocation was found in ADDRESS1,
406 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
407 contain something relevant.
412 # Postal Addressing Standards, Appendix C
413 # (plus correction of "hanger" to "hangar")
441 # Canada Post Addressing Guidelines 4.3
452 sub subloc_address2 {
453 # Some things seen in the address2 field:
455 # The complete address (with address1 containing part of the company name,
456 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
459 # try to parse sublocation parts from address1; if they are present we'll
460 # append them back to address1 after standardizing
462 my ($addr1, $addr2, $country) = map uc, @_;
463 my $dict = $subloc_forms{$country} or return('', $addr2);
465 my $found_in = 0; # which address is the sublocation
468 # patterns to try to parse
470 "$addr1 Nullcity, CA"
472 $h = Geo::StreetAddress::US->parse_location($addr1);
473 last if exists($h->{sec_unit_type});
475 if (exists($h->{sec_unit_type})) {
482 "$addr1, $addr2 Nullcity, CA"
484 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
485 last if exists($h->{sec_unit_type});
487 if (exists($h->{sec_unit_type})) {
492 $subloc = $h->{sec_unit_type};
493 # special case: do not combine P.O. box sublocs with address1
494 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
495 if ( $found_in == 2 ) {
496 $addr2 = "PO BOX ".$h->{sec_unit_num};
497 } # else it's in addr1, and leave it alone
499 } elsif ( exists($dict->{$subloc}) ) {
500 # substitute the official abbreviation
501 $subloc = $dict->{$subloc};
503 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
504 } # otherwise $subloc = ''
506 if ( $found_in == 2 ) {
507 # address2 should be fully combined into address1
508 return ($subloc, '');
510 # else address2 is not the canonical sublocation, but do our best to
514 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
516 # remove all punctuation and spaces
517 foreach my $w (split(/\W+/, $addr2)) {
518 if ( exists($dict->{$w}) ) {
519 push @words, $dict->{$w};
523 my $result = join(' ', @words);
524 # correct spacing of pound sign + number
525 $result =~ s/NUMBER(\d)/# $1/;
526 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
529 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
533 sub standardize_melissa {
535 my $location = shift;
538 eval "use Geo::Melissa::WebSmart";
541 my $id = $conf->config('melissa-userid')
542 or die "no melissa-userid configured\n";
543 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
547 a1 => $location->{address1},
548 a2 => $location->{address2},
549 city => $location->{city},
550 state => $location->{state},
551 ctry => $location->{country},
552 zip => $location->{zip},
555 my $result = Geo::Melissa::WebSmart->query($request);
556 if ( $result->code =~ /AS01/ ) { # always present on success
557 my $addr = $result->address;
558 warn Dumper $addr if $DEBUG > 1;
560 address1 => $addr->{Address1},
561 address2 => $addr->{Address2},
562 city => $addr->{City}->{Name},
563 state => $addr->{State}->{Abbreviation},
564 country => $addr->{Country}->{Abbreviation},
566 latitude => $addr->{Latitude},
567 longitude => $addr->{Longitude},
570 if ( $addr->{Census}->{Tract} ) {
571 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
572 # insert decimal point two digits from the end
573 $censustract =~ s/(\d\d)$/\.$1/;
574 $out->{censustract} = $censustract;
575 $out->{censusyear} = $conf->config('census_year');
577 # we could do a lot more nuanced reporting of the warning/status codes,
578 # but the UI doesn't support that yet.
581 die $result->status_message;