4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
18 FS::UID->install_callback( sub {
24 @EXPORT_OK = qw( get_district );
28 FS::Misc::Geo - routines to fetch geographic information
34 =item get_censustract_ffiec LOCATION YEAR
36 Given a location hash (see L<FS::location_Mixin>) and a census map year,
37 returns a census tract code (consisting of state, county, and tract
38 codes) or an error message.
42 sub get_censustract_ffiec {
48 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
52 warn Dumper($location, $year) if $DEBUG;
54 # the old FFIEC geocoding service was shut down December 1, 2014.
55 # welcome to the future.
56 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
57 # build the single-line query
58 my $single_line = join(', ', $location->{address1},
62 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
63 my $request = POST( $url,
64 'Content-Type' => 'application/json; charset=utf-8',
65 'Accept' => 'application/json',
66 'Content' => encode_json($hashref)
69 my $ua = new LWP::UserAgent;
70 my $res = $ua->request( $request );
75 if (!$res->is_success) {
77 die "Census tract lookup error: ".$res->message;
82 my $content = eval { decode_json($res->content) };
83 die "Census tract JSON error: $@\n" if $@;
85 if ( !exists $content->{d}->{sStatus} ) {
86 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
88 if ( $content->{d}->{sStatus} eq 'Y' ) {
90 # this also contains the (partial) standardized address, correct zip
91 # code, coordinates, etc., and we could get all of them, but right now
92 # we only want the census tract
93 my $tract = join('', $content->{d}->{sStateCode},
94 $content->{d}->{sCountyCode},
95 $content->{d}->{sTractCode});
100 my $error = $content->{d}->{sMsg}
101 || 'FFIEC lookup failed, but with no status message.';
107 #sub get_district_methods {
109 # 'wa_sales' => 'Washington sales tax',
112 =item get_district LOCATION METHOD
114 For the location hash in LOCATION, using lookup method METHOD, fetch
115 tax district information. Currently the only available method is
116 'wa_sales' (the Washington Department of Revenue sales tax lookup).
118 Returns a hash reference containing the following fields:
123 - exempt_amount (currently zero)
124 - city, county, state, country (from
126 The intent is that you can assign this to an L<FS::cust_main_county>
127 object and insert it if there's not yet a tax rate defined for that
130 get_district will die on error.
138 my $location = shift;
139 my $method = shift or return '';
140 warn Dumper($location, $method) if $DEBUG;
145 my $location = shift;
147 return '' if $location->{state} ne 'WA';
149 my $return = { %$location };
150 $return->{'exempt_amount'} = 0.00;
152 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
153 my $ua = new LWP::UserAgent;
155 my $delim = '<|>'; # yes, <|>
156 my $year = (localtime)[5] + 1900;
157 my $month = (localtime)[4] + 1;
158 my @zip = split('-', $location->{zip});
161 'TaxType=S', #sales; 'P' = property
162 'Src=0', #does something complicated
164 'Addr='.uri_escape($location->{address1}),
165 'City='.uri_escape($location->{city}),
167 'Zip1='.($zip[1] || ''), #optional
174 my $query_string = join($delim, @args );
175 $url .= "?$query_string";
176 warn "\nrequest: $url\n\n" if $DEBUG > 1;
178 my $res = $ua->request( GET( "$url?$query_string" ) );
183 if ($res->code ne '200') {
184 $error = $res->message;
187 my $content = $res->content;
188 my $p = new HTML::TokeParser \$content;
190 while ( my $t = $p->get_tag('script') ) {
191 my $u = $p->get_token; #either enclosed text or the </script> tag
192 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
197 if ( $js ) { #found it
198 # strip down to the quoted string, which contains escaped single quotes.
199 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
200 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
201 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
203 $p = new HTML::TokeParser \$js;
204 TD: while ( my $td = $p->get_tag('td') ) {
205 while ( my $u = $p->get_token ) {
206 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
207 next if $u->[0] ne 'T'; # skip non-text
210 if ( lc($text) eq 'location code' ) {
211 $p->get_tag('td'); # skip to the next column
213 $u = $p->get_token until ($u->[0] || '') eq 'T'; # and then skip non-text
214 $return->{'district'} = $u->[1];
216 elsif ( lc($text) eq 'total tax rate' ) {
219 $u = $p->get_token until ($u->[0] || '') eq 'T';
220 $return->{'tax'} = $u->[1];
226 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
227 $return->{'tax'} *= 100; #percentage
228 warn Dumper($return) if $DEBUG > 1;
232 $error = 'district code/tax rate not found';
236 $error = "failed to parse document";
239 die "WA tax district lookup error: $error";
242 ###### USPS Standardization ######
244 sub standardize_usps {
247 eval "use Business::US::USPS::WebTools::AddressStandardization";
250 my $location = shift;
251 if ( $location->{country} ne 'US' ) {
253 warn "standardize_usps not for use in country ".$location->{country}."\n";
254 $location->{addr_clean} = '';
257 my $userid = $conf->config('usps_webtools-userid');
258 my $password = $conf->config('usps_webtools-password');
259 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
261 Password => $password,
263 } ) or die "error starting USPS WebTools\n";
265 my($zip5, $zip4) = split('-',$location->{'zip'});
268 FirmName => $location->{company},
269 Address2 => $location->{address1},
270 Address1 => $location->{address2},
271 City => $location->{city},
272 State => $location->{state},
276 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
279 my $hash = $verifier->verify_address( %usps_args );
281 warn $verifier->response
284 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
285 if $verifier->is_error;
287 my $zip = $hash->{Zip5};
288 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
290 { company => $hash->{FirmName},
291 address1 => $hash->{Address2},
292 address2 => $hash->{Address1},
293 city => $hash->{City},
294 state => $hash->{State},
300 ###### U.S. Census Bureau ######
302 sub standardize_uscensus {
304 my $location = shift;
305 my $log = FS::Log->new('FS::Misc::Geo::standardize_uscensus');
306 $log->debug(join("\n", @{$location}{'address1', 'city', 'state', 'zip'}));
308 eval "use Geo::USCensus::Geocoding";
311 if ( $location->{country} ne 'US' ) {
313 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
314 $location->{addr_clean} = '';
319 street => $location->{address1},
320 city => $location->{city},
321 state => $location->{state},
322 zip => $location->{zip},
323 debug => ($DEBUG || 0),
326 my $result = Geo::USCensus::Geocoding->query($request);
327 if ( $result->is_match ) {
328 # unfortunately we get the address back as a single line
329 $log->debug($result->address);
330 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
336 address2 => uc($location->{address2}),
337 latitude => $result->latitude,
338 longitude => $result->longitude,
339 censustract => $result->censustract,
342 die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n";
344 } elsif ( $result->match_level eq 'Tie' ) {
345 die "Geocoding was not able to identify a unique matching address.\n";
346 } elsif ( $result->match_level ) {
347 die "Geocoding did not find a matching address.\n";
349 $log->error($result->error_message);
350 return; # for internal errors, don't return anything
354 ####### EZLOCATE (obsolete) #######
356 sub _tomtom_query { # helper method for the below
358 my $result = Geo::TomTom::Geocoding->query(%args);
359 die "TomTom geocoding error: ".$result->message."\n"
360 unless ( $result->is_success );
361 my ($match) = $result->locations;
362 my $type = $match->{type};
363 # match levels below "intersection" should not be considered clean
364 my $clean = ($type eq 'addresspoint' ||
367 $type eq 'intersection'
369 warn "tomtom returned $type match\n" if $DEBUG;
370 warn Dumper($match) if $DEBUG > 1;
374 sub standardize_tomtom {
375 # post-2013 TomTom API
376 # much better, but incompatible with ezlocate
378 my $location = shift;
379 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
382 my $key = $conf->config('tomtom-userid')
383 or die "no tomtom-userid configured\n";
385 my $country = code2country($location->{country});
386 my ($address1, $address2) = ($location->{address1}, $location->{address2});
390 $address1 =~ s/^\s+//;
391 $address1 =~ s/\s+$//;
392 $address2 =~ s/^\s+//;
393 $address2 =~ s/\s+$//;
395 # try to fix some cases of the address fields being switched
396 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
397 $address2 = $address1;
398 $address1 = $location->{address2};
400 # parse sublocation part (unit/suite/apartment...) and clean up
401 # non-sublocation address2
402 ($subloc, $address2) =
403 subloc_address2($address1, $address2, $location->{country});
404 # ask TomTom to standardize address1:
408 L => $location->{city},
409 AA => $location->{state},
410 PC => $location->{zip},
411 CC => country2code($country, LOCALE_CODE_ALPHA_3),
414 my ($match, $clean) = _tomtom_query(%args);
416 if (!$match or !$clean) {
417 # Then try cleaning up the input; TomTom is picky about junk in the
418 # address. Any of these can still be a clean match.
419 my $h = Geo::StreetAddress::US->parse_location($address1);
420 # First conservatively:
421 if ( $h->{sec_unit_type} ) {
422 my $strip = '\s+' . $h->{sec_unit_type};
423 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
425 $args{T} =~ s/$strip//;
426 ($match, $clean) = _tomtom_query(%args);
428 if ( !$match or !$clean ) {
429 # Then more aggressively:
430 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
431 ($match, $clean) = _tomtom_query(%args);
435 if ( !$match or !$clean ) { # partial matches are not useful
436 die "Address not found\n";
439 if ( defined $match->{censusTract} ) {
440 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
441 join('.', $match->{censusTract} =~ /(....)(..)/);
444 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
445 $address1 .= $match->{street} if $match->{street};
446 $address1 .= ' '.$subloc if $subloc;
447 $address1 = uc($address1); # USPS standards
450 address1 => $address1,
451 address2 => $address2,
452 city => uc($match->{city}),
453 state => uc($location->{state}),
454 country => uc($location->{country}),
455 zip => ($match->{standardPostalCode} || $match->{postcode}),
456 latitude => $match->{latitude},
457 longitude => $match->{longitude},
458 censustract => $tract,
459 addr_clean => $clean,
463 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
465 Given 'address1' and 'address2' strings, extract the sublocation part
466 (from either one) and return it. If the sublocation was found in ADDRESS1,
467 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
468 contain something relevant.
473 # Postal Addressing Standards, Appendix C
474 # (plus correction of "hanger" to "hangar")
502 # Canada Post Addressing Guidelines 4.3
513 sub subloc_address2 {
514 # Some things seen in the address2 field:
516 # The complete address (with address1 containing part of the company name,
517 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
520 # try to parse sublocation parts from address1; if they are present we'll
521 # append them back to address1 after standardizing
523 my ($addr1, $addr2, $country) = map uc, @_;
524 my $dict = $subloc_forms{$country} or return('', $addr2);
526 my $found_in = 0; # which address is the sublocation
529 # patterns to try to parse
531 "$addr1 Nullcity, CA"
533 $h = Geo::StreetAddress::US->parse_location($addr1);
534 last if exists($h->{sec_unit_type});
536 if (exists($h->{sec_unit_type})) {
543 "$addr1, $addr2 Nullcity, CA"
545 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
546 last if exists($h->{sec_unit_type});
548 if (exists($h->{sec_unit_type})) {
553 $subloc = $h->{sec_unit_type};
554 # special case: do not combine P.O. box sublocs with address1
555 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
556 if ( $found_in == 2 ) {
557 $addr2 = "PO BOX ".$h->{sec_unit_num};
558 } # else it's in addr1, and leave it alone
560 } elsif ( exists($dict->{$subloc}) ) {
561 # substitute the official abbreviation
562 $subloc = $dict->{$subloc};
564 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
565 } # otherwise $subloc = ''
567 if ( $found_in == 2 ) {
568 # address2 should be fully combined into address1
569 return ($subloc, '');
571 # else address2 is not the canonical sublocation, but do our best to
575 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
577 # remove all punctuation and spaces
578 foreach my $w (split(/\W+/, $addr2)) {
579 if ( exists($dict->{$w}) ) {
580 push @words, $dict->{$w};
584 my $result = join(' ', @words);
585 # correct spacing of pound sign + number
586 $result =~ s/NUMBER(\d)/# $1/;
587 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
590 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
594 sub standardize_melissa {
596 my $location = shift;
599 eval "use Geo::Melissa::WebSmart";
602 my $id = $conf->config('melissa-userid')
603 or die "no melissa-userid configured\n";
604 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
608 a1 => $location->{address1},
609 a2 => $location->{address2},
610 city => $location->{city},
611 state => $location->{state},
612 ctry => $location->{country},
613 zip => $location->{zip},
616 my $result = Geo::Melissa::WebSmart->query($request);
617 if ( $result->code =~ /AS01/ ) { # always present on success
618 my $addr = $result->address;
619 warn Dumper $addr if $DEBUG > 1;
621 address1 => $addr->{Address1},
622 address2 => $addr->{Address2},
623 city => $addr->{City}->{Name},
624 state => $addr->{State}->{Abbreviation},
625 country => $addr->{Country}->{Abbreviation},
627 latitude => $addr->{Latitude},
628 longitude => $addr->{Longitude},
631 if ( $addr->{Census}->{Tract} ) {
632 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
633 # insert decimal point two digits from the end
634 $censustract =~ s/(\d\d)$/\.$1/;
635 $out->{censustract} = $censustract;
636 $out->{censusyear} = $conf->config('census_year');
638 # we could do a lot more nuanced reporting of the warning/status codes,
639 # but the UI doesn't support that yet.
642 die $result->status_message;
646 sub standardize_freeside {
648 my $location = shift;
650 my $url = 'https://ws.freeside.biz/normalize';
652 #free freeside.biz normalization only for US
653 if ( $location->{country} ne 'US' ) {
655 #why? something else could have cleaned it $location->{addr_clean} = '';
659 my $ua = LWP::UserAgent->new(
661 verify_hostname => 0,
662 SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE,
665 my $response = $ua->request( POST $url, [
666 'support-key' => scalar($conf->config('support-key')),
670 die "Address normalization error: ". $response->message
671 unless $response->is_success;
674 my $content = eval { decode_json($response->content) };
676 warn $response->content;
677 die "Address normalization JSON error : $@\n";
680 die $content->{error}."\n"
681 if $content->{error};
683 { 'addr_clean' => 'Y',
684 map { $_ => $content->{$_} }
685 qw( address1 address2 city state zip country )