4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
45 warn Dumper($location, $year) if $DEBUG;
47 my $url = 'http://www.ffiec.gov/Geocode/default.aspx';
52 my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new);
53 my $res = $ua->request( GET( $url ) );
58 if (!$res->is_success) {
60 $error = $res->message;
64 my $content = $res->content;
66 my $p = new HTML::TokeParser \$content;
69 while (my $token = $p->get_tag('input') ) {
70 if ($token->[1]->{name} eq '__VIEWSTATE') {
71 $viewstate = $token->[1]->{value};
73 if ($token->[1]->{name} eq '__EVENTVALIDATION') {
74 $eventvalidation = $token->[1]->{value};
76 last if $viewstate && $eventvalidation;
79 if (!$viewstate or !$eventvalidation ) {
81 $error = "either no __VIEWSTATE or __EVENTVALIDATION found";
85 my($zip5, $zip4) = split('-',$location->{zip});
89 __VIEWSTATE => $viewstate,
90 __EVENTVALIDATION => $eventvalidation,
91 __VIEWSTATEENCRYPTED => '',
93 txtAddress => $location->{address1},
94 txtCity => $location->{city},
95 ddlbState => $location->{state},
97 btnSearch => 'Search',
99 warn join("\n", @ffiec_args )
102 push @{ $ua->requests_redirectable }, 'POST';
103 $res = $ua->request( POST( $url, \@ffiec_args ) );
107 unless ($res->code eq '200') {
109 $error = $res->message;
113 my @id = qw( MSACode StateCode CountyCode TractCode );
114 $content = $res->content;
115 warn $res->content if $DEBUG > 2;
116 $p = new HTML::TokeParser \$content;
117 my $prefix = 'UcGeoResult11_lb';
119 sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) };
121 while (my $token = $p->get_tag('span') ) {
122 next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) );
123 $token->[1]->{id} =~ /^$prefix(\w+)$/;
124 $return->{lc($1)} = $p->get_trimmed_text("/span");
127 unless ( $return->{tractcode} ) {
128 warn "$error: $content ". Dumper($return) if $DEBUG;
129 $error = "No census tract found";
131 $return->{tractcode} .= ' '
132 unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround
134 } #unless ($res->code eq '200')
136 } #unless ($viewstate)
138 } #unless ($res->code eq '200')
140 die "FFIEC Geocoding error: $error\n" if $error;
142 $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'};
145 #sub get_district_methods {
147 # 'wa_sales' => 'Washington sales tax',
150 =item get_district LOCATION METHOD
152 For the location hash in LOCATION, using lookup method METHOD, fetch
153 tax district information. Currently the only available method is
154 'wa_sales' (the Washington Department of Revenue sales tax lookup).
156 Returns a hash reference containing the following fields:
161 - exempt_amount (currently zero)
162 - city, county, state, country (from
164 The intent is that you can assign this to an L<FS::cust_main_county>
165 object and insert it if there's not yet a tax rate defined for that
168 get_district will die on error.
176 my $location = shift;
177 my $method = shift or return '';
178 warn Dumper($location, $method) if $DEBUG;
183 my $location = shift;
185 return '' if $location->{state} ne 'WA';
187 my $return = { %$location };
188 $return->{'exempt_amount'} = 0.00;
190 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
191 my $ua = new LWP::UserAgent;
193 my $delim = '<|>'; # yes, <|>
194 my $year = (localtime)[5] + 1900;
195 my $month = (localtime)[4] + 1;
196 my @zip = split('-', $location->{zip});
199 'TaxType=S', #sales; 'P' = property
200 'Src=0', #does something complicated
202 'Addr='.uri_escape($location->{address1}),
203 'City='.uri_escape($location->{city}),
205 'Zip1='.($zip[1] || ''), #optional
212 my $query_string = join($delim, @args );
213 $url .= "?$query_string";
214 warn "\nrequest: $url\n\n" if $DEBUG > 1;
216 my $res = $ua->request( GET( "$url?$query_string" ) );
221 if ($res->code ne '200') {
222 $error = $res->message;
225 my $content = $res->content;
226 my $p = new HTML::TokeParser \$content;
228 while ( my $t = $p->get_tag('script') ) {
229 my $u = $p->get_token; #either enclosed text or the </script> tag
230 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
235 if ( $js ) { #found it
236 # strip down to the quoted string, which contains escaped single quotes.
237 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
238 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
239 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
241 $p = new HTML::TokeParser \$js;
242 TD: while ( my $td = $p->get_tag('td') ) {
243 while ( my $u = $p->get_token ) {
244 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
245 next if $u->[0] ne 'T'; # skip non-text
248 if ( lc($text) eq 'location code' ) {
249 $p->get_tag('td'); # skip to the next column
251 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
252 $return->{'district'} = $u->[1];
254 elsif ( lc($text) eq 'total tax rate' ) {
257 $u = $p->get_token until $u->[0] eq 'T';
258 $return->{'tax'} = $u->[1];
264 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
265 $return->{'tax'} *= 100; #percentage
266 warn Dumper($return) if $DEBUG > 1;
270 $error = 'district code/tax rate not found';
274 $error = "failed to parse document";
277 die "WA tax district lookup error: $error";
280 sub standardize_usps {
283 eval "use Business::US::USPS::WebTools::AddressStandardization";
286 my $location = shift;
287 if ( $location->{country} ne 'US' ) {
289 warn "standardize_usps not for use in country ".$location->{country}."\n";
290 $location->{addr_clean} = '';
293 my $userid = $conf->config('usps_webtools-userid');
294 my $password = $conf->config('usps_webtools-password');
295 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
297 Password => $password,
299 } ) or die "error starting USPS WebTools\n";
301 my($zip5, $zip4) = split('-',$location->{'zip'});
304 FirmName => $location->{company},
305 Address2 => $location->{address1},
306 Address1 => $location->{address2},
307 City => $location->{city},
308 State => $location->{state},
312 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
315 my $hash = $verifier->verify_address( %usps_args );
317 warn $verifier->response
320 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
321 if $verifier->is_error;
323 my $zip = $hash->{Zip5};
324 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
326 { company => $hash->{FirmName},
327 address1 => $hash->{Address2},
328 address2 => $hash->{Address1},
329 city => $hash->{City},
330 state => $hash->{State},
336 sub _tomtom_query { # helper method for the below
338 my $result = Geo::TomTom::Geocoding->query(%args);
339 die "TomTom geocoding error: ".$result->message."\n"
340 unless ( $result->is_success );
341 my ($match) = $result->locations;
342 my $type = $match->{type};
343 # match levels below "intersection" should not be considered clean
344 my $clean = ($type eq 'addresspoint' ||
347 $type eq 'intersection'
349 warn "tomtom returned $type match\n" if $DEBUG;
350 warn Dumper($match) if $DEBUG > 1;
354 sub standardize_tomtom {
355 # post-2013 TomTom API
356 # much better, but incompatible with ezlocate
358 my $location = shift;
359 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
362 my $key = $conf->config('tomtom-userid')
363 or die "no tomtom-userid configured\n";
365 my $country = code2country($location->{country});
366 my ($address1, $address2) = ($location->{address1}, $location->{address2});
370 $address1 =~ s/^\s+//;
371 $address1 =~ s/\s+$//;
372 $address2 =~ s/^\s+//;
373 $address2 =~ s/\s+$//;
375 # try to fix some cases of the address fields being switched
376 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
377 $address2 = $address1;
378 $address1 = $location->{address2};
380 # parse sublocation part (unit/suite/apartment...) and clean up
381 # non-sublocation address2
382 ($subloc, $address2) =
383 subloc_address2($address1, $address2, $location->{country});
384 # ask TomTom to standardize address1:
388 L => $location->{city},
389 AA => $location->{state},
390 PC => $location->{zip},
391 CC => country2code($country, LOCALE_CODE_ALPHA_3),
394 my ($match, $clean) = _tomtom_query(%args);
396 if (!$match or !$clean) {
397 # Then try cleaning up the input; TomTom is picky about junk in the
398 # address. Any of these can still be a clean match.
399 my $h = Geo::StreetAddress::US->parse_location($address1);
400 # First conservatively:
401 if ( $h->{sec_unit_type} ) {
402 my $strip = '\s+' . $h->{sec_unit_type};
403 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
405 $args{T} =~ s/$strip//;
406 ($match, $clean) = _tomtom_query(%args);
408 if ( !$match or !$clean ) {
409 # Then more aggressively:
410 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
411 ($match, $clean) = _tomtom_query(%args);
415 if ( !$match or !$clean ) { # partial matches are not useful
416 die "Address not found\n";
419 if ( defined $match->{censusTract} ) {
420 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
421 join('.', $match->{censusTract} =~ /(....)(..)/);
424 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
425 $address1 .= $match->{street} if $match->{street};
426 $address1 .= ' '.$subloc if $subloc;
427 $address1 = uc($address1); # USPS standards
430 address1 => $address1,
431 address2 => $address2,
432 city => uc($match->{city}),
433 state => uc($location->{state}),
434 country => uc($location->{country}),
435 zip => ($match->{standardPostalCode} || $match->{postcode}),
436 latitude => $match->{latitude},
437 longitude => $match->{longitude},
438 censustract => $tract,
439 addr_clean => $clean,
443 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
445 Given 'address1' and 'address2' strings, extract the sublocation part
446 (from either one) and return it. If the sublocation was found in ADDRESS1,
447 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
448 contain something relevant.
453 # Postal Addressing Standards, Appendix C
454 # (plus correction of "hanger" to "hangar")
482 # Canada Post Addressing Guidelines 4.3
493 sub subloc_address2 {
494 # Some things seen in the address2 field:
496 # The complete address (with address1 containing part of the company name,
497 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
500 # try to parse sublocation parts from address1; if they are present we'll
501 # append them back to address1 after standardizing
503 my ($addr1, $addr2, $country) = map uc, @_;
504 my $dict = $subloc_forms{$country} or return('', $addr2);
506 my $found_in = 0; # which address is the sublocation
509 # patterns to try to parse
511 "$addr1 Nullcity, CA"
513 $h = Geo::StreetAddress::US->parse_location($addr1);
514 last if exists($h->{sec_unit_type});
516 if (exists($h->{sec_unit_type})) {
523 "$addr1, $addr2 Nullcity, CA"
525 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
526 last if exists($h->{sec_unit_type});
528 if (exists($h->{sec_unit_type})) {
533 $subloc = $h->{sec_unit_type};
534 # special case: do not combine P.O. box sublocs with address1
535 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
536 if ( $found_in == 2 ) {
537 $addr2 = "PO BOX ".$h->{sec_unit_num};
538 } # else it's in addr1, and leave it alone
540 } elsif ( exists($dict->{$subloc}) ) {
541 # substitute the official abbreviation
542 $subloc = $dict->{$subloc};
544 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
545 } # otherwise $subloc = ''
547 if ( $found_in == 2 ) {
548 # address2 should be fully combined into address1
549 return ($subloc, '');
551 # else address2 is not the canonical sublocation, but do our best to
555 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
557 # remove all punctuation and spaces
558 foreach my $w (split(/\W+/, $addr2)) {
559 if ( exists($dict->{$w}) ) {
560 push @words, $dict->{$w};
564 my $result = join(' ', @words);
565 # correct spacing of pound sign + number
566 $result =~ s/NUMBER(\d)/# $1/;
567 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
570 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
574 sub standardize_melissa {
576 my $location = shift;
579 eval "use Geo::Melissa::WebSmart";
582 my $id = $conf->config('melissa-userid')
583 or die "no melissa-userid configured\n";
584 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
588 a1 => $location->{address1},
589 a2 => $location->{address2},
590 city => $location->{city},
591 state => $location->{state},
592 ctry => $location->{country},
593 zip => $location->{zip},
596 my $result = Geo::Melissa::WebSmart->query($request);
597 if ( $result->code =~ /AS01/ ) { # always present on success
598 my $addr = $result->address;
599 warn Dumper $addr if $DEBUG > 1;
601 address1 => $addr->{Address1},
602 address2 => $addr->{Address2},
603 city => $addr->{City}->{Name},
604 state => $addr->{State}->{Abbreviation},
605 country => $addr->{Country}->{Abbreviation},
607 latitude => $addr->{Latitude},
608 longitude => $addr->{Longitude},
611 if ( $addr->{Census}->{Tract} ) {
612 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
613 # insert decimal point two digits from the end
614 $censustract =~ s/(\d\d)$/\.$1/;
615 $out->{censustract} = $censustract;
616 $out->{censusyear} = $conf->config('census_year');
618 # we could do a lot more nuanced reporting of the warning/status codes,
619 # but the UI doesn't support that yet.
622 die $result->status_message;