4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
45 warn Dumper($location, $year) if $DEBUG;
47 my $url = 'http://www.ffiec.gov/Geocode/default.aspx';
52 my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new);
53 my $res = $ua->request( GET( $url ) );
58 if (!$res->is_success) {
60 $error = $res->message;
64 my $content = $res->content;
66 my $p = new HTML::TokeParser \$content;
69 while (my $token = $p->get_tag('input') ) {
70 if ($token->[1]->{name} eq '__VIEWSTATE') {
71 $viewstate = $token->[1]->{value};
73 if ($token->[1]->{name} eq '__EVENTVALIDATION') {
74 $eventvalidation = $token->[1]->{value};
76 last if $viewstate && $eventvalidation;
79 if (!$viewstate or !$eventvalidation ) {
81 $error = "either no __VIEWSTATE or __EVENTVALIDATION found";
85 my($zip5, $zip4) = split('-',$location->{zip});
89 __VIEWSTATE => $viewstate,
90 __EVENTVALIDATION => $eventvalidation,
91 __VIEWSTATEENCRYPTED => '',
93 txtAddress => $location->{address1},
94 txtCity => $location->{city},
95 ddlbState => $location->{state},
97 btnSearch => 'Search',
99 warn join("\n", @ffiec_args )
102 push @{ $ua->requests_redirectable }, 'POST';
103 $res = $ua->request( POST( $url, \@ffiec_args ) );
107 unless ($res->code eq '200') {
109 $error = $res->message;
113 my @id = qw( MSACode StateCode CountyCode TractCode );
114 $content = $res->content;
115 warn $res->content if $DEBUG > 2;
116 $p = new HTML::TokeParser \$content;
117 my $prefix = 'UcGeoResult11_lb';
119 sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) };
121 while (my $token = $p->get_tag('span') ) {
122 next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) );
123 $token->[1]->{id} =~ /^$prefix(\w+)$/;
124 $return->{lc($1)} = $p->get_trimmed_text("/span");
127 unless ( $return->{tractcode} ) {
128 warn "$error: $content ". Dumper($return) if $DEBUG;
129 $error = "No census tract found";
131 $return->{tractcode} .= ' '
132 unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround
134 } #unless ($res->code eq '200')
136 } #unless ($viewstate)
138 } #unless ($res->code eq '200')
140 die "FFIEC Geocoding error: $error\n" if $error;
142 $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'};
145 #sub get_district_methods {
147 # 'wa_sales' => 'Washington sales tax',
150 =item get_district LOCATION METHOD
152 For the location hash in LOCATION, using lookup method METHOD, fetch
153 tax district information. Currently the only available method is
154 'wa_sales' (the Washington Department of Revenue sales tax lookup).
156 Returns a hash reference containing the following fields:
161 - exempt_amount (currently zero)
162 - city, county, state, country (from
164 The intent is that you can assign this to an L<FS::cust_main_county>
165 object and insert it if there's not yet a tax rate defined for that
168 get_district will die on error.
176 my $location = shift;
177 my $method = shift or return '';
178 warn Dumper($location, $method) if $DEBUG;
183 my $location = shift;
185 return '' if $location->{state} ne 'WA';
187 my $return = { %$location };
188 $return->{'exempt_amount'} = 0.00;
190 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
191 my $ua = new LWP::UserAgent;
193 my $delim = '<|>'; # yes, <|>
194 my $year = (localtime)[5] + 1900;
195 my $month = (localtime)[4] + 1;
196 my @zip = split('-', $location->{zip});
199 'TaxType=S', #sales; 'P' = property
200 'Src=0', #does something complicated
202 'Addr='.uri_escape($location->{address1}),
203 'City='.uri_escape($location->{city}),
205 'Zip1='.($zip[1] || ''), #optional
212 my $query_string = join($delim, @args );
213 $url .= "?$query_string";
214 warn "\nrequest: $url\n\n" if $DEBUG > 1;
216 my $res = $ua->request( GET( "$url?$query_string" ) );
221 if ($res->code ne '200') {
222 $error = $res->message;
225 my $content = $res->content;
226 my $p = new HTML::TokeParser \$content;
228 while ( my $t = $p->get_tag('script') ) {
229 my $u = $p->get_token; #either enclosed text or the </script> tag
230 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
235 if ( $js ) { #found it
236 # strip down to the quoted string, which contains escaped single quotes.
237 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
238 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
239 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
241 $p = new HTML::TokeParser \$js;
242 TD: while ( my $td = $p->get_tag('td') ) {
243 while ( my $u = $p->get_token ) {
244 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
245 next if $u->[0] ne 'T'; # skip non-text
248 if ( lc($text) eq 'location code' ) {
249 $p->get_tag('td'); # skip to the next column
251 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
252 $return->{'district'} = $u->[1];
254 elsif ( lc($text) eq 'total tax rate' ) {
257 $u = $p->get_token until $u->[0] eq 'T';
258 $return->{'tax'} = $u->[1];
264 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
265 $return->{'tax'} *= 100; #percentage
266 warn Dumper($return) if $DEBUG > 1;
270 $error = 'district code/tax rate not found';
274 $error = "failed to parse document";
277 die "WA tax district lookup error: $error";
280 sub standardize_usps {
283 eval "use Business::US::USPS::WebTools::AddressStandardization";
286 my $location = shift;
287 if ( $location->{country} ne 'US' ) {
289 warn "standardize_usps not for use in country ".$location->{country}."\n";
290 $location->{addr_clean} = '';
293 my $userid = $conf->config('usps_webtools-userid');
294 my $password = $conf->config('usps_webtools-password');
295 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
297 Password => $password,
299 } ) or die "error starting USPS WebTools\n";
301 my($zip5, $zip4) = split('-',$location->{'zip'});
304 FirmName => $location->{company},
305 Address2 => $location->{address1},
306 Address1 => $location->{address2},
307 City => $location->{city},
308 State => $location->{state},
312 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
315 my $hash = $verifier->verify_address( %usps_args );
317 warn $verifier->response
320 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
321 if $verifier->is_error;
323 my $zip = $hash->{Zip5};
324 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
326 { company => $hash->{FirmName},
327 address1 => $hash->{Address2},
328 address2 => $hash->{Address1},
329 city => $hash->{City},
330 state => $hash->{State},
336 my %ezlocate_error = ( # USA_Geo_002 documentation
337 10 => 'State not found',
338 11 => 'City not found',
339 12 => 'Invalid street address',
340 14 => 'Street name not found',
341 15 => 'Address range does not exist',
342 16 => 'Ambiguous address',
343 17 => 'Intersection not found', #unused?
346 sub standardize_ezlocate {
348 my $location = shift;
350 #if ( $location->{country} eq 'US' ) {
351 # $class = 'USA_Geo_004Tool';
353 #elsif ( $location->{country} eq 'CA' ) {
354 # $class = 'CAN_Geo_001Tool';
356 #else { # shouldn't be a fatal error, just pass through unverified address
357 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
358 # "' not available\n";
361 #my $path = $conf->config('teleatlas-path') || '';
362 #local @INC = (@INC, $path);
365 # die "Loading $class failed:\n$@".
366 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
369 $class = 'Geo::EZLocate'; # use our own library
370 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
373 my $userid = $conf->config('ezlocate-userid')
374 or die "no ezlocate-userid configured\n";
375 my $password = $conf->config('ezlocate-password')
376 or die "no ezlocate-password configured\n";
378 my $tool = $class->new($userid, $password);
379 my $match = $tool->findAddress(
380 $location->{address1},
383 $location->{zip}, #12345-6789 format is allowed
385 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
386 # error handling - B codes indicate success
387 die $ezlocate_error{$match->{MAT_STAT}}."\n"
388 unless $match->{MAT_STAT} =~ /^B\d$/;
391 address1 => $match->{MAT_ADDR},
392 address2 => $location->{address2},
393 city => $match->{MAT_CITY},
394 state => $match->{MAT_ST},
395 country => $location->{country},
396 zip => $match->{MAT_ZIP},
397 latitude => $match->{MAT_LAT},
398 longitude => $match->{MAT_LON},
399 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
400 sprintf('%07.2f',$match->{CEN_TRCT}),
403 if ( $match->{STD_ADDR} ) {
404 # then they have a postal standardized address for us
406 address1 => $match->{STD_ADDR},
407 address2 => $location->{address2},
408 city => $match->{STD_CITY},
409 state => $match->{STD_ST},
410 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
417 sub standardize_tomtom {
418 # post-2013 TomTom API
419 # much better, but incompatible with ezlocate
421 my $location = shift;
422 my $class = 'Geo::TomTom::Geocoding';
426 my $key = $conf->config('tomtom-userid')
427 or die "no tomtom-userid configured\n";
429 my $country = code2country($location->{country});
430 my ($address1, $address2) = ($location->{address1}, $location->{address2});
431 # try to fix some cases of the address fields being switched
432 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
433 $address2 = $address1;
434 $address1 = $location->{address2};
436 my $result = $class->query(
439 L => $location->{city},
440 AA => $location->{state},
441 PC => $location->{zip},
442 CC => country2code($country, LOCALE_CODE_ALPHA_3),
444 unless ( $result->is_success ) {
445 die "TomTom geocoding error: ".$result->message."\n";
447 my ($match) = $result->locations;
449 die "Location not found.\n";
451 my $type = $match->{type};
452 warn "tomtom returned $type match\n" if $DEBUG;
453 warn Dumper($match) if $DEBUG > 1;
455 if ( defined $match->{censusTract} ) {
456 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
457 join('.', $match->{censusTract} =~ /(....)(..)/);
459 # match levels below "intersection" should not be considered clean
460 my $clean = ($type eq 'addresspoint' ||
463 $type eq 'intersection'
466 $address2 = normalize_address2($address2, $location->{country});
469 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
470 $address1 .= $match->{street} if $match->{street};
473 address1 => $address1,
474 address2 => $address2,
475 city => $match->{city},
476 state => $location->{state}, # this will never change
477 country => $location->{country}, # ditto
478 zip => ($match->{standardPostalCode} || $match->{postcode}),
479 latitude => $match->{latitude},
480 longitude => $match->{longitude},
481 censustract => $tract,
482 addr_clean => $clean,
486 =iten normalize_address2 STRING, COUNTRY
488 Given an 'address2' STRING, normalize it for COUNTRY postal standards.
489 Currently only works for US and CA.
493 # XXX really ought to be a separate module
494 my %address2_forms = (
495 # Postal Addressing Standards, Appendix C
496 # (plus correction of "hanger" to "hangar")
524 # Canada Post Addressing Guidelines 4.3
535 sub normalize_address2 {
536 # Some things seen in the address2 field:
538 # The complete address (with address1 containing part of the company name,
539 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
541 my ($addr2, $country) = @_;
543 if ( exists($address2_forms{$country}) ) {
544 my $dict = $address2_forms{$country};
546 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
548 # remove all punctuation and spaces
549 foreach my $w (split(/\W+/, $addr2)) {
550 if ( exists($dict->{$w}) ) {
551 push @words, $dict->{$w};
556 my $result = join(' ', @words);
557 # correct spacing of pound sign + number
558 $result =~ s/NUMBER(\d)/# $1/;
559 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;