X-Git-Url: http://git.freeside.biz/gitweb/?a=blobdiff_plain;f=FS%2FFS%2FMisc%2FGeo.pm;h=b5cc325d1713852b8d06555d5bd32c027022e466;hb=fe4515eb37d76849dd08c62782d86bc7ba311dcd;hp=acfeabf3b5da086eecf136c6bc87709b0f06cb3e;hpb=fb4ab1073f0d15d660c6cdc4e07afebf68ef3924;p=freeside.git diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm index acfeabf3b..b5cc325d1 100644 --- a/FS/FS/Misc/Geo.pm +++ b/FS/FS/Misc/Geo.pm @@ -2,23 +2,29 @@ package FS::Misc::Geo; use strict; use base qw( Exporter ); -use vars qw( $DEBUG @EXPORT_OK ); +use vars qw( $DEBUG @EXPORT_OK $conf ); use LWP::UserAgent; use HTTP::Request; use HTTP::Request::Common qw( GET POST ); use HTML::TokeParser; -use URI::Escape; +use URI::Escape 3.31; use Data::Dumper; +use FS::Conf; +use Locale::Country; + +FS::UID->install_callback( sub { + $conf = new FS::Conf; +} ); $DEBUG = 0; -@EXPORT_OK = qw( get_censustract get_district ); +@EXPORT_OK = qw( get_district ); =head1 NAME FS::Misc::Geo - routines to fetch geographic information -=head1 FUNCTIONS +=head1 CLASS METHODS =over 4 @@ -30,13 +36,14 @@ codes) or an error message. =cut -sub get_censustract { +sub get_censustract_ffiec { + my $class = shift; my $location = shift; my $year = shift; warn Dumper($location, $year) if $DEBUG; - my $url='http://www.ffiec.gov/Geocode/default.aspx'; + my $url = 'http://www.ffiec.gov/Geocode/default.aspx'; my $return = {}; my $error = ''; @@ -45,7 +52,7 @@ sub get_censustract { my $res = $ua->request( GET( $url ) ); warn $res->as_string - if $DEBUG > 1; + if $DEBUG > 2; unless ($res->code eq '200') { @@ -75,14 +82,11 @@ sub get_censustract { my($zip5, $zip4) = split('-',$location->{zip}); - $year ||= '2011'; - #ugh workaround a mess at ffiec - $year = " $year" if $year ne '2011'; + $year ||= '2012'; my @ffiec_args = ( __VIEWSTATE => $viewstate, __EVENTVALIDATION => $eventvalidation, ddlbYear => $year, - ddlbYear => '2011', #' 2009', txtAddress => $location->{address1}, txtCity => $location->{city}, ddlbState => $location->{state}, @@ -90,12 +94,12 @@ sub get_censustract { btnSearch => 'Search', ); warn join("\n", @ffiec_args ) - if $DEBUG; + if $DEBUG > 1; push @{ $ua->requests_redirectable }, 'POST'; $res = $ua->request( POST( $url, \@ffiec_args ) ); warn $res->as_string - if $DEBUG > 1; + if $DEBUG > 2; unless ($res->code eq '200') { @@ -105,7 +109,7 @@ sub get_censustract { my @id = qw( MSACode StateCode CountyCode TractCode ); $content = $res->content; - warn $res->content if $DEBUG > 1; + warn $res->content if $DEBUG > 2; $p = new HTML::TokeParser \$content; my $prefix = 'UcGeoResult11_lb'; my $compare = @@ -117,7 +121,10 @@ sub get_censustract { $return->{lc($1)} = $p->get_trimmed_text("/span"); } - $error = "No census tract found" unless $return->{tractcode}; + unless ( $return->{tractcode} ) { + warn "$error: $content ". Dumper($return) if $DEBUG; + $error = "No census tract found"; + } $return->{tractcode} .= ' ' unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround @@ -127,15 +134,15 @@ sub get_censustract { } #unless ($res->code eq '200') - return "FFIEC Geocoding error: $error" if $error; + die "FFIEC Geocoding error: $error\n" if $error; $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'}; } -sub get_district_methods { - '' => '', - 'wa_sales' => 'Washington sales tax', -}; +#sub get_district_methods { +# '' => '', +# 'wa_sales' => 'Washington sales tax', +#}; =item get_district LOCATION METHOD @@ -201,12 +208,12 @@ sub wa_sales { my $query_string = join($delim, @args ); $url .= "?$query_string"; - warn "\nrequest: $url\n\n" if $DEBUG; + warn "\nrequest: $url\n\n" if $DEBUG > 1; my $res = $ua->request( GET( "$url?$query_string" ) ); warn $res->as_string - if $DEBUG > 1; + if $DEBUG > 2; if ($res->code ne '200') { $error = $res->message; @@ -253,7 +260,7 @@ sub wa_sales { # just to make sure if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) { $return->{'tax'} *= 100; #percentage - warn Dumper($return) if $DEBUG; + warn Dumper($return) if $DEBUG > 1; return $return; } else { @@ -267,6 +274,292 @@ sub wa_sales { die "WA tax district lookup error: $error"; } +sub standardize_usps { + my $class = shift; + + eval "use Business::US::USPS::WebTools::AddressStandardization"; + die $@ if $@; + + my $location = shift; + if ( $location->{country} ne 'US' ) { + # soft failure + warn "standardize_usps not for use in country ".$location->{country}."\n"; + $location->{addr_clean} = ''; + return $location; + } + my $userid = $conf->config('usps_webtools-userid'); + my $password = $conf->config('usps_webtools-password'); + my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( { + UserID => $userid, + Password => $password, + Testing => 0, + } ) or die "error starting USPS WebTools\n"; + + my($zip5, $zip4) = split('-',$location->{'zip'}); + + my %usps_args = ( + FirmName => $location->{company}, + Address2 => $location->{address1}, + Address1 => $location->{address2}, + City => $location->{city}, + State => $location->{state}, + Zip5 => $zip5, + Zip4 => $zip4, + ); + warn join('', map "$_: $usps_args{$_}\n", keys %usps_args ) + if $DEBUG > 1; + + my $hash = $verifier->verify_address( %usps_args ); + + warn $verifier->response + if $DEBUG > 1; + + die "USPS WebTools error: ".$verifier->{error}{description} ."\n" + if $verifier->is_error; + + my $zip = $hash->{Zip5}; + $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/; + + { company => $hash->{FirmName}, + address1 => $hash->{Address2}, + address2 => $hash->{Address1}, + city => $hash->{City}, + state => $hash->{State}, + zip => $zip, + country => 'US', + addr_clean=> 'Y' } +} + +my %ezlocate_error = ( # USA_Geo_002 documentation + 10 => 'State not found', + 11 => 'City not found', + 12 => 'Invalid street address', + 14 => 'Street name not found', + 15 => 'Address range does not exist', + 16 => 'Ambiguous address', + 17 => 'Intersection not found', #unused? +); + +sub standardize_ezlocate { + my $self = shift; + my $location = shift; + my $class; + #if ( $location->{country} eq 'US' ) { + # $class = 'USA_Geo_004Tool'; + #} + #elsif ( $location->{country} eq 'CA' ) { + # $class = 'CAN_Geo_001Tool'; + #} + #else { # shouldn't be a fatal error, just pass through unverified address + # warn "standardize_teleatlas: address lookup in '".$location->{country}. + # "' not available\n"; + # return $location; + #} + #my $path = $conf->config('teleatlas-path') || ''; + #local @INC = (@INC, $path); + #eval "use $class;"; + #if ( $@ ) { + # die "Loading $class failed:\n$@". + # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n"; + #} + + $class = 'Geo::EZLocate'; # use our own library + eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling + die $@ if $@; + + my $userid = $conf->config('ezlocate-userid') + or die "no ezlocate-userid configured\n"; + my $password = $conf->config('ezlocate-password') + or die "no ezlocate-password configured\n"; + + my $tool = $class->new($userid, $password); + my $match = $tool->findAddress( + $location->{address1}, + $location->{city}, + $location->{state}, + $location->{zip}, #12345-6789 format is allowed + ); + warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1; + # error handling - B codes indicate success + die $ezlocate_error{$match->{MAT_STAT}}."\n" + unless $match->{MAT_STAT} =~ /^B\d$/; + + my %result = ( + address1 => $match->{MAT_ADDR}, + address2 => $location->{address2}, + city => $match->{MAT_CITY}, + state => $match->{MAT_ST}, + country => $location->{country}, + zip => $match->{MAT_ZIP}, + latitude => $match->{MAT_LAT}, + longitude => $match->{MAT_LON}, + censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}. + sprintf('%07.2f',$match->{CEN_TRCT}), + addr_clean => 'Y', + ); + if ( $match->{STD_ADDR} ) { + # then they have a postal standardized address for us + %result = ( %result, + address1 => $match->{STD_ADDR}, + address2 => $location->{address2}, + city => $match->{STD_CITY}, + state => $match->{STD_ST}, + zip => $match->{STD_ZIP}.'-'.$match->{STD_P4}, + ); + } + + \%result; +} + +sub standardize_tomtom { + # post-2013 TomTom API + # much better, but incompatible with ezlocate + my $self = shift; + my $location = shift; + my $class = 'Geo::TomTom::Geocoding'; + eval "use $class"; + die $@ if $@; + + my $key = $conf->config('tomtom-userid') + or die "no tomtom-userid configured\n"; + + my $country = code2country($location->{country}); + my ($address1, $address2) = ($location->{address1}, $location->{address2}); + # try to fix some cases of the address fields being switched + if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) { + $address2 = $address1; + $address1 = $location->{address2}; + } + my $result = $class->query( + key => $key, + T => $address1, + L => $location->{city}, + AA => $location->{state}, + PC => $location->{zip}, + CC => country2code($country, LOCALE_CODE_ALPHA_3), + ); + unless ( $result->is_success ) { + die "TomTom geocoding error: ".$result->message."\n"; + } + my ($match) = $result->locations; + if (!$match) { + die "Location not found.\n"; + } + my $type = $match->{type}; + warn "tomtom returned $type match\n" if $DEBUG; + warn Dumper($match) if $DEBUG > 1; + my $tract = ''; + if ( defined $match->{censusTract} ) { + $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}. + join('.', $match->{censusTract} =~ /(....)(..)/); + } + # match levels below "intersection" should not be considered clean + my $clean = ($type eq 'addresspoint' || + $type eq 'poi' || + $type eq 'house' || + $type eq 'intersection' + ) ? 'Y' : ''; + + $address2 = normalize_address2($address2, $location->{country}); + + $address1 = ''; + $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber}); + $address1 .= $match->{street} if $match->{street}; + + return +{ + address1 => $address1, + address2 => $address2, + city => $match->{city}, + state => $location->{state}, # this will never change + country => $location->{country}, # ditto + zip => ($match->{standardPostalCode} || $match->{postcode}), + latitude => $match->{latitude}, + longitude => $match->{longitude}, + censustract => $tract, + addr_clean => $clean, + }; +} + +=iten normalize_address2 STRING, COUNTRY + +Given an 'address2' STRING, normalize it for COUNTRY postal standards. +Currently only works for US and CA. + +=cut + +# XXX really ought to be a separate module +my %address2_forms = ( + # Postal Addressing Standards, Appendix C + # (plus correction of "hanger" to "hangar") + US => {qw( + APARTMENT APT + BASEMENT BSMT + BUILDING BLDG + DEPARTMENT DEPT + FLOOR FL + FRONT FRNT + HANGAR HNGR + HANGER HNGR + KEY KEY + LOBBY LBBY + LOT LOT + LOWER LOWR + OFFICE OFC + PENTHOUSE PH + PIER PIER + REAR REAR + ROOM RM + SIDE SIDE + SLIP SLIP + SPACE SPC + STOP STOP + SUITE STE + TRAILER TRLR + UNIT UNIT + UPPER UPPR + )}, + # Canada Post Addressing Guidelines 4.3 + CA => {qw( + APARTMENT APT + APPARTEMENT APP + BUREAU BUREAU + SUITE SUITE + UNIT UNIT + UNITÉ UNITÉ + )}, +); + +sub normalize_address2 { + # Some things seen in the address2 field: + # Whitespace + # The complete address (with address1 containing part of the company name, + # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite + # number, etc.) + my ($addr2, $country) = @_; + $addr2 = uc($addr2); + if ( exists($address2_forms{$country}) ) { + my $dict = $address2_forms{$country}; + # protect this + $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g? + my @words; + # remove all punctuation and spaces + foreach my $w (split(/\W+/, $addr2)) { + if ( exists($dict->{$w}) ) { + push @words, $dict->{$w}; + } else { + push @words, $w; + } + } + my $result = join(' ', @words); + # correct spacing of pound sign + number + $result =~ s/NUMBER(\d)/# $1/; + warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1; + $addr2 = $result; + } + $addr2; +} + + =back =cut