+sub standardize_usps {
+ my $class = shift;
+
+ eval "use Business::US::USPS::WebTools::AddressStandardization";
+ die $@ if $@;
+
+ my $location = shift;
+ if ( $location->{country} ne 'US' ) {
+ # soft failure
+ warn "standardize_usps not for use in country ".$location->{country}."\n";
+ $location->{addr_clean} = '';
+ return $location;
+ }
+ my $userid = $conf->config('usps_webtools-userid');
+ my $password = $conf->config('usps_webtools-password');
+ my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
+ UserID => $userid,
+ Password => $password,
+ Testing => 0,
+ } ) or die "error starting USPS WebTools\n";
+
+ my($zip5, $zip4) = split('-',$location->{'zip'});
+
+ my %usps_args = (
+ FirmName => $location->{company},
+ Address2 => $location->{address1},
+ Address1 => $location->{address2},
+ City => $location->{city},
+ State => $location->{state},
+ Zip5 => $zip5,
+ Zip4 => $zip4,
+ );
+ warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
+ if $DEBUG > 1;
+
+ my $hash = $verifier->verify_address( %usps_args );
+
+ warn $verifier->response
+ if $DEBUG > 1;
+
+ die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
+ if $verifier->is_error;
+
+ my $zip = $hash->{Zip5};
+ $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
+
+ { company => $hash->{FirmName},
+ address1 => $hash->{Address2},
+ address2 => $hash->{Address1},
+ city => $hash->{City},
+ state => $hash->{State},
+ zip => $zip,
+ country => 'US',
+ addr_clean=> 'Y' }
+}
+
+my %ezlocate_error = ( # USA_Geo_002 documentation
+ 10 => 'State not found',
+ 11 => 'City not found',
+ 12 => 'Invalid street address',
+ 14 => 'Street name not found',
+ 15 => 'Address range does not exist',
+ 16 => 'Ambiguous address',
+ 17 => 'Intersection not found', #unused?
+);
+
+sub standardize_ezlocate {
+ my $self = shift;
+ my $location = shift;
+ my $class;
+ #if ( $location->{country} eq 'US' ) {
+ # $class = 'USA_Geo_004Tool';
+ #}
+ #elsif ( $location->{country} eq 'CA' ) {
+ # $class = 'CAN_Geo_001Tool';
+ #}
+ #else { # shouldn't be a fatal error, just pass through unverified address
+ # warn "standardize_teleatlas: address lookup in '".$location->{country}.
+ # "' not available\n";
+ # return $location;
+ #}
+ #my $path = $conf->config('teleatlas-path') || '';
+ #local @INC = (@INC, $path);
+ #eval "use $class;";
+ #if ( $@ ) {
+ # die "Loading $class failed:\n$@".
+ # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
+ #}
+
+ $class = 'Geo::EZLocate'; # use our own library
+ eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
+ die $@ if $@;
+
+ my $userid = $conf->config('ezlocate-userid')
+ or die "no ezlocate-userid configured\n";
+ my $password = $conf->config('ezlocate-password')
+ or die "no ezlocate-password configured\n";
+
+ my $tool = $class->new($userid, $password);
+ my $match = $tool->findAddress(
+ $location->{address1},
+ $location->{city},
+ $location->{state},
+ $location->{zip}, #12345-6789 format is allowed
+ );
+ warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
+ # error handling - B codes indicate success
+ die $ezlocate_error{$match->{MAT_STAT}}."\n"
+ unless $match->{MAT_STAT} =~ /^B\d$/;
+
+ my %result = (
+ address1 => $match->{MAT_ADDR},
+ address2 => $location->{address2},
+ city => $match->{MAT_CITY},
+ state => $match->{MAT_ST},
+ country => $location->{country},
+ zip => $match->{MAT_ZIP},
+ latitude => $match->{MAT_LAT},
+ longitude => $match->{MAT_LON},
+ censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
+ sprintf('%07.2f',$match->{CEN_TRCT}),
+ addr_clean => 'Y',
+ );
+ if ( $match->{STD_ADDR} ) {
+ # then they have a postal standardized address for us
+ %result = ( %result,
+ address1 => $match->{STD_ADDR},
+ address2 => $location->{address2},
+ city => $match->{STD_CITY},
+ state => $match->{STD_ST},
+ zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
+ );
+ }
+
+ \%result;
+}
+
+sub _tomtom_query { # helper method for the below
+ my %args = @_;
+ my $result = Geo::TomTom::Geocoding->query(%args);
+ die "TomTom geocoding error: ".$result->message."\n"
+ unless ( $result->is_success );
+ my ($match) = $result->locations;
+ my $type = $match->{type};
+ # match levels below "intersection" should not be considered clean
+ my $clean = ($type eq 'addresspoint' ||
+ $type eq 'poi' ||
+ $type eq 'house' ||
+ $type eq 'intersection'
+ ) ? 'Y' : '';
+ warn "tomtom returned $type match\n" if $DEBUG;
+ warn Dumper($match) if $DEBUG > 1;
+ ($match, $clean);
+}
+
+sub standardize_tomtom {
+ # post-2013 TomTom API
+ # much better, but incompatible with ezlocate
+ my $self = shift;
+ my $location = shift;
+ eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
+ die $@ if $@;
+
+ my $key = $conf->config('tomtom-userid')
+ or die "no tomtom-userid configured\n";
+
+ my $country = code2country($location->{country});
+ my ($address1, $address2) = ($location->{address1}, $location->{address2});
+ my $subloc = '';
+
+ # trim whitespace
+ $address1 =~ s/^\s+//;
+ $address1 =~ s/\s+$//;
+ $address2 =~ s/^\s+//;
+ $address2 =~ s/\s+$//;
+
+ # try to fix some cases of the address fields being switched
+ if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
+ $address2 = $address1;
+ $address1 = $location->{address2};
+ }
+ # parse sublocation part (unit/suite/apartment...) and clean up
+ # non-sublocation address2
+ ($subloc, $address2) =
+ subloc_address2($address1, $address2, $location->{country});
+ # ask TomTom to standardize address1:
+ my %args = (
+ key => $key,
+ T => $address1,
+ L => $location->{city},
+ AA => $location->{state},
+ PC => $location->{zip},
+ CC => country2code($country, LOCALE_CODE_ALPHA_3),
+ );
+
+ my ($match, $clean) = _tomtom_query(%args);
+
+ if (!$match or !$clean) {
+ # Then try cleaning up the input; TomTom is picky about junk in the
+ # address. Any of these can still be a clean match.
+ my $h = Geo::StreetAddress::US->parse_location($address1);
+ # First conservatively:
+ if ( $h->{sec_unit_type} ) {
+ my $strip = '\s+' . $h->{sec_unit_type};
+ $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
+ $strip .= '$';
+ $args{T} =~ s/$strip//;
+ ($match, $clean) = _tomtom_query(%args);
+ }
+ if ( !$match or !$clean ) {
+ # Then more aggressively:
+ $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
+ ($match, $clean) = _tomtom_query(%args);
+ }
+ }
+
+ if ( !$match or !$clean ) { # partial matches are not useful
+ die "Address not found\n";
+ }
+ my $tract = '';
+ if ( defined $match->{censusTract} ) {
+ $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
+ join('.', $match->{censusTract} =~ /(....)(..)/);
+ }
+ $address1 = '';
+ $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
+ $address1 .= $match->{street} if $match->{street};
+ $address1 .= ' '.$subloc if $subloc;
+ $address1 = uc($address1); # USPS standards
+
+ return +{
+ address1 => $address1,
+ address2 => $address2,
+ city => uc($match->{city}),
+ state => uc($location->{state}),
+ country => uc($location->{country}),
+ zip => ($match->{standardPostalCode} || $match->{postcode}),
+ latitude => $match->{latitude},
+ longitude => $match->{longitude},
+ censustract => $tract,
+ addr_clean => $clean,
+ };
+}
+
+=iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
+
+Given 'address1' and 'address2' strings, extract the sublocation part
+(from either one) and return it. If the sublocation was found in ADDRESS1,
+also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
+contain something relevant.