use LWP::UserAgent;
use HTTP::Request;
use HTTP::Request::Common qw( GET POST );
+use HTTP::Cookies;
use HTML::TokeParser;
use URI::Escape 3.31;
use Data::Dumper;
use FS::Conf;
+use Locale::Country;
FS::UID->install_callback( sub {
$conf = new FS::Conf;
my $return = {};
my $error = '';
- my $ua = new LWP::UserAgent;
+ my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new);
my $res = $ua->request( GET( $url ) );
warn $res->as_string
if $DEBUG > 2;
- unless ($res->code eq '200') {
+ if (!$res->is_success) {
$error = $res->message;
} else {
my $content = $res->content;
+
my $p = new HTML::TokeParser \$content;
my $viewstate;
my $eventvalidation;
last if $viewstate && $eventvalidation;
}
- unless ($viewstate && $eventvalidation ) {
+ if (!$viewstate or !$eventvalidation ) {
$error = "either no __VIEWSTATE or __EVENTVALIDATION found";
my @ffiec_args = (
__VIEWSTATE => $viewstate,
__EVENTVALIDATION => $eventvalidation,
+ __VIEWSTATEENCRYPTED => '',
ddlbYear => $year,
txtAddress => $location->{address1},
txtCity => $location->{city},
die $ezlocate_error{$match->{MAT_STAT}}."\n"
unless $match->{MAT_STAT} =~ /^B\d$/;
- {
- address1 => $match->{STD_ADDR},
+ my %result = (
+ address1 => $match->{MAT_ADDR},
address2 => $location->{address2},
- city => $match->{STD_CITY},
- state => $match->{STD_ST},
+ city => $match->{MAT_CITY},
+ state => $match->{MAT_ST},
country => $location->{country},
- zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
+ zip => $match->{MAT_ZIP},
latitude => $match->{MAT_LAT},
longitude => $match->{MAT_LON},
censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
sprintf('%07.2f',$match->{CEN_TRCT}),
addr_clean => 'Y',
+ );
+ if ( $match->{STD_ADDR} ) {
+ # then they have a postal standardized address for us
+ %result = ( %result,
+ address1 => $match->{STD_ADDR},
+ address2 => $location->{address2},
+ city => $match->{STD_CITY},
+ state => $match->{STD_ST},
+ zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
+ );
+ }
+
+ \%result;
+}
+
+sub _tomtom_query { # helper method for the below
+ my %args = @_;
+ my $result = Geo::TomTom::Geocoding->query(%args);
+ die "TomTom geocoding error: ".$result->message."\n"
+ unless ( $result->is_success );
+ my ($match) = $result->locations;
+ my $type = $match->{type};
+ # match levels below "intersection" should not be considered clean
+ my $clean = ($type eq 'addresspoint' ||
+ $type eq 'poi' ||
+ $type eq 'house' ||
+ $type eq 'intersection'
+ ) ? 'Y' : '';
+ warn "tomtom returned $type match\n" if $DEBUG;
+ warn Dumper($match) if $DEBUG > 1;
+ ($match, $clean);
+}
+
+sub standardize_tomtom {
+ # post-2013 TomTom API
+ # much better, but incompatible with ezlocate
+ my $self = shift;
+ my $location = shift;
+ eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
+ die $@ if $@;
+
+ my $key = $conf->config('tomtom-userid')
+ or die "no tomtom-userid configured\n";
+
+ my $country = code2country($location->{country});
+ my ($address1, $address2) = ($location->{address1}, $location->{address2});
+ my $subloc = '';
+
+ # try to fix some cases of the address fields being switched
+ if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
+ $address2 = $address1;
+ $address1 = $location->{address2};
+ }
+ # parse sublocation part (unit/suite/apartment...) and clean up
+ # non-sublocation address2
+ ($subloc, $address2) =
+ subloc_address2($address1, $address2, $location->{country});
+ # ask TomTom to standardize address1:
+ my %args = (
+ key => $key,
+ T => $address1,
+ L => $location->{city},
+ AA => $location->{state},
+ PC => $location->{zip},
+ CC => country2code($country, LOCALE_CODE_ALPHA_3),
+ );
+
+ my ($match, $clean) = _tomtom_query(%args);
+
+ if (!$match or !$clean) {
+ # Then try cleaning up the input; TomTom is picky about junk in the
+ # address. Any of these can still be a clean match.
+ my $h = Geo::StreetAddress::US->parse_location($address1);
+ # First conservatively:
+ if ( $h->{sec_unit_type} ) {
+ my $strip = '\s+' . $h->{sec_unit_type};
+ $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
+ $strip .= '$';
+ $args{T} =~ s/$strip//;
+ ($match, $clean) = _tomtom_query(%args);
+ }
+ if ( !$match or !$clean ) {
+ # Then more aggressively:
+ $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
+ ($match, $clean) = _tomtom_query(%args);
+ }
+ }
+
+ if (!$match) {
+ die "Location not found.\n";
+ }
+ my $tract = '';
+ if ( defined $match->{censusTract} ) {
+ $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
+ join('.', $match->{censusTract} =~ /(....)(..)/);
+ }
+ $address1 = '';
+ $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
+ $address1 .= $match->{street} if $match->{street};
+ $address1 .= ' '.$subloc if $subloc;
+ $address1 = uc($address1); # USPS standards
+
+ return +{
+ address1 => $address1,
+ address2 => $address2,
+ city => uc($match->{city}),
+ state => uc($location->{state}),
+ country => uc($location->{country}),
+ zip => ($match->{standardPostalCode} || $match->{postcode}),
+ latitude => $match->{latitude},
+ longitude => $match->{longitude},
+ censustract => $tract,
+ addr_clean => $clean,
};
}
-=back
+=iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
+
+Given 'address1' and 'address2' strings, extract the sublocation part
+(from either one) and return it. If the sublocation was found in ADDRESS1,
+also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
+contain something relevant.
=cut
+my %subloc_forms = (
+ # Postal Addressing Standards, Appendix C
+ # (plus correction of "hanger" to "hangar")
+ US => {qw(
+ APARTMENT APT
+ BASEMENT BSMT
+ BUILDING BLDG
+ DEPARTMENT DEPT
+ FLOOR FL
+ FRONT FRNT
+ HANGAR HNGR
+ HANGER HNGR
+ KEY KEY
+ LOBBY LBBY
+ LOT LOT
+ LOWER LOWR
+ OFFICE OFC
+ PENTHOUSE PH
+ PIER PIER
+ REAR REAR
+ ROOM RM
+ SIDE SIDE
+ SLIP SLIP
+ SPACE SPC
+ STOP STOP
+ SUITE STE
+ TRAILER TRLR
+ UNIT UNIT
+ UPPER UPPR
+ )},
+ # Canada Post Addressing Guidelines 4.3
+ CA => {qw(
+ APARTMENT APT
+ APPARTEMENT APP
+ BUREAU BUREAU
+ SUITE SUITE
+ UNIT UNIT
+ UNITÉ UNITÉ
+ )},
+);
+
+sub subloc_address2 {
+ # Some things seen in the address2 field:
+ # Whitespace
+ # The complete address (with address1 containing part of the company name,
+ # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
+ # number, etc.)
+
+ # try to parse sublocation parts from address1; if they are present we'll
+ # append them back to address1 after standardizing
+ my $subloc = '';
+ my ($addr1, $addr2, $country) = map uc, @_;
+ my $dict = $subloc_forms{$country} or return('', $addr2);
+
+ my $found_in = 0; # which address is the sublocation
+ my $h;
+ foreach my $string (
+ # patterns to try to parse
+ $addr1,
+ "$addr1 Nullcity, CA"
+ ) {
+ $h = Geo::StreetAddress::US->parse_location($addr1);
+ last if exists($h->{sec_unit_type});
+ }
+ if (exists($h->{sec_unit_type})) {
+ $found_in = 1
+ } else {
+ foreach my $string (
+ # more patterns
+ $addr2,
+ "$addr1, $addr2",
+ "$addr1, $addr2 Nullcity, CA"
+ ) {
+ $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
+ last if exists($h->{sec_unit_type});
+ }
+ if (exists($h->{sec_unit_type})) {
+ $found_in = 2;
+ }
+ }
+ if ( $found_in ) {
+ $subloc = $h->{sec_unit_type};
+ # special case: do not combine P.O. box sublocs with address1
+ if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
+ if ( $found_in == 2 ) {
+ $addr2 = "PO BOX ".$h->{sec_unit_num};
+ } # else it's in addr1, and leave it alone
+ return ('', $addr2);
+ } elsif ( exists($dict->{$subloc}) ) {
+ # substitute the official abbreviation
+ $subloc = $dict->{$subloc};
+ }
+ $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
+ } # otherwise $subloc = ''
+
+ if ( $found_in == 2 ) {
+ # address2 should be fully combined into address1
+ return ($subloc, '');
+ }
+ # else address2 is not the canonical sublocation, but do our best to
+ # clean it up
+ #
+ # protect this
+ $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
+ my @words;
+ # remove all punctuation and spaces
+ foreach my $w (split(/\W+/, $addr2)) {
+ if ( exists($dict->{$w}) ) {
+ push @words, $dict->{$w};
+ } else {
+ push @words, $w;
+ }
+ my $result = join(' ', @words);
+ # correct spacing of pound sign + number
+ $result =~ s/NUMBER(\d)/# $1/;
+ warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
+ $addr2 = $result;
+ }
+ ($subloc, $addr2);
+}
+
+
+=back
+
+=cut
1;