X-Git-Url: http://git.freeside.biz/gitweb/?p=freeside.git;a=blobdiff_plain;f=FS%2FFS%2FMisc%2FGeo.pm;h=bc020a22d403567ca1f329b856fa6e107ac3d22c;hp=b5cc325d1713852b8d06555d5bd32c027022e466;hb=690d877102bcbddb20806995c549ed35b7b36647;hpb=fe4515eb37d76849dd08c62782d86bc7ba311dcd diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm index b5cc325d1..bc020a22d 100644 --- a/FS/FS/Misc/Geo.pm +++ b/FS/FS/Misc/Geo.pm @@ -6,11 +6,15 @@ use vars qw( $DEBUG @EXPORT_OK $conf ); use LWP::UserAgent; use HTTP::Request; use HTTP::Request::Common qw( GET POST ); +use IO::Socket::SSL; use HTML::TokeParser; +use Cpanel::JSON::XS; use URI::Escape 3.31; use Data::Dumper; use FS::Conf; +use FS::Log; use Locale::Country; +use XML::LibXML; FS::UID->install_callback( sub { $conf = new FS::Conf; @@ -28,7 +32,7 @@ FS::Misc::Geo - routines to fetch geographic information =over 4 -=item get_censustract LOCATION YEAR +=item get_censustract_ffiec LOCATION YEAR Given a location hash (see L) and a census map year, returns a census tract code (consisting of state, county, and tract @@ -40,103 +44,65 @@ sub get_censustract_ffiec { my $class = shift; my $location = shift; my $year = shift; + $year ||= 2013; - warn Dumper($location, $year) if $DEBUG; + if ( length($location->{country}) and uc($location->{country}) ne 'US' ) { + return ''; + } - my $url = 'http://www.ffiec.gov/Geocode/default.aspx'; + warn Dumper($location, $year) if $DEBUG; - my $return = {}; - my $error = ''; + # the old FFIEC geocoding service was shut down December 1, 2014. + # welcome to the future. + my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData'; + # build the single-line query + my $single_line = join(', ', $location->{address1}, + $location->{city}, + $location->{state} + ); + my $hashref = { sSingleLine => $single_line, iCensusYear => $year }; + my $request = POST( $url, + 'Content-Type' => 'application/json; charset=utf-8', + 'Accept' => 'application/json', + 'Content' => encode_json($hashref) + ); my $ua = new LWP::UserAgent; - my $res = $ua->request( GET( $url ) ); + my $res = $ua->request( $request ); warn $res->as_string if $DEBUG > 2; - unless ($res->code eq '200') { - - $error = $res->message; - - } else { - - my $content = $res->content; - my $p = new HTML::TokeParser \$content; - my $viewstate; - my $eventvalidation; - while (my $token = $p->get_tag('input') ) { - if ($token->[1]->{name} eq '__VIEWSTATE') { - $viewstate = $token->[1]->{value}; - } - if ($token->[1]->{name} eq '__EVENTVALIDATION') { - $eventvalidation = $token->[1]->{value}; - } - last if $viewstate && $eventvalidation; - } - - unless ($viewstate && $eventvalidation ) { - - $error = "either no __VIEWSTATE or __EVENTVALIDATION found"; - - } else { - - my($zip5, $zip4) = split('-',$location->{zip}); - - $year ||= '2012'; - my @ffiec_args = ( - __VIEWSTATE => $viewstate, - __EVENTVALIDATION => $eventvalidation, - ddlbYear => $year, - txtAddress => $location->{address1}, - txtCity => $location->{city}, - ddlbState => $location->{state}, - txtZipCode => $zip5, - btnSearch => 'Search', - ); - warn join("\n", @ffiec_args ) - if $DEBUG > 1; - - push @{ $ua->requests_redirectable }, 'POST'; - $res = $ua->request( POST( $url, \@ffiec_args ) ); - warn $res->as_string - if $DEBUG > 2; - - unless ($res->code eq '200') { - - $error = $res->message; + if (!$res->is_success) { - } else { + die "Census tract lookup error: ".$res->message; - my @id = qw( MSACode StateCode CountyCode TractCode ); - $content = $res->content; - warn $res->content if $DEBUG > 2; - $p = new HTML::TokeParser \$content; - my $prefix = 'UcGeoResult11_lb'; - my $compare = - sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) }; - - while (my $token = $p->get_tag('span') ) { - next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) ); - $token->[1]->{id} =~ /^$prefix(\w+)$/; - $return->{lc($1)} = $p->get_trimmed_text("/span"); - } - - unless ( $return->{tractcode} ) { - warn "$error: $content ". Dumper($return) if $DEBUG; - $error = "No census tract found"; - } - $return->{tractcode} .= ' ' - unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround + } - } #unless ($res->code eq '200') + local $@; + my $content = eval { decode_json($res->content) }; + die "Census tract JSON error: $@\n" if $@; - } #unless ($viewstate) + if ( !exists $content->{d}->{sStatus} ) { + die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n"; + } + if ( $content->{d}->{sStatus} eq 'Y' ) { + # success + # this also contains the (partial) standardized address, correct zip + # code, coordinates, etc., and we could get all of them, but right now + # we only want the census tract + my $tract = join('', $content->{d}->{sStateCode}, + $content->{d}->{sCountyCode}, + $content->{d}->{sTractCode}); + return $tract; - } #unless ($res->code eq '200') + } else { - die "FFIEC Geocoding error: $error\n" if $error; + my $error = $content->{d}->{sMsg} + || 'FFIEC lookup failed, but with no status message.'; + die "$error\n"; - $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'}; + } } #sub get_district_methods { @@ -176,104 +142,174 @@ sub get_district { &$method($location); } -sub wa_sales { - my $location = shift; - my $error = ''; - return '' if $location->{state} ne 'WA'; - my $return = { %$location }; - $return->{'exempt_amount'} = 0.00; +=head2 wa_sales location_hash - my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx'; - my $ua = new LWP::UserAgent; +Expects output of location_hash() as parameter - my $delim = '<|>'; # yes, <|> - my $year = (localtime)[5] + 1900; - my $month = (localtime)[4] + 1; - my @zip = split('-', $location->{zip}); - - my @args = ( - 'TaxType=S', #sales; 'P' = property - 'Src=0', #does something complicated - 'TAXABLE=', - 'Addr='.uri_escape($location->{address1}), - 'City='.uri_escape($location->{city}), - 'Zip='.$zip[0], - 'Zip1='.($zip[1] || ''), #optional - 'Year='.$year, - 'SYear='.$year, - 'Month='.$month, - 'EMon='.$month, +Returns undef on error, or if tax rate cannot be found using given address + +Query the WA State Dept of Revenue API with an address, and return +tax district information for that address. + +Documentation for the API can be found here: + +L + +This API does not return consistent usable county names, as the county +name may include appreviations or labels referring to PTBA (public transport +benefit area) or CEZ (community empowerment zone). It's recommended to use +the tool freeside-wa-tax-table-update to fully populate the +city/county/districts for WA state every financial quarter. + +Returns a hashref with the following keys: + + - district the wa state tax district id + - tax the combined total tax rate, as a percentage + - city the API rate name + - county The API address PTBA + - state WA + - country US + - exempt_amount 0 + +If api returns no district for address, generates system log error +and returns undef + +=cut + +sub wa_sales { + + # + # no die(): + # freeside-queued will issue dbh->rollback on die() ... this will + # also roll back system log messages about errors :/ freeside-queued + # doesn't propgate die messages into the system log. + # + + my $location_hash = shift; + + # Return when called with pointless context + return + unless $location_hash + && ref $location_hash + && $location_hash->{state} eq 'WA' + && $location_hash->{address1} + && $location_hash->{zip} + && $location_hash->{city}; + + my $log = FS::Log->new('wa_sales'); + + warn "wa_sales() called with location_hash:\n".Dumper( $location_hash)."\n" + if $DEBUG; + + my $api_url = 'http://webgis.dor.wa.gov/webapi/AddressRates.aspx'; + my @api_response_codes = ( + 'The address was found', + 'The address was not found, but the ZIP+4 was located.', + 'The address was updated and found, the user should validate the address record', + 'The address was updated and Zip+4 located, the user should validate the address record', + 'The address was corrected and found, the user should validate the address record', + 'Neither the address or ZIP+4 was found, but the 5-digit ZIP was located.', + 'The address, ZIP+4, and ZIP could not be found.', + 'Invalid Latitude/Longitude', + 'Internal error' ); - - my $query_string = join($delim, @args ); - $url .= "?$query_string"; - warn "\nrequest: $url\n\n" if $DEBUG > 1; - my $res = $ua->request( GET( "$url?$query_string" ) ); + my %get_query = ( + output => 'xml', + addr => $location_hash->{address1}, + city => $location_hash->{city}, + zip => substr( $location_hash->{zip}, 0, 5 ), + ); + my $get_string = join '&' => ( + map{ sprintf "%s=%s", $_, uri_escape( $get_query{$_} ) } + keys %get_query + ); - warn $res->as_string - if $DEBUG > 2; + my $prepared_url = "${api_url}?$get_string"; - if ($res->code ne '200') { - $error = $res->message; - } + warn "API call to URL: $prepared_url\n" + if $DEBUG; - my $content = $res->content; - my $p = new HTML::TokeParser \$content; - my $js = ''; - while ( my $t = $p->get_tag('script') ) { - my $u = $p->get_token; #either enclosed text or the tag - if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) { - $js = $u->[1]; - last; - } + my $dom; + local $@; + eval { $dom = XML::LibXML->load_xml( location => $prepared_url ); }; + if ( $@ ) { + my $error = + sprintf "Problem parsing XML from API URL(%s): %s", + $prepared_url, $@; + + $log->error( $error ); + warn $error; + return; } - if ( $js ) { #found it - # strip down to the quoted string, which contains escaped single quotes. - $js =~ s/.*\('tblSales'\);c.innerHTML='//s; - $js =~ s/(? 2; - - $p = new HTML::TokeParser \$js; - TD: while ( my $td = $p->get_tag('td') ) { - while ( my $u = $p->get_token ) { - next TD if $u->[0] eq 'E' and $u->[1] eq 'td'; - next if $u->[0] ne 'T'; # skip non-text - my $text = $u->[1]; - - if ( lc($text) eq 'location code' ) { - $p->get_tag('td'); # skip to the next column - undef $u; - $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text - $return->{'district'} = $u->[1]; - } - elsif ( lc($text) eq 'total tax rate' ) { - $p->get_tag('td'); - undef $u; - $u = $p->get_token until $u->[0] eq 'T'; - $return->{'tax'} = $u->[1]; - } - } # get_token - } # TD - - # just to make sure - if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) { - $return->{'tax'} *= 100; #percentage - warn Dumper($return) if $DEBUG > 1; - return $return; - } - else { - $error = 'district code/tax rate not found'; - } + + my ($res_root) = $dom->findnodes('/response'); + my ($res_addressline) = $dom->findnodes('/response/addressline'); + my ($res_rate) = $dom->findnodes('/response/rate'); + + my $res_code = $res_root->getAttribute('code') + if $res_root; + + unless ( + ref $res_root + && ref $res_addressline + && ref $res_rate + && $res_code <= 5 + && $res_root->getAttribute('rate') > 0 + ) { + my $error = + sprintf + "Problem querying WA DOR tax district - " . + "code( %s %s ) " . + "address( %s ) " . + "url( %s )", + $res_code || 'n/a', + $res_code ? $api_response_codes[$res_code] : 'n/a', + $location_hash->{address1}, + $prepared_url; + + $log->error( $error ); + warn "$error\n"; + return; } - else { - $error = "failed to parse document"; + + my %response = ( + exempt_amount => 0, + state => 'WA', + country => 'US', + district => $res_root->getAttribute('loccode'), + tax => $res_root->getAttribute('rate') * 100, + county => uc $res_addressline->getAttribute('ptba'), + city => uc $res_rate->getAttribute('name') + ); + + $response{county} =~ s/ PTBA//i; + + if ( $DEBUG ) { + warn "XML document: $dom\n"; + warn "API parsed response: ".Dumper( \%response )."\n"; } - die "WA tax district lookup error: $error"; + my $info_message = + sprintf + "Tax district(%s) selected for address(%s %s %s %s)", + $response{district}, + $location_hash->{address1}, + $location_hash->{city}, + $location_hash->{state}, + $location_hash->{zip}; + + $log->info( $info_message ); + warn "$info_message\n" + if $DEBUG; + + \%response; + } +###### USPS Standardization ###### + sub standardize_usps { my $class = shift; @@ -330,85 +366,78 @@ sub standardize_usps { addr_clean=> 'Y' } } -my %ezlocate_error = ( # USA_Geo_002 documentation - 10 => 'State not found', - 11 => 'City not found', - 12 => 'Invalid street address', - 14 => 'Street name not found', - 15 => 'Address range does not exist', - 16 => 'Ambiguous address', - 17 => 'Intersection not found', #unused? -); +###### U.S. Census Bureau ###### -sub standardize_ezlocate { +sub standardize_uscensus { my $self = shift; my $location = shift; - my $class; - #if ( $location->{country} eq 'US' ) { - # $class = 'USA_Geo_004Tool'; - #} - #elsif ( $location->{country} eq 'CA' ) { - # $class = 'CAN_Geo_001Tool'; - #} - #else { # shouldn't be a fatal error, just pass through unverified address - # warn "standardize_teleatlas: address lookup in '".$location->{country}. - # "' not available\n"; - # return $location; - #} - #my $path = $conf->config('teleatlas-path') || ''; - #local @INC = (@INC, $path); - #eval "use $class;"; - #if ( $@ ) { - # die "Loading $class failed:\n$@". - # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n"; - #} - - $class = 'Geo::EZLocate'; # use our own library - eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling + my $log = FS::Log->new('FS::Misc::Geo::standardize_uscensus'); + $log->debug(join("\n", @{$location}{'address1', 'city', 'state', 'zip'})); + + eval "use Geo::USCensus::Geocoding"; die $@ if $@; - my $userid = $conf->config('ezlocate-userid') - or die "no ezlocate-userid configured\n"; - my $password = $conf->config('ezlocate-password') - or die "no ezlocate-password configured\n"; - - my $tool = $class->new($userid, $password); - my $match = $tool->findAddress( - $location->{address1}, - $location->{city}, - $location->{state}, - $location->{zip}, #12345-6789 format is allowed - ); - warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1; - # error handling - B codes indicate success - die $ezlocate_error{$match->{MAT_STAT}}."\n" - unless $match->{MAT_STAT} =~ /^B\d$/; - - my %result = ( - address1 => $match->{MAT_ADDR}, - address2 => $location->{address2}, - city => $match->{MAT_CITY}, - state => $match->{MAT_ST}, - country => $location->{country}, - zip => $match->{MAT_ZIP}, - latitude => $match->{MAT_LAT}, - longitude => $match->{MAT_LON}, - censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}. - sprintf('%07.2f',$match->{CEN_TRCT}), - addr_clean => 'Y', - ); - if ( $match->{STD_ADDR} ) { - # then they have a postal standardized address for us - %result = ( %result, - address1 => $match->{STD_ADDR}, - address2 => $location->{address2}, - city => $match->{STD_CITY}, - state => $match->{STD_ST}, - zip => $match->{STD_ZIP}.'-'.$match->{STD_P4}, - ); + if ( $location->{country} ne 'US' ) { + # soft failure + warn "standardize_uscensus not for use in country ".$location->{country}."\n"; + $location->{addr_clean} = ''; + return $location; } - \%result; + my $request = { + street => $location->{address1}, + city => $location->{city}, + state => $location->{state}, + zip => $location->{zip}, + debug => ($DEBUG || 0), + }; + + my $result = Geo::USCensus::Geocoding->query($request); + if ( $result->is_match ) { + # unfortunately we get the address back as a single line + $log->debug($result->address); + if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) { + return +{ + address1 => $1, + city => $2, + state => $3, + zip => $4, + address2 => uc($location->{address2}), + latitude => $result->latitude, + longitude => $result->longitude, + censustract => $result->censustract, + }; + } else { + die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n"; + } + } elsif ( $result->match_level eq 'Tie' ) { + die "Geocoding was not able to identify a unique matching address.\n"; + } elsif ( $result->match_level ) { + die "Geocoding did not find a matching address.\n"; + } else { + $log->error($result->error_message); + return; # for internal errors, don't return anything + } +} + +####### EZLOCATE (obsolete) ####### + +sub _tomtom_query { # helper method for the below + my %args = @_; + my $result = Geo::TomTom::Geocoding->query(%args); + die "TomTom geocoding error: ".$result->message."\n" + unless ( $result->is_success ); + my ($match) = $result->locations; + my $type = $match->{type}; + # match levels below "intersection" should not be considered clean + my $clean = ($type eq 'addresspoint' || + $type eq 'poi' || + $type eq 'house' || + $type eq 'intersection' + ) ? 'Y' : ''; + warn "tomtom returned $type match\n" if $DEBUG; + warn Dumper($match) if $DEBUG > 1; + ($match, $clean); } sub standardize_tomtom { @@ -416,8 +445,7 @@ sub standardize_tomtom { # much better, but incompatible with ezlocate my $self = shift; my $location = shift; - my $class = 'Geo::TomTom::Geocoding'; - eval "use $class"; + eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US"; die $@ if $@; my $key = $conf->config('tomtom-userid') @@ -425,12 +453,25 @@ sub standardize_tomtom { my $country = code2country($location->{country}); my ($address1, $address2) = ($location->{address1}, $location->{address2}); + my $subloc = ''; + + # trim whitespace + $address1 =~ s/^\s+//; + $address1 =~ s/\s+$//; + $address2 =~ s/^\s+//; + $address2 =~ s/\s+$//; + # try to fix some cases of the address fields being switched if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) { $address2 = $address1; $address1 = $location->{address2}; } - my $result = $class->query( + # parse sublocation part (unit/suite/apartment...) and clean up + # non-sublocation address2 + ($subloc, $address2) = + subloc_address2($address1, $address2, $location->{country}); + # ask TomTom to standardize address1: + my %args = ( key => $key, T => $address1, L => $location->{city}, @@ -438,40 +479,48 @@ sub standardize_tomtom { PC => $location->{zip}, CC => country2code($country, LOCALE_CODE_ALPHA_3), ); - unless ( $result->is_success ) { - die "TomTom geocoding error: ".$result->message."\n"; + + my ($match, $clean) = _tomtom_query(%args); + + if (!$match or !$clean) { + # Then try cleaning up the input; TomTom is picky about junk in the + # address. Any of these can still be a clean match. + my $h = Geo::StreetAddress::US->parse_location($address1); + # First conservatively: + if ( $h->{sec_unit_type} ) { + my $strip = '\s+' . $h->{sec_unit_type}; + $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num}; + $strip .= '$'; + $args{T} =~ s/$strip//; + ($match, $clean) = _tomtom_query(%args); + } + if ( !$match or !$clean ) { + # Then more aggressively: + $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) ); + ($match, $clean) = _tomtom_query(%args); + } } - my ($match) = $result->locations; - if (!$match) { - die "Location not found.\n"; + + if ( !$match or !$clean ) { # partial matches are not useful + die "Address not found\n"; } - my $type = $match->{type}; - warn "tomtom returned $type match\n" if $DEBUG; - warn Dumper($match) if $DEBUG > 1; my $tract = ''; if ( defined $match->{censusTract} ) { $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}. join('.', $match->{censusTract} =~ /(....)(..)/); } - # match levels below "intersection" should not be considered clean - my $clean = ($type eq 'addresspoint' || - $type eq 'poi' || - $type eq 'house' || - $type eq 'intersection' - ) ? 'Y' : ''; - - $address2 = normalize_address2($address2, $location->{country}); - $address1 = ''; $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber}); $address1 .= $match->{street} if $match->{street}; + $address1 .= ' '.$subloc if $subloc; + $address1 = uc($address1); # USPS standards return +{ address1 => $address1, address2 => $address2, - city => $match->{city}, - state => $location->{state}, # this will never change - country => $location->{country}, # ditto + city => uc($match->{city}), + state => uc($location->{state}), + country => uc($location->{country}), zip => ($match->{standardPostalCode} || $match->{postcode}), latitude => $match->{latitude}, longitude => $match->{longitude}, @@ -480,15 +529,16 @@ sub standardize_tomtom { }; } -=iten normalize_address2 STRING, COUNTRY +=iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY -Given an 'address2' STRING, normalize it for COUNTRY postal standards. -Currently only works for US and CA. +Given 'address1' and 'address2' strings, extract the sublocation part +(from either one) and return it. If the sublocation was found in ADDRESS1, +also return ADDRESS2 (cleaned up for postal standards) as it's assumed to +contain something relevant. =cut -# XXX really ought to be a separate module -my %address2_forms = ( +my %subloc_forms = ( # Postal Addressing Standards, Appendix C # (plus correction of "hanger" to "hangar") US => {qw( @@ -529,26 +579,76 @@ my %address2_forms = ( )}, ); -sub normalize_address2 { +sub subloc_address2 { # Some things seen in the address2 field: # Whitespace # The complete address (with address1 containing part of the company name, # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite # number, etc.) - my ($addr2, $country) = @_; - $addr2 = uc($addr2); - if ( exists($address2_forms{$country}) ) { - my $dict = $address2_forms{$country}; - # protect this - $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g? - my @words; - # remove all punctuation and spaces - foreach my $w (split(/\W+/, $addr2)) { - if ( exists($dict->{$w}) ) { - push @words, $dict->{$w}; - } else { - push @words, $w; - } + + # try to parse sublocation parts from address1; if they are present we'll + # append them back to address1 after standardizing + my $subloc = ''; + my ($addr1, $addr2, $country) = map uc, @_; + my $dict = $subloc_forms{$country} or return('', $addr2); + + my $found_in = 0; # which address is the sublocation + my $h; + foreach my $string ( + # patterns to try to parse + $addr1, + "$addr1 Nullcity, CA" + ) { + $h = Geo::StreetAddress::US->parse_location($addr1); + last if exists($h->{sec_unit_type}); + } + if (exists($h->{sec_unit_type})) { + $found_in = 1 + } else { + foreach my $string ( + # more patterns + $addr2, + "$addr1, $addr2", + "$addr1, $addr2 Nullcity, CA" + ) { + $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2"); + last if exists($h->{sec_unit_type}); + } + if (exists($h->{sec_unit_type})) { + $found_in = 2; + } + } + if ( $found_in ) { + $subloc = $h->{sec_unit_type}; + # special case: do not combine P.O. box sublocs with address1 + if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) { + if ( $found_in == 2 ) { + $addr2 = "PO BOX ".$h->{sec_unit_num}; + } # else it's in addr1, and leave it alone + return ('', $addr2); + } elsif ( exists($dict->{$subloc}) ) { + # substitute the official abbreviation + $subloc = $dict->{$subloc}; + } + $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num}); + } # otherwise $subloc = '' + + if ( $found_in == 2 ) { + # address2 should be fully combined into address1 + return ($subloc, ''); + } + # else address2 is not the canonical sublocation, but do our best to + # clean it up + # + # protect this + $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g? + my @words; + # remove all punctuation and spaces + foreach my $w (split(/\W+/, $addr2)) { + if ( exists($dict->{$w}) ) { + push @words, $dict->{$w}; + } else { + push @words, $w; } my $result = join(' ', @words); # correct spacing of pound sign + number @@ -556,13 +656,108 @@ sub normalize_address2 { warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1; $addr2 = $result; } - $addr2; + $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly + ($subloc, $addr2); } +sub standardize_melissa { + my $class = shift; + my $location = shift; + + local $@; + eval "use Geo::Melissa::WebSmart"; + die $@ if $@; + + my $id = $conf->config('melissa-userid') + or die "no melissa-userid configured\n"; + my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0; + + my $request = { + id => $id, + a1 => $location->{address1}, + a2 => $location->{address2}, + city => $location->{city}, + state => $location->{state}, + ctry => $location->{country}, + zip => $location->{zip}, + geocode => $geocode, + }; + my $result = Geo::Melissa::WebSmart->query($request); + if ( $result->code =~ /AS01/ ) { # always present on success + my $addr = $result->address; + warn Dumper $addr if $DEBUG > 1; + my $out = { + address1 => $addr->{Address1}, + address2 => $addr->{Address2}, + city => $addr->{City}->{Name}, + state => $addr->{State}->{Abbreviation}, + country => $addr->{Country}->{Abbreviation}, + zip => $addr->{Zip}, + latitude => $addr->{Latitude}, + longitude => $addr->{Longitude}, + addr_clean => 'Y', + }; + if ( $addr->{Census}->{Tract} ) { + my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract}; + # insert decimal point two digits from the end + $censustract =~ s/(\d\d)$/\.$1/; + $out->{censustract} = $censustract; + $out->{censusyear} = $conf->config('census_year'); + } + # we could do a lot more nuanced reporting of the warning/status codes, + # but the UI doesn't support that yet. + return $out; + } else { + die $result->status_message; + } +} + +sub standardize_freeside { + my $class = shift; + my $location = shift; + + my $url = 'https://ws.freeside.biz/normalize'; + + #free freeside.biz normalization only for US + if ( $location->{country} ne 'US' ) { + # soft failure + #why? something else could have cleaned it $location->{addr_clean} = ''; + return $location; + } + + my $ua = LWP::UserAgent->new( + 'ssl_opts' => { + verify_hostname => 0, + SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, + }, + ); + my $response = $ua->request( POST $url, [ + 'support-key' => scalar($conf->config('support-key')), + %$location, + ]); + + die "Address normalization error: ". $response->message + unless $response->is_success; + + local $@; + my $content = eval { decode_json($response->content) }; + if ( $@ ) { + warn $response->content; + die "Address normalization JSON error : $@\n"; + } + + die $content->{error}."\n" + if $content->{error}; + + { 'addr_clean' => 'Y', + map { $_ => $content->{$_} } + qw( address1 address2 city state zip country ) + }; + +} =back =cut - 1;