From: Mark Wells Date: Fri, 4 Oct 2013 22:25:20 +0000 (-0700) Subject: improvements to TomTom address standardization, #13763 X-Git-Url: http://git.freeside.biz/gitweb/?p=freeside.git;a=commitdiff_plain;h=7d967f5ac6929fddc08cc077bcd44ea48a3937f2 improvements to TomTom address standardization, #13763 --- diff --git a/FS/FS/Conf.pm b/FS/FS/Conf.pm index f0f2b4696..baca21dbd 100644 --- a/FS/FS/Conf.pm +++ b/FS/FS/Conf.pm @@ -4212,7 +4212,7 @@ and customer address. Include units.', { 'key' => 'tomtom-userid', 'section' => 'UI', - 'description' => 'TomTom geocoding service API key. See the TomTom website to obtain a key.', + 'description' => 'TomTom geocoding service API key. See the TomTom website to obtain a key. This is recommended for addresses in the United States only.', 'type' => 'text', }, diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm index 4dd6dc6e0..b5cc325d1 100644 --- a/FS/FS/Misc/Geo.pm +++ b/FS/FS/Misc/Geo.pm @@ -424,9 +424,15 @@ sub standardize_tomtom { or die "no tomtom-userid configured\n"; my $country = code2country($location->{country}); + my ($address1, $address2) = ($location->{address1}, $location->{address2}); + # try to fix some cases of the address fields being switched + if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) { + $address2 = $address1; + $address1 = $location->{address2}; + } my $result = $class->query( key => $key, - T => $location->{address1}, + T => $address1, L => $location->{city}, AA => $location->{state}, PC => $location->{zip}, @@ -439,24 +445,121 @@ sub standardize_tomtom { if (!$match) { die "Location not found.\n"; } - warn "tomtom returned match:\n".Dumper($match) if $DEBUG > 1; - my $tract = join('.', $match->{censusTract} =~ /(....)(..)/); + my $type = $match->{type}; + warn "tomtom returned $type match\n" if $DEBUG; + warn Dumper($match) if $DEBUG > 1; + my $tract = ''; + if ( defined $match->{censusTract} ) { + $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}. + join('.', $match->{censusTract} =~ /(....)(..)/); + } + # match levels below "intersection" should not be considered clean + my $clean = ($type eq 'addresspoint' || + $type eq 'poi' || + $type eq 'house' || + $type eq 'intersection' + ) ? 'Y' : ''; + + $address2 = normalize_address2($address2, $location->{country}); + + $address1 = ''; + $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber}); + $address1 .= $match->{street} if $match->{street}; + return +{ - address1 => join(' ', $match->{houseNumber}, $match->{street}), - address2 => $location->{address2}, # XXX still need a solution to this + address1 => $address1, + address2 => $address2, city => $match->{city}, - state => $match->{state}, - country => country2code($match->{country}, LOCALE_CODE_ALPHA_2), + state => $location->{state}, # this will never change + country => $location->{country}, # ditto zip => ($match->{standardPostalCode} || $match->{postcode}), latitude => $match->{latitude}, longitude => $match->{longitude}, - censustract => $match->{censusStateCode}. - $match->{censusFipsCountyCode}. - $tract, - addr_clean => 'Y', + censustract => $tract, + addr_clean => $clean, }; } +=iten normalize_address2 STRING, COUNTRY + +Given an 'address2' STRING, normalize it for COUNTRY postal standards. +Currently only works for US and CA. + +=cut + +# XXX really ought to be a separate module +my %address2_forms = ( + # Postal Addressing Standards, Appendix C + # (plus correction of "hanger" to "hangar") + US => {qw( + APARTMENT APT + BASEMENT BSMT + BUILDING BLDG + DEPARTMENT DEPT + FLOOR FL + FRONT FRNT + HANGAR HNGR + HANGER HNGR + KEY KEY + LOBBY LBBY + LOT LOT + LOWER LOWR + OFFICE OFC + PENTHOUSE PH + PIER PIER + REAR REAR + ROOM RM + SIDE SIDE + SLIP SLIP + SPACE SPC + STOP STOP + SUITE STE + TRAILER TRLR + UNIT UNIT + UPPER UPPR + )}, + # Canada Post Addressing Guidelines 4.3 + CA => {qw( + APARTMENT APT + APPARTEMENT APP + BUREAU BUREAU + SUITE SUITE + UNIT UNIT + UNITÉ UNITÉ + )}, +); + +sub normalize_address2 { + # Some things seen in the address2 field: + # Whitespace + # The complete address (with address1 containing part of the company name, + # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite + # number, etc.) + my ($addr2, $country) = @_; + $addr2 = uc($addr2); + if ( exists($address2_forms{$country}) ) { + my $dict = $address2_forms{$country}; + # protect this + $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g? + my @words; + # remove all punctuation and spaces + foreach my $w (split(/\W+/, $addr2)) { + if ( exists($dict->{$w}) ) { + push @words, $dict->{$w}; + } else { + push @words, $w; + } + } + my $result = join(' ', @words); + # correct spacing of pound sign + number + $result =~ s/NUMBER(\d)/# $1/; + warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1; + $addr2 = $result; + } + $addr2; +} + + =back =cut diff --git a/FS/FS/cust_location.pm b/FS/FS/cust_location.pm index b98ade157..11e97ecfe 100644 --- a/FS/FS/cust_location.pm +++ b/FS/FS/cust_location.pm @@ -10,6 +10,8 @@ use FS::Conf; use FS::prospect_main; use FS::cust_main; use FS::cust_main_county; +use FS::GeocodeCache; +use Date::Format qw( time2str ); $import = 0; @@ -677,6 +679,13 @@ sub process_censustract_update { return; } +=item process_set_coord + +Queueable function to find and fill in coordinates for all locations that +lack them. Because this uses the Google Maps API, it's internally rate +limited and must run in a single process. + +=cut sub process_set_coord { my $job = shift; @@ -716,6 +725,67 @@ sub process_set_coord { return; } +=item process_standardize [ LOCATIONNUMS ] + +Performs address standardization on locations with unclean addresses, +using whatever method you have configured. If the standardize_* method +returns a I address match, the location will be updated. This is +always an in-place update (because the physical location is the same, +and is just being referred to by a more accurate name). + +Disabled locations will be skipped, as nobody cares. + +If any LOCATIONNUMS are provided, only those locations will be updated. + +=cut + +sub process_standardize { + my $job = shift; + my @others = qsearch('queue', { + 'status' => 'locked', + 'job' => $job->job, + 'jobnum' => {op=>'!=', value=>$job->jobnum}, + }); + return if @others; + my @locationnums = grep /^\d+$/, @_; + my $where = "AND locationnum IN(".join(',',@locationnums).")" + if scalar(@locationnums); + my @locations = qsearch({ + table => 'cust_location', + hashref => { addr_clean => '', disabled => '' }, + extra_sql => $where, + }); + my $n_todo = scalar(@locations); + my $n_done = 0; + + # special: log this + my $log; + eval "use Text::CSV"; + open $log, '>', "$FS::UID::cache_dir/process_standardize-" . + time2str('%Y%m%d',time) . + ".csv"; + my $csv = Text::CSV->new({binary => 1, eol => "\n"}); + + foreach my $cust_location (@locations) { + $job->update_statustext( int(100 * $n_done/$n_todo) . ",$n_done / $n_todo locations" ) if $job; + my $result = FS::GeocodeCache->standardize($cust_location); + if ( $result->{addr_clean} and !$result->{error} ) { + my @cols = ($cust_location->locationnum); + foreach (keys %$result) { + push @cols, $cust_location->get($_), $result->{$_}; + $cust_location->set($_, $result->{$_}); + } + # bypass immutable field restrictions + my $error = $cust_location->FS::Record::replace; + warn "location ".$cust_location->locationnum.": $error\n" if $error; + $csv->print($log, \@cols); + } + $n_done++; + dbh->commit; # so that we can resume if interrupted + } + close $log; +} + =head1 BUGS =head1 SEE ALSO diff --git a/bin/standardize-locations b/bin/standardize-locations new file mode 100755 index 000000000..6e5fd3c16 --- /dev/null +++ b/bin/standardize-locations @@ -0,0 +1,25 @@ +#!/usr/bin/perl -w + +use strict; +use FS::UID 'adminsuidsetup'; +use FS::Conf; +use FS::queue; + +my $user = shift or die "usage:\n standardize-locations user"; +adminsuidsetup($user); +my $conf = FS::Conf->new; +my $method = $conf->config('address_standardize_method') + or die "No address standardization method configured.\n"; +if ($method eq 'usps') { + # we're not supposed to do this + # (allow it anyway with a warning?) + die "USPS standardization does not allow batch processing.\n"; +} +my $job = FS::queue->new({ + job => 'FS::cust_location::process_standardize' +}); +my $error = $job->insert('_JOB'); +die $error if $error; +print "Address standardization job scheduled.\n"; + +1;