summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wells <mark@freeside.biz>2013-10-04 15:25:20 -0700
committerMark Wells <mark@freeside.biz>2013-10-04 15:25:20 -0700
commit7d967f5ac6929fddc08cc077bcd44ea48a3937f2 (patch)
tree985256b6f9970c6ff148d587900fc421dfa11561
parent5d1f486c543c2e61cea6c050bed86c0c9815085e (diff)
improvements to TomTom address standardization, #13763
-rw-r--r--FS/FS/Conf.pm2
-rw-r--r--FS/FS/Misc/Geo.pm125
-rw-r--r--FS/FS/cust_location.pm70
-rwxr-xr-xbin/standardize-locations25
4 files changed, 210 insertions, 12 deletions
diff --git a/FS/FS/Conf.pm b/FS/FS/Conf.pm
index f0f2b46..baca21d 100644
--- a/FS/FS/Conf.pm
+++ b/FS/FS/Conf.pm
@@ -4212,7 +4212,7 @@ and customer address. Include units.',
{
'key' => 'tomtom-userid',
'section' => 'UI',
- 'description' => 'TomTom geocoding service API key. See <a href="http://www.tomtom.com/">the TomTom website</a> to obtain a key.',
+ 'description' => 'TomTom geocoding service API key. See <a href="http://www.tomtom.com/">the TomTom website</a> to obtain a key. This is recommended for addresses in the United States only.',
'type' => 'text',
},
diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm
index 4dd6dc6..b5cc325 100644
--- a/FS/FS/Misc/Geo.pm
+++ b/FS/FS/Misc/Geo.pm
@@ -424,9 +424,15 @@ sub standardize_tomtom {
or die "no tomtom-userid configured\n";
my $country = code2country($location->{country});
+ my ($address1, $address2) = ($location->{address1}, $location->{address2});
+ # try to fix some cases of the address fields being switched
+ if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
+ $address2 = $address1;
+ $address1 = $location->{address2};
+ }
my $result = $class->query(
key => $key,
- T => $location->{address1},
+ T => $address1,
L => $location->{city},
AA => $location->{state},
PC => $location->{zip},
@@ -439,24 +445,121 @@ sub standardize_tomtom {
if (!$match) {
die "Location not found.\n";
}
- warn "tomtom returned match:\n".Dumper($match) if $DEBUG > 1;
- my $tract = join('.', $match->{censusTract} =~ /(....)(..)/);
+ my $type = $match->{type};
+ warn "tomtom returned $type match\n" if $DEBUG;
+ warn Dumper($match) if $DEBUG > 1;
+ my $tract = '';
+ if ( defined $match->{censusTract} ) {
+ $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
+ join('.', $match->{censusTract} =~ /(....)(..)/);
+ }
+ # match levels below "intersection" should not be considered clean
+ my $clean = ($type eq 'addresspoint' ||
+ $type eq 'poi' ||
+ $type eq 'house' ||
+ $type eq 'intersection'
+ ) ? 'Y' : '';
+
+ $address2 = normalize_address2($address2, $location->{country});
+
+ $address1 = '';
+ $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
+ $address1 .= $match->{street} if $match->{street};
+
return +{
- address1 => join(' ', $match->{houseNumber}, $match->{street}),
- address2 => $location->{address2}, # XXX still need a solution to this
+ address1 => $address1,
+ address2 => $address2,
city => $match->{city},
- state => $match->{state},
- country => country2code($match->{country}, LOCALE_CODE_ALPHA_2),
+ state => $location->{state}, # this will never change
+ country => $location->{country}, # ditto
zip => ($match->{standardPostalCode} || $match->{postcode}),
latitude => $match->{latitude},
longitude => $match->{longitude},
- censustract => $match->{censusStateCode}.
- $match->{censusFipsCountyCode}.
- $tract,
- addr_clean => 'Y',
+ censustract => $tract,
+ addr_clean => $clean,
};
}
+=iten normalize_address2 STRING, COUNTRY
+
+Given an 'address2' STRING, normalize it for COUNTRY postal standards.
+Currently only works for US and CA.
+
+=cut
+
+# XXX really ought to be a separate module
+my %address2_forms = (
+ # Postal Addressing Standards, Appendix C
+ # (plus correction of "hanger" to "hangar")
+ US => {qw(
+ APARTMENT APT
+ BASEMENT BSMT
+ BUILDING BLDG
+ DEPARTMENT DEPT
+ FLOOR FL
+ FRONT FRNT
+ HANGAR HNGR
+ HANGER HNGR
+ KEY KEY
+ LOBBY LBBY
+ LOT LOT
+ LOWER LOWR
+ OFFICE OFC
+ PENTHOUSE PH
+ PIER PIER
+ REAR REAR
+ ROOM RM
+ SIDE SIDE
+ SLIP SLIP
+ SPACE SPC
+ STOP STOP
+ SUITE STE
+ TRAILER TRLR
+ UNIT UNIT
+ UPPER UPPR
+ )},
+ # Canada Post Addressing Guidelines 4.3
+ CA => {qw(
+ APARTMENT APT
+ APPARTEMENT APP
+ BUREAU BUREAU
+ SUITE SUITE
+ UNIT UNIT
+ UNITÉ UNITÉ
+ )},
+);
+
+sub normalize_address2 {
+ # Some things seen in the address2 field:
+ # Whitespace
+ # The complete address (with address1 containing part of the company name,
+ # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
+ # number, etc.)
+ my ($addr2, $country) = @_;
+ $addr2 = uc($addr2);
+ if ( exists($address2_forms{$country}) ) {
+ my $dict = $address2_forms{$country};
+ # protect this
+ $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
+ my @words;
+ # remove all punctuation and spaces
+ foreach my $w (split(/\W+/, $addr2)) {
+ if ( exists($dict->{$w}) ) {
+ push @words, $dict->{$w};
+ } else {
+ push @words, $w;
+ }
+ }
+ my $result = join(' ', @words);
+ # correct spacing of pound sign + number
+ $result =~ s/NUMBER(\d)/# $1/;
+ warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
+ $addr2 = $result;
+ }
+ $addr2;
+}
+
+
=back
=cut
diff --git a/FS/FS/cust_location.pm b/FS/FS/cust_location.pm
index b98ade1..11e97ec 100644
--- a/FS/FS/cust_location.pm
+++ b/FS/FS/cust_location.pm
@@ -10,6 +10,8 @@ use FS::Conf;
use FS::prospect_main;
use FS::cust_main;
use FS::cust_main_county;
+use FS::GeocodeCache;
+use Date::Format qw( time2str );
$import = 0;
@@ -677,6 +679,13 @@ sub process_censustract_update {
return;
}
+=item process_set_coord
+
+Queueable function to find and fill in coordinates for all locations that
+lack them. Because this uses the Google Maps API, it's internally rate
+limited and must run in a single process.
+
+=cut
sub process_set_coord {
my $job = shift;
@@ -716,6 +725,67 @@ sub process_set_coord {
return;
}
+=item process_standardize [ LOCATIONNUMS ]
+
+Performs address standardization on locations with unclean addresses,
+using whatever method you have configured. If the standardize_* method
+returns a I<clean> address match, the location will be updated. This is
+always an in-place update (because the physical location is the same,
+and is just being referred to by a more accurate name).
+
+Disabled locations will be skipped, as nobody cares.
+
+If any LOCATIONNUMS are provided, only those locations will be updated.
+
+=cut
+
+sub process_standardize {
+ my $job = shift;
+ my @others = qsearch('queue', {
+ 'status' => 'locked',
+ 'job' => $job->job,
+ 'jobnum' => {op=>'!=', value=>$job->jobnum},
+ });
+ return if @others;
+ my @locationnums = grep /^\d+$/, @_;
+ my $where = "AND locationnum IN(".join(',',@locationnums).")"
+ if scalar(@locationnums);
+ my @locations = qsearch({
+ table => 'cust_location',
+ hashref => { addr_clean => '', disabled => '' },
+ extra_sql => $where,
+ });
+ my $n_todo = scalar(@locations);
+ my $n_done = 0;
+
+ # special: log this
+ my $log;
+ eval "use Text::CSV";
+ open $log, '>', "$FS::UID::cache_dir/process_standardize-" .
+ time2str('%Y%m%d',time) .
+ ".csv";
+ my $csv = Text::CSV->new({binary => 1, eol => "\n"});
+
+ foreach my $cust_location (@locations) {
+ $job->update_statustext( int(100 * $n_done/$n_todo) . ",$n_done / $n_todo locations" ) if $job;
+ my $result = FS::GeocodeCache->standardize($cust_location);
+ if ( $result->{addr_clean} and !$result->{error} ) {
+ my @cols = ($cust_location->locationnum);
+ foreach (keys %$result) {
+ push @cols, $cust_location->get($_), $result->{$_};
+ $cust_location->set($_, $result->{$_});
+ }
+ # bypass immutable field restrictions
+ my $error = $cust_location->FS::Record::replace;
+ warn "location ".$cust_location->locationnum.": $error\n" if $error;
+ $csv->print($log, \@cols);
+ }
+ $n_done++;
+ dbh->commit; # so that we can resume if interrupted
+ }
+ close $log;
+}
+
=head1 BUGS
=head1 SEE ALSO
diff --git a/bin/standardize-locations b/bin/standardize-locations
new file mode 100755
index 0000000..6e5fd3c
--- /dev/null
+++ b/bin/standardize-locations
@@ -0,0 +1,25 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FS::UID 'adminsuidsetup';
+use FS::Conf;
+use FS::queue;
+
+my $user = shift or die "usage:\n standardize-locations user";
+adminsuidsetup($user);
+my $conf = FS::Conf->new;
+my $method = $conf->config('address_standardize_method')
+ or die "No address standardization method configured.\n";
+if ($method eq 'usps') {
+ # we're not supposed to do this
+ # (allow it anyway with a warning?)
+ die "USPS standardization does not allow batch processing.\n";
+}
+my $job = FS::queue->new({
+ job => 'FS::cust_location::process_standardize'
+});
+my $error = $job->insert('_JOB');
+die $error if $error;
+print "Address standardization job scheduled.\n";
+
+1;