TomTom address standardization, #13763

author Mark Wells <mark@freeside.biz>

Fri, 4 Oct 2013 22:40:43 +0000 (15:40 -0700)

committer Mark Wells <mark@freeside.biz>

Fri, 4 Oct 2013 22:40:43 +0000 (15:40 -0700)
author Mark Wells <mark@freeside.biz>
Fri, 4 Oct 2013 22:40:43 +0000 (15:40 -0700)
committer Mark Wells <mark@freeside.biz>
Fri, 4 Oct 2013 22:40:43 +0000 (15:40 -0700)
diff --git a/FS/FS/Conf.pm b/FS/FS/Conf.pm

index 517395d..ed8a117 100644 (file)
--- a/FS/FS/Conf.pm
+++ b/FS/FS/Conf.pm
@@ -4195,8 +4195,9 @@ and customer address. Include units.',
      'description' => 'Method for standardizing customer addresses.',
      'type'        => 'select',
      'select_hash' => [ '' => '', 
      'description' => 'Method for standardizing customer addresses.',
      'type'        => 'select',
      'select_hash' => [ '' => '', 
-                       'usps' => 'U.S. Postal Service',
+                       'usps'     => 'U.S. Postal Service',
                         'ezlocate' => 'EZLocate',
                         'ezlocate' => 'EZLocate',
+                       'tomtom'   => 'TomTom',
                       ],
    },
  
                       ],
    },
  
@@ -4215,6 +4216,13 @@ and customer address. Include units.',
    },
  
    {
    },
  
    {
+    'key'         => 'tomtom-userid',
+    'section'     => 'UI',
+    'description' => 'TomTom geocoding service API key.  See <a href="http://www.tomtom.com/">the TomTom website</a> to obtain a key.  This is recommended for addresses in the United States only.',
+    'type'        => 'text',
+  },
+
+  {
      'key'         => 'ezlocate-userid',
      'section'     => 'UI',
      'description' => 'User ID for EZ-Locate service.  See <a href="http://www.geocode.com/">the TomTom website</a> for access and pricing information.',
      'key'         => 'ezlocate-userid',
      'section'     => 'UI',
      'description' => 'User ID for EZ-Locate service.  See <a href="http://www.geocode.com/">the TomTom website</a> for access and pricing information.',
diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm

index a93d98f..b5cc325 100644 (file)
--- a/FS/FS/Misc/Geo.pm
+++ b/FS/FS/Misc/Geo.pm
@@ -10,6 +10,7 @@ use HTML::TokeParser;
  use URI::Escape 3.31;
  use Data::Dumper;
  use FS::Conf;
  use URI::Escape 3.31;
  use Data::Dumper;
  use FS::Conf;
+use Locale::Country;
  
  FS::UID->install_callback( sub {
    $conf = new FS::Conf;
  
  FS::UID->install_callback( sub {
    $conf = new FS::Conf;
@@ -410,6 +411,155 @@ sub standardize_ezlocate {
    \%result;
  }
  
    \%result;
  }
  
+sub standardize_tomtom {
+  # post-2013 TomTom API
+  # much better, but incompatible with ezlocate
+  my $self = shift;
+  my $location = shift;
+  my $class = 'Geo::TomTom::Geocoding';
+  eval "use $class";
+  die $@ if $@;
+
+  my $key = $conf->config('tomtom-userid')
+    or die "no tomtom-userid configured\n";
+
+  my $country = code2country($location->{country});
+  my ($address1, $address2) = ($location->{address1}, $location->{address2});
+  # try to fix some cases of the address fields being switched
+  if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
+    $address2 = $address1;
+    $address1 = $location->{address2};
+  }
+  my $result = $class->query(
+    key => $key,
+    T   => $address1,
+    L   => $location->{city},
+    AA  => $location->{state},
+    PC  => $location->{zip},
+    CC  => country2code($country, LOCALE_CODE_ALPHA_3),
+  );
+  unless ( $result->is_success ) {
+    die "TomTom geocoding error: ".$result->message."\n";
+  }
+  my ($match) = $result->locations;
+  if (!$match) {
+    die "Location not found.\n";
+  }
+  my $type = $match->{type};
+  warn "tomtom returned $type match\n" if $DEBUG;
+  warn Dumper($match) if $DEBUG > 1;
+  my $tract = '';
+  if ( defined $match->{censusTract} ) {
+    $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
+             join('.', $match->{censusTract} =~ /(....)(..)/);
+  }
+  # match levels below "intersection" should not be considered clean
+  my $clean = ($type eq 'addresspoint'  ||
+               $type eq 'poi'           ||
+               $type eq 'house'         ||
+               $type eq 'intersection'
+              ) ? 'Y' : '';
+
+  $address2 = normalize_address2($address2, $location->{country});
+
+  $address1 = '';
+  $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
+  $address1 .= $match->{street} if $match->{street};
+
+  return +{
+    address1    => $address1,
+    address2    => $address2,
+    city        => $match->{city},
+    state       => $location->{state},    # this will never change
+    country     => $location->{country},  # ditto
+    zip         => ($match->{standardPostalCode} || $match->{postcode}),
+    latitude    => $match->{latitude},
+    longitude   => $match->{longitude},
+    censustract => $tract,
+    addr_clean  => $clean,
+  };
+}
+
+=iten normalize_address2 STRING, COUNTRY
+
+Given an 'address2' STRING, normalize it for COUNTRY postal standards.
+Currently only works for US and CA.
+
+=cut
+
+# XXX really ought to be a separate module
+my %address2_forms = (
+  # Postal Addressing Standards, Appendix C
+  # (plus correction of "hanger" to "hangar")
+  US => {qw(
+    APARTMENT     APT
+    BASEMENT      BSMT
+    BUILDING      BLDG
+    DEPARTMENT    DEPT
+    FLOOR         FL
+    FRONT         FRNT
+    HANGAR        HNGR
+    HANGER        HNGR
+    KEY           KEY
+    LOBBY         LBBY
+    LOT           LOT
+    LOWER         LOWR
+    OFFICE        OFC
+    PENTHOUSE     PH
+    PIER          PIER
+    REAR          REAR
+    ROOM          RM
+    SIDE          SIDE
+    SLIP          SLIP
+    SPACE         SPC
+    STOP          STOP
+    SUITE         STE
+    TRAILER       TRLR
+    UNIT          UNIT
+    UPPER         UPPR
+  )},
+  # Canada Post Addressing Guidelines 4.3
+  CA => {qw(
+    APARTMENT     APT
+    APPARTEMENT   APP
+    BUREAU        BUREAU
+    SUITE         SUITE
+    UNIT          UNIT
+    UNITÉ         UNITÉ
+  )},
+);
+ 
+sub normalize_address2 {
+  # Some things seen in the address2 field:
+  # Whitespace
+  # The complete address (with address1 containing part of the company name, 
+  # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
+  # number, etc.)
+  my ($addr2, $country) = @_;
+  $addr2 = uc($addr2);
+  if ( exists($address2_forms{$country}) ) {
+    my $dict = $address2_forms{$country};
+    # protect this
+    $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
+    my @words;
+    # remove all punctuation and spaces
+    foreach my $w (split(/\W+/, $addr2)) {
+      if ( exists($dict->{$w}) ) {
+        push @words, $dict->{$w};
+      } else {
+        push @words, $w;
+      }
+    }
+    my $result = join(' ', @words);
+    # correct spacing of pound sign + number
+    $result =~ s/NUMBER(\d)/# $1/;
+    warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
+    $addr2 = $result;
+  }
+  $addr2;
+}
+
+
  =back
  
  =cut
  =back
  
  =cut
diff --git a/FS/FS/cust_location.pm b/FS/FS/cust_location.pm

index b98ade1..11e97ec 100644 (file)
--- a/FS/FS/cust_location.pm
+++ b/FS/FS/cust_location.pm
@@ -10,6 +10,8 @@ use FS::Conf;
  use FS::prospect_main;
  use FS::cust_main;
  use FS::cust_main_county;
  use FS::prospect_main;
  use FS::cust_main;
  use FS::cust_main_county;
+use FS::GeocodeCache;
+use Date::Format qw( time2str );
  
  $import = 0;
  
  
  $import = 0;
  
@@ -677,6 +679,13 @@ sub process_censustract_update {
    return;
  }
  
    return;
  }
  
+=item process_set_coord
+
+Queueable function to find and fill in coordinates for all locations that 
+lack them.  Because this uses the Google Maps API, it's internally rate
+limited and must run in a single process.
+
+=cut
  
  sub process_set_coord {
    my $job = shift;
  
  sub process_set_coord {
    my $job = shift;
@@ -716,6 +725,67 @@ sub process_set_coord {
    return;
  }
  
    return;
  }
  
+=item process_standardize [ LOCATIONNUMS ]
+
+Performs address standardization on locations with unclean addresses,
+using whatever method you have configured.  If the standardize_* method 
+returns a I<clean> address match, the location will be updated.  This is 
+always an in-place update (because the physical location is the same, 
+and is just being referred to by a more accurate name).
+
+Disabled locations will be skipped, as nobody cares.
+
+If any LOCATIONNUMS are provided, only those locations will be updated.
+
+=cut
+
+sub process_standardize {
+  my $job = shift;
+  my @others = qsearch('queue', {
+      'status'  => 'locked',
+      'job'     => $job->job,
+      'jobnum'  => {op=>'!=', value=>$job->jobnum},
+  });
+  return if @others;
+  my @locationnums = grep /^\d+$/, @_;
+  my $where = "AND locationnum IN(".join(',',@locationnums).")"
+    if scalar(@locationnums);
+  my @locations = qsearch({
+      table     => 'cust_location',
+      hashref   => { addr_clean => '', disabled => '' },
+      extra_sql => $where,
+  });
+  my $n_todo = scalar(@locations);
+  my $n_done = 0;
+
+  # special: log this
+  my $log;
+  eval "use Text::CSV";
+  open $log, '>', "$FS::UID::cache_dir/process_standardize-" . 
+                  time2str('%Y%m%d',time) .
+                  ".csv";
+  my $csv = Text::CSV->new({binary => 1, eol => "\n"});
+
+  foreach my $cust_location (@locations) {
+    $job->update_statustext( int(100 * $n_done/$n_todo) . ",$n_done / $n_todo locations" ) if $job;
+    my $result = FS::GeocodeCache->standardize($cust_location);
+    if ( $result->{addr_clean} and !$result->{error} ) {
+      my @cols = ($cust_location->locationnum);
+      foreach (keys %$result) {
+        push @cols, $cust_location->get($_), $result->{$_};
+        $cust_location->set($_, $result->{$_});
+      }
+      # bypass immutable field restrictions
+      my $error = $cust_location->FS::Record::replace;
+      warn "location ".$cust_location->locationnum.": $error\n" if $error;
+      $csv->print($log, \@cols);
+    }
+    $n_done++;
+    dbh->commit; # so that we can resume if interrupted
+  }
+  close $log;
+}
+
  =head1 BUGS
  
  =head1 SEE ALSO
  =head1 BUGS
  
  =head1 SEE ALSO
diff --git a/bin/standardize-locations b/bin/standardize-locations

new file mode 100644 (file)

index 0000000..6e5fd3c
--- /dev/null
+++ b/bin/standardize-locations
@@ -0,0 +1,25 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FS::UID 'adminsuidsetup';
+use FS::Conf;
+use FS::queue;
+
+my $user = shift or die "usage:\n  standardize-locations user";
+adminsuidsetup($user);
+my $conf = FS::Conf->new;
+my $method = $conf->config('address_standardize_method')
+  or die "No address standardization method configured.\n";
+if ($method eq 'usps') {
+  # we're not supposed to do this
+  # (allow it anyway with a warning?)
+  die "USPS standardization does not allow batch processing.\n";
+}
+my $job = FS::queue->new({
+  job => 'FS::cust_location::process_standardize'
+});
+my $error = $job->insert('_JOB');
+die $error if $error;
+print "Address standardization job scheduled.\n";
+
+1;
author	Mark Wells <mark@freeside.biz>
	Fri, 4 Oct 2013 22:40:43 +0000 (15:40 -0700)
committer	Mark Wells <mark@freeside.biz>
	Fri, 4 Oct 2013 22:40:43 +0000 (15:40 -0700)
FS/FS/Conf.pm		patch \| blob \| history
FS/FS/Misc/Geo.pm		patch \| blob \| history
FS/FS/cust_location.pm		patch \| blob \| history
bin/standardize-locations	[new file with mode: 0644]	patch \| blob