diff options
author | Mark Wells <mark@freeside.biz> | 2013-10-04 15:25:20 -0700 |
---|---|---|
committer | Mark Wells <mark@freeside.biz> | 2013-10-04 15:25:20 -0700 |
commit | 7d967f5ac6929fddc08cc077bcd44ea48a3937f2 (patch) | |
tree | 985256b6f9970c6ff148d587900fc421dfa11561 /FS/FS/Misc/Geo.pm | |
parent | 5d1f486c543c2e61cea6c050bed86c0c9815085e (diff) |
improvements to TomTom address standardization, #13763
Diffstat (limited to 'FS/FS/Misc/Geo.pm')
-rw-r--r-- | FS/FS/Misc/Geo.pm | 125 |
1 files changed, 114 insertions, 11 deletions
diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm index 4dd6dc6..b5cc325 100644 --- a/FS/FS/Misc/Geo.pm +++ b/FS/FS/Misc/Geo.pm @@ -424,9 +424,15 @@ sub standardize_tomtom { or die "no tomtom-userid configured\n"; my $country = code2country($location->{country}); + my ($address1, $address2) = ($location->{address1}, $location->{address2}); + # try to fix some cases of the address fields being switched + if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) { + $address2 = $address1; + $address1 = $location->{address2}; + } my $result = $class->query( key => $key, - T => $location->{address1}, + T => $address1, L => $location->{city}, AA => $location->{state}, PC => $location->{zip}, @@ -439,24 +445,121 @@ sub standardize_tomtom { if (!$match) { die "Location not found.\n"; } - warn "tomtom returned match:\n".Dumper($match) if $DEBUG > 1; - my $tract = join('.', $match->{censusTract} =~ /(....)(..)/); + my $type = $match->{type}; + warn "tomtom returned $type match\n" if $DEBUG; + warn Dumper($match) if $DEBUG > 1; + my $tract = ''; + if ( defined $match->{censusTract} ) { + $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}. + join('.', $match->{censusTract} =~ /(....)(..)/); + } + # match levels below "intersection" should not be considered clean + my $clean = ($type eq 'addresspoint' || + $type eq 'poi' || + $type eq 'house' || + $type eq 'intersection' + ) ? 'Y' : ''; + + $address2 = normalize_address2($address2, $location->{country}); + + $address1 = ''; + $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber}); + $address1 .= $match->{street} if $match->{street}; + return +{ - address1 => join(' ', $match->{houseNumber}, $match->{street}), - address2 => $location->{address2}, # XXX still need a solution to this + address1 => $address1, + address2 => $address2, city => $match->{city}, - state => $match->{state}, - country => country2code($match->{country}, LOCALE_CODE_ALPHA_2), + state => $location->{state}, # this will never change + country => $location->{country}, # ditto zip => ($match->{standardPostalCode} || $match->{postcode}), latitude => $match->{latitude}, longitude => $match->{longitude}, - censustract => $match->{censusStateCode}. - $match->{censusFipsCountyCode}. - $tract, - addr_clean => 'Y', + censustract => $tract, + addr_clean => $clean, }; } +=iten normalize_address2 STRING, COUNTRY + +Given an 'address2' STRING, normalize it for COUNTRY postal standards. +Currently only works for US and CA. + +=cut + +# XXX really ought to be a separate module +my %address2_forms = ( + # Postal Addressing Standards, Appendix C + # (plus correction of "hanger" to "hangar") + US => {qw( + APARTMENT APT + BASEMENT BSMT + BUILDING BLDG + DEPARTMENT DEPT + FLOOR FL + FRONT FRNT + HANGAR HNGR + HANGER HNGR + KEY KEY + LOBBY LBBY + LOT LOT + LOWER LOWR + OFFICE OFC + PENTHOUSE PH + PIER PIER + REAR REAR + ROOM RM + SIDE SIDE + SLIP SLIP + SPACE SPC + STOP STOP + SUITE STE + TRAILER TRLR + UNIT UNIT + UPPER UPPR + )}, + # Canada Post Addressing Guidelines 4.3 + CA => {qw( + APARTMENT APT + APPARTEMENT APP + BUREAU BUREAU + SUITE SUITE + UNIT UNIT + UNITÉ UNITÉ + )}, +); + +sub normalize_address2 { + # Some things seen in the address2 field: + # Whitespace + # The complete address (with address1 containing part of the company name, + # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite + # number, etc.) + my ($addr2, $country) = @_; + $addr2 = uc($addr2); + if ( exists($address2_forms{$country}) ) { + my $dict = $address2_forms{$country}; + # protect this + $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g? + my @words; + # remove all punctuation and spaces + foreach my $w (split(/\W+/, $addr2)) { + if ( exists($dict->{$w}) ) { + push @words, $dict->{$w}; + } else { + push @words, $w; + } + } + my $result = join(' ', @words); + # correct spacing of pound sign + number + $result =~ s/NUMBER(\d)/# $1/; + warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1; + $addr2 = $result; + } + $addr2; +} + + =back =cut |