+my %subloc_forms = (
+ # Postal Addressing Standards, Appendix C
+ # (plus correction of "hanger" to "hangar")
+ US => {qw(
+ APARTMENT APT
+ BASEMENT BSMT
+ BUILDING BLDG
+ DEPARTMENT DEPT
+ FLOOR FL
+ FRONT FRNT
+ HANGAR HNGR
+ HANGER HNGR
+ KEY KEY
+ LOBBY LBBY
+ LOT LOT
+ LOWER LOWR
+ OFFICE OFC
+ PENTHOUSE PH
+ PIER PIER
+ REAR REAR
+ ROOM RM
+ SIDE SIDE
+ SLIP SLIP
+ SPACE SPC
+ STOP STOP
+ SUITE STE
+ TRAILER TRLR
+ UNIT UNIT
+ UPPER UPPR
+ )},
+ # Canada Post Addressing Guidelines 4.3
+ CA => {qw(
+ APARTMENT APT
+ APPARTEMENT APP
+ BUREAU BUREAU
+ SUITE SUITE
+ UNIT UNIT
+ UNITÉ UNITÉ
+ )},
+);
+
+sub subloc_address2 {
+ # Some things seen in the address2 field:
+ # Whitespace
+ # The complete address (with address1 containing part of the company name,
+ # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
+ # number, etc.)
+
+ # try to parse sublocation parts from address1; if they are present we'll
+ # append them back to address1 after standardizing
+ my $subloc = '';
+ my ($addr1, $addr2, $country) = map uc, @_;
+ my $dict = $subloc_forms{$country} or return('', $addr2);
+
+ my $found_in = 0; # which address is the sublocation
+ my $h;
+ foreach my $string (
+ # patterns to try to parse
+ $addr1,
+ "$addr1 Nullcity, CA"
+ ) {
+ $h = Geo::StreetAddress::US->parse_location($addr1);
+ last if exists($h->{sec_unit_type});
+ }
+ if (exists($h->{sec_unit_type})) {
+ $found_in = 1
+ } else {
+ foreach my $string (
+ # more patterns
+ $addr2,
+ "$addr1, $addr2",
+ "$addr1, $addr2 Nullcity, CA"
+ ) {
+ $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
+ last if exists($h->{sec_unit_type});
+ }
+ if (exists($h->{sec_unit_type})) {
+ $found_in = 2;
+ }
+ }
+ if ( $found_in ) {
+ $subloc = $h->{sec_unit_type};
+ # special case: do not combine P.O. box sublocs with address1
+ if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
+ if ( $found_in == 2 ) {
+ $addr2 = "PO BOX ".$h->{sec_unit_num};
+ } # else it's in addr1, and leave it alone
+ return ('', $addr2);
+ } elsif ( exists($dict->{$subloc}) ) {
+ # substitute the official abbreviation
+ $subloc = $dict->{$subloc};
+ }
+ $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
+ } # otherwise $subloc = ''
+
+ if ( $found_in == 2 ) {
+ # address2 should be fully combined into address1
+ return ($subloc, '');
+ }
+ # else address2 is not the canonical sublocation, but do our best to
+ # clean it up
+ #
+ # protect this
+ $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
+ my @words;
+ # remove all punctuation and spaces
+ foreach my $w (split(/\W+/, $addr2)) {
+ if ( exists($dict->{$w}) ) {
+ push @words, $dict->{$w};
+ } else {
+ push @words, $w;
+ }
+ my $result = join(' ', @words);
+ # correct spacing of pound sign + number
+ $result =~ s/NUMBER(\d)/# $1/;
+ warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
+ $addr2 = $result;
+ }
+ ($subloc, $addr2);
+}
+
+
+=back
+
+=cut