From c16e8ca7ab01db1321d407a8eba212440b0c97c9 Mon Sep 17 00:00:00 2001 From: Mark Wells Date: Wed, 4 Mar 2015 12:10:04 -0800 Subject: [PATCH] switch to CSV API for better reliability, prepare 0.01 release --- Changes | 3 +- Geocoding.pm | 151 +++++++++++++++++++++++----------------------------- Geocoding/Match.pm | 54 ------------------- Geocoding/Result.pm | 23 ++++++++ MANIFEST | 4 +- Makefile.PL | 3 +- debian/control | 12 +++-- debian/copyright | 1 + t/01-lookup.t | 8 +-- t/02-fail.t | 3 +- 10 files changed, 114 insertions(+), 148 deletions(-) delete mode 100644 Geocoding/Match.pm create mode 100644 Geocoding/Result.pm diff --git a/Changes b/Changes index 3514195..67924d7 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,4 @@ Revision history for Geo-USCensus-Geocoding -unreleased +0.01 Mar 03 2015 + initial release diff --git a/Geocoding.pm b/Geocoding.pm index a7e5ba1..04605ad 100644 --- a/Geocoding.pm +++ b/Geocoding.pm @@ -4,9 +4,9 @@ use strict; use warnings; use LWP::UserAgent; -use JSON; -use URI; -use Geo::USCensus::Geocoding::Match; +use HTTP::Request::Common; +use Geo::USCensus::Geocoding::Result; +use Text::CSV; =head1 NAME @@ -39,11 +39,12 @@ our $DEBUG = 0; }; my $result = Geo::USCensus::Geocoding->query($request); - if ($result->matches) { - my $match = $result->match(0); - print $match->matchedAddress,"\n", - $match->coordinates->{x},',',$match->coordinates->{y},"\n", - $match->censustract,"\n"; + if ($result->is_match) { + print $result->address,"\n", + $result->latitude,", ",$result->longitude,"\n", + $result->censustract,"\n"; + } else { + print "No match.\n"; } =head1 CLASS METHODS @@ -52,20 +53,29 @@ our $DEBUG = 0; Send a request to the web service. See L for API documentation. This -package will always use the JSON data format and the Geographies return type. +package will always use the batch method (which seems to be more reliable, +as of 2015) and the Geographies return type. -Returns an object of class Geo::USCensus::Geocoding. +Returns an object of class Geo::USCensus::Geocoding::Result. =cut my $ua = LWP::UserAgent->new; -my $api_uri = 'http://geocoding.geo.census.gov/geocoder/geographies/address'; +my $url = 'http://geocoding.geo.census.gov/geocoder/geographies/addressbatch'; + +my $csv = Text::CSV->new({eol => "\n", binary => 1}); + +# for a current list of benchmark/vintage IDs, download +# http://geocoding.geo.census.gov/geocoder/benchmarks +# http://geocoding.geo.census.gov/geocoder/vintages?benchmark= +# with Accept: application/json sub query { my $class = shift; my %opt = ( - benchmark => 'Public_AR_Census2010', - vintage => 'Census2010_Census2010', + returntype => 'geographies', + benchmark => 4, # "Current" + vintage => 4, # "Current" ); if (ref $_[0] eq 'HASH') { %opt = (%opt, %{ $_[0] }); @@ -74,86 +84,61 @@ sub query { } $DEBUG = $opt{debug} || 0; - $opt{format} = 'json'; + my $result = Geo::USCensus::Geocoding::Result->new; + + my @row = ( 1 ); # first element = row identifier + # at some point support multiple rows in a single query? foreach (qw(street city state zip)) { - die "$_ required\n" unless length($opt{$_}); + if (!length($opt{$_})) { + $result->error_message("$_ required"); + return $result; + } + push @row, $opt{$_}; } - my $uri = URI->new($api_uri); - $uri->query_form(\%opt); - warn "$class->query\n$uri\n\n" if $DEBUG; - my $http_req = HTTP::Request->new(GET => $uri->as_string); - my $resp = $ua->request($http_req); - my $self = { addr_response => $resp }; - bless $self, $class; + $csv->combine(@row); + warn "Sending:\n".$csv->string."\n" if $DEBUG; + + # they are not picky about content types, Accept headers, etc., but + # the uploaded file must have a _name_. + my $resp = $ua->request(POST $url, + 'Content_Type' => 'form-data', + 'Content' => [ benchmark => $opt{benchmark}, + vintage => $opt{vintage}, + returntype => $opt{returntype}, + addressFile => [ undef, 'upload.csv', + Content => $csv->string + ], + ], + ); if ( $resp->is_success ) { - local $@; - my $tree = eval { from_json($resp->content) }; - if ($@) { - $self->message("Unable to parse response:\n$@"); - return $self; + $result->content($resp->content); + my $status = $csv->parse($resp->content); + my @fields = $csv->fields; + if (!$status or @fields < 3) { + $result->error_message("Unable to parse response:\n" . $resp->content); + return $result; } - if (!exists $tree->{result}) { - $self->message("Response does not contain geocoding results."); - warn $self->message. "\n".$resp->content."\n\n"; - return $self; + if ( $fields[2] eq 'Match' ) { + $result->is_match(1); + $result->match_level($fields[3]); + $result->address($fields[4]); + my ($lat, $long) = split(',', $fields[5]); + $result->latitude($lat); + $result->longitude($long); + $result->state($fields[8]); + $result->county($fields[9]); + $result->tract($fields[10]); + $result->block($fields[11]); + } else { + $result->is_match(0); } - $tree = $tree->{result}; - - my @matches; - if (exists( $tree->{addressMatches} )) { - foreach my $am (@{ $tree->{addressMatches} }) { - push @matches, Geo::USCensus::Geocoding::Match->new($am); - } - } # else what? does this happen if there's no match? a proper REST - # interface should throw a 404 - $self->{matches} = \@matches; } else { - $self->message( $resp->status_line ); - } - $self; -} - -=head1 METHODS - -=head2 message - -Sets/gets an explicit error status. - -=cut - -sub message { - my $self = shift; - if (@_) { - $self->{_message} = shift; + $result->error_message( $resp->status_line ); } - $self->{_message} || ''; -} - -=head2 matches - -Returns the number of matches found. - -=cut - -sub matches { - my $self = shift; - $self->{matches} ? scalar @{ $self->{matches} } : 0; -} - -=head2 match NUMBER - -Returns a specific match (starting from zero). Matches are returned -as L objects, in the order they were -returned by the service. - -=cut -sub match { - my $self = shift; - my $i = shift; - $self->{matches}->[$i]; + return $result; } =head1 AUTHOR diff --git a/Geocoding/Match.pm b/Geocoding/Match.pm deleted file mode 100644 index b4a7675..0000000 --- a/Geocoding/Match.pm +++ /dev/null @@ -1,54 +0,0 @@ -package Geo::USCensus::Geocoding::Match; - -use strict; -use Data::Dumper; - -sub new { - my $class = shift; - my $address = shift; - my $census = shift; - - my $self = { %$address }; - bless $self, $class; -} - -sub matchedAddress { - my $self = shift; - $self->{matchedAddress}; -} - -sub coordinates { - my $self = shift; - $self->{coordinates}; -} - -sub addressComponents { - my $self = shift; - $self->{addressComponents}; -} - -sub geographies { - my $self = shift; - $self->{geographies}; -} - -sub block_info { - my $self = shift; - my $geo = $self->{geographies}; - my $block_info = $geo->{'2010 Census Blocks'}; # XXX change this in 2020 - if ($block_info and exists($block_info->[0])) { - return $block_info->[0]; - } else { - warn "2010 Census Blocks element not found in response\n"; - warn Dumper $self->{geographies}; - return ''; - } -} - -sub censustract { - my $self = shift; - my $block = $self->block_info or return ''; - return $block->{STATE} . $block->{COUNTY} . $block->{TRACT}; -} - -1; diff --git a/Geocoding/Result.pm b/Geocoding/Result.pm new file mode 100644 index 0000000..e0e271a --- /dev/null +++ b/Geocoding/Result.pm @@ -0,0 +1,23 @@ +package Geo::USCensus::Geocoding::Result; + +use Moo; # just for attribute declaration + +has 'is_match' => ( is => 'rw', default => 0 ); +has [ 'content', + 'match_level', + 'address', + 'state', + 'county', + 'tract', + 'block', + 'error_message', + 'latitude', + 'longitude' + ] => ( is => 'rw', default => '' ); + +sub censustract { + my $self = shift; + return join('', $self->state, $self->county, $self->tract); +} + +1; diff --git a/MANIFEST b/MANIFEST index 2c57785..e634c5d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,7 +1,9 @@ Changes Geocoding.pm -Geocoding/Match.pm +Geocoding/Result.pm Makefile.PL MANIFEST This list of files README t/00-load.t +t/01-lookup.t +t/02-fail.t diff --git a/Makefile.PL b/Makefile.PL index fa04a9a..908534b 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -14,8 +14,9 @@ WriteMakefile( PL_FILES => {}, PREREQ_PM => { 'Test::More' => 0, - 'XML::LibXML' => 2, 'LWP::UserAgent' => 0, + 'Text::CSV' => 0, + 'Moo' => 1, }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'Geo-USCensus-Geocoding-*' }, diff --git a/debian/control b/debian/control index 52804b3..2f2102a 100644 --- a/debian/control +++ b/debian/control @@ -1,15 +1,21 @@ Source: libgeo-uscensus-geocoding-perl Section: perl Priority: optional -Maintainer: Mark Wells +Maintainer: Mark Wells Build-Depends: debhelper (>= 9) -Build-Depends-Indep: perl +Build-Depends-Indep: libmoo-perl (>= 1), + libtext-csv-perl, + libwww-perl, + perl Standards-Version: 3.9.5 Homepage: https://metacpan.org/release/Geo-USCensus-Geocoding Package: libgeo-uscensus-geocoding-perl Architecture: all -Depends: ${misc:Depends}, ${perl:Depends} +Depends: ${misc:Depends}, ${perl:Depends}, + libmoo-perl (>= 1), + libtext-csv-perl, + libwww-perl Description: The U.S. Census Bureau geocoding service (no description was found) . diff --git a/debian/copyright b/debian/copyright index 0b919bf..dd11453 100644 --- a/debian/copyright +++ b/debian/copyright @@ -17,6 +17,7 @@ License: Files: debian/* Copyright: 2014, Mark Wells + 2015, Mark Wells License: Artistic or GPL-1+ License: Artistic diff --git a/t/01-lookup.t b/t/01-lookup.t index ad780e2..6a3fb15 100644 --- a/t/01-lookup.t +++ b/t/01-lookup.t @@ -12,8 +12,8 @@ my $result = Geo::USCensus::Geocoding->query( zip => '95814', ); -is( $result->message, '', 'error status' ); -is( $result->matches, 1, 'number of matches' ); -diag($result->match(0)->matchedAddress); -diag('Census tract '.$result->match(0)->censustract); +ok( $result->is_match ); +is( $result->error_message, '', 'error status' ); +diag($result->address); +diag('Census tract '.$result->censustract); diff --git a/t/02-fail.t b/t/02-fail.t index 72fc6da..39b9f1f 100644 --- a/t/02-fail.t +++ b/t/02-fail.t @@ -12,5 +12,6 @@ my $result = Geo::USCensus::Geocoding->query( zip => '95814', ); -is( $result->matches, 0, 'number of matches' ); +ok( !$result->is_match ); +diag( $result->content ); -- 2.11.0