diff options
Diffstat (limited to 'lib/HTML/AutoConvert/poppler.pm')
-rw-r--r-- | lib/HTML/AutoConvert/poppler.pm | 69 |
1 files changed, 44 insertions, 25 deletions
diff --git a/lib/HTML/AutoConvert/poppler.pm b/lib/HTML/AutoConvert/poppler.pm index cca5b0d..a75a54f 100644 --- a/lib/HTML/AutoConvert/poppler.pm +++ b/lib/HTML/AutoConvert/poppler.pm @@ -13,36 +13,55 @@ poppler can be downloaded from http://poppler.freedesktop.org/ use strict; use vars qw( %info ); use base 'HTML::AutoConvert::Run'; +use File::Temp qw( tempdir ); +use File::Slurp qw( slurp ); +use IPC::Run qw( run timeout ); %info = ( - 'types' => 'pdf', - 'weight' => 10, - 'url' => 'http://poppler.freedesktop.org/', + 'types' => 'pdf', + 'weight' => 10, + 'returns_images' => 1, + 'url' => 'http://poppler.freedesktop.org/', ); sub program { ( 'pdftohtml', '-stdout' ) } -#false laziness w/OpenOffice.pm -#sub html_convert { -# my( $self, $file ) = ( shift, shift ); -# my $opt = ref($_[0]) ? shift : { @_ }; -# -# my $program = 'pdftohtml'; -# -# my $timeout = 60; #? -# -# my($out, $err) = ( '', '' ); -# local($SIG{CHLD}) = sub {}; -# run( [ $program, $file ], \undef, \$out, \$err, timeout($timeout) ) -# or die "$program failed with exit status ". ( $? >> 8 ). ": $out\n"; -# -# ( my $outfile = $file ) =~ s/\.pdf$/.html/i -# or die "poppler.pm called with non-PDF file?!"; -# -# my $html = slurp($outfile); -# -# $html; -# -#} +#some false laziness "in spirit" w/OpenOffice.pm +sub html_convert { + my( $self, $file ) = ( shift, shift ); + my $opt = ref($_[0]) ? shift : { @_ }; + + my $html = $self->SUPER::html_convert($file, $opt); + return $html unless wantarray; + + my @images = $self->extract_images($file, $opt); + + ( $html, @images); +} + +sub extract_images { + my( $self, $file ) = ( shift, shift ); + my $opt = ref($_[0]) ? shift : { @_ }; + + my $imgdir = tempdir( CLEANUP=>1 ).'/'; + + #some false laziness w/Run::html_convert :( + my @program = ( 'pdfimages' ); + my $program = $program[0]; + + my $timeout = 60; #? + + my( $out, $err ) = ( '', ''); + local($SIG{CHLD}) = sub {}; + run( [ @program, $file, $imgdir ], \undef, \$out, \$err, timeout($timeout) ) + or die "$program failed with exit status ". ( $? >> 8 ). ": $err\n"; + + map { + ( my $filename = $_ ) =~ s/^.*\/\-?//; + [ $filename, scalar(slurp($_)) ]; + } + glob("$imgdir*"); + +} 1; |