diff options
Diffstat (limited to 'lib/HTML/AutoConvert')
-rw-r--r-- | lib/HTML/AutoConvert/.OpenOffice.pm.swp | bin | 0 -> 12288 bytes | |||
-rw-r--r-- | lib/HTML/AutoConvert/.Run.pm.swp | bin | 0 -> 12288 bytes | |||
-rw-r--r-- | lib/HTML/AutoConvert/.antiword.pm.swp | bin | 0 -> 12288 bytes | |||
-rw-r--r-- | lib/HTML/AutoConvert/.poppler.pm.swp | bin | 0 -> 12288 bytes | |||
-rw-r--r-- | lib/HTML/AutoConvert/.unrtf.pm.swp | bin | 0 -> 12288 bytes | |||
-rw-r--r-- | lib/HTML/AutoConvert/.wvWare.pm.swp | bin | 0 -> 12288 bytes | |||
-rw-r--r-- | lib/HTML/AutoConvert/OpenOffice.pm | 96 | ||||
-rw-r--r-- | lib/HTML/AutoConvert/Run.pm | 30 | ||||
-rw-r--r-- | lib/HTML/AutoConvert/antiword.pm | 31 | ||||
-rw-r--r-- | lib/HTML/AutoConvert/poppler.pm | 48 | ||||
-rw-r--r-- | lib/HTML/AutoConvert/unrtf.pm | 25 | ||||
-rw-r--r-- | lib/HTML/AutoConvert/wvWare.pm | 25 |
12 files changed, 255 insertions, 0 deletions
diff --git a/lib/HTML/AutoConvert/.OpenOffice.pm.swp b/lib/HTML/AutoConvert/.OpenOffice.pm.swp Binary files differnew file mode 100644 index 0000000..750ad68 --- /dev/null +++ b/lib/HTML/AutoConvert/.OpenOffice.pm.swp diff --git a/lib/HTML/AutoConvert/.Run.pm.swp b/lib/HTML/AutoConvert/.Run.pm.swp Binary files differnew file mode 100644 index 0000000..8a90a9a --- /dev/null +++ b/lib/HTML/AutoConvert/.Run.pm.swp diff --git a/lib/HTML/AutoConvert/.antiword.pm.swp b/lib/HTML/AutoConvert/.antiword.pm.swp Binary files differnew file mode 100644 index 0000000..79e92ef --- /dev/null +++ b/lib/HTML/AutoConvert/.antiword.pm.swp diff --git a/lib/HTML/AutoConvert/.poppler.pm.swp b/lib/HTML/AutoConvert/.poppler.pm.swp Binary files differnew file mode 100644 index 0000000..2c011bf --- /dev/null +++ b/lib/HTML/AutoConvert/.poppler.pm.swp diff --git a/lib/HTML/AutoConvert/.unrtf.pm.swp b/lib/HTML/AutoConvert/.unrtf.pm.swp Binary files differnew file mode 100644 index 0000000..9bc779c --- /dev/null +++ b/lib/HTML/AutoConvert/.unrtf.pm.swp diff --git a/lib/HTML/AutoConvert/.wvWare.pm.swp b/lib/HTML/AutoConvert/.wvWare.pm.swp Binary files differnew file mode 100644 index 0000000..39e4317 --- /dev/null +++ b/lib/HTML/AutoConvert/.wvWare.pm.swp diff --git a/lib/HTML/AutoConvert/OpenOffice.pm b/lib/HTML/AutoConvert/OpenOffice.pm new file mode 100644 index 0000000..e09a9e4 --- /dev/null +++ b/lib/HTML/AutoConvert/OpenOffice.pm @@ -0,0 +1,96 @@ +package HTML::AutoConvert::OpenOffice; + +=head1 NAME + +HTML::AutoConvert::antiword - antiword plugin for HTML::AutoConvert + +=head1 PREREQUISITES + +OpenOffice v2.3 or later + +(currently) +Python +Python-UNO + +(future) +Perl OpenOffice::UNO + +=head1 SECURITY NOTE + +This module starts and leaves an OpenOffice instance running. + +The OpenOffice instance binds to and listens to a port on localhost for +commands. Anything which can talk to this port can instruct OpenOffice to +read or write any file the current user has access to. + +By default, port 8100 is used. You can choose a different port by passing an +option to the new() constructor: + + my $converter = HTML::AutoConvert->new('openoffice_port'=>5555); + +=cut + +use strict; +use vars qw( %info ); #$slept ); +use IPC::Run qw( run timeout io ); +use File::Slurp qw( slurp ); + +%info = ( + 'types' => [qw( doc rtf odt sxw )], + 'weight' => 80, + 'url' => 'http://wvware.sourceforge.net/', +); + +#$slept = 0; + +#sub program { ( 'openoffice', '-headless' ); } + +#half-ass using DocumentConverter.py for now +#need to recode with OpenOffice::UNO + +sub html_convert { + my( $self, $file ) = ( shift, shift ); + my $opt = ref($_[0]) ? shift : { @_ }; + + $self->start_openoffice($opt); + + my $program = 'DocumentConverter.py'; + + my $timeout = 60; #? + + use File::Temp qw/ tempfile /; + my($fh, $outfile) = tempfile(SUFFIX => '.html'); + #hmm, it gets overwritten so $fh is bunk + + my($out, $err) = ( '', '' ); + local($SIG{CHLD}) = sub {}; + run( [ $program, $file, $outfile ], \undef, \$out, \$err, timeout($timeout) ) + or die "$program failed with exit status ". ( $? >> 8 ). ": $out\n"; + + my $html = slurp($outfile); + + $html; + +} + +sub start_openoffice { + my( $self ) = ( shift, shift ); + my $opt = ref($_[0]) ? shift : { @_ }; + my $port = $opt->{'openoffice_port'} || 8100; + + my $cmd = [ 'openoffice', '-headless', + "-accept=socket,port=$port;urp", + #'-splash-pipe=5', + ]; + + local($SIG{CHLD}) = sub {}; + run $cmd, \undef, '>/dev/null', '2>/dev/null' + or die "can't launch openoffice: $@\n"; + + #it isn't ready to run commands right away :( + #it would be better if we could ping against UNO API somehow until ready... + #sleep 5 unless $slept++; + +} + +1; diff --git a/lib/HTML/AutoConvert/Run.pm b/lib/HTML/AutoConvert/Run.pm new file mode 100644 index 0000000..2eada89 --- /dev/null +++ b/lib/HTML/AutoConvert/Run.pm @@ -0,0 +1,30 @@ +package HTML::AutoConvert::Run; + +=head1 NAME + +HTML::AutoConvert::Run - Base class for HTML::AutoConvert plugs that run an external program + +=cut + +use strict; +use IPC::Run qw( run timeout ); + +sub html_convert { + my( $self, $file ) = ( shift, shift ); + my $opt = ref($_[0]) ? shift : { @_ }; + + my @program = $self->program; + my $program = $program[0]; + + my $timeout = 60; #? + + my( $html, $err ) = ( '', ''); + local($SIG{CHLD}) = sub {}; + run( [ @program, $file ], \undef, \$html, \$err, timeout($timeout) ) + or die "$program failed with exit status ". ( $? >> 8 ). ": $err\n"; + + $html; + +} + +1; diff --git a/lib/HTML/AutoConvert/antiword.pm b/lib/HTML/AutoConvert/antiword.pm new file mode 100644 index 0000000..4622a79 --- /dev/null +++ b/lib/HTML/AutoConvert/antiword.pm @@ -0,0 +1,31 @@ +package HTML::AutoConvert::antiword; + +=head1 NAME + +HTML::AutoConvert::antiword - antiword plugin for HTML::AutoConvert + +=head1 URL + +Antiword can be downloaded from http://www.winfield.demon.nl/index.html + +=cut + +use strict; +use vars qw( %info ); +use base 'HTML::AutoConvert::Run'; + +%info = ( + 'types' => 'doc', + 'weight' => 90, + 'url' => 'http://www.winfield.demon.nl/index.html', +); + +sub program { ( 'antiword' ) } + +sub html_convert { + my $self = shift; + my $html = $self->SUPER::html_convert(@_); + "<HTML><HEAD></HEAD><BODY><PRE>\n$html\n</PRE></BODY></HTML>"; +} + +1; diff --git a/lib/HTML/AutoConvert/poppler.pm b/lib/HTML/AutoConvert/poppler.pm new file mode 100644 index 0000000..cca5b0d --- /dev/null +++ b/lib/HTML/AutoConvert/poppler.pm @@ -0,0 +1,48 @@ +package HTML::AutoConvert::poppler; + +=head1 NAME + +HTML::AutoConvert::poppler - poppler (pdftohtml) plugin for HTML::AutoConvert + +=head1 URL + +poppler can be downloaded from http://poppler.freedesktop.org/ + +=cut + +use strict; +use vars qw( %info ); +use base 'HTML::AutoConvert::Run'; + +%info = ( + 'types' => 'pdf', + 'weight' => 10, + 'url' => 'http://poppler.freedesktop.org/', +); + +sub program { ( 'pdftohtml', '-stdout' ) } + +#false laziness w/OpenOffice.pm +#sub html_convert { +# my( $self, $file ) = ( shift, shift ); +# my $opt = ref($_[0]) ? shift : { @_ }; +# +# my $program = 'pdftohtml'; +# +# my $timeout = 60; #? +# +# my($out, $err) = ( '', '' ); +# local($SIG{CHLD}) = sub {}; +# run( [ $program, $file ], \undef, \$out, \$err, timeout($timeout) ) +# or die "$program failed with exit status ". ( $? >> 8 ). ": $out\n"; +# +# ( my $outfile = $file ) =~ s/\.pdf$/.html/i +# or die "poppler.pm called with non-PDF file?!"; +# +# my $html = slurp($outfile); +# +# $html; +# +#} + +1; diff --git a/lib/HTML/AutoConvert/unrtf.pm b/lib/HTML/AutoConvert/unrtf.pm new file mode 100644 index 0000000..034440e --- /dev/null +++ b/lib/HTML/AutoConvert/unrtf.pm @@ -0,0 +1,25 @@ +package HTML::AutoConvert::unrtf; + +=head1 NAME + +HTML::AutoConvert::unrtf - unrtf plugin for HTML::AutoConvert + +=head1 URL + +unrtf can be downloaded from ftp://ftp.gnu.org/pub/gnu/unrtf/ + +=cut + +use strict; +use vars qw( %info ); +use base 'HTML::AutoConvert::Run'; + +%info = ( + 'types' => 'rtf', + 'weight' => 90, + 'url' => 'ftp://ftp.gnu.org/pub/gnu/unrtf/', +); + +sub program { ( 'unrtf' ) } + +1; diff --git a/lib/HTML/AutoConvert/wvWare.pm b/lib/HTML/AutoConvert/wvWare.pm new file mode 100644 index 0000000..26a57cd --- /dev/null +++ b/lib/HTML/AutoConvert/wvWare.pm @@ -0,0 +1,25 @@ +package HTML::AutoConvert::wvWare; + +=head1 NAME + +HTML::AutoConvert::wvWare - wvWare plugin for HTML::AutoConvert + +=head1 URL + +wvWare can be downloaded from http://wvware.sourceforge.net/ + +=cut + +use strict; +use vars qw( %info ); +use base 'HTML::AutoConvert::Run'; + +%info = ( + 'types' => 'doc', + 'weight' => 80, + 'url' => 'http://wvware.sourceforge.net/', +); + +sub program { ( 'wvWare' ) } + +1; |