diff options
Diffstat (limited to 'lib/HTML/AutoConvert/OpenOffice.pm')
-rw-r--r-- | lib/HTML/AutoConvert/OpenOffice.pm | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/lib/HTML/AutoConvert/OpenOffice.pm b/lib/HTML/AutoConvert/OpenOffice.pm new file mode 100644 index 0000000..e09a9e4 --- /dev/null +++ b/lib/HTML/AutoConvert/OpenOffice.pm @@ -0,0 +1,96 @@ +package HTML::AutoConvert::OpenOffice; + +=head1 NAME + +HTML::AutoConvert::antiword - antiword plugin for HTML::AutoConvert + +=head1 PREREQUISITES + +OpenOffice v2.3 or later + +(currently) +Python +Python-UNO + +(future) +Perl OpenOffice::UNO + +=head1 SECURITY NOTE + +This module starts and leaves an OpenOffice instance running. + +The OpenOffice instance binds to and listens to a port on localhost for +commands. Anything which can talk to this port can instruct OpenOffice to +read or write any file the current user has access to. + +By default, port 8100 is used. You can choose a different port by passing an +option to the new() constructor: + + my $converter = HTML::AutoConvert->new('openoffice_port'=>5555); + +=cut + +use strict; +use vars qw( %info ); #$slept ); +use IPC::Run qw( run timeout io ); +use File::Slurp qw( slurp ); + +%info = ( + 'types' => [qw( doc rtf odt sxw )], + 'weight' => 80, + 'url' => 'http://wvware.sourceforge.net/', +); + +#$slept = 0; + +#sub program { ( 'openoffice', '-headless' ); } + +#half-ass using DocumentConverter.py for now +#need to recode with OpenOffice::UNO + +sub html_convert { + my( $self, $file ) = ( shift, shift ); + my $opt = ref($_[0]) ? shift : { @_ }; + + $self->start_openoffice($opt); + + my $program = 'DocumentConverter.py'; + + my $timeout = 60; #? + + use File::Temp qw/ tempfile /; + my($fh, $outfile) = tempfile(SUFFIX => '.html'); + #hmm, it gets overwritten so $fh is bunk + + my($out, $err) = ( '', '' ); + local($SIG{CHLD}) = sub {}; + run( [ $program, $file, $outfile ], \undef, \$out, \$err, timeout($timeout) ) + or die "$program failed with exit status ". ( $? >> 8 ). ": $out\n"; + + my $html = slurp($outfile); + + $html; + +} + +sub start_openoffice { + my( $self ) = ( shift, shift ); + my $opt = ref($_[0]) ? shift : { @_ }; + my $port = $opt->{'openoffice_port'} || 8100; + + my $cmd = [ 'openoffice', '-headless', + "-accept=socket,port=$port;urp", + #'-splash-pipe=5', + ]; + + local($SIG{CHLD}) = sub {}; + run $cmd, \undef, '>/dev/null', '2>/dev/null' + or die "can't launch openoffice: $@\n"; + + #it isn't ready to run commands right away :( + #it would be better if we could ping against UNO API somehow until ready... + #sleep 5 unless $slept++; + +} + +1; |