summaryrefslogtreecommitdiff
path: root/lib/HTML/AutoConvert/OpenOffice.pm
blob: e09a9e43e7dc146e6fd5c8867807114a8f49cf05 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
package HTML::AutoConvert::OpenOffice;

=head1 NAME

HTML::AutoConvert::antiword - antiword plugin for HTML::AutoConvert

=head1 PREREQUISITES

OpenOffice v2.3 or later

(currently)
Python
Python-UNO

(future)
Perl OpenOffice::UNO

=head1 SECURITY NOTE

This module starts and leaves an OpenOffice instance running.

The OpenOffice instance binds to and listens to a port on localhost for
commands.  Anything which can talk to this port can instruct OpenOffice to
read or write any file the current user has access to.

By default, port 8100 is used.  You can choose a different port by passing an
option to the new() constructor:

  my $converter = HTML::AutoConvert->new('openoffice_port'=>5555);

=cut

use strict;
use vars qw( %info ); #$slept );
use IPC::Run qw( run timeout io );
use File::Slurp qw( slurp );

%info = (
  'types'   => [qw( doc rtf odt sxw )],
  'weight'  => 80,
  'url'     => 'http://wvware.sourceforge.net/',
);

#$slept = 0;

#sub program { ( 'openoffice', '-headless' ); }

#half-ass using DocumentConverter.py for now
#need to recode with OpenOffice::UNO

sub html_convert {
  my( $self, $file ) = ( shift, shift );
  my $opt = ref($_[0]) ? shift : { @_ };

  $self->start_openoffice($opt);

  my $program = 'DocumentConverter.py';

  my $timeout = 60; #?

  use File::Temp qw/ tempfile /;
  my($fh, $outfile) = tempfile(SUFFIX => '.html');
  #hmm, it gets overwritten so $fh is bunk

  my($out, $err) = ( '', '' );
  local($SIG{CHLD}) = sub {};
  run( [ $program, $file, $outfile ], \undef, \$out, \$err, timeout($timeout) )
    or die "$program failed with exit status ". ( $? >> 8 ). ": $out\n";

  my $html = slurp($outfile);

  $html;

}

sub start_openoffice {
  my( $self ) = ( shift, shift );
  my $opt = ref($_[0]) ? shift : { @_ };
  my $port = $opt->{'openoffice_port'} || 8100;

  my $cmd = [ 'openoffice', '-headless',
                            "-accept=socket,port=$port;urp",
                            #'-splash-pipe=5',
            ];

  local($SIG{CHLD}) = sub {};
  run $cmd, \undef, '>/dev/null', '2>/dev/null'
    or die "can't launch openoffice: $@\n";

  #it isn't ready to run commands right away :(
  #it would be better if we could ping against UNO API somehow until ready...
  #sleep 5 unless $slept++;

}

1;