initial import
[HTML-AutoConvert.git] / bin / DocumentConverter.py
1 #!/usr/bin/python
2 #
3 # PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
4 #
5 # This script converts a document from one office format to another by
6 # connecting to an OpenOffice.org instance via Python-UNO bridge.
7 #
8 # Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
9 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
10 # - or any later version.
11 #
12 DEFAULT_OPENOFFICE_PORT = 8100
13
14 import uno
15 from os.path import abspath, isfile, splitext
16 from com.sun.star.beans import PropertyValue
17 from com.sun.star.task import ErrorCodeIOException
18 from com.sun.star.connection import NoConnectException
19
20 FAMILY_TEXT = "Text"
21 FAMILY_SPREADSHEET = "Spreadsheet"
22 FAMILY_PRESENTATION = "Presentation"
23 FAMILY_DRAWING = "Drawing"
24
25 FILTER_MAP = {
26     "pdf": {
27         FAMILY_TEXT: "writer_pdf_Export",
28         FAMILY_SPREADSHEET: "calc_pdf_Export",
29         FAMILY_PRESENTATION: "impress_pdf_Export",
30         FAMILY_DRAWING: "draw_pdf_Export"
31     },
32     "html": {
33         FAMILY_TEXT: "HTML (StarWriter)",
34         FAMILY_SPREADSHEET: "HTML (StarCalc)",
35         FAMILY_PRESENTATION: "impress_html_Export"
36     },
37     "odt": { FAMILY_TEXT: "writer8" },
38     "doc": { FAMILY_TEXT: "MS Word 97" },
39     "rtf": { FAMILY_TEXT: "Rich Text Format" },
40     "txt": { FAMILY_TEXT: "Text" },
41     "ods": { FAMILY_SPREADSHEET: "calc8" },
42     "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
43     "odp": { FAMILY_PRESENTATION: "impress8" },
44     "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
45     "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
46 }
47 # see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
48 # for more available filters
49
50
51 class DocumentConversionException(Exception):
52
53     def __init__(self, message):
54         self.message = message
55
56     def __str__(self):
57         return self.message
58
59
60 class DocumentConverter:
61     
62     def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
63         localContext = uno.getComponentContext()
64         resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
65         try:
66             context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
67         except NoConnectException:
68             raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
69         self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
70
71     def convert(self, inputFile, outputFile):
72
73         inputUrl = self._toFileUrl(inputFile)
74         outputUrl = self._toFileUrl(outputFile)
75         
76         document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
77         try:
78           document.refresh()
79         except AttributeError:
80           pass
81         
82         outputExt = self._getFileExt(outputFile)
83         filterName = self._filterName(document, outputExt)
84
85         try:
86             document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
87         finally:
88             document.close(True)
89
90     def _filterName(self, document, outputExt):
91         family = self._detectFamily(document)
92         try:
93             filterByFamily = FILTER_MAP[outputExt]
94         except KeyError:
95             raise DocumentConversionException, "unknown output format: '%s'" % outputExt
96         try:
97             return filterByFamily[family]
98         except KeyError:
99             raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
100     
101     def _detectFamily(self, document):
102         if document.supportsService("com.sun.star.text.GenericTextDocument"):
103             # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
104             # but this further distinction doesn't seem to matter for conversions
105             return FAMILY_TEXT
106         if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
107             return FAMILY_SPREADSHEET
108         if document.supportsService("com.sun.star.presentation.PresentationDocument"):
109             return FAMILY_PRESENTATION
110         if document.supportsService("com.sun.star.drawing.DrawingDocument"):
111             return FAMILY_DRAWING
112         raise DocumentConversionException, "unknown document family: %s" % document
113
114     def _getFileExt(self, path):
115         ext = splitext(path)[1]
116         if ext is not None:
117             return ext[1:].lower()
118
119     def _toFileUrl(self, path):
120         return uno.systemPathToFileUrl(abspath(path))
121
122     def _toProperties(self, **args):
123         props = []
124         for key in args:
125             prop = PropertyValue()
126             prop.Name = key
127             prop.Value = args[key]
128             props.append(prop)
129         return tuple(props)
130
131
132 if __name__ == "__main__":
133     from sys import argv, exit
134     
135     if len(argv) < 3:
136         print "USAGE: python %s <input-file> <output-file>" % argv[0]
137         exit(255)
138     if not isfile(argv[1]):
139         print "no such input file: %s" % argv[1]
140         exit(1)
141
142     try:
143         converter = DocumentConverter()    
144         converter.convert(argv[1], argv[2])
145     except DocumentConversionException, exception:
146         print "ERROR!" + str(exception)
147         exit(1)
148     except ErrorCodeIOException, exception:
149         print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
150         exit(1)
151