3 # PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
5 # This script converts a document from one office format to another by
6 # connecting to an OpenOffice.org instance via Python-UNO bridge.
8 # Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
9 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
10 # - or any later version.
12 DEFAULT_OPENOFFICE_PORT = 8100
15 from os.path import abspath, isfile, splitext
16 from com.sun.star.beans import PropertyValue
17 from com.sun.star.task import ErrorCodeIOException
18 from com.sun.star.connection import NoConnectException
21 FAMILY_SPREADSHEET = "Spreadsheet"
22 FAMILY_PRESENTATION = "Presentation"
23 FAMILY_DRAWING = "Drawing"
27 FAMILY_TEXT: "writer_pdf_Export",
28 FAMILY_SPREADSHEET: "calc_pdf_Export",
29 FAMILY_PRESENTATION: "impress_pdf_Export",
30 FAMILY_DRAWING: "draw_pdf_Export"
33 FAMILY_TEXT: "HTML (StarWriter)",
34 FAMILY_SPREADSHEET: "HTML (StarCalc)",
35 FAMILY_PRESENTATION: "impress_html_Export"
37 "odt": { FAMILY_TEXT: "writer8" },
38 "doc": { FAMILY_TEXT: "MS Word 97" },
39 "rtf": { FAMILY_TEXT: "Rich Text Format" },
40 "txt": { FAMILY_TEXT: "Text" },
41 "ods": { FAMILY_SPREADSHEET: "calc8" },
42 "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
43 "odp": { FAMILY_PRESENTATION: "impress8" },
44 "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
45 "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
47 # see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
48 # for more available filters
51 class DocumentConversionException(Exception):
53 def __init__(self, message):
54 self.message = message
60 class DocumentConverter:
62 def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
63 localContext = uno.getComponentContext()
64 resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
66 context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
67 except NoConnectException:
68 raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
69 self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
71 def convert(self, inputFile, outputFile):
73 inputUrl = self._toFileUrl(inputFile)
74 outputUrl = self._toFileUrl(outputFile)
76 document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
79 except AttributeError:
82 outputExt = self._getFileExt(outputFile)
83 filterName = self._filterName(document, outputExt)
86 document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
90 def _filterName(self, document, outputExt):
91 family = self._detectFamily(document)
93 filterByFamily = FILTER_MAP[outputExt]
95 raise DocumentConversionException, "unknown output format: '%s'" % outputExt
97 return filterByFamily[family]
99 raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
101 def _detectFamily(self, document):
102 if document.supportsService("com.sun.star.text.GenericTextDocument"):
103 # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
104 # but this further distinction doesn't seem to matter for conversions
106 if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
107 return FAMILY_SPREADSHEET
108 if document.supportsService("com.sun.star.presentation.PresentationDocument"):
109 return FAMILY_PRESENTATION
110 if document.supportsService("com.sun.star.drawing.DrawingDocument"):
111 return FAMILY_DRAWING
112 raise DocumentConversionException, "unknown document family: %s" % document
114 def _getFileExt(self, path):
115 ext = splitext(path)[1]
117 return ext[1:].lower()
119 def _toFileUrl(self, path):
120 return uno.systemPathToFileUrl(abspath(path))
122 def _toProperties(self, **args):
125 prop = PropertyValue()
127 prop.Value = args[key]
132 if __name__ == "__main__":
133 from sys import argv, exit
136 print "USAGE: python %s <input-file> <output-file>" % argv[0]
138 if not isfile(argv[1]):
139 print "no such input file: %s" % argv[1]
143 converter = DocumentConverter()
144 converter.convert(argv[1], argv[2])
145 except DocumentConversionException, exception:
146 print "ERROR!" + str(exception)
148 except ErrorCodeIOException, exception:
149 print "ERROR! ErrorCodeIOException %d" % exception.ErrCode