diff options
Diffstat (limited to 'bin/DocumentConverter.py')
-rwxr-xr-x | bin/DocumentConverter.py | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/bin/DocumentConverter.py b/bin/DocumentConverter.py new file mode 100755 index 0000000..2b4d2b3 --- /dev/null +++ b/bin/DocumentConverter.py @@ -0,0 +1,151 @@ +#!/usr/bin/python +# +# PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05 +# +# This script converts a document from one office format to another by +# connecting to an OpenOffice.org instance via Python-UNO bridge. +# +# Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com> +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html +# - or any later version. +# +DEFAULT_OPENOFFICE_PORT = 8100 + +import uno +from os.path import abspath, isfile, splitext +from com.sun.star.beans import PropertyValue +from com.sun.star.task import ErrorCodeIOException +from com.sun.star.connection import NoConnectException + +FAMILY_TEXT = "Text" +FAMILY_SPREADSHEET = "Spreadsheet" +FAMILY_PRESENTATION = "Presentation" +FAMILY_DRAWING = "Drawing" + +FILTER_MAP = { + "pdf": { + FAMILY_TEXT: "writer_pdf_Export", + FAMILY_SPREADSHEET: "calc_pdf_Export", + FAMILY_PRESENTATION: "impress_pdf_Export", + FAMILY_DRAWING: "draw_pdf_Export" + }, + "html": { + FAMILY_TEXT: "HTML (StarWriter)", + FAMILY_SPREADSHEET: "HTML (StarCalc)", + FAMILY_PRESENTATION: "impress_html_Export" + }, + "odt": { FAMILY_TEXT: "writer8" }, + "doc": { FAMILY_TEXT: "MS Word 97" }, + "rtf": { FAMILY_TEXT: "Rich Text Format" }, + "txt": { FAMILY_TEXT: "Text" }, + "ods": { FAMILY_SPREADSHEET: "calc8" }, + "xls": { FAMILY_SPREADSHEET: "MS Excel 97" }, + "odp": { FAMILY_PRESENTATION: "impress8" }, + "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" }, + "swf": { FAMILY_PRESENTATION: "impress_flash_Export" } +} +# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter +# for more available filters + + +class DocumentConversionException(Exception): + + def __init__(self, message): + self.message = message + + def __str__(self): + return self.message + + +class DocumentConverter: + + def __init__(self, port=DEFAULT_OPENOFFICE_PORT): + localContext = uno.getComponentContext() + resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) + try: + context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port) + except NoConnectException: + raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port + self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context) + + def convert(self, inputFile, outputFile): + + inputUrl = self._toFileUrl(inputFile) + outputUrl = self._toFileUrl(outputFile) + + document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True)) + try: + document.refresh() + except AttributeError: + pass + + outputExt = self._getFileExt(outputFile) + filterName = self._filterName(document, outputExt) + + try: + document.storeToURL(outputUrl, self._toProperties(FilterName=filterName)) + finally: + document.close(True) + + def _filterName(self, document, outputExt): + family = self._detectFamily(document) + try: + filterByFamily = FILTER_MAP[outputExt] + except KeyError: + raise DocumentConversionException, "unknown output format: '%s'" % outputExt + try: + return filterByFamily[family] + except KeyError: + raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt) + + def _detectFamily(self, document): + if document.supportsService("com.sun.star.text.GenericTextDocument"): + # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument + # but this further distinction doesn't seem to matter for conversions + return FAMILY_TEXT + if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"): + return FAMILY_SPREADSHEET + if document.supportsService("com.sun.star.presentation.PresentationDocument"): + return FAMILY_PRESENTATION + if document.supportsService("com.sun.star.drawing.DrawingDocument"): + return FAMILY_DRAWING + raise DocumentConversionException, "unknown document family: %s" % document + + def _getFileExt(self, path): + ext = splitext(path)[1] + if ext is not None: + return ext[1:].lower() + + def _toFileUrl(self, path): + return uno.systemPathToFileUrl(abspath(path)) + + def _toProperties(self, **args): + props = [] + for key in args: + prop = PropertyValue() + prop.Name = key + prop.Value = args[key] + props.append(prop) + return tuple(props) + + +if __name__ == "__main__": + from sys import argv, exit + + if len(argv) < 3: + print "USAGE: python %s <input-file> <output-file>" % argv[0] + exit(255) + if not isfile(argv[1]): + print "no such input file: %s" % argv[1] + exit(1) + + try: + converter = DocumentConverter() + converter.convert(argv[1], argv[2]) + except DocumentConversionException, exception: + print "ERROR!" + str(exception) + exit(1) + except ErrorCodeIOException, exception: + print "ERROR! ErrorCodeIOException %d" % exception.ErrCode + exit(1) + |