summaryrefslogtreecommitdiff
path: root/bin/DocumentConverter.py
diff options
context:
space:
mode:
authorivan <ivan>2008-08-11 07:38:09 +0000
committerivan <ivan>2008-08-11 07:38:09 +0000
commitb5e00dd5534d7f6ec852b537f551bb872931fa04 (patch)
tree3091d93f430e74f823397559d324d7f4ce882f7d /bin/DocumentConverter.py
initial importSTART
Diffstat (limited to 'bin/DocumentConverter.py')
-rwxr-xr-xbin/DocumentConverter.py151
1 files changed, 151 insertions, 0 deletions
diff --git a/bin/DocumentConverter.py b/bin/DocumentConverter.py
new file mode 100755
index 0000000..2b4d2b3
--- /dev/null
+++ b/bin/DocumentConverter.py
@@ -0,0 +1,151 @@
+#!/usr/bin/python
+#
+# PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
+#
+# This script converts a document from one office format to another by
+# connecting to an OpenOffice.org instance via Python-UNO bridge.
+#
+# Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
+# - or any later version.
+#
+DEFAULT_OPENOFFICE_PORT = 8100
+
+import uno
+from os.path import abspath, isfile, splitext
+from com.sun.star.beans import PropertyValue
+from com.sun.star.task import ErrorCodeIOException
+from com.sun.star.connection import NoConnectException
+
+FAMILY_TEXT = "Text"
+FAMILY_SPREADSHEET = "Spreadsheet"
+FAMILY_PRESENTATION = "Presentation"
+FAMILY_DRAWING = "Drawing"
+
+FILTER_MAP = {
+ "pdf": {
+ FAMILY_TEXT: "writer_pdf_Export",
+ FAMILY_SPREADSHEET: "calc_pdf_Export",
+ FAMILY_PRESENTATION: "impress_pdf_Export",
+ FAMILY_DRAWING: "draw_pdf_Export"
+ },
+ "html": {
+ FAMILY_TEXT: "HTML (StarWriter)",
+ FAMILY_SPREADSHEET: "HTML (StarCalc)",
+ FAMILY_PRESENTATION: "impress_html_Export"
+ },
+ "odt": { FAMILY_TEXT: "writer8" },
+ "doc": { FAMILY_TEXT: "MS Word 97" },
+ "rtf": { FAMILY_TEXT: "Rich Text Format" },
+ "txt": { FAMILY_TEXT: "Text" },
+ "ods": { FAMILY_SPREADSHEET: "calc8" },
+ "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
+ "odp": { FAMILY_PRESENTATION: "impress8" },
+ "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
+ "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
+}
+# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
+# for more available filters
+
+
+class DocumentConversionException(Exception):
+
+ def __init__(self, message):
+ self.message = message
+
+ def __str__(self):
+ return self.message
+
+
+class DocumentConverter:
+
+ def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ try:
+ context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
+ except NoConnectException:
+ raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
+ self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
+
+ def convert(self, inputFile, outputFile):
+
+ inputUrl = self._toFileUrl(inputFile)
+ outputUrl = self._toFileUrl(outputFile)
+
+ document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
+ try:
+ document.refresh()
+ except AttributeError:
+ pass
+
+ outputExt = self._getFileExt(outputFile)
+ filterName = self._filterName(document, outputExt)
+
+ try:
+ document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
+ finally:
+ document.close(True)
+
+ def _filterName(self, document, outputExt):
+ family = self._detectFamily(document)
+ try:
+ filterByFamily = FILTER_MAP[outputExt]
+ except KeyError:
+ raise DocumentConversionException, "unknown output format: '%s'" % outputExt
+ try:
+ return filterByFamily[family]
+ except KeyError:
+ raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
+
+ def _detectFamily(self, document):
+ if document.supportsService("com.sun.star.text.GenericTextDocument"):
+ # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
+ # but this further distinction doesn't seem to matter for conversions
+ return FAMILY_TEXT
+ if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
+ return FAMILY_SPREADSHEET
+ if document.supportsService("com.sun.star.presentation.PresentationDocument"):
+ return FAMILY_PRESENTATION
+ if document.supportsService("com.sun.star.drawing.DrawingDocument"):
+ return FAMILY_DRAWING
+ raise DocumentConversionException, "unknown document family: %s" % document
+
+ def _getFileExt(self, path):
+ ext = splitext(path)[1]
+ if ext is not None:
+ return ext[1:].lower()
+
+ def _toFileUrl(self, path):
+ return uno.systemPathToFileUrl(abspath(path))
+
+ def _toProperties(self, **args):
+ props = []
+ for key in args:
+ prop = PropertyValue()
+ prop.Name = key
+ prop.Value = args[key]
+ props.append(prop)
+ return tuple(props)
+
+
+if __name__ == "__main__":
+ from sys import argv, exit
+
+ if len(argv) < 3:
+ print "USAGE: python %s <input-file> <output-file>" % argv[0]
+ exit(255)
+ if not isfile(argv[1]):
+ print "no such input file: %s" % argv[1]
+ exit(1)
+
+ try:
+ converter = DocumentConverter()
+ converter.convert(argv[1], argv[2])
+ except DocumentConversionException, exception:
+ print "ERROR!" + str(exception)
+ exit(1)
+ except ErrorCodeIOException, exception:
+ print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
+ exit(1)
+