Created
January 4, 2010 14:47
-
-
Save psd/268550 to your computer and use it in GitHub Desktop.
OpenOffice Document Conversion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Experiments in running a headless OpenOffice as a document convertor for TiddlyDocs, etc. | |
Useful links: | |
http://wiki.services.openoffice.org/wiki/Python | |
Headless OpenOffice: | |
Mac OS/X: | |
$ /Applications/OpenOffice.org.app/Contents/soffice.bin -headless -nofirststartwizard -accept="socket,port=8100;urp;" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14 | |
# | |
# This script converts a document from one office format to another by | |
# connecting to an OpenOffice.org instance via Python-UNO bridge. | |
# | |
# Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com> | |
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html | |
# - or any later version. | |
# | |
DEFAULT_OPENOFFICE_PORT = 8100 | |
import uno | |
from os.path import abspath, isfile, splitext | |
from com.sun.star.beans import PropertyValue | |
from com.sun.star.task import ErrorCodeIOException | |
from com.sun.star.connection import NoConnectException | |
FAMILY_TEXT = "Text" | |
FAMILY_WEB = "Web" | |
FAMILY_SPREADSHEET = "Spreadsheet" | |
FAMILY_PRESENTATION = "Presentation" | |
FAMILY_DRAWING = "Drawing" | |
#---------------------# | |
# Configuration Start # | |
#---------------------# | |
# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter | |
# most formats are auto-detected; only those requiring options are defined here | |
IMPORT_FILTER_MAP = { | |
"txt": { | |
"FilterName": "Text (encoded)", | |
"FilterOptions": "utf8" | |
}, | |
"csv": { | |
"FilterName": "Text - txt - csv (StarCalc)", | |
"FilterOptions": "44,34,0" | |
} | |
} | |
EXPORT_FILTER_MAP = { | |
"pdf": { | |
FAMILY_TEXT: { "FilterName": "writer_pdf_Export" }, | |
FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" }, | |
FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" }, | |
FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" }, | |
FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" } | |
}, | |
"html": { | |
FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" }, | |
FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" }, | |
FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" } | |
}, | |
"odt": { | |
FAMILY_TEXT: { "FilterName": "writer8" }, | |
FAMILY_WEB: { "FilterName": "writerweb8_writer" } | |
}, | |
"doc": { | |
FAMILY_TEXT: { "FilterName": "MS Word 97" } | |
}, | |
"rtf": { | |
FAMILY_TEXT: { "FilterName": "Rich Text Format" } | |
}, | |
"txt": { | |
FAMILY_TEXT: { | |
"FilterName": "Text", | |
"FilterOptions": "utf8" | |
} | |
}, | |
"ods": { | |
FAMILY_SPREADSHEET: { "FilterName": "calc8" } | |
}, | |
"xls": { | |
FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" } | |
}, | |
"csv": { | |
FAMILY_SPREADSHEET: { | |
"FilterName": "Text - txt - csv (StarCalc)", | |
"FilterOptions": "44,34,0" | |
} | |
}, | |
"odp": { | |
FAMILY_PRESENTATION: { "FilterName": "impress8" } | |
}, | |
"ppt": { | |
FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" } | |
}, | |
"swf": { | |
FAMILY_DRAWING: { "FilterName": "draw_flash_Export" }, | |
FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" } | |
} | |
} | |
PAGE_STYLE_OVERRIDE_PROPERTIES = { | |
FAMILY_SPREADSHEET: { | |
#--- Scale options: uncomment 1 of the 3 --- | |
# a) 'Reduce / enlarge printout': 'Scaling factor' | |
"PageScale": 100, | |
# b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages' | |
#"ScaleToPagesX": 1, "ScaleToPagesY": 1000, | |
# c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages' | |
#"ScaleToPages": 1, | |
"PrintGrid": False | |
} | |
} | |
#-------------------# | |
# Configuration End # | |
#-------------------# | |
class DocumentConversionException(Exception): | |
def __init__(self, message): | |
self.message = message | |
def __str__(self): | |
return self.message | |
class DocumentConverter: | |
def __init__(self, port=DEFAULT_OPENOFFICE_PORT): | |
localContext = uno.getComponentContext() | |
resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) | |
try: | |
context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port) | |
except NoConnectException: | |
raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port | |
self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context) | |
def convert(self, inputFile, outputFile): | |
inputUrl = self._toFileUrl(inputFile) | |
outputUrl = self._toFileUrl(outputFile) | |
loadProperties = { "Hidden": True } | |
inputExt = self._getFileExt(inputFile) | |
if IMPORT_FILTER_MAP.has_key(inputExt): | |
loadProperties.update(IMPORT_FILTER_MAP[inputExt]) | |
document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties)) | |
try: | |
document.refresh() | |
except AttributeError: | |
pass | |
family = self._detectFamily(document) | |
self._overridePageStyleProperties(document, family) | |
outputExt = self._getFileExt(outputFile) | |
storeProperties = self._getStoreProperties(document, outputExt) | |
try: | |
document.storeToURL(outputUrl, self._toProperties(storeProperties)) | |
finally: | |
document.close(True) | |
def _overridePageStyleProperties(self, document, family): | |
if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family): | |
properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family] | |
pageStyles = document.getStyleFamilies().getByName('PageStyles') | |
for styleName in pageStyles.getElementNames(): | |
pageStyle = pageStyles.getByName(styleName) | |
for name, value in properties.items(): | |
pageStyle.setPropertyValue(name, value) | |
def _getStoreProperties(self, document, outputExt): | |
family = self._detectFamily(document) | |
try: | |
propertiesByFamily = EXPORT_FILTER_MAP[outputExt] | |
except KeyError: | |
raise DocumentConversionException, "unknown output format: '%s'" % outputExt | |
try: | |
return propertiesByFamily[family] | |
except KeyError: | |
raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt) | |
def _detectFamily(self, document): | |
if document.supportsService("com.sun.star.text.WebDocument"): | |
return FAMILY_WEB | |
if document.supportsService("com.sun.star.text.GenericTextDocument"): | |
# must be TextDocument or GlobalDocument | |
return FAMILY_TEXT | |
if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"): | |
return FAMILY_SPREADSHEET | |
if document.supportsService("com.sun.star.presentation.PresentationDocument"): | |
return FAMILY_PRESENTATION | |
if document.supportsService("com.sun.star.drawing.DrawingDocument"): | |
return FAMILY_DRAWING | |
raise DocumentConversionException, "unknown document family: %s" % document | |
def _getFileExt(self, path): | |
ext = splitext(path)[1] | |
if ext is not None: | |
return ext[1:].lower() | |
def _toFileUrl(self, path): | |
return uno.systemPathToFileUrl(abspath(path)) | |
def _toProperties(self, dict): | |
props = [] | |
for key in dict: | |
prop = PropertyValue() | |
prop.Name = key | |
prop.Value = dict[key] | |
props.append(prop) | |
return tuple(props) | |
if __name__ == "__main__": | |
from sys import argv, exit | |
if len(argv) < 3: | |
print "USAGE: python %s <input-file> <output-file>" % argv[0] | |
exit(255) | |
if not isfile(argv[1]): | |
print "no such input file: %s" % argv[1] | |
exit(1) | |
try: | |
converter = DocumentConverter() | |
converter.convert(argv[1], argv[2]) | |
except DocumentConversionException, exception: | |
print "ERROR! " + str(exception) | |
exit(1) | |
except ErrorCodeIOException, exception: | |
print "ERROR! ErrorCodeIOException %d" % exception.ErrCode | |
exit(1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment