Created July 24, 2015 14:44
Convert a word (doc/docx) file to markdown
#!/usr/bin/env python
"""Convert a word (doc/docx) file to markdown"""
import sys
import os
import subprocess
SOFFICE = r'/Applications/'
PANDOC = r'pandoc'
def convert(infile, outfile):
"""Convert the given infile to the given outfile in markdown format, via
LibreOffice and Pandoc"""
root, __ = os.path.splitext(infile)
htmlfile = root + ".html"[SOFFICE, '--invisible', '--convert-to',
'html', infile])[PANDOC, '-f', 'html', '-t', 'markdown', '-o', outfile,
def main(argv=None):
from optparse import OptionParser
if argv is None:
argv = sys.argv
arg_parser = OptionParser(
usage = "usage: %prog DOCFILE",
description = __doc__)
options, args = arg_parser.parse_args(argv)
docfile = args[1]
except IndexError:
arg_parser.error("must give DOCFILE")
root, __ = os.path.splitext(docfile)
mdfile = root + ".md"
convert(docfile, mdfile)
if __name__ == "__main__":
