Created
July 24, 2015 14:44
-
-
Save goerz/8897c2d8a602af2a45d4 to your computer and use it in GitHub Desktop.
Convert a word (doc/docx) file to markdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Convert a word (doc/docx) file to markdown""" | |
import sys | |
import os | |
import subprocess | |
SOFFICE = r'/Applications/LibreOffice.app/Contents/MacOS/soffice' | |
PANDOC = r'pandoc' | |
def convert(infile, outfile): | |
"""Convert the given infile to the given outfile in markdown format, via | |
LibreOffice and Pandoc""" | |
root, __ = os.path.splitext(infile) | |
htmlfile = root + ".html" | |
subprocess.call([SOFFICE, '--invisible', '--convert-to', | |
'html', infile]) | |
subprocess.call([PANDOC, '-f', 'html', '-t', 'markdown', '-o', outfile, | |
htmlfile]) | |
os.remove(htmlfile) | |
def main(argv=None): | |
from optparse import OptionParser | |
if argv is None: | |
argv = sys.argv | |
arg_parser = OptionParser( | |
usage = "usage: %prog DOCFILE", | |
description = __doc__) | |
options, args = arg_parser.parse_args(argv) | |
try: | |
docfile = args[1] | |
except IndexError: | |
arg_parser.error("must give DOCFILE") | |
root, __ = os.path.splitext(docfile) | |
mdfile = root + ".md" | |
convert(docfile, mdfile) | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment