Skip to content

Instantly share code, notes, and snippets.

@johnbeard
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnbeard/1574e0f4e451c9869e5e to your computer and use it in GitHub Desktop.
Save johnbeard/1574e0f4e451c9869e5e to your computer and use it in GitHub Desktop.
ODT to MD processor
import os
import re
import zipfile
import subprocess
import base64
import md5
def get_image_data_from_odt(odt):
zf = zipfile.ZipFile(INFILE)
zipdir = "/tmp/ktmp";
zf.extractall(zipdir)
print "Unzipped %s" % INFILE
image_dir = zipdir + os.path.sep + "Pictures"
print "Collecting base64 of images in: %s" % image_dir
images = {}
for image in os.listdir(image_dir):
print "\t%s" % image
path = os.path.join(image_dir, image)
b64 = base64.b64encode(open(path, 'r').read())
hsh = md5.new(b64).hexdigest()
print "\t\t%s" % hsh
images[hsh] = path
print "Base64 hash -> image path pairs collected"
return images
def generate_md_from_odt(odt):
"""
Takes path to ODT file
Returns path to markdown file
"""
print "Converting to HTML"
root, ext = os.path.splitext(odt)
print root
tmpdir = "/tmp"
conversion_profile = os.path.join(tmpdir, "LibO_Conversion")
cmd = ["libreoffice",
"-env:UserInstallation=file://%s" % conversion_profile,
"--headless",
"--convert-to", "html",
"--outdir", tmpdir,
odt]
print cmd
subprocess.call(cmd)
html = root + ".html"
md = root + ".md"
print "Converting from HTML to markdown"
cmd = ["pandoc", html, "-t", "markdown", "-o", md]
subprocess.call(cmd)
print "Removing HTML intermediate"
os.remove(html)
return md
def sub_images(md, images):
"""
replace base64 encode images with links to real images here.
Warning: may contain ugly hacks
"""
def b64_replace(match):
"""
Replace a base64 blob with a matching image if found
"""
b64 = match.group(1)
hsh = md5.new(b64).hexdigest()
if hsh in images:
print "\tImage %s -> %s" % (hsh, images[hsh])
return "(%s)" % images[hsh]
return match
prefix = "(data:image/png;base64,"
print "Replacing images in %s" % md
mdtext = re.sub(r"\(data:image/png;base64,([A-Za-z0-9+/]+=*)\)",
b64_replace, open(md, 'r').read())
outf = open(md, 'w')
outf.write(mdtext)
outf.close()
if __name__ == "__main__":
INFILE = "/tmp/kicad_EN.odt"
odtimg = get_image_data_from_odt(INFILE)
md_path = generate_md_from_odt(INFILE)
sub_images(md_path, odtimg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment