Skip to content

Instantly share code, notes, and snippets.

@cflewis
Created September 27, 2011 01:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cflewis/1244092 to your computer and use it in GitHub Desktop.
Save cflewis/1244092 to your computer and use it in GitHub Desktop.
Docgen, a silly personal script to handle pre and post processing of Pandoc files
#! /usr/bin/env python
import re
import subprocess
import tempfile
import sys
import os
class Processor(object):
def __init__(self):
pass
def get_file_lines(self, file_name):
file_handle = open(file_name, 'r')
lines = file_handle.readlines()
file_handle.close()
return lines
def write_temp_file(self, lines, file_name='docgen'):
f = open(file_name, 'w')
f.writelines(lines)
f.close()
return f.name
class LaTeXProcessor(Processor):
def output(self, input_file_name):
input_file_name = sys.argv[1]
jobname = os.path.splitext(input_file_name)[0]
formatted_input = self.format_citations(self.get_file_lines(input_file_name))
pandoc_output = self.get_pandoc_output(formatted_input)
# Output a temp file, even though this is a pain, it seems to be
# the only way to make pdflatex see the output correctly.
# Having the output written out is useful, anyway, so I'm not losing
# sleep over it.
temp_file_name = self.write_temp_file(pandoc_output, file_name=jobname + '.tex')
self.run_pdflatex(temp_file_name, jobname=jobname)
self.run_bibtex(jobname=jobname)
# Rerun pdflatex again to fix bibliography citations, once
# for it to notice the citations, once for it to get the labelling right
for i in range(0,2):
print self.run_pdflatex(temp_file_name, jobname=jobname)
def format_citations(self, lines):
"""Return lines with Pandoc citations translated to LaTeX ones.
Note: The extra slashes are due to Python's interpreter, this is the correct
output for just a single \cite command.
>>> format_citations(['foo [@bar]'])
['foo \\\cite{bar}']
>>> format_citations(['foo', '[@bar] baz'])
['foo', '\\\cite{bar} baz']
It should also work with more complicated citations.
>>> format_citations(['[@foo:bar]'])
['\\\cite{foo:bar}']
"""
return [re.sub('\[@(\S+)\]', '\cite{\g<1>}', l) for l in lines]
def run_text_processor(self, execution_method, stdin=None):
processor = subprocess.Popen(execution_method, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if stdin:
(output, errors) = processor.communicate(input=''.join(stdin))
else:
(output, errors) = processor.communicate()
if errors:
print "Text processor error: " + errors
return output
def get_pandoc_output(self, lines):
return self.run_text_processor(['pandoc',
'-s', '-t', 'latex',
'--template=/Users/cflewis/Library/Pandoc/acm.tex'],
stdin=lines)
def run_pdflatex(self, file_name, jobname='docgen'):
return self.run_text_processor(['pdflatex',
'--interaction=batchmode',
'--jobname=' + jobname,
file_name])
def run_bibtex(self, jobname='docgen'):
try:
print subprocess.check_output(['bibtex', jobname + '.aux'])
except subprocess.CalledProcessError as e:
print "BibTeX error: " + e
latex = LaTeXProcessor()
latex.output(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment