Created
September 27, 2011 01:49
-
-
Save cflewis/1244092 to your computer and use it in GitHub Desktop.
Docgen, a silly personal script to handle pre and post processing of Pandoc files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import re | |
import subprocess | |
import tempfile | |
import sys | |
import os | |
class Processor(object): | |
def __init__(self): | |
pass | |
def get_file_lines(self, file_name): | |
file_handle = open(file_name, 'r') | |
lines = file_handle.readlines() | |
file_handle.close() | |
return lines | |
def write_temp_file(self, lines, file_name='docgen'): | |
f = open(file_name, 'w') | |
f.writelines(lines) | |
f.close() | |
return f.name | |
class LaTeXProcessor(Processor): | |
def output(self, input_file_name): | |
input_file_name = sys.argv[1] | |
jobname = os.path.splitext(input_file_name)[0] | |
formatted_input = self.format_citations(self.get_file_lines(input_file_name)) | |
pandoc_output = self.get_pandoc_output(formatted_input) | |
# Output a temp file, even though this is a pain, it seems to be | |
# the only way to make pdflatex see the output correctly. | |
# Having the output written out is useful, anyway, so I'm not losing | |
# sleep over it. | |
temp_file_name = self.write_temp_file(pandoc_output, file_name=jobname + '.tex') | |
self.run_pdflatex(temp_file_name, jobname=jobname) | |
self.run_bibtex(jobname=jobname) | |
# Rerun pdflatex again to fix bibliography citations, once | |
# for it to notice the citations, once for it to get the labelling right | |
for i in range(0,2): | |
print self.run_pdflatex(temp_file_name, jobname=jobname) | |
def format_citations(self, lines): | |
"""Return lines with Pandoc citations translated to LaTeX ones. | |
Note: The extra slashes are due to Python's interpreter, this is the correct | |
output for just a single \cite command. | |
>>> format_citations(['foo [@bar]']) | |
['foo \\\cite{bar}'] | |
>>> format_citations(['foo', '[@bar] baz']) | |
['foo', '\\\cite{bar} baz'] | |
It should also work with more complicated citations. | |
>>> format_citations(['[@foo:bar]']) | |
['\\\cite{foo:bar}'] | |
""" | |
return [re.sub('\[@(\S+)\]', '\cite{\g<1>}', l) for l in lines] | |
def run_text_processor(self, execution_method, stdin=None): | |
processor = subprocess.Popen(execution_method, stdin=subprocess.PIPE, | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
if stdin: | |
(output, errors) = processor.communicate(input=''.join(stdin)) | |
else: | |
(output, errors) = processor.communicate() | |
if errors: | |
print "Text processor error: " + errors | |
return output | |
def get_pandoc_output(self, lines): | |
return self.run_text_processor(['pandoc', | |
'-s', '-t', 'latex', | |
'--template=/Users/cflewis/Library/Pandoc/acm.tex'], | |
stdin=lines) | |
def run_pdflatex(self, file_name, jobname='docgen'): | |
return self.run_text_processor(['pdflatex', | |
'--interaction=batchmode', | |
'--jobname=' + jobname, | |
file_name]) | |
def run_bibtex(self, jobname='docgen'): | |
try: | |
print subprocess.check_output(['bibtex', jobname + '.aux']) | |
except subprocess.CalledProcessError as e: | |
print "BibTeX error: " + e | |
latex = LaTeXProcessor() | |
latex.output(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment