Skip to content

Instantly share code, notes, and snippets.

@pshriwise
Created May 13, 2016 18:27
Show Gist options
  • Save pshriwise/c13472bedaf9e2f7a7da366420da1713 to your computer and use it in GitHub Desktop.
Save pshriwise/c13472bedaf9e2f7a7da366420da1713 to your computer and use it in GitHub Desktop.
A python script for scraping the TOC from a tex file with color values gathered from metadata hidden in latex comments.
import os, subprocess
from pyPdf import PdfFileReader
import traceback
status_colors = {"done":"green",
"in progress":"darkorange",
"pending" : "red",
"no status": "black"}
class html_generator:
def __init__(self):
pass
def open_tag(self,tag_type, attribs=None):
assert(type(tag_type) is str)
attribs = "" if attribs is None else self.attribs_to_str(attribs)
return "<"+tag_type+attribs+">"
def close_tag(self,tag_type):
assert(type(tag_type) is str)
val = "</"+tag_type+">"
val += "" if tag_type == "span" else "\n"
return val
def attribs_to_str(self,attribs):
assert(type(attribs) is dict)
val = ""
for key,value in attribs.items():
val+=" "+str(key)+"="+str(value)
return val
def paragraph(self,contents,attribs=None):
return self.open_tag("p",attribs)+contents+self.close_tag("p")
def lineitem(self,contents,attribs=None):
return self.open_tag("li",attribs)+contents+self.close_tag("li")
def span(self,contents,attribs=None):
return self.open_tag("span",attribs)+contents+self.close_tag("span")
def unorderedlist(self,contents,attribs=None):
return self.open_tag("ul",attribs)+"\n"+contents+self.close_tag("ul")+"\n"
def color_attrib(self,color):
return {'style' : "color:"+color+";"}
def generate_html():
try:
subprocess.call(["bash","update_prelim.sh"])
except:
traceback.print_exc()
print "Could not update prelim doc."
for file in os.listdir("./"):
if ".md" in file:
f = open(file.split(".md")[0]+".html", 'wb')
subprocess.call(["markdown",file], stdout=f)
f.close()
prelim_status_report()
def status_header(f):
h = html_generator()
header = h.paragraph("Prelim Table of Contents & Status Report")
header += h.paragraph(h.span("Green",attribs=h.color_attrib("green"))+" - complete (for now)")
header += h.paragraph(h.span("Orange",attribs=h.color_attrib("darkorange"))+" - in progress")
header += h.paragraph(h.span("Red",attribs=h.color_attrib("red"))+" - haven't begun")
f.write(header)
return
def status_footer(f):
#special output
try:
pages = PdfFileReader(open('prelim.pdf','rb')).getNumPages()
f.write("<p>Number of Pages: " + str(pages)+"</p>")
except:
traceback.print_exc()
f.write("<p>Number of Pages: Could not read pdf file </p>")
def html_from_tex():
f = open("prelim.tex",'rb')
lines = f.readlines()
f.close()
lines.append(None)
return gen_html_toc(lines,"section")
def parse_line(l):
#title should be inside the brackets
title = l[l.find("{")+1:l.find("}")]
#if there's no status
if 'Status' not in l:
#indicate no status found
status = "no status"
#if there is a status
else:
#format %%Status:<status_val>%%
#parse that status value out
status = l[l.find("%%")+2:]
status = status[:status.find("%%")]
status = status.split(":")[-1]
#return the title of this item and status
return title, status
def gen_html_toc(lines,search_key,return_key=None):
#list of hierarchical keys in document
search_keys = ["chapter","section","subsection","subsubsection"]
assert(search_key in search_keys)
#set downward recursion key w/ extra characters
down_key = None if search_key == search_keys[-1] else "\\"+search_keys[search_keys.index(search_key)+1]+"{"
#set search key w/ extra characters
search_key = "\\"+search_key+"{"
#for tracking first lineitem occurance
first = True
#create an html generator
h = html_generator()
html_out = ""
#open unorderedlist
html_out += "\n"+h.open_tag("ul")+"\n"
while True:
line = lines.pop(0)
# if we find the down key in this line
if down_key is not None and line is not None and down_key in line:
#put this line back in so we don't skip something
lines.insert(0,line)
# make recursive call w/ down key and search key as return key
out,line = gen_html_toc(lines,down_key[1:-1],return_key=search_key)
#add output to current html
html_out+=out
#if we find our return key or hit the end of the file
if line is None or (return_key is not None and return_key in line):
# a) close the previous lineitem and current list
html_out += h.close_tag("li")
html_out += h.close_tag("ul")
# b) return the html and current line
return html_out, line
#if we find the key we're currently looking for
elif search_key in line:
#if this is the first item found
if first:
#we're now no longer on our first item
first = not first
#otherwise close the previous line item at this level
else:
html_out += h.close_tag("li")
#parse title and status from the new item
title,status = parse_line(line)
#create a new lineitem w/ title and status color
html_out += h.open_tag("li")+h.span(title,attribs=h.color_attrib(status_colors[status.lower()]))
def prelim_status_report():
print "Opening file for writing..."
report_file = open("prelim_status.html",'w')
print "Generating html and writing to file..."
status_header(report_file)
print "Done"
html = html_from_tex()
report_file.write(html[0])
#write num pages to html
status_footer(report_file)
report_file.close()
if __name__=="__main__":
prelim_status_report()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment