Skip to content

Instantly share code, notes, and snippets.

@xziyue
Created December 11, 2019 17:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xziyue/b0b77842f54f3fcb029a02333fdd3602 to your computer and use it in GitHub Desktop.
Save xziyue/b0b77842f54f3fcb029a02333fdd3602 to your computer and use it in GitHub Desktop.
Converting colored LaTeX listing to HTML
from TexSoup.data import TexNode, TexEnv, TexCmd, RArg
from TexSoup import TexSoup
import re
import copy
import html as htmlib
class Tex2HTML:
def __init__(self):
self.colorDef = dict()
self.env = []
def _get_obj_type(self, node):
if isinstance(node, str):
return 'comment'
if isinstance(node, RArg):
return 'group'
if isinstance(node, TexNode):
expr = node.expr
if isinstance(expr, TexCmd):
return 'cmd'
elif isinstance(expr, TexEnv):
return 'env'
else:
return 'text'
def _get_default_entity(self):
return {
'data' : None
}
def _process_latex_1(self, node):
for obj in node:
objType = self._get_obj_type(obj)
if objType == 'cmd':
if obj.name == 'definecolor':
latexColorName = obj.args[0]
htmlColor = obj.args[2]
self.colorDef[latexColorName] = htmlColor
elif objType == 'env':
if obj.expr.begin == '\\begin{lstlisting}':
self.htmlResult = self._process_latex_2(''.join(list(obj.text)))
else:
self._process_latex_1(obj)
elif objType == 'group':
newSoup = TexSoup(obj.value)
self._process_latex_1(newSoup)
def _process_latex_2(self, lstData):
escapeRe = re.compile('%\*(.*?)\*\)')
lstDataTicks = set()
lstDataTicks.add(0)
lstDataTicks.add(len(lstData))
matchGroups = []
for match in escapeRe.finditer(lstData):
lstDataTicks.add(match.start())
lstDataTicks.add(match.end())
matchGroups.append((match.start(), match.end()))
result = []
lstDataTicks = list(lstDataTicks)
lstDataTicks.sort()
if len(lstDataTicks) > 2:
for i in range(len(lstDataTicks) - 1):
sePair = (lstDataTicks[i], lstDataTicks[i+1])
if sePair in matchGroups:
escapedData = lstData[sePair[0] + 2: sePair[1] - 2]
newSoup = TexSoup(escapedData)
self.groupTemplate = self._get_default_entity()
self._process_latex_3(newSoup, result)
else:
entity = self._get_default_entity()
entity['data'] = lstData[sePair[0] : sePair[1]]
result.append(entity)
else:
entity = self._get_default_entity()
entity['data'] = lstData
result.append(entity)
newResult = []
for entity in result:
if len(entity['data']) > 0:
newResult.append(entity)
return self._result_to_html(newResult)
def _process_latex_3(self, node, result):
for obj in node:
objType = self._get_obj_type(obj)
objText = str(obj)
if objType == 'group':
oldGroupTemplate = copy.copy(self.groupTemplate)
newSoup = TexSoup(obj.value)
self._process_latex_3(newSoup, result)
self.groupTemplate = oldGroupTemplate
elif objType == 'cmd':
if obj.name == 'color':
if len(obj.args) == 2:
oldGroupTemplate = copy.copy(self.groupTemplate)
self.groupTemplate['color'] = self.colorDef[obj.args[0]]
newSoup = TexSoup(obj.args[1])
self._process_latex_3(newSoup, result)
self.groupTemplate = oldGroupTemplate
else:
self.groupTemplate['color'] = self.colorDef[obj.args[0]]
elif obj.name == 'bfseries':
self.groupTemplate['bold'] = True
elif obj.name == 'colorbox':
oldGroupTemplate = copy.copy(self.groupTemplate)
self.groupTemplate['bg-color'] = self.colorDef[obj.args[0]]
newSoup = TexSoup(obj.args[1])
self._process_latex_3(newSoup, result)
self.groupTemplate = oldGroupTemplate
elif obj.name == 'smash':
newSoup = TexSoup(obj.args[0])
self._process_latex_3(newSoup, result)
elif obj.name == 'space':
entity = copy.copy(self.groupTemplate)
entity['data'] = ''
result.append(entity)
elif obj.name == 'unichar':
charCode = int(obj.args[0].strip('\"'), 16)
entity = copy.copy(self.groupTemplate)
entity['data'] = chr(charCode)
result.append(entity)
elif objType == 'env':
raise NotImplementedError('this is not allowed so far')
else:
entity = copy.copy(self.groupTemplate)
entity['data'] = str(obj)
result.append(entity)
def _result_to_html(self, result):
html = ''
firstBolded = False
for entity in result:
pre = []
suf = []
if 'bg-color' in entity:
pre.append('<span style="background-color:#{};">'.format(entity['bg-color']))
suf.insert(0, '</span>')
if 'color' in entity:
pre.append('<span style="color:#{};">'.format(entity['color']))
suf.insert(0, '</span>')
if 'bold' in entity:
pre.append('<b>')
if not firstBolded:
firstBolded = True
entity['data'] = entity['data'].lstrip()
else:
entity['data'] = entity['data']
suf.insert(0, '</b>')
else:
if firstBolded:
firstBolded = False
temp = ''.join(pre) + htmlib.escape(entity['data']) + ''.join(suf)
html += temp
return html
def convert(self, texContent):
soup = TexSoup(texContent)
self._process_latex_1(soup.document)
return self.htmlResult
def convert_for_website(self, texInner):
texContent = r'''
\begin{document}
\lstconsolestyle
%s
\end{document}
''' % texInner
html = self.convert(texContent).rstrip()
htmlContent = r'''
<pre class="console-pre">
%s
</pre>
''' % html
return htmlContent.lstrip()
if __name__ == '__main__':
# run the GUI
import wx
class MyFrame(wx.Frame):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
sizer = wx.BoxSizer(wx.VERTICAL)
self.panel = wx.Panel(self)
self.SetSize(wx.Size(800, 600))
self.SetTitle('LaTeX2HTML')
self.textIn = wx.TextCtrl(self.panel, style=wx.TE_MULTILINE)
self.textOut = wx.TextCtrl(self.panel, style=wx.TE_MULTILINE | wx.TE_READONLY)
sizer.Add(self.textIn, 1, wx.ALL | wx.EXPAND, 10)
sizer.Add(self.textOut, 1, wx.ALL | wx.EXPAND, 10)
self.btnConv = wx.Button(self.panel, label='Convert')
self.btnConv.Bind(wx.EVT_BUTTON, self.evtBtn)
sizer.Add(self.btnConv, 0, wx.ALL | wx.ALIGN_CENTER, 5)
self.panel.SetSizerAndFit(sizer)
self.Show()
def evtBtn(self, evt):
inText = self.textIn.GetValue()
result = None
try:
conv = Tex2HTML()
result = conv.convert_for_website(inText)
except Exception as e:
wx.MessageBox('An exception occured during conversion: {}'.format(repr(e)), 'Exception', wx.OK | wx.ICON_ERROR)
if result is not None:
self.textOut.SetValue(result)
app = wx.App()
MyFrame(None)
app.MainLoop()
@xziyue
Copy link
Author

xziyue commented Dec 11, 2019

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment