Created
December 11, 2019 17:28
-
-
Save xziyue/b0b77842f54f3fcb029a02333fdd3602 to your computer and use it in GitHub Desktop.
Converting colored LaTeX listing to HTML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from TexSoup.data import TexNode, TexEnv, TexCmd, RArg | |
from TexSoup import TexSoup | |
import re | |
import copy | |
import html as htmlib | |
class Tex2HTML: | |
def __init__(self): | |
self.colorDef = dict() | |
self.env = [] | |
def _get_obj_type(self, node): | |
if isinstance(node, str): | |
return 'comment' | |
if isinstance(node, RArg): | |
return 'group' | |
if isinstance(node, TexNode): | |
expr = node.expr | |
if isinstance(expr, TexCmd): | |
return 'cmd' | |
elif isinstance(expr, TexEnv): | |
return 'env' | |
else: | |
return 'text' | |
def _get_default_entity(self): | |
return { | |
'data' : None | |
} | |
def _process_latex_1(self, node): | |
for obj in node: | |
objType = self._get_obj_type(obj) | |
if objType == 'cmd': | |
if obj.name == 'definecolor': | |
latexColorName = obj.args[0] | |
htmlColor = obj.args[2] | |
self.colorDef[latexColorName] = htmlColor | |
elif objType == 'env': | |
if obj.expr.begin == '\\begin{lstlisting}': | |
self.htmlResult = self._process_latex_2(''.join(list(obj.text))) | |
else: | |
self._process_latex_1(obj) | |
elif objType == 'group': | |
newSoup = TexSoup(obj.value) | |
self._process_latex_1(newSoup) | |
def _process_latex_2(self, lstData): | |
escapeRe = re.compile('%\*(.*?)\*\)') | |
lstDataTicks = set() | |
lstDataTicks.add(0) | |
lstDataTicks.add(len(lstData)) | |
matchGroups = [] | |
for match in escapeRe.finditer(lstData): | |
lstDataTicks.add(match.start()) | |
lstDataTicks.add(match.end()) | |
matchGroups.append((match.start(), match.end())) | |
result = [] | |
lstDataTicks = list(lstDataTicks) | |
lstDataTicks.sort() | |
if len(lstDataTicks) > 2: | |
for i in range(len(lstDataTicks) - 1): | |
sePair = (lstDataTicks[i], lstDataTicks[i+1]) | |
if sePair in matchGroups: | |
escapedData = lstData[sePair[0] + 2: sePair[1] - 2] | |
newSoup = TexSoup(escapedData) | |
self.groupTemplate = self._get_default_entity() | |
self._process_latex_3(newSoup, result) | |
else: | |
entity = self._get_default_entity() | |
entity['data'] = lstData[sePair[0] : sePair[1]] | |
result.append(entity) | |
else: | |
entity = self._get_default_entity() | |
entity['data'] = lstData | |
result.append(entity) | |
newResult = [] | |
for entity in result: | |
if len(entity['data']) > 0: | |
newResult.append(entity) | |
return self._result_to_html(newResult) | |
def _process_latex_3(self, node, result): | |
for obj in node: | |
objType = self._get_obj_type(obj) | |
objText = str(obj) | |
if objType == 'group': | |
oldGroupTemplate = copy.copy(self.groupTemplate) | |
newSoup = TexSoup(obj.value) | |
self._process_latex_3(newSoup, result) | |
self.groupTemplate = oldGroupTemplate | |
elif objType == 'cmd': | |
if obj.name == 'color': | |
if len(obj.args) == 2: | |
oldGroupTemplate = copy.copy(self.groupTemplate) | |
self.groupTemplate['color'] = self.colorDef[obj.args[0]] | |
newSoup = TexSoup(obj.args[1]) | |
self._process_latex_3(newSoup, result) | |
self.groupTemplate = oldGroupTemplate | |
else: | |
self.groupTemplate['color'] = self.colorDef[obj.args[0]] | |
elif obj.name == 'bfseries': | |
self.groupTemplate['bold'] = True | |
elif obj.name == 'colorbox': | |
oldGroupTemplate = copy.copy(self.groupTemplate) | |
self.groupTemplate['bg-color'] = self.colorDef[obj.args[0]] | |
newSoup = TexSoup(obj.args[1]) | |
self._process_latex_3(newSoup, result) | |
self.groupTemplate = oldGroupTemplate | |
elif obj.name == 'smash': | |
newSoup = TexSoup(obj.args[0]) | |
self._process_latex_3(newSoup, result) | |
elif obj.name == 'space': | |
entity = copy.copy(self.groupTemplate) | |
entity['data'] = '' | |
result.append(entity) | |
elif obj.name == 'unichar': | |
charCode = int(obj.args[0].strip('\"'), 16) | |
entity = copy.copy(self.groupTemplate) | |
entity['data'] = chr(charCode) | |
result.append(entity) | |
elif objType == 'env': | |
raise NotImplementedError('this is not allowed so far') | |
else: | |
entity = copy.copy(self.groupTemplate) | |
entity['data'] = str(obj) | |
result.append(entity) | |
def _result_to_html(self, result): | |
html = '' | |
firstBolded = False | |
for entity in result: | |
pre = [] | |
suf = [] | |
if 'bg-color' in entity: | |
pre.append('<span style="background-color:#{};">'.format(entity['bg-color'])) | |
suf.insert(0, '</span>') | |
if 'color' in entity: | |
pre.append('<span style="color:#{};">'.format(entity['color'])) | |
suf.insert(0, '</span>') | |
if 'bold' in entity: | |
pre.append('<b>') | |
if not firstBolded: | |
firstBolded = True | |
entity['data'] = entity['data'].lstrip() | |
else: | |
entity['data'] = entity['data'] | |
suf.insert(0, '</b>') | |
else: | |
if firstBolded: | |
firstBolded = False | |
temp = ''.join(pre) + htmlib.escape(entity['data']) + ''.join(suf) | |
html += temp | |
return html | |
def convert(self, texContent): | |
soup = TexSoup(texContent) | |
self._process_latex_1(soup.document) | |
return self.htmlResult | |
def convert_for_website(self, texInner): | |
texContent = r''' | |
\begin{document} | |
\lstconsolestyle | |
%s | |
\end{document} | |
''' % texInner | |
html = self.convert(texContent).rstrip() | |
htmlContent = r''' | |
<pre class="console-pre"> | |
%s | |
</pre> | |
''' % html | |
return htmlContent.lstrip() | |
if __name__ == '__main__': | |
# run the GUI | |
import wx | |
class MyFrame(wx.Frame): | |
def __init__(self, *args, **kwargs): | |
super().__init__(*args, **kwargs) | |
sizer = wx.BoxSizer(wx.VERTICAL) | |
self.panel = wx.Panel(self) | |
self.SetSize(wx.Size(800, 600)) | |
self.SetTitle('LaTeX2HTML') | |
self.textIn = wx.TextCtrl(self.panel, style=wx.TE_MULTILINE) | |
self.textOut = wx.TextCtrl(self.panel, style=wx.TE_MULTILINE | wx.TE_READONLY) | |
sizer.Add(self.textIn, 1, wx.ALL | wx.EXPAND, 10) | |
sizer.Add(self.textOut, 1, wx.ALL | wx.EXPAND, 10) | |
self.btnConv = wx.Button(self.panel, label='Convert') | |
self.btnConv.Bind(wx.EVT_BUTTON, self.evtBtn) | |
sizer.Add(self.btnConv, 0, wx.ALL | wx.ALIGN_CENTER, 5) | |
self.panel.SetSizerAndFit(sizer) | |
self.Show() | |
def evtBtn(self, evt): | |
inText = self.textIn.GetValue() | |
result = None | |
try: | |
conv = Tex2HTML() | |
result = conv.convert_for_website(inText) | |
except Exception as e: | |
wx.MessageBox('An exception occured during conversion: {}'.format(repr(e)), 'Exception', wx.OK | wx.ICON_ERROR) | |
if result is not None: | |
self.textOut.SetValue(result) | |
app = wx.App() | |
MyFrame(None) | |
app.MainLoop() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the inverse of https://gist.github.com/xziyue/b73c9413fd40050fac3be3ec0b0f4a63.