Skip to content

Instantly share code, notes, and snippets.

@yymao
Last active April 13, 2017 16:44
Show Gist options
  • Save yymao/e803bf715a683b7e219f57284bdcb0f4 to your computer and use it in GitHub Desktop.
Save yymao/e803bf715a683b7e219f57284bdcb0f4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import print_function
import re
import argparse
def _find_ending_brace(s, open_brace=1):
escaping = False
output = ''
remain = ''
for i, c in enumerate(s):
if escaping:
escaping = False
else:
if c == '%':
break
elif c == '{':
open_brace += 1
elif c == '}':
open_brace -= 1
if open_brace == 0:
remain = s[i+1:].strip()
break
escaping = (c == '\\')
output += c
return open_brace, output, remain
def _print_counter(counter):
_counter = list(counter)
output = ''
while sum(_counter):
output += '{}.'.format(_counter.pop(0))
return output.rstrip('.')
def _iter_section(fp, re_sec):
remain = ''
open_brace = 0
while True:
try:
if remain:
l = remain
remain = ''
else:
l = next(fp)
m = re_sec.search(l)
if m is None:
continue
level, skip, title = m.groups()
open_brace, title, remain = _find_ending_brace(title.strip() + ' ', 1)
while open_brace:
open_brace, title_this, remain = _find_ending_brace(next(fp).strip() + ' ', open_brace)
title += title_this
yield level, skip, title
except StopIteration:
if open_brace:
raise ValueError('misformed tex file')
break
def iter_tex_sections(files, max_sublevel=2, no_number='*'):
counter = [0]*(max_sublevel+1)
re_sec = re.compile(r'\\((?:sub){{0,{}}})section(\*?){{(.*)'.format(max_sublevel))
if isinstance(files, basestring):
files = [files]
for this_file in files:
with open(this_file) as f:
for level, skip, title in _iter_section(f, re_sec):
if not skip:
level = len(level) // 3
counter[level] += 1
for i in range(level+1, len(counter)):
counter[i] = 0
yield '{} {}'.format(no_number if skip else _print_counter(counter), title).strip()
def main():
parser = argparse.ArgumentParser(description='Extract section titles from a TeX file')
parser.add_argument('files', metavar='TEX_FILE', nargs='+')
parser.add_argument('--max-sublevel', default=2, type=int, help='maximal subsection level (default: 2)')
parser.add_argument('--no-number', default='*', help='Bullet point for sections without numbering')
args = parser.parse_args()
for l in iter_tex_sections(**args.__dict__):
print(l)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment