Created
July 30, 2012 18:32
-
-
Save taldcroft/3208992 to your computer and use it in GitHub Desktop.
Find header character sets used in a list of RST files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Find the sets of characters used in RST section headers. For input like | |
:: | |
########## | |
header | |
########## | |
Section | |
======== | |
Subsection | |
----------- | |
Section 2 | |
========== | |
then the get_header_chars() routine will return '#=-'. It also checks that the | |
ordering is valid. | |
Usage:: | |
% find . -name "*.rst" | xargs ./header_chars.py | |
... # bunch of files and their section orderings | |
#*= | |
*=-" | |
*=-^" | |
-^". | |
-^"> | |
=-^" | |
=-` | |
% find . -name "*.rst" | xargs ./header_chars.py | grep '=-`' | |
./wcs/history.rst =-` | |
=-` | |
""" | |
import sys | |
def get_header_chars(filename): | |
lines = (x.strip() for x in open(filename, 'r')) | |
header_chars = '' | |
level = -1 | |
for line in lines: | |
line_len = len(line) | |
if line_len == 0: | |
continue | |
char = line[0] | |
if line == char * line_len and line_len > 2: | |
if char not in header_chars: | |
if level == len(header_chars) - 1: | |
header_chars += char | |
level += 1 | |
else: | |
raise ValueError('ERROR misorder {} at level {} from {}' | |
.format(char, level, header_chars)) | |
else: | |
new_level = header_chars.index(char) | |
if new_level > level + 1: | |
raise ValueError('ERROR misorder new_level={} level={} ' | |
'char={} header_chars={}' | |
.format(new_level, level, char, | |
header_chars)) | |
else: | |
level = new_level | |
return header_chars | |
def main(): | |
header_chars_set = set() | |
filenames = sys.argv[1:] | |
for filename in filenames: | |
print filename, | |
try: | |
header_chars = get_header_chars(filename) | |
header_chars_set.add(header_chars) | |
print header_chars | |
except ValueError as err: | |
print err | |
header_chars_list = sorted(header_chars_set) | |
all_header_chars = [] | |
for hc0, hc1 in zip(header_chars_list[:-1], header_chars_list[1:]): | |
if not hc1.startswith(hc0): | |
all_header_chars.append(hc0) | |
all_header_chars.append(hc1) | |
print '\n'.join(all_header_chars) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment