Skip to content

Instantly share code, notes, and snippets.

@ict4eo
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ict4eo/9546725 to your computer and use it in GitHub Desktop.
Save ict4eo/9546725 to your computer and use it in GitHub Desktop.
Convert a document in Google Code wiki page format to reStructuredText (reST)
"""
Convert a wikipage in Google wiki format to reStructuredText
Version 1.0.2
19 March 2014
Derek Hohls, Meraka, CSIR
Handles
=======
* internal comments (#name - no space)
* bold / italics text
* inline code
* code blocks
* internal cross-reference [[example]]
* bullets
* numbered list
* headers (up to 4 levels deep)
* tables
* standalone hyperlinks (these stay as is)
TODO
====
indented bullets
external hyperlinks with embedded link: `Python web site <http://www.python.org>`__
named internal cross-reference: [[FutureModuleRoadmap | Future Roadmap]]
"""
import pprint
def process_header(txt):
if txt[:1] == '=':
level = 1
under = '============================================================='
if txt[:2] == '==':
level = 2
under = '-------------------------------------------------------------'
if txt[:3] == '===':
level = 3
under = '^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^'
if txt[:3] == '====':
level = 4
under = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
else:
pass
replace = '======'[:level]
_txt = txt.replace(replace, '')
_txt = _txt.strip(' ')
length = len(_txt) - 1
result = []
result.append(_txt)
result.append(under[1:length])
result.append(' ')
return result
def process_table(rows):
items = []
results = []
# clean data
for r in rows:
r = r.strip('\n').strip(' ').strip('||')
new = r.split('||')
items.append(new)
#pprint.pprint(items)
# size of cells
count = {}
for key, i in enumerate(items[0]):
count[key] = len(i)
for item in items:
#print len(item), "::", item
for key, i in enumerate(item):
count[key] = max(len(i), count[key])
#pprint.pprint(count)
# splitter row
split = '+'
for key, i in enumerate(items[0]):
split = "%s%s%s" % (split, "-" * count[key], '+')
#print split
# results
results.append(split)
for item in items:
cell = '|'
for key, i in enumerate(item):
cell += i.ljust(count[key])
cell += '|'
results.append(cell)
results.append(split)
#pprint.pprint(results)
return results
def process_bullet(text, text_prior):
"""TODO"""
return text
def process(file_in, file_out=None):
# open input/output files
inputfile = open(file_in)
if file_out:
outputfile = open(file_out, 'w')
text_in = inputfile.readlines()
# defaults
text_out = []
table_rows = []
is_code = False
txt_prior = ''
# process rows
for txt in text_in:
_txt = txt.strip(' ')
if is_code and txt[:3] != '}}}': # indent code
text_out.append(' %s' % _txt)
elif txt[:3] == '{{{': # code start
is_code = True
text_out.append('::\n')
elif txt[:3] == '}}}': # code end
text_out.append('\n')
is_code = False
else:
if _txt[:2] == '# ': # numbered list
_txt = _txt.replace('# ', '#. ')
if txt[0] == '*': # bullets
_txt = process_bullet(txt, txt_prior)
_txt = _txt.replace('__', '**') # bold / strong
_txt = _txt.replace('_', '*') # italics
_txt = _txt.replace('`', '``') # inline
_txt = _txt.replace('{{', '``').replace('}}', '``') # inline
_txt = _txt.replace('[', '[[').replace(']', ']]') # cross-ref
if txt[0] == '#': # comment
text_out.append('.. .. %s' % _txt)
elif txt[0] == '=': # header
header_out = process_header(_txt)
for head in header_out:
text_out.append(head)
text_out.append('\n')
elif txt[:2] == '||': # table
table_rows.append(_txt)
else:
if table_rows:
#pprint.pprint(table_rows)
new_rows = process_table(table_rows)
for row in new_rows:
text_out.append("%s\n" % row)
table_rows = []
text_out.append(_txt)
txt_prior = txt
# write output
if file_out:
outputfile.writelines(text_out)
else:
for text in text_out:
print text.strip('\n')
process('text.wiki', 'text.rst')
#process('text.wiki') # to screen
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment