Skip to content

Instantly share code, notes, and snippets.

@arp242
Created April 25, 2020 07:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arp242/ef0574aa2bf9d3ea31673223af98bef7 to your computer and use it in GitHub Desktop.
Save arp242/ef0574aa2bf9d3ea31673223af98bef7 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# Convert Vim help files to Markdown.
#
# No copyright; do what you will with this.
import sys, pprint, re, argparse, subprocess, urllib.parse
def parse_first_line(line):
# *usr_23.txt* For Vim version 8.0. Last change: 2006 Apr 24
# *xdg_open.txt* Run xdg-open from Vim; re-implements netrw's gx.
star = line.find('*', 1)
return {
'name': line[1:star],
'blurb': line[star+1:].strip(),
}
def parse_intro(section):
# TOC can have two forms:
#
# "New" style:
# 1. Search commands |search-commands|
# 2. The definition of a pattern |search-pattern|
#
# And the "Old" style:
# |21.1| Suspend and resume
# |21.2| Executing shell commands
#
# Only the "new" style is supported.
ns = []
for line in section[1:]:
# TODO
'^[0-9]+\.\s+?'
'.*'
'\s+?\|[\w-]\|$'
ns.append(line)
return parse_text(ns)
def parse_section(text):
# Header can have two forms:
# *23.2* Header
# 1. Search commands *search-commands*
header = text[0].strip()
if header.startswith('*'):
star = header.find('*', 1)
section = {
'tag': header[1:star],
'name': header[star+1:].strip(),
}
else:
star = header.rfind('*', 0, len(header)-1)
section = {
'tag': header[star+1:len(header)-1],
'name': header[:star].strip(),
}
# Remove leading section number, if any
section['name'] = re.sub('^[0-9]+\.\s+', '', section['name'])
section['text'] = parse_text(text[1:])
return section
def parse_inline(line):
md = line
# |n| -> [`n`](http://vimhelp.appspot.com/pattern.txt.html#n)
# |foo| -> `foo`
# not very fast...
def repl(m):
out = subprocess.run(['vim', '-u', 'NONE', '+:help ' + m.groups()[0], '+:q', '+:q'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout = out.stdout.decode()
# "pattern.txt" [readonly] 1395 lines, 58382 characters
# E149: Sorry, no help for foooooobarrrrr
if not ' [readonly] ' in stdout:
return '`{}`'.format(m)
q = stdout.find('"') + 1
url = 'http://vimhelp.appspot.com/{}#{}'.format(
out[q+1:stdout.find('"', q+1)], urllib.parse.quote(m))
return '[`{}`]({})'.format(m, url)
md = re.sub(r'\|(.*?)\|', r'`\1`', md)
#md = re.sub(r'\|(.*?)\|', repl, md)
# *foo* -> `foo`
# {foo} -> `foo`
# <foo> -> `foo`
md = re.sub(r'\*(.*?)\*', r'`\1`', md)
md = re.sub(r'{(.*?)}', r'`\1`', md)
md = re.sub(r'<(.*?)>', r'`\1`', md)
md = re.sub(r'\*(.*?)\*', r'`\1`', md)
return md
def parse_text(text):
# Remove modeline which is typically set on the last section
# vim:tw=78:ts=8:ft=help:norl:expandtab
for line in text[-3:]:
if line.startswith('vim:'):
text.remove(line)
break
md = '\n'.join([ parse_inline(l) for l in text ])
# Replace much of the indentation that we don't need
md = re.sub(r'\n +', r'\n', md)
# Code blocks, also add indentation again
def repl(m):
g = '\n'.join([ ' ' + l for l in m.groups()[0].split('\n') ])
return '\n\n' + g + '\n\n'
md = re.sub(r'\n+>\n(.*)\n<\n+', repl, md, 0, re.DOTALL)
return md.strip()
def parse_document(vim):
header_marker = '=' * 78
vim = [ s.strip().split('\n') for s in vim.split(header_marker) ]
fl = parse_first_line(vim[0][0])
doc = {
'name': fl['name'],
'blurb': fl['blurb'],
'intro': parse_intro(vim[0]),
'sections': [],
}
for section in vim[1:]:
doc['sections'].append(parse_section(section))
return doc
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert Vim help files to Markdown.')
parser.add_argument('-S', '--show-sections', action='store_true',
help='Print out the section names')
parser.add_argument('-s', '--sections', default='ALL',
help='List of sections to include; comma-separated.'
'Use ALL for all sections; INTRO for the intro text.')
parser.add_argument('doc', nargs=1,
help='Vim help document')
args = parser.parse_args()
with open(args.doc[0], 'r') as fp:
doc = parse_document(fp.read())
if args.show_sections:
for s in doc['sections']:
print('tag: {} – text: {}'.format(s['tag'], s['name']))
else:
if args.sections == 'ALL' or 'INTRO' in args.sections:
print(doc['intro'])
print('\n')
sections = [ s.strip() for s in args.sections.split(',') ]
for s in doc['sections']:
if not (args.sections == 'ALL' or s['name'] in sections or s['tag'] in sections):
continue
print(s['name'])
print('=' * len(s['name']))
print(s['text'])
print('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment