arp242/vimtomd.py

## vimtomd.py
#!/usr/bin/env python3
#
# Convert Vim help files to Markdown.
#
# No copyright; do what you will with this.

import sys, pprint, re, argparse, subprocess, urllib.parse

def parse_first_line(line):
	# *usr_23.txt*	For Vim version 8.0.  Last change: 2006 Apr 24
	# *xdg_open.txt* Run xdg-open from Vim; re-implements netrw's gx.
	star = line.find('*', 1)
	return {
		'name': line[1:star],
		'blurb': line[star+1:].strip(),
	}

def parse_intro(section):
	# TOC can have two forms:
	#
	# "New" style:
	# 1. Search commands		|search-commands|
	# 2. The definition of a pattern	|search-pattern|
	#
	# And the "Old" style:
	# |21.1|	Suspend and resume
	# |21.2|	Executing shell commands
	#
	# Only the "new" style is supported.

	ns = []
	for line in section[1:]:
		# TODO
		'^[0-9]+\.\s+?'
		'.*'
		'\s+?\|[\w-]\|$'
		ns.append(line)

	return parse_text(ns)

def parse_section(text):
	# Header can have two forms:
	# *23.2*	Header
	# 1. Search commands				*search-commands*
	header = text[0].strip()
	if header.startswith('*'):
		star = header.find('*', 1)
		section = {
			'tag':  header[1:star],
			'name':  header[star+1:].strip(),
		}
	else:
		star = header.rfind('*', 0, len(header)-1)
		section = {
			'tag':  header[star+1:len(header)-1],
			'name':  header[:star].strip(),
		}
	# Remove leading section number, if any
	section['name'] = re.sub('^[0-9]+\.\s+', '', section['name'])

	section['text'] = parse_text(text[1:])
	return section

def parse_inline(line):
	md = line

	# |n| -> [`n`](http://vimhelp.appspot.com/pattern.txt.html#n)
	# |foo| -> `foo`
	# not very fast...
	def repl(m):
		out = subprocess.run(['vim', '-u', 'NONE', '+:help ' + m.groups()[0], '+:q', '+:q'],
			stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		stdout = out.stdout.decode()

		# "pattern.txt" [readonly] 1395 lines, 58382 characters
		# E149: Sorry, no help for foooooobarrrrr
		if not ' [readonly] ' in stdout:
			return '`{}`'.format(m)

		q = stdout.find('"') + 1
		url = 'http://vimhelp.appspot.com/{}#{}'.format(
			out[q+1:stdout.find('"', q+1)], urllib.parse.quote(m))
		return '[`{}`]({})'.format(m, url)

	md = re.sub(r'\|(.*?)\|', r'`\1`', md)
	#md = re.sub(r'\|(.*?)\|', repl, md)

	# *foo* -> `foo`
	# {foo} -> `foo`
	# <foo> -> `foo`
	md = re.sub(r'\*(.*?)\*', r'`\1`', md)
	md = re.sub(r'{(.*?)}', r'`\1`', md)
	md = re.sub(r'<(.*?)>', r'`\1`', md)

	md = re.sub(r'\*(.*?)\*', r'`\1`', md)

	return md

def parse_text(text):
	# Remove modeline which is typically set on the last section
	# vim:tw=78:ts=8:ft=help:norl:expandtab
	for line in text[-3:]:
		if line.startswith('vim:'):
			text.remove(line)
			break

	md = '\n'.join([ parse_inline(l) for l in text ])

	# Replace much of the indentation that we don't need
	md = re.sub(r'\n +', r'\n', md)

	# Code blocks, also add indentation again
	def repl(m):
		g = '\n'.join([ '    ' + l for l in m.groups()[0].split('\n') ])
		return '\n\n' + g + '\n\n'
	md = re.sub(r'\n+>\n(.*)\n<\n+', repl, md, 0, re.DOTALL)

	return md.strip()

def parse_document(vim):
	header_marker = '=' * 78
	vim = [ s.strip().split('\n') for s in vim.split(header_marker) ]

	fl = parse_first_line(vim[0][0])
	doc = {
		'name': fl['name'],
		'blurb': fl['blurb'],
		'intro': parse_intro(vim[0]),
		'sections': [],
	}

	for section in vim[1:]:
		doc['sections'].append(parse_section(section))

	return doc

if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Convert Vim help files to Markdown.')
	parser.add_argument('-S', '--show-sections',  action='store_true',
		help='Print out the section names')
	parser.add_argument('-s', '--sections', default='ALL',
		help='List of sections to include; comma-separated.'
		     'Use ALL for all sections; INTRO for the intro text.')
	parser.add_argument('doc', nargs=1,
		help='Vim help document')
	args = parser.parse_args()

	with open(args.doc[0], 'r') as fp:
		doc = parse_document(fp.read())

	if args.show_sections:
		for s in doc['sections']:
			print('tag: {} – text: {}'.format(s['tag'], s['name']))
	else:
		if args.sections == 'ALL' or 'INTRO' in args.sections:
			print(doc['intro'])
			print('\n')

		sections = [ s.strip() for s in args.sections.split(',') ]
		for s in doc['sections']:
			if not (args.sections == 'ALL' or s['name'] in sections or s['tag'] in sections):
				continue

			print(s['name'])
			print('=' * len(s['name']))
			print(s['text'])
			print('\n')
	#!/usr/bin/env python3
	#
	# Convert Vim help files to Markdown.
	#
	# No copyright; do what you will with this.

	import sys, pprint, re, argparse, subprocess, urllib.parse

	def parse_first_line(line):
	# usr_23.txt For Vim version 8.0. Last change: 2006 Apr 24
	# xdg_open.txt Run xdg-open from Vim; re-implements netrw's gx.
	star = line.find('*', 1)
	return {
	'name': line[1:star],
	'blurb': line[star+1:].strip(),
	}

	def parse_intro(section):
	# TOC can have two forms:
	#
	# "New" style:
	# 1. Search commands \|search-commands\|
	# 2. The definition of a pattern \|search-pattern\|
	#
	# And the "Old" style:
	# \|21.1\| Suspend and resume
	# \|21.2\| Executing shell commands
	#
	# Only the "new" style is supported.

	ns = []
	for line in section[1:]:
	# TODO
	'^[0-9]+\.\s+?'
	'.*'
	'\s+?\\|[\w-]\\|$'
	ns.append(line)

	return parse_text(ns)

	def parse_section(text):
	# Header can have two forms:
	# 23.2 Header
	# 1. Search commands search-commands
	header = text[0].strip()
	if header.startswith('*'):
	star = header.find('*', 1)
	section = {
	'tag': header[1:star],
	'name': header[star+1:].strip(),
	}
	else:
	star = header.rfind('*', 0, len(header)-1)
	section = {
	'tag': header[star+1:len(header)-1],
	'name': header[:star].strip(),
	}
	# Remove leading section number, if any
	section['name'] = re.sub('^[0-9]+\.\s+', '', section['name'])

	section['text'] = parse_text(text[1:])
	return section

	def parse_inline(line):
	md = line

	# \|n\| -> [`n`](http://vimhelp.appspot.com/pattern.txt.html#n)
	# \|foo\| -> `foo`
	# not very fast...
	def repl(m):
	out = subprocess.run(['vim', '-u', 'NONE', '+:help ' + m.groups()[0], '+:q', '+:q'],
	stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout = out.stdout.decode()

	# "pattern.txt" [readonly] 1395 lines, 58382 characters
	# E149: Sorry, no help for foooooobarrrrr
	if not ' [readonly] ' in stdout:
	return '`{}`'.format(m)

	q = stdout.find('"') + 1
	url = 'http://vimhelp.appspot.com/{}#{}'.format(
	out[q+1:stdout.find('"', q+1)], urllib.parse.quote(m))
	return '[`{}`]({})'.format(m, url)

	md = re.sub(r'\\|(.*?)\\|', r'`\1`', md)
	#md = re.sub(r'\\|(.*?)\\|', repl, md)

	# foo -> `foo`
	# {foo} -> `foo`
	# <foo> -> `foo`
	md = re.sub(r'\(.?)\*', r'`\1`', md)
	md = re.sub(r'{(.*?)}', r'`\1`', md)
	md = re.sub(r'<(.*?)>', r'`\1`', md)

	md = re.sub(r'\(.?)\*', r'`\1`', md)

	return md

	def parse_text(text):
	# Remove modeline which is typically set on the last section
	# vim:tw=78:ts=8:ft=help:norl:expandtab
	for line in text[-3:]:
	if line.startswith('vim:'):
	text.remove(line)
	break

	md = '\n'.join([ parse_inline(l) for l in text ])

	# Replace much of the indentation that we don't need
	md = re.sub(r'\n +', r'\n', md)

	# Code blocks, also add indentation again
	def repl(m):
	g = '\n'.join([ ' ' + l for l in m.groups()[0].split('\n') ])
	return '\n\n' + g + '\n\n'
	md = re.sub(r'\n+>\n(.*)\n<\n+', repl, md, 0, re.DOTALL)

	return md.strip()

	def parse_document(vim):
	header_marker = '=' * 78
	vim = [ s.strip().split('\n') for s in vim.split(header_marker) ]

	fl = parse_first_line(vim[0][0])
	doc = {
	'name': fl['name'],
	'blurb': fl['blurb'],
	'intro': parse_intro(vim[0]),
	'sections': [],
	}

	for section in vim[1:]:
	doc['sections'].append(parse_section(section))

	return doc

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Convert Vim help files to Markdown.')
	parser.add_argument('-S', '--show-sections', action='store_true',
	help='Print out the section names')
	parser.add_argument('-s', '--sections', default='ALL',
	help='List of sections to include; comma-separated.'
	'Use ALL for all sections; INTRO for the intro text.')
	parser.add_argument('doc', nargs=1,
	help='Vim help document')
	args = parser.parse_args()

	with open(args.doc[0], 'r') as fp:
	doc = parse_document(fp.read())

	if args.show_sections:
	for s in doc['sections']:
	print('tag: {} – text: {}'.format(s['tag'], s['name']))
	else:
	if args.sections == 'ALL' or 'INTRO' in args.sections:
	print(doc['intro'])
	print('\n')

	sections = [ s.strip() for s in args.sections.split(',') ]
	for s in doc['sections']:
	if not (args.sections == 'ALL' or s['name'] in sections or s['tag'] in sections):
	continue

	print(s['name'])
	print('=' * len(s['name']))
	print(s['text'])
	print('\n')