Skip to content

Instantly share code, notes, and snippets.

@benoit-pierre
Created September 12, 2016 19:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benoit-pierre/3d415adf9b4299de9e59ef3f9046e24d to your computer and use it in GitHub Desktop.
Save benoit-pierre/3d415adf9b4299de9e59ef3f9046e24d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from collections import namedtuple
import sys
import re
from plover.dictionary.base import create_dictionary
RTF_TOKEN = re.compile(r'\\((?P<cchar>[-_~\\{}*])|(?P<cword>[A-Za-z]+)(?P<cparam>-?[0-9]+)? ?)|(?P<group>[{}])|(?P<text>[^\n\r\\{}]+)|(?P<nl>[\n\r]+)')
Token = namedtuple('Token', 'kind value')
class ControlWord(namedtuple('ControlWord', 'kind name param')):
def __eq__(self, other):
if isinstance(other, ControlWord):
other = other.name
return self.name == other
class Group(object):
def __init__(self, destination=None, ignorable=False):
self.destination = destination
self.ignorable = ignorable
self.text = ''
text = open(sys.argv[1], 'rb').read().decode('cp1252')
dictionary = create_dictionary(sys.argv[2])
pos = 0
token_list = []
while pos < len(text):
m = RTF_TOKEN.match(text, pos)
assert m is not None
pos = m.end()
for group_names in (
('cchar',),
('cword', 'cparam'),
('group',),
('nl',),
('text',)
):
token_value = m.group(*group_names)
if token_value not in (None, (None, None)):
token_kind = group_names[0]
break
else:
raise ValueError()
if token_kind == 'nl':
continue
if token_kind == 'cword':
name, param = token_value
if param is not None:
param = int(param)
token = ControlWord('cword', name, param)
else:
token = Token(token_kind, token_value)
token_list.append(token)
assert token_list[0] == Token('group', '{')
assert token_list[1] == ControlWord('cword', 'rtf', 1)
assert token_list[-1] == Token('group', '}')
token_list = token_list[2:-1]
group = Group(None)
group_stack = [group]
steno = None
n = 0
while n < len(token_list):
token = token_list[n]
if token.kind == 'cchar':
char = token.value
if char == '*':
pass
elif char == '~':
group.text += '{^ ^}'
elif char == '_':
group.text += '{^-^}'
else:
group.text += char
elif token.kind == 'cword':
if token.name == 'par':
group.text += '{#Return}{#Return}'
elif token.name == 'cxds':
group.text += '{^}'
elif token.name == 'cxfc':
group.text += '{-|}'
elif token.name == 'cxfing':
next_token = token_list[n + 1]
assert next_token.kind == 'text'
group.text += '{&' + next_token.value + '}'
n += 1
elif token.kind == 'group':
if token.value == '{':
ignorable = False
destination = None
next_token = token_list[n + 1]
if next_token.kind == 'cword':
n += 1
destination = next_token
elif next_token == Token('cchar', '*'):
ignorable = True
next_token = token_list[n + 2]
if next_token.kind == 'cword':
n += 2
destination = next_token
if destination == 'cxs':
assert len(group_stack) == 1
if steno is not None:
dictionary[steno] = group.text
group.text = ''
group = Group(destination, ignorable)
group_stack.append(group)
elif token.value == '}':
text = ''
if group.destination == 'cxs':
steno = tuple(group.text.split('/'))
elif group.destination == 'cxp':
stripped = group.text.strip()
if stripped in ['.', '!', '?', ',', ';', ':']:
text = '{' + stripped + '}'
elif stripped == "'":
text = "{^'}"
elif stripped in ['-', '/']:
text = '{^' + contents + '^}'
elif stripped:
# Show unknown punctuation as given.
text = '{^' + contents + '^}'
elif group.destination == 'cxfing':
text = '{&' + group.text + '}'
elif not group.ignorable:
text = group.text
group_stack.pop()
group = group_stack[-1]
group.text += text
else:
raise ValueError()
elif token.kind == 'text':
group.text += token.value
else:
raise ValueError()
n += 1
assert len(group_stack) == 1
if steno is not None:
dictionary[steno] = group.text
dictionary.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment