Skip to content

Instantly share code, notes, and snippets.

@necrobuffalo
Created April 1, 2017 02:21
Show Gist options
  • Save necrobuffalo/8db52abbf74c0dcb72e656122dbf39ed to your computer and use it in GitHub Desktop.
Save necrobuffalo/8db52abbf74c0dcb72e656122dbf39ed to your computer and use it in GitHub Desktop.
wordgen
import random
import collections
import re
Token = collections.namedtuple('Token', ['type', 'value'])
consonants = "bcdfghjklmnpqrstvwxzy"
vowels = "aeiou"
wordblock = "(C)(C)(C)V(C)(C)(C)(C)(C)"
def tokenize(pattern):
token_specification = [
('CONSONANT', r'C'),
('VOWEL', r'V'),
('OPEN_PARENS', r'\('),
('CLOSE_PARENS', r'\)'),
('LITERAL', r'.')
]
token_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
for item in re.finditer(token_regex, pattern):
kind = item.lastgroup
value = item.group(kind)
yield Token(kind, value)
def parse(pattern):
output = ''
parens = False
discard = False
for token in tokenize(pattern):
if (discard == True) and (token.type != 'CLOSE_PARENS'):
continue
if token.type == 'CONSONANT':
output += random.choice(consonants)
elif token.type == 'VOWEL':
output += random.choice(vowels)
elif token.type == 'OPEN_PARENS':
if parens:
raise ValueError('You have mismatched parentheses in your phonotactic constraints.') # throw exception
parens = True
if random.getrandbits(1) == 1:
discard = True
elif token.type == 'CLOSE_PARENS':
if not parens:
raise ValueError('You have mismatched parentheses in your phonotactic constraints.') # throw exception
parens = False
discard = False
elif token.type == 'LITERAL':
output += token.value
return output
for i in range(0, 10):
print(parse(wordblock))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment