Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@Ceasar
Created March 29, 2021 02:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ceasar/61138d16f3daeb35ce2c6cb8af7cdd36 to your computer and use it in GitHub Desktop.
Save Ceasar/61138d16f3daeb35ce2c6cb8af7cdd36 to your computer and use it in GitHub Desktop.
Markov chain generator
import collections
import random
def gen_ngrams(letters, n=2):
ngram = collections.deque([None] * n, n)
for letter in letters:
ngram.append(letter)
yield tuple(ngram)
ngram.append(None)
yield tuple(ngram)
def get_ngram_weights(words, n=2):
ngram_weights = collections.defaultdict(lambda: collections.Counter())
for word in words:
ngrams = gen_ngrams(word, n=n)
for ngram in ngrams:
key = ngram[:-1]
key2 = ngram[-1]
ngram_weights[key][key2] += 1
return ngram_weights
def choose_weighted_key(dict_items):
keys, weights = zip(*dict_items)
choices = random.choices(keys, weights=weights)
[key] = choices
return key
def make_name(lines, n=2):
bigrams = dict(get_ngram_weights(lines, n=n))
word = []
key = collections.deque([None] * (n - 1), n - 1)
items = list(bigrams[(None,) * (n - 1)].items())
letter = choose_weighted_key(items)
while letter is not None:
word.append(letter)
key.append(letter)
items = bigrams[tuple(key)].items()
if not items:
return ''.join(word)
letter = choose_weighted_key(items)
return ''.join(word)
def gen_names(filename, n=2):
with open(filename) as fp:
lines = [line.strip() for line in fp.readlines()]
while True:
yield make_name(lines, n=n)
def main(filename, n=2):
for name in gen_names(filename, n):
print(name, flush=True)
if __name__ == '__main__':
import sys
main(sys.argv[1], int(sys.argv[2]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment