Skip to content

Instantly share code, notes, and snippets.

@pqcfox
Created January 19, 2015 06:50
Show Gist options
  • Save pqcfox/19ff57ece74caeb0685d to your computer and use it in GitHub Desktop.
Save pqcfox/19ff57ece74caeb0685d to your computer and use it in GitHub Desktop.
A simple Markov chain to generate nicknames or fake English words.
import collections
import random
import re
import string
alphabet = string.ascii_lowercase
# Make a counter for each lowercase letter, and add a count to it
# whenever a letter follows it in /usr/share/dict/words
counters = {letter: collections.Counter() for letter in alphabet}
raw_words = [word for word in open('/usr/share/dict/words', 'r').read().splitlines()]
# Clean up the words by making them lowercase and removing any
# non-alphabetical characters
words = [re.sub(r'[^a-z]', r'', word.lower()) for word in raw_words]
# Get a count of following letters for each word and populate counters
for word in words:
for index in range(len(word) - 1):
counters[word[index]][word[index+1]] += 1
# Make a list of strings starting with random letters
names = [ random.choice(alphabet) for _ in range(20) ]
# Iterate through each name and, using the predetermined probabilities
# of one letter following another, choose the next letters randomly
for name in names:
while len(name) < 6:
choices = counters[name[-1]].elements()
name += random.choice(list(choices))
print name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment