Skip to content

Instantly share code, notes, and snippets.

@nvanderw
Created April 25, 2013 21:52
Show Gist options
  • Save nvanderw/5463517 to your computer and use it in GitHub Desktop.
Save nvanderw/5463517 to your computer and use it in GitHub Desktop.
Zalgo
from __future__ import division
import sys
from random import SystemRandom
# A list of unicode combining characters
COMBINING = [unichr(c) for c in xrange(0x300, 0x370)]
ZalgoError = KeyError
def zalgoify(source, rand):
"""
Given a string and a random number generator, give a zalgo string
"""
for char in source:
yield char # Emit the character
# Use geometric R.V. to determine number of additional chars
expected_chars = 5
max_chars = len(COMBINING)
numchars = 0
while rand.random() > 1/expected_chars and numchars < max_chars:
numchars += 1
for ch in rand.sample(COMBINING, numchars):
yield ch
def gen_markov(source, rand):
"""
Given an input source of tokens, generate an infinite sequence of words
using a Markov model
"""
def get_transition_map(source):
"""
Given a source, which is an iterable sequence of words, generate a
dictionary of transitions.
The transitions dictionary maps a word to another dictionary of
the possible words we could emit next.
This second-level dictionary maps "next words" to the number of
times this transition has occurred in the text.
"""
transitions = {}
last_token = None
for token in source:
if last_token is not None:
if not last_token in transitions:
transitions[last_token] = {}
if not token in transitions[last_token]:
transitions[last_token][token] = 0
transitions[last_token][token] += 1
last_token = token
return transitions
def get_frequency_map(transitions):
"""
Given a map of absolute transition occurrences like the
output of get_transition_map, scales all of the transitions
from each word to sum to 1 so that each word has a
probability mass function of possible transitions.
"""
freqs = {}
for (token, trans) in transitions.iteritems():
freqs[token] = {}
scaling_factor = 0
for (next_token, count) in transitions[token].iteritems():
scaling_factor += count
for (next_token, count) in transitions[token].iteritems():
freqs[token][next_token] = count / scaling_factor
return freqs
def select_from_pmf(pmf):
"""
Given a probability mass function, which is a dictionary mapping
items to their probabilities, randomly choose one
"""
s = 0
selector = rand.random()
for (item, prob) in pmf.iteritems():
s += prob
if s > selector:
return item
freqs = get_frequency_map(get_transition_map(source))
# Choose a first word randomly and begin transitioning
word = rand.choice(freqs.keys())
while True:
yield word
try:
word = select_from_pmf(freqs[word])
except ZalgoError:
word = rand.choice(freqs.keys())
TEXT = """oh god the horror it comes it lurks in the shadows oh god oh god
oh no why why oh why no no the fear not the oh it can't be why me parsing
XML with regex it is torture the madness it cannot be fear as it is"""
def main():
def words_to_characters(source):
for word in source:
for character in word:
yield character
yield " "
rand = SystemRandom()
chars = words_to_characters(gen_markov(TEXT.split(), rand))
zalgoed = zalgoify(chars, rand)
for char in zalgoed:
sys.stdout.write(char)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment