Created
October 15, 2012 15:48
-
-
Save dtuominen/3893204 to your computer and use it in GitHub Desktop.
markov.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""markov generator to make u feel bad | |
Usage: | |
markov.py [<length>] [--sms=<number>] | |
Options: | |
--help -h show this menu | |
-s NUMBER, --sms=<number> end an sms to a number | |
""" | |
import random | |
import string | |
from collections import defaultdict | |
import sms | |
try: | |
from docopt import docopt | |
from xerox import copy as _copy | |
except ImportError: | |
from lib.docopt import docopt | |
from lib.xerox import copy as _copy | |
def markov(table, triplets, length): | |
state = None | |
if state == None: #Should only run on the very first chain | |
w1, w2 = triplets[0][0] | |
state = (w1, w2) | |
output = [w1, w2] #begin looping | |
for i in range(length - 2): | |
if 'with' in state: #Catch case where there's no avail.suffix | |
nextword = random.choice(table.keys()) | |
w1, w2 = nextword | |
nextword = ' '.join(nextword) | |
output.append(nextword) | |
if table[(w1,w2)]: | |
w3_choices = table[(w1,w2)] | |
current_word = random.choice(w3_choices) | |
output.append(current_word) | |
state = (current_word) | |
w1, w2 = w2, current_word | |
output = ' '.join([''.join(word) for word in output]) | |
return output | |
def make_table(words, triplets, chains): | |
for triplet in triplets: | |
prefix = triplet[0] | |
suffix = triplet[1] | |
chains[prefix].append(suffix) | |
return chains | |
def construct(words): | |
''' | |
Get the set of prefixes from input, | |
Write to the dict, initialized with an empty list as the value | |
''' | |
trips = [] | |
prefs = [] | |
chains = defaultdict() | |
for i in range(len(words) - 1): | |
prefixpair = words[i], words[i+1] | |
try: | |
suffix = words[i+2] | |
except IndexError: | |
suffix = 'with' | |
trips.append(((prefixpair), suffix)) | |
prefs.append(prefixpair) | |
prefs = prefs | |
for k in prefs: | |
chains[k] = [] | |
return chains, trips | |
def get_text(fname='fyad.txt'): | |
with open(fname) as f: | |
words = ' '.join([word.strip('\n') for word in f]) | |
words = words.split() | |
return words | |
def main(length): | |
words = get_text() | |
table, triplets = construct(words) | |
table = make_table(words, triplets, table) | |
output_chain = markov(table, triplets, length) | |
#return table, triplets, output_chain | |
return output_chain | |
if __name__ == '__main__': | |
args = docopt(__doc__) | |
#print args | |
length = 25 | |
if args.get('<length>'): | |
try: | |
length = int(args.get('<length>')) | |
except ValueError: | |
print 'length option must be a value, using default({})'.format(length) | |
output = main(length) | |
#output = ' '.join([''.join(word) for word in output]) | |
if output[-1:] in set(string.punctuation): | |
output[-1:].strip(string.punctuation) | |
output = output.strip() + '.' | |
else: | |
output = output.strip() + '.' | |
print output | |
print 'generated output length: {} chars | {} words'.format(len(output), len(output.split())) | |
if args.get('--copy'): | |
_copy(output) | |
if args['--sms']: | |
if len(args['--sms']) == 10: | |
phone = args['--sms'] | |
if len(output) <= 160: | |
message = output | |
sms.sms(phone, message) | |
print 'sent sms to {}. body: {}'.format(phone, message) | |
else: | |
#only send first 160 chars | |
message = output[:159] | |
sms.sms(phone, message) | |
print 'Message sent to {}. body: {}'.format(phone, message) | |
else: | |
print 'incorrect phone number. 10 digits ONLY.' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment