Skip to content

Instantly share code, notes, and snippets.

@nachowski
Forked from grantslatton/hngen.py
Last active December 30, 2015 01:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nachowski/7754528 to your computer and use it in GitHub Desktop.
Save nachowski/7754528 to your computer and use it in GitHub Desktop.
import urllib2
import re
import sys
from collections import defaultdict
from random import random
import json
from collections import namedtuple
"""
Introducing: Face-Smash!
Create a file called statuses.json with the following data:
https://developers.facebook.com/tools/explorer/145634995501895/?method=GET&path=me%2Fstatuses%3Ffields%3Dmessage%26limit%3D400
You can alternatively call the Graph API directly using
https://graph.facebook.com/me/statuses?fields=message&limit=400
Based on this wonderful piece of code:
https://gist.github.com/grantslatton/7694811
"""
archive = open("statuses.json")
data = json.loads(archive.read())[u'data']
titles = []
for e in data:
status = e[u'message']
titles.append(status)
archive.close()
markov_map = defaultdict(lambda:defaultdict(int))
lookback = 2
#Generate map in the form word1 -> word2 -> occurences of word2 after word1
for title in titles[:-1]:
title = title.split()
if len(title) > lookback:
for i in xrange(len(title)+1):
markov_map[' '.join(title[max(0,i-lookback):i])][' '.join(title[i:i+1])] += 1
#Convert map to the word1 -> word2 -> probability of word2 after word1
for word, following in markov_map.items():
total = float(sum(following.values()))
for key in following:
following[key] /= total
#Typical sampling from a categorical distribution
def sample(items):
next_word = None
t = 0.0
for k, v in items:
t += v
if t and random() < v/t:
next_word = k
return next_word
sentences = []
while len(sentences) < 100:
sentence = []
next_word = sample(markov_map[''].items())
while next_word != '':
sentence.append(next_word)
next_word = sample(markov_map[' '.join(sentence[-lookback:])].items())
sentence = ' '.join(sentence)
flag = True
for title in titles: #Prune titles that are substrings of actual titles
if sentence in title:
flag = False
break
if flag:
sentences.append(sentence)
for sentence in sentences:
print (sentence + '\n').encode(sys.stdout.encoding, errors='replace')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment