Skip to content

Instantly share code, notes, and snippets.

@impshum
Last active February 1, 2018 06:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save impshum/7ed755b84f980490b5ed2df15f363d77 to your computer and use it in GitHub Desktop.
Save impshum/7ed755b84f980490b5ed2df15f363d77 to your computer and use it in GitHub Desktop.
Crap markov chain thing
# pip3 install tweepy markovify text_cleaner
# Create the 5 text files mentioned below
# Run - python3 run.py
import markovify
import tweepy
from text_cleaner import keep
from text_cleaner.processor.common import ASCII
from text_cleaner.processor.misc import URL, ESCAPED_WHITESPACE
import re
test_mode = 0
post_mode = 0
consumer_key = 'XXXX'
consumer_secret = 'XXXX'
access_key = 'XXXX-XXXX'
access_secret = 'XXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
in1 = 'officialjaden.txt'
in2 = 'realDonaldTrump.txt'
out1 = 'officialjaden_out.txt'
out2 = 'realDonaldTrump_out.txt'
out3 = 'markov.txt'
def get_tweets(twatter):
print('Getting tweets')
alltweets = []
new_tweets = api.user_timeline(screen_name=twatter, count=200)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
while len(new_tweets) > 0:
new_tweets = api.user_timeline(
screen_name=twatter, count=200, max_id=oldest)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
with open(twatter + '.txt', 'w') as f:
for tweet in alltweets:
f.write(tweet.text + '\n')
def sieve(filein, fileout):
print('Processing tweets')
def replace_all(text, dic):
for i, j in dic.items():
text = text.replace(i, j)
return text
with open(filein, 'r') as x, open(fileout, 'a') as y:
for line in x:
k = keep(
line,
[ASCII],
)
k = URL.remove(k)
expression = '(\#[a-zA-Z0-9]+)|(\@[A-Za-z0-9]+)|\$(\w+)|([#@$"|])'
k = ' '.join(re.sub(expression, " ", k).split())
reps = {'RT : ': '', 'RT ': '', 'RT _: ': '',
'RT : . : ': '', 'RT : - ': '', ': : ': '', ': ': '', ':': '', '_': '', 'Soo': '', '!!!': '!',
'http': '', 'https': '', 'http:': '', 'http://': '', 'https': '', 'https:': '', 'https://': ''}
txt = replace_all(k, reps)
y.write(txt + ' ')
def merge(fileout1, fileout2, fileout3):
with open(fileout1) as xh:
with open(fileout2) as yh:
with open(fileout3, 'w') as zh:
xlines = xh.readlines()
ylines = yh.readlines()
for line1, line2 in zip(ylines, xlines):
zh.write("{} {}\n".format(line1.rstrip(), line2.rstrip()))
def silly():
with open(out3, 'r') as f:
text = f.read()
text_model = markovify.Text(text, state_size=2)
twonk = text_model.make_short_sentence(200)
if post_mode:
api.update_status(twonk)
print(twonk)
def clean():
with open(in1, 'w') as a, open(in2, 'w') as b, open(out1, 'w') as c, open(out2, 'w') as d, open(out3, 'w') as e:
t = [a,b,c,d,e]
for r in t:
r.write('')
if __name__ == '__main__':
if not test_mode:
clean()
get_tweets("officialjaden")
get_tweets("realDonaldTrump")
sieve(in1, out1)
sieve(in2, out2)
merge(out1, out2, out3)
silly()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment