impshum/markovify.py

## markovify.py
# pip3 install tweepy markovify text_cleaner
# Create the 5 text files mentioned below
# Run - python3 run.py

import markovify
import tweepy
from text_cleaner import keep
from text_cleaner.processor.common import ASCII
from text_cleaner.processor.misc import URL, ESCAPED_WHITESPACE
import re

test_mode = 0
post_mode = 0

consumer_key = 'XXXX'
consumer_secret = 'XXXX'
access_key = 'XXXX-XXXX'
access_secret = 'XXXX'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)

in1 = 'officialjaden.txt'
in2 = 'realDonaldTrump.txt'
out1 = 'officialjaden_out.txt'
out2 = 'realDonaldTrump_out.txt'
out3 = 'markov.txt'


def get_tweets(twatter):
    print('Getting tweets')
    alltweets = []
    new_tweets = api.user_timeline(screen_name=twatter, count=200)
    alltweets.extend(new_tweets)
    oldest = alltweets[-1].id - 1

    while len(new_tweets) > 0:
        new_tweets = api.user_timeline(
            screen_name=twatter, count=200, max_id=oldest)

        alltweets.extend(new_tweets)
        oldest = alltweets[-1].id - 1

    with open(twatter + '.txt', 'w') as f:
        for tweet in alltweets:
            f.write(tweet.text + '\n')


def sieve(filein, fileout):
    print('Processing tweets')

    def replace_all(text, dic):
        for i, j in dic.items():
            text = text.replace(i, j)
        return text

    with open(filein, 'r') as x, open(fileout, 'a') as y:
        for line in x:
            k = keep(
                line,
                [ASCII],
            )
            k = URL.remove(k)
            expression = '(\#[a-zA-Z0-9]+)|(\@[A-Za-z0-9]+)|\$(\w+)|([#@$"|])'
            k = ' '.join(re.sub(expression, " ", k).split())
            reps = {'RT : ': '', 'RT ': '', 'RT _: ': '',
                    'RT : . : ': '', 'RT : - ': '', ': : ': '', ': ': '', ':': '', '_': '', 'Soo': '', '!!!': '!',
                    'http': '', 'https': '', 'http:': '', 'http://': '', 'https': '', 'https:': '', 'https://': ''}
            txt = replace_all(k, reps)
            y.write(txt + ' ')


def merge(fileout1, fileout2, fileout3):
    with open(fileout1) as xh:
        with open(fileout2) as yh:
            with open(fileout3, 'w') as zh:
                xlines = xh.readlines()
                ylines = yh.readlines()
                for line1, line2 in zip(ylines, xlines):
                    zh.write("{} {}\n".format(line1.rstrip(), line2.rstrip()))


def silly():
    with open(out3, 'r') as f:
        text = f.read()

    text_model = markovify.Text(text, state_size=2)
    twonk = text_model.make_short_sentence(200)
    if post_mode:
        api.update_status(twonk)
    print(twonk)


def clean():
    with open(in1, 'w') as a, open(in2, 'w') as b, open(out1, 'w') as c, open(out2, 'w') as d, open(out3, 'w') as e:
        t = [a,b,c,d,e]
        for r in t:
            r.write('')


if __name__ == '__main__':
    if not test_mode:
        clean()
        get_tweets("officialjaden")
        get_tweets("realDonaldTrump")
        sieve(in1, out1)
        sieve(in2, out2)
        merge(out1, out2, out3)
    silly()
	# pip3 install tweepy markovify text_cleaner
	# Create the 5 text files mentioned below
	# Run - python3 run.py

	import markovify
	import tweepy
	from text_cleaner import keep
	from text_cleaner.processor.common import ASCII
	from text_cleaner.processor.misc import URL, ESCAPED_WHITESPACE
	import re

	test_mode = 0
	post_mode = 0

	consumer_key = 'XXXX'
	consumer_secret = 'XXXX'
	access_key = 'XXXX-XXXX'
	access_secret = 'XXXX'

	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_key, access_secret)
	api = tweepy.API(auth)

	in1 = 'officialjaden.txt'
	in2 = 'realDonaldTrump.txt'
	out1 = 'officialjaden_out.txt'
	out2 = 'realDonaldTrump_out.txt'
	out3 = 'markov.txt'


	def get_tweets(twatter):
	print('Getting tweets')
	alltweets = []
	new_tweets = api.user_timeline(screen_name=twatter, count=200)
	alltweets.extend(new_tweets)
	oldest = alltweets[-1].id - 1

	while len(new_tweets) > 0:
	new_tweets = api.user_timeline(
	screen_name=twatter, count=200, max_id=oldest)

	alltweets.extend(new_tweets)
	oldest = alltweets[-1].id - 1

	with open(twatter + '.txt', 'w') as f:
	for tweet in alltweets:
	f.write(tweet.text + '\n')


	def sieve(filein, fileout):
	print('Processing tweets')

	def replace_all(text, dic):
	for i, j in dic.items():
	text = text.replace(i, j)
	return text

	with open(filein, 'r') as x, open(fileout, 'a') as y:
	for line in x:
	k = keep(
	line,
	[ASCII],
	)
	k = URL.remove(k)
	expression = '(\#[a-zA-Z0-9]+)\|(\@[A-Za-z0-9]+)\|\$(\w+)\|([#@$"\|])'
	k = ' '.join(re.sub(expression, " ", k).split())
	reps = {'RT : ': '', 'RT ': '', 'RT _: ': '',
	'RT : . : ': '', 'RT : - ': '', ': : ': '', ': ': '', ':': '', '_': '', 'Soo': '', '!!!': '!',
	'http': '', 'https': '', 'http:': '', 'http://': '', 'https': '', 'https:': '', 'https://': ''}
	txt = replace_all(k, reps)
	y.write(txt + ' ')


	def merge(fileout1, fileout2, fileout3):
	with open(fileout1) as xh:
	with open(fileout2) as yh:
	with open(fileout3, 'w') as zh:
	xlines = xh.readlines()
	ylines = yh.readlines()
	for line1, line2 in zip(ylines, xlines):
	zh.write("{} {}\n".format(line1.rstrip(), line2.rstrip()))


	def silly():
	with open(out3, 'r') as f:
	text = f.read()

	text_model = markovify.Text(text, state_size=2)
	twonk = text_model.make_short_sentence(200)
	if post_mode:
	api.update_status(twonk)
	print(twonk)


	def clean():
	with open(in1, 'w') as a, open(in2, 'w') as b, open(out1, 'w') as c, open(out2, 'w') as d, open(out3, 'w') as e:
	t = [a,b,c,d,e]
	for r in t:
	r.write('')


	if __name__ == '__main__':
	if not test_mode:
	clean()
	get_tweets("officialjaden")
	get_tweets("realDonaldTrump")
	sieve(in1, out1)
	sieve(in2, out2)
	merge(out1, out2, out3)
	silly()