Skip to content

Instantly share code, notes, and snippets.

@AlphaGit
Created July 29, 2018 18:13
Show Gist options
  • Save AlphaGit/6f42eeccc3ee56ca39f3f73f74fde8fb to your computer and use it in GitHub Desktop.
Save AlphaGit/6f42eeccc3ee56ca39f3f73f74fde8fb to your computer and use it in GitHub Desktop.
Generating shitposts from tumblr/Gutenberg corpora
import markovify
import tumblr_client
import settings
import re
import nltk
class NltkText(markovify.Text):
def word_split(self, sentence):
words = re.split(self.word_split_pattern, sentence)
if words[0] != "":
words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
else:
words = list("",)
return words
def word_join(self, words):
return " ".join(word.split("::")[0] for word in words)
print('loading gutenberg corpora...')
gutenberg_texts = " ".join([ " ".join(nltk.corpus.gutenberg.words(f)) for f in nltk.corpus.gutenberg.fileids() ])
gutenberg_model = NltkText(gutenberg_texts)
with open('text_source.txt', 'r', encoding='utf8') as f:
print('loading tumblr corpora...')
tumblr_text_model = NltkText(f)
text_model = markovify.combine([tumblr_text_model, gutenberg_model ], [ 2, 1 ])
post = text_model.make_sentence()
tumblr_client.create_post('screaming-bot', post, settings.tags_to_post)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment