Skip to content

Instantly share code, notes, and snippets.

@emre
Created September 25, 2018 13:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save emre/86f3003df0d77f8a080ad3bfd8e26e62 to your computer and use it in GitHub Desktop.
Save emre/86f3003df0d77f8a080ad3bfd8e26e62 to your computer and use it in GitHub Desktop.
random sentence generator
import re
from bs4 import BeautifulSoup
from markdown import markdown
from lightsteem.client import Client
import markovify
def markdown_to_text(markdown_string):
""" Converts a markdown string to plaintext """
# md -> html -> text since BeautifulSoup can extract text cleanly
html = markdown(markdown_string)
# remove code snippets
html = re.sub(r'<pre>(.*?)</pre>', ' ', html)
html = re.sub(r'<code>(.*?)</code >', ' ', html)
# extract text
soup = BeautifulSoup(html, "html.parser")
text = ''.join(soup.findAll(text=True))
return text
def clear_noise(text):
TAG_RE = re.compile(r'<[^>]+>')
text = TAG_RE.sub('', text)
text = markdown_to_text(text)
return text
def random_post_generator(username):
c = Client()
print("Fetching main posts...")
main_posts = c.get_discussions_by_blog({"limit": 100, "tag": username})
main_comments = c.get_discussions_by_comments({"limit": 100, "start_author": username})
all_text_data = ""
for post in main_posts + main_comments:
if post["author"] != username:
continue
all_text_data += clear_noise(post["body"])
# build the model
text_model = markovify.Text(all_text_data)
for i in range(0, 10):
print(text_model.make_short_sentence(140))
if __name__ == '__main__':
random_post_generator('ned')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment