Skip to content

Instantly share code, notes, and snippets.

@skurmedel
Created January 23, 2015 12:03
Show Gist options
  • Save skurmedel/1bf7e224dbb7c466ada7 to your computer and use it in GitHub Desktop.
Save skurmedel/1bf7e224dbb7c466ada7 to your computer and use it in GitHub Desktop.
Quotes timecube.com
from urllib.request import *
from html.parser import HTMLParser
import sys
import os
import random
class _TimecubeParser(HTMLParser):
def __init__(self):
super().__init__(self)
self.in_span = False
self.pieces = []
def handle_starttag(self, tag, attrs):
if tag.lower() == "span":
self.in_span = True
def handle_endtag(self, tag):
if tag.lower() == "span":
self.in_span = False
def handle_data(self, data):
if not self.in_span:
return
if data == "\r\n\r\n" or data == "\r\n" or data == " ":
# The markup is pretty strange so lets ignore
# some weird occurances.
pass
else:
self.pieces.append(data.replace("\r\n", " ").strip())
def get_page_bytes():
data = b''
with urlopen("http://www.timecube.com/") as r:
#print(r.status, r.reason)
data = r.read()
return data
def get_random_text(n = 3):
b = get_page_bytes()
# Assume utf-8
text = b.decode("utf-8", "ignore")
parser = _TimecubeParser()
parser.feed(text)
text = set(map(lambda p: p.strip() + ".", " ".join(parser.pieces).split(".")))
return random.sample(text, n)
if __name__ == '__main__':
print("\n\n".join(get_random_text()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment