Skip to content

Instantly share code, notes, and snippets.

@thurask thurask/clickbait.py
Last active Apr 14, 2016

Embed
What would you like to do?
import requests
import time
from itertools import product
from string import ascii_lowercase
from bs4 import BeautifulSoup
def RateLimited(maxPerSecond): # http://stackoverflow.com/questions/667508/whats-a-good-rate-limiting-algorithm/667706#667706
minInterval = 1.0 / float(maxPerSecond)
def decorate(func):
lastTimeCalled = [0.0]
def rateLimitedFunction(*args,**kargs):
elapsed = time.time() - lastTimeCalled[0]
leftToWait = minInterval - elapsed
if leftToWait>0:
time.sleep(leftToWait)
ret = func(*args,**kargs)
lastTimeCalled[0] = time.time()
return ret
return rateLimitedFunction
return decorate
def capper(somestr):
return somestr[0:2].upper() + somestr[2]
@RateLimited(5)
def souper(combo):
req = requests.get("http://bit.ly/1U{0}85+".format(combo))
soup = BeautifulSoup(req.content, "html.parser")
articles = soup.find_all("a", class_="article-title")
if articles:
try:
url = articles[0]["href"]
except IndexError:
print("http://bit.ly/1U{0}85+".format(combo))
raise SystemExit
print(url)
return url
else:
pass
if __name__ == "__main__":
keywords = [capper("".join(i)) for i in product(ascii_lowercase, repeat = 3)]
urls = [souper(combo) for combo in keywords]
with open("this_is_stupid.txt", "w") as afile:
afile.write("\n".join(urls))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.