Skip to content

Instantly share code, notes, and snippets.

@minecrafter
Created July 1, 2012 03:51
Show Gist options
  • Save minecrafter/3026737 to your computer and use it in GitHub Desktop.
Save minecrafter/3026737 to your computer and use it in GitHub Desktop.
#!/usr/bin/python2.7
# SuperCoolPosideonShit.py!
# Now with more shit.
# Written by a really cool Debian GNU/Linux user called Tux on freenode ;)
# - http://the.leet.la
import feedparser, sys, urllib, urllib2, re, time
if len(sys.argv) <= 1:
print "I need a URL"
sys.exit(1)
# hardcoded atm
# Parse the feed.
# We only need to parse links.
d = feedparser.parse(sys.argv[1])
links = []
for i in d.entries:
# we only care about edits here
if "New user account" in i.summary:
pass
else:
links.append(i.link)
# Now, mass-fetch the pages.
# We are going to impose a 0.2 second time.sleep() per fetch.
trending = []
tcount = []
for z in links:
time.sleep(0.2)
tmp = urllib2.urlopen(z).read()
for match in re.finditer(r"(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&amp;%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&amp;%\$#\=~_\-]+\:))*", tmp):
tmp2 = match.group()
if tmp2 in trending:
s = trending.index(tmp2)
tcount[s] = tcount[s] + 1
else:
trending.append(tmp2)
tcount.append(1)
final = []
for index, url in enumerate(trending):
if tcount[index] >= 5:
pass
else:
final.append(url.split("/")[2])
for i in final:
print i
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment