Created
November 19, 2010 00:48
-
-
Save admalledd/705967 to your computer and use it in GitHub Desktop.
a short program i wrote to get the top five songs in a given section of newgrounds.com using urllib2 and BeautifulSoup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[main] | |
## Easy Listening | |
Classical=3 | |
Jazz=18 | |
## Electronic | |
Ambient=5 | |
Dance=6 | |
Drum N Bass=7 | |
Dubstep=41 | |
House=9 | |
Industrial=8 | |
New Wave=20 | |
Techno=10 | |
Trance=11 | |
Video Game=12 | |
## Hip Hop, Rap, R&B | |
HipHop-Modern=17 | |
HipHop-Olskool=16 | |
R&B=21 | |
## Metal, Rock | |
Brit-Pop=22 | |
Classical-Rock=23 | |
General-Rock=24 | |
Grunge=25 | |
Heavy-Metal=15 | |
Indie=26 | |
Pop=27 | |
Punk=28 | |
## Miscellaneous | |
Funk=13 | |
Goth=14 | |
Latin=19 | |
Miscellaneous=39 | |
Ska=29 | |
Voice Acting=40 | |
## Southern Flavor | |
Bluegrass=1 | |
Blues=2 | |
Country=4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from urllib2 import urlopen, URLError, HTTPError | |
import ConfigParser | |
import BeautifulSoup as bs | |
config = ConfigParser.ConfigParser() | |
config.read('indexes.cfg') | |
def dlfile(url): | |
# Open the url | |
try: | |
f = urlopen(url) | |
mime = f.info() | |
filename = mime.getheader('Content-Disposition').split(';')[1] | |
filename = filename[filename.index('"')+1:filename.index('"',-1)] | |
print "downloading %s to %s"%(filename,os.path.join(os.getcwd(),'tmp',filename)) | |
#check for tmp dir, make if needed | |
if not os.path.isdir(os.path.join(os.getcwd(),'tmp')): | |
os.mkdir(os.path.join(os.getcwd(),'tmp')) | |
# Open our local file for writing | |
with open(os.path.join(os.getcwd(),'tmp',filename), "wb") as local_file: | |
local_file.write(f.read()) | |
#handle errors | |
except HTTPError, e: | |
print "HTTP Error:", e.code, url | |
except URLError, e: | |
print "URL Error:", e.reason, url | |
def getindex(section='trance'): | |
try: | |
int(section) | |
index=section | |
except ValueError: | |
index = config.get('main',section) | |
indexurl='http://www.newgrounds.com/audio/list/%s'%index | |
soup = bs.BeautifulSoup(urlopen(indexurl).read()) | |
for tag in soup.findAll('tr',{'class':('odd','even')}): | |
if int(tag.find('td',{'class':'listnum'}).string[:-1]) > 5: | |
break | |
listen = tag.find('td',{'class':'listtitle'}).next['href'] | |
song_number = listen[listen.index('/',-10)+1:] | |
dl_url = 'http://www.newgrounds.com/audio/download/%s'%(song_number) | |
dlfile(dl_url) | |
if __name__== '__main__': | |
section = raw_input('enter a section number or title (see indexes.cfg)\n>>>') | |
getindex(section) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment