Skip to content

Instantly share code, notes, and snippets.

@admalledd
Created November 19, 2010 00:48
Show Gist options
  • Save admalledd/705967 to your computer and use it in GitHub Desktop.
Save admalledd/705967 to your computer and use it in GitHub Desktop.
a short program i wrote to get the top five songs in a given section of newgrounds.com using urllib2 and BeautifulSoup
[main]
## Easy Listening
Classical=3
Jazz=18
## Electronic
Ambient=5
Dance=6
Drum N Bass=7
Dubstep=41
House=9
Industrial=8
New Wave=20
Techno=10
Trance=11
Video Game=12
## Hip Hop, Rap, R&B
HipHop-Modern=17
HipHop-Olskool=16
R&B=21
## Metal, Rock
Brit-Pop=22
Classical-Rock=23
General-Rock=24
Grunge=25
Heavy-Metal=15
Indie=26
Pop=27
Punk=28
## Miscellaneous
Funk=13
Goth=14
Latin=19
Miscellaneous=39
Ska=29
Voice Acting=40
## Southern Flavor
Bluegrass=1
Blues=2
Country=4
import os
from urllib2 import urlopen, URLError, HTTPError
import ConfigParser
import BeautifulSoup as bs
config = ConfigParser.ConfigParser()
config.read('indexes.cfg')
def dlfile(url):
# Open the url
try:
f = urlopen(url)
mime = f.info()
filename = mime.getheader('Content-Disposition').split(';')[1]
filename = filename[filename.index('"')+1:filename.index('"',-1)]
print "downloading %s to %s"%(filename,os.path.join(os.getcwd(),'tmp',filename))
#check for tmp dir, make if needed
if not os.path.isdir(os.path.join(os.getcwd(),'tmp')):
os.mkdir(os.path.join(os.getcwd(),'tmp'))
# Open our local file for writing
with open(os.path.join(os.getcwd(),'tmp',filename), "wb") as local_file:
local_file.write(f.read())
#handle errors
except HTTPError, e:
print "HTTP Error:", e.code, url
except URLError, e:
print "URL Error:", e.reason, url
def getindex(section='trance'):
try:
int(section)
index=section
except ValueError:
index = config.get('main',section)
indexurl='http://www.newgrounds.com/audio/list/%s'%index
soup = bs.BeautifulSoup(urlopen(indexurl).read())
for tag in soup.findAll('tr',{'class':('odd','even')}):
if int(tag.find('td',{'class':'listnum'}).string[:-1]) > 5:
break
listen = tag.find('td',{'class':'listtitle'}).next['href']
song_number = listen[listen.index('/',-10)+1:]
dl_url = 'http://www.newgrounds.com/audio/download/%s'%(song_number)
dlfile(dl_url)
if __name__== '__main__':
section = raw_input('enter a section number or title (see indexes.cfg)\n>>>')
getindex(section)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment