Skip to content

Instantly share code, notes, and snippets.

@scturtle
Created February 6, 2012 02:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scturtle/1748942 to your computer and use it in GitHub Desktop.
Save scturtle/1748942 to your computer and use it in GitHub Desktop.
Download DoubanFM favourite songs
# coding: utf-8
import os, urllib, urllib2, re, json
baseurl = 'http://douban.fm/j/mine/playlist?type=n&h=&channel=0&context=channel:0|subject_id:%s'
musicdir = os.path.abspath('./music')
num_pattern = re.compile(r'(\d+)')
def get_json_by_album(href):
''' get direct url of songs by album url '''
subject_id = num_pattern.search(href).groups()[0]
content = urllib2.urlopen(baseurl % subject_id).read()
jsob = json.loads(content)
return filter(lambda song: song['album'].endswith(subject_id+'/'), jsob['song'])
def download(song):
''' download by song json object then correct id3 infomation '''
if not os.path.exists(musicdir):
os.mkdir(musicdir)
filename = '%s.mp3' % song['title']
filepath = os.path.join(musicdir, filename)
if os.path.exists(filepath):
return
urllib.urlretrieve(song['url'], filepath)
picname = song['picture'][1+song['picture'].rindex('/'):]
picpath = os.path.join(musicdir, picname)
urllib.urlretrieve(song['picture'], picpath)
import eyeD3
tag = eyeD3.Tag()
tag.link(filepath)
tag.header.setVersion(eyeD3.ID3_V2_3)
tag.encoding = '\x01'
tag.setTitle(song['title'])
tag.setAlbum(song['albumtitle'])
tag.setArtist(song['artist'])
tag.addImage(3, picpath, u'')
os.remove(picpath)
tag.update()
if __name__ == '__main__':
from pprint import pprint
songs = get_json_by_album('http://music.douban.com/subject/3649821/')
#pprint(songs)
print 'Found: %d' % len(songs)
for song in songs:
download(song)
# coding: utf-8
# =======================================================
faillist = file('faillist.txt').read().split('\n')
faillist = map(lambda line: line.decode('utf-8'), faillist)
from fmdownload import *
# =======================================================
import sys, urllib2, cookielib
from BeautifulSoup import BeautifulSoup
from time import sleep
from pprint import pprint
import HTMLParser
hp = HTMLParser.HTMLParser()
ue = lambda s: hp.unescape(s)
# cookies
cookie = 'flag="ok";ck="XXXX";dbcl2="XXXXXXXXXXXXXXX";bid="XXXXXXXXX";'
# song per page
spp = 15
baseurl = 'http://douban.fm/mine?start=%d&type=liked'
pages = int(raw_input('number of pages: '))
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
urllib2.install_opener(opener)
for i in range(pages):
sleep(1)
req = urllib2.Request(baseurl % (i*spp,))
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')
req.add_header('Cookie', cookie)
content = urllib2.urlopen(req).read()
soup = BeautifulSoup(str(content))
divs = soup.findAll('div', {'class': 'song_info'})
for div in divs:
p = div.findAll('p')
name = u'%s %s' % (ue(p[0].string), ue(p[1].string))
if name in faillist:
songs = get_json_by_album(str(p[2].a))
found = False
for i in range(10):
for song in songs:
if ue(p[0].string) == song['title']:
download(song)
#print 'DONE:', name.encode('utf-8')
found = True
break
if not found:
if i == 9:
print 'FAIL:', name.encode('utf-8')
else:
break
# coding: utf-8
import os, urllib, urllib2, json, re
ref = 'https://github.com/alswl/tingdownload'
SEARCH_URL = u'http://openapi.baidu.com/public/2.0/mp3/info/suggestion?' \
'format=json&word=%s&callback=window.baidu.sug'
DOWNLOAD_URL = u'http://ting.baidu.com/song/%s/download'
pattern = re.compile(r'<a href="([^"]*)"')
TARGET_URL = u'http://ting.baidu.com%s'
MUSICS_DIR = os.path.abspath('./musics')
def search(word):
''' search musics, return json infomation '''
word = urllib2.quote(word)
response = urllib2.urlopen(SEARCH_URL % word).read()
json_text = response.strip()[17:-2]
return json.loads(json_text)
def geturl(songid):
''' get download url by song id '''
page = urllib2.urlopen(DOWNLOAD_URL % songid).read()
link = pattern.search(page).groups()[0]
return TARGET_URL % link
def download(url):
''' download song by url '''
if not os.path.exists(MUSICS_DIR):
os.mkdir(MUSICS_DIR)
name = urllib2.unquote(url)[1+urllib2.unquote(url).rindex('/'):]
#name = name.decode('utf-8')
name = ''.join(map(chr,map(ord,list(name)))) # Orz
print 'downloading:', name
filepath = os.path.join(MUSICS_DIR, name)
if not os.path.exists(filepath):
urllib.urlretrieve(url, filepath)
if __name__ == '__main__':
print 'Downloading DoubanFM favs...'
songs = file('favs.txt').read().strip().split('\n')
print len(songs), 'songs'
faillist = []
for line in songs:
res = search(line)
if not res['song']:
print 'FAIL:', line
faillist.append(line)
continue
download(geturl(res['song'][0]['songid']))
print 'DONE:', line
with file('faillist.txt','w') as f:
f.write('\n'.join(faillist))
# coding: utf-8
import sys, urllib2, cookielib
from BeautifulSoup import BeautifulSoup
from time import sleep
import HTMLParser
hp = HTMLParser.HTMLParser()
ue = lambda s: hp.unescape(s)
# cookies
cookie = 'flag="ok";ck="XXXX";dbcl2="XXXXXXXXXXXXXXX";bid="XXXXXXXXX";'
# song per page
spp = 15
baseurl = 'http://douban.fm/mine?start=%d&type=liked'
pages = int(raw_input('number of pages: '))
result = 0
# output to file
reload(sys)
sys.setdefaultencoding(sys.getfilesystemencoding())
originout = sys.stdout
sys.stdout = open('favs.txt','w')
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
urllib2.install_opener(opener)
for i in range(pages):
sleep(1)
req = urllib2.Request(baseurl % (i*spp,))
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')
req.add_header('Cookie', cookie)
content = urllib2.urlopen(req).read()
soup = BeautifulSoup(str(content))
divs = soup.findAll('div', {'class': 'song_info'})
result += len(divs)
for div in divs:
p = div.findAll('p')
print ue(p[0].string), ue(p[1].string)#, ue(p[2].a.string)
sys.stdout = originout
print 'Done with %d results.' % result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment