Skip to content

Instantly share code, notes, and snippets.

@selfboot
Created May 19, 2013 15:23
Show Gist options
  • Save selfboot/5607978 to your computer and use it in GitHub Desktop.
Save selfboot/5607978 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import eyed3
import re
import urllib
import os
num_p = re.compile(r'(\d+)')
songs_dir = 'songs'
base_url = 'http://douban.fm/j/mine/playlist?type\
=n&sid=&pt=0.0&channel=0&from=mainsite'
songinfo_url = 'http://dbfmdb.sinaapp.com/api/song.php?sid=%s'
invalid = ['/', '\\', ':', '*', '?', '"', '<', '>', '|']
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) \
AppleWebKit/537.31 (KHTML, like Gecko) \
Chrome/26.0.1410.65 Safari/537.31"
headers = {"User-Agent": user_agent}
def valid_filename(s):
return filter(lambda x: x not in invalid, s)
def cookie_prase(cookie):
cookies = {}
cookie_list = cookie.split("; ")
for item in cookie_list:
key_value = item.split("=")
cookies[key_value[0]] = key_value[1]
return cookies
def get_songs_information(sid):
song_info_req = requests.get((songinfo_url % sid), headers=headers)
detail = song_info_req.json()
requests.get('http://douban.fm?start=%sg%sg' %
(sid, detail['ssid']))
song_req = requests.get(base_url, headers=headers)
ret = song_req.json()
return ret['song']
def download(song):
try:
os.mkdir(songs_dir)
except:
pass
filename = '%s-%s.mp3' % (valid_filename(song['artist']),
valid_filename(song['title']))
filepath = os.path.join(songs_dir, filename)
if os.path.exists(filepath):
return
urllib.urlretrieve(song['url'], filepath)
picname = song['picture'][1+song['picture'].rindex('/'):]
picpath = os.path.join(songs_dir, picname)
urllib.urlretrieve(song['picture'].replace('mpic', 'lpic'), picpath)
tag = eyed3.Tag()
tag.link(filepath)
tag.header.setVersion(eyed3.ID3_V2_3)
tag.encoding = '\x01'
tag.setTitle(song['title'])
tag.setAlbum(song['albumtitle'])
tag.setArtist(song['artist'])
tag.setDate(song['public_time'])
tag.addImage(3, picpath)
os.remove(picpath)
tag.update()
def get(myurl, cookies):
list_req = requests.get(myurl, headers=headers, cookies=cookies)
list_soup = BeautifulSoup(list_req.content)
for div_tag in list_soup.findAll('div', class_='info_wrapper'):
p_tag = div_tag.find('div', class_='song_info').findAll("p")
sid = div_tag.find('div', class_='action')['sid']
try:
print "song:%s\nsinger:%s\nalbum:%s" % (
p_tag[0].string, p_tag[1].string, p_tag[2].a.string)
except:
print "song..."
mark = False
try:
for j in range(10):
songs = get_songs_information(sid)
for song in songs:
if sid == song['sid']:
download(song)
mark = True
break
if mark:
break
if mark:
print 'succeed!\n\n'
else:
print 'fail!\n\n'
except Exception as e:
print e.message+'\n'
def main():
url = 'http://douban.fm/mine?start=%d&type=liked'
cookie = raw_input('cookie:')
cookies = cookie_prase(cookie)
print 'you should enter the pages you want to download'
page0 = int(raw_input('page from:'))
page1 = int(raw_input('page to:'))
for i in range(page1-page0+1):
get(url % ((i+page0-1)*15), cookies)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment