Last active
January 14, 2016 09:15
-
-
Save lotabout/cbd4993e164ba71983b5 to your computer and use it in GitHub Desktop.
get a list of douban starred songs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import requests | |
from PIL import Image | |
import io | |
import pickle | |
import os | |
import json | |
import time | |
import logging | |
import urllib | |
import sys, argparse | |
logging.basicConfig(level=logging.DEBUG) | |
bot = requests.session() | |
cookie_file = 'cookies.txt' | |
def save_cookies(session, filename): | |
with open(filename, 'wb') as f: | |
f.truncate() | |
pickle.dump(session.cookies._cookies, f) | |
def load_cookies(session, filename): | |
with open(filename, 'rb') as f: | |
cookies = pickle.load(f) | |
if cookies: | |
jar = requests.cookies.RequestsCookieJar() | |
jar._cookies = cookies | |
session.cookies = jar | |
def show_captcha(): | |
# get captcha address | |
new_captcha = bot.get('http://douban.fm/j/new_captcha') | |
tmp = {'size': 'm', 'id': new_captcha.text.strip('"')} | |
captcha = bot.get('http://douban.fm/misc/captcha', params=tmp) | |
# write captcha image file | |
#with open('captcha.jpg', 'wb') as fp: | |
#for chunk in captcha.iter_content(): | |
#fp.write(chunk) | |
file = io.BytesIO(captcha.content) | |
file.seek(0) | |
# display image | |
image = Image.open(file) | |
image.show() | |
return new_captcha.text.strip('"') | |
def single_login(username, password): | |
params = {'source': 'radio'} | |
params['alias'] = username | |
params['form_password'] = password | |
captcha_id = show_captcha() | |
params['captcha_id'] = captcha_id | |
cap = input('Captcha: ').strip() | |
params['captcha_solution'] = cap | |
loginurl = 'http://douban.fm/j/login' | |
return bot.post(loginurl, data = params) | |
def login(username, password): | |
try: | |
login_info = single_login(username, password) | |
while not login_info.ok or 'err_no' in login_info.json(): | |
login_info = single_login(username, password) | |
logging.debug('login_info:') | |
logging.debug(login_info.json()) | |
ck = login_info.json()['user_info']['ck'] | |
played = login_info.json()['user_info']['play_record']['played'] | |
liked = login_info.json()['user_info']['play_record']['liked'] | |
banned = login_info.json()['user_info']['play_record']['banned'] | |
bot.cookies.set('ck', ck) | |
bot.cookies.set('played', str(played)) | |
bot.cookies.set('liked', str(liked)) | |
bot.cookies.set('banned', str(banned)) | |
# save cookie | |
save_cookies(bot, cookie_file) | |
except KeyboardInterrupt: | |
raise | |
except Exception as e: | |
print("Exception: {0}".format(e)) | |
login(username, password) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('file', help = "specify the output file") | |
parser.add_argument('-t', '--type', choices=['liked', 'played', 'banned'], | |
help = "The type of songs, either 'liked' or played' or 'banned") | |
parser.add_argument('-l', '--log', choices=['INFO', 'DEBUG'], | |
help = "specify the log level") | |
args = parser.parse_args() | |
fetch_type = args.type if args.type else 'liked' | |
output_file = args.file | |
if os.path.exists(cookie_file): | |
load_cookies(bot, cookie_file) | |
else: | |
username = input('Username: ').strip() | |
password = input('Password: ').strip() | |
login(username, password) | |
# open the star page | |
star_url = 'http://douban.fm/j/play_record' | |
logging.debug(bot.cookies.get_dict()) | |
cookies = bot.cookies.get_dict() | |
params = {'type': fetch_type} | |
params['ck'] = cookies['ck'] | |
params['spbid'] = '::' + cookies['bid'].strip('"') | |
star_url += '?ck=' + params['ck'] + '&spbid=' + urllib.parse.quote(params['spbid']) | |
star_url += '&type=' + fetch_type + '&start='; | |
params['start'] = '0' | |
total = int(cookies[fetch_type]) | |
print("Total =", total) | |
per_page = 15 | |
logging.debug(params) | |
bot.headers.update({'referer': 'http://douban.fm/mine/', | |
'User-Agent': 'Mozilla/5.0', | |
'Host': 'douban.fm', | |
'X-Requested-With': 'XMLHttpRequest'}) | |
#star_page = bot.get(url=star_url, params=params) | |
#logging.debug(star_page.text) | |
songs = [] | |
current = 0 | |
while current < total: | |
# fetch next page | |
print("Fetching page", current/per_page) | |
time.sleep(1) | |
params['start'] = str(current) | |
#star_page = bot.get(url=star_url, params=params) | |
print('url = ', star_url+str(current)) | |
star_page = bot.get(url=star_url+str(current)) | |
logging.debug(star_page.json()) | |
if star_page.ok: | |
songs.extend(star_page.json()['songs']) | |
current += per_page | |
# save the stars | |
with open(output_file, 'w') as fp: | |
json.dump(songs, fp, indent=4, sort_keys=True, ensure_ascii=False) | |
print('Save Done'); | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment