Last active
August 29, 2015 14:07
-
-
Save Winiex/d6a3f10eced5e6912233 to your computer and use it in GitHub Desktop.
用于备份虾米音乐收藏乐库的脚本。Quick and dirty,but it works。依赖 keyring、requests、pycrypto、beautifulsoup4,运行于 python2.7。暂时只支持 Mac OS X。它会读取 Chrome 浏览器对于 xiami 域名的加密 cookie(所以你必须使用 Chrome 浏览器登录过虾米),解密后形成 cookies,进而使用 requests 请求虾米的页面,使用 beautifulsoup4 提取歌曲信息,最终 dump 成一份包含歌曲信息的 sqlite3 数据库文件。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:utf8 -*- | |
import time | |
import argparse | |
import os | |
import requests | |
import sqlite3 | |
import keyring | |
from Crypto.Cipher import AES | |
from Crypto.Protocol.KDF import PBKDF2 | |
from bs4 import BeautifulSoup | |
_base_url = 'http://www.xiami.com/space/lib-song/u/%s/page/' | |
mac_chrome_cookie_path = os.path.expanduser( | |
'~/Library/Application Support/Google/Chrome/Default/Cookies' | |
) | |
def clean(x): | |
return x[:-ord(x[-1])] | |
def decrypt_cookie_value(encrypted_value): | |
encrypted_value = encrypted_value[3:] | |
salt = b'saltysalt' | |
iv = b' ' * 16 | |
length = 16 | |
my_pass = keyring.get_password('Chrome Safe Storage', 'Chrome') | |
my_pass = my_pass.encode('utf8') | |
iterations = 1003 | |
key = PBKDF2(my_pass, salt, length, iterations) | |
cipher = AES.new(key, AES.MODE_CBC, IV=iv) | |
decrypted_value = cipher.decrypt(encrypted_value) | |
return clean(decrypted_value) | |
def load_xiami_cookies(): | |
conn = sqlite3.connect(mac_chrome_cookie_path) | |
cursor = conn.cursor() | |
cursor.execute( | |
'SELECT name, encrypted_value FROM' | |
' cookies WHERE host_key LIKE \'%xiami%\'' | |
) | |
cookies = "" | |
for name, encrypted_value in cursor.fetchall(): | |
value = decrypt_cookie_value(encrypted_value) | |
cookie_item = "%s=%s; " % (name, value) | |
cookies += cookie_item | |
cookies = cookies[:-2] | |
return cookies | |
def fetch_library(url): | |
page = 1 | |
data = [] | |
while True: | |
cookies = load_xiami_cookies() | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) ' | |
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124' | |
' Safari/537.36', | |
'Cookies': cookies, | |
} | |
html = requests.get( | |
url + str(page), headers=headers | |
).text | |
soup = BeautifulSoup(html) | |
song_tds = soup.findChildren( | |
'td', | |
{'class': 'song_name'} | |
) | |
if not song_tds: | |
if 'document.location.reload()' in unicode(html): | |
while 'document.location.reload()' in unicode(html): | |
print '被虾米发现了,低调 1 分钟再试试。:)' | |
time.sleep(60) | |
print '再试试。' | |
cookies = load_xiami_cookies() | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) ' | |
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124' | |
' Safari/537.36', | |
'Cookies': cookies, | |
} | |
html = requests.get( | |
url + str(page), headers=headers | |
).text | |
song_tds = soup.findChildren( | |
'td', | |
{'class': 'song_name'} | |
) | |
else: | |
break | |
for song_td in song_tds: | |
a_elements = song_td.findChildren('a') | |
song_a = a_elements[0] | |
artist_a = a_elements[1] | |
song_title = song_a.text | |
song_xiami_url = song_a['href'] | |
artist_name = artist_a.text | |
artist_xiami_url = artist_a['href'] | |
data_item = { | |
'title': unicode(song_title), | |
'song_xiami_url': unicode(song_xiami_url), | |
'artist_name': unicode(artist_name), | |
'artist_xiami_url': unicode(artist_xiami_url), | |
} | |
data.append(data_item) | |
print '搞定了第 %s 页。' % page | |
page += 1 | |
return data | |
def store_data(data): | |
curdir = os.path.dirname(os.path.abspath(__file__)) | |
db_file = curdir + os.path.sep + 'xiami_backup.sqlite3' | |
conn = sqlite3.connect(db_file) | |
cursor = conn.cursor() | |
cursor.execute("CREATE TABLE songs (title TEXT NOT NULL, xiami_url TEXT NOT NULL, artist TEXT NOT NULL, artist_xiami_url TEXT NOT NULL)") | |
for data_item in data: | |
sql = "INSERT INTO songs values(?,?,?,?)" | |
values = (data_item['title'], data_item['song_xiami_url'], | |
data_item['artist_name'], data_item['artist_xiami_url']) | |
cursor.execute(sql, values) | |
conn.commit() | |
cursor.close() | |
conn.close() | |
if __name__ == '__main__': | |
os.path.dirname(os.path.abspath(__file__)) | |
parser = argparse.ArgumentParser(description='备份你的虾米曲库') | |
parser.add_argument('--uid', type=int, help='你的用户 id。' | |
'你可以在你的主页的地址中找到它。', | |
required=True,) | |
args = parser.parse_args() | |
url = _base_url % args.uid | |
data = fetch_library(url) | |
if data: | |
store_data(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beautifulsoup4==4.3.2 | |
keyring==4.0 | |
pycrypto==2.6.1 | |
requests==2.4.3 | |
wsgiref==0.1.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. 备份数据: | |
backup_xiami_songs.py --uid 你的用户 id | |
在依赖库完整安装的前提下,脚本将会一直运行,直到在脚本所在目录下生成名字为 xiami_backup.sqlite3 的 sqlite3 数据库文件。 | |
你的用户 id 可以在你自己的虾米个人主页的地址中找到。譬如: | |
http://www.xiami.com/u/1795453 | |
表示我的用户 id 是 1795453。 | |
2. 查看命令行帮助: | |
backup_xiami_songs.py -h |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment