Skip to content

Instantly share code, notes, and snippets.

@Winiex
Last active August 29, 2015 14:07
Show Gist options
  • Save Winiex/d6a3f10eced5e6912233 to your computer and use it in GitHub Desktop.
Save Winiex/d6a3f10eced5e6912233 to your computer and use it in GitHub Desktop.
用于备份虾米音乐收藏乐库的脚本。Quick and dirty,but it works。依赖 keyring、requests、pycrypto、beautifulsoup4,运行于 python2.7。暂时只支持 Mac OS X。它会读取 Chrome 浏览器对于 xiami 域名的加密 cookie(所以你必须使用 Chrome 浏览器登录过虾米),解密后形成 cookies,进而使用 requests 请求虾米的页面,使用 beautifulsoup4 提取歌曲信息,最终 dump 成一份包含歌曲信息的 sqlite3 数据库文件。
#!/usr/bin/env python
# -*- coding:utf8 -*-
import time
import argparse
import os
import requests
import sqlite3
import keyring
from Crypto.Cipher import AES
from Crypto.Protocol.KDF import PBKDF2
from bs4 import BeautifulSoup
_base_url = 'http://www.xiami.com/space/lib-song/u/%s/page/'
mac_chrome_cookie_path = os.path.expanduser(
'~/Library/Application Support/Google/Chrome/Default/Cookies'
)
def clean(x):
return x[:-ord(x[-1])]
def decrypt_cookie_value(encrypted_value):
encrypted_value = encrypted_value[3:]
salt = b'saltysalt'
iv = b' ' * 16
length = 16
my_pass = keyring.get_password('Chrome Safe Storage', 'Chrome')
my_pass = my_pass.encode('utf8')
iterations = 1003
key = PBKDF2(my_pass, salt, length, iterations)
cipher = AES.new(key, AES.MODE_CBC, IV=iv)
decrypted_value = cipher.decrypt(encrypted_value)
return clean(decrypted_value)
def load_xiami_cookies():
conn = sqlite3.connect(mac_chrome_cookie_path)
cursor = conn.cursor()
cursor.execute(
'SELECT name, encrypted_value FROM'
' cookies WHERE host_key LIKE \'%xiami%\''
)
cookies = ""
for name, encrypted_value in cursor.fetchall():
value = decrypt_cookie_value(encrypted_value)
cookie_item = "%s=%s; " % (name, value)
cookies += cookie_item
cookies = cookies[:-2]
return cookies
def fetch_library(url):
page = 1
data = []
while True:
cookies = load_xiami_cookies()
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124'
' Safari/537.36',
'Cookies': cookies,
}
html = requests.get(
url + str(page), headers=headers
).text
soup = BeautifulSoup(html)
song_tds = soup.findChildren(
'td',
{'class': 'song_name'}
)
if not song_tds:
if 'document.location.reload()' in unicode(html):
while 'document.location.reload()' in unicode(html):
print '被虾米发现了,低调 1 分钟再试试。:)'
time.sleep(60)
print '再试试。'
cookies = load_xiami_cookies()
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124'
' Safari/537.36',
'Cookies': cookies,
}
html = requests.get(
url + str(page), headers=headers
).text
song_tds = soup.findChildren(
'td',
{'class': 'song_name'}
)
else:
break
for song_td in song_tds:
a_elements = song_td.findChildren('a')
song_a = a_elements[0]
artist_a = a_elements[1]
song_title = song_a.text
song_xiami_url = song_a['href']
artist_name = artist_a.text
artist_xiami_url = artist_a['href']
data_item = {
'title': unicode(song_title),
'song_xiami_url': unicode(song_xiami_url),
'artist_name': unicode(artist_name),
'artist_xiami_url': unicode(artist_xiami_url),
}
data.append(data_item)
print '搞定了第 %s 页。' % page
page += 1
return data
def store_data(data):
curdir = os.path.dirname(os.path.abspath(__file__))
db_file = curdir + os.path.sep + 'xiami_backup.sqlite3'
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
cursor.execute("CREATE TABLE songs (title TEXT NOT NULL, xiami_url TEXT NOT NULL, artist TEXT NOT NULL, artist_xiami_url TEXT NOT NULL)")
for data_item in data:
sql = "INSERT INTO songs values(?,?,?,?)"
values = (data_item['title'], data_item['song_xiami_url'],
data_item['artist_name'], data_item['artist_xiami_url'])
cursor.execute(sql, values)
conn.commit()
cursor.close()
conn.close()
if __name__ == '__main__':
os.path.dirname(os.path.abspath(__file__))
parser = argparse.ArgumentParser(description='备份你的虾米曲库')
parser.add_argument('--uid', type=int, help='你的用户 id。'
'你可以在你的主页的地址中找到它。',
required=True,)
args = parser.parse_args()
url = _base_url % args.uid
data = fetch_library(url)
if data:
store_data(data)
beautifulsoup4==4.3.2
keyring==4.0
pycrypto==2.6.1
requests==2.4.3
wsgiref==0.1.2
1. 备份数据:
backup_xiami_songs.py --uid 你的用户 id
在依赖库完整安装的前提下,脚本将会一直运行,直到在脚本所在目录下生成名字为 xiami_backup.sqlite3 的 sqlite3 数据库文件。
你的用户 id 可以在你自己的虾米个人主页的地址中找到。譬如:
http://www.xiami.com/u/1795453
表示我的用户 id 是 1795453。
2. 查看命令行帮助:
backup_xiami_songs.py -h
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment