Skip to content

Instantly share code, notes, and snippets.

@pandada8
Last active December 24, 2015 11:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pandada8/6789316 to your computer and use it in GitHub Desktop.
Save pandada8/6789316 to your computer and use it in GitHub Desktop.
A Python Script used to parse Baidu Music. Poor HC Design. the output is the input file of the aria2c. If you want to use other tools ,Change the code yourself :)
import threading
import queue
import sys
import requests
import json
import re
from bs4 import BeautifulSoup as bs
class multidown(object):
def __init__(self,data,method=None):
self.data = queue.Queue()
for i in data:
self.data.put(i)
self.lock = threading.Lock()
self.now = 0
self._method = method if method else self._parse
self.result = queue.Queue()
self.error = queue.Queue()
self._threadpool = []
def start(self,num):
for i in range(num):
self._threadpool.append(threading.Thread(target = self._do))
for i in self._threadpool:
i.start()
for i in self._threadpool:
i.join()
def _do(self):
while not self.data.empty():
# try:
data = self.data.get()
result = self._method(data)
self.data.task_done()
self.result.put(result)
with self.lock:
self.now += 1
# except Exception as e:
# sys.stderr.write(str(e))
# sys.stderr.write("\n")
# self.error.put(data)
def _parse(self,_id):
url_to_go = "http://music.baidu.com/data/music/fmlink?songIds={}&type=mp3&rate=128".format(_id)
req = requests.get(url_to_go)
if "verify.baidu.com" in req.url:
with self.lock:
print(req.url)
input()
req = requests.get(url_to_go)
data = json.loads(req.text)
try:
singername = data["data"]["songList"][0]["artistName"]
songname = data["data"]["songList"][0]["songName"]
filename = "{} - {}.mp3".format(singername,songname)
except Exception:
filename = "{}.mp3".format(_id)
result_url = data["data"]["songList"][0]["songLink"]
sys.stdout.write("{}\tout={}\n{}\n".format(_id,filename,result_url))
sys.stdout.flush()
with self.lock:
with open (sys.argv[2],"a") as fp:
fp.write("{}\n out={}\n".format(result_url,filename))
return (_id,result_url,filename)
def geterror(self):
error = []
while self.error.not_empty():
error.append(self.error.get())
self.error.task_done()
return error
def getsongs(Singer):
soup = bs(requests.get("http://music.baidu.com/artist/{}".format(Singer)).text)
num = soup.find("a",class_ = "list").string
assert num[:2] == "歌曲"
totalnum = int(num[3:-1])
ALL_SONGS = []
for i in range(0,totalnum,20):
url = "http://music.baidu.com/data/user/getsongs?start={}&ting_uid={}&order=time".format(i,Singer)
data = json.loads(requests.get(url).text)
while data["errorCode"] != 22000:
data = json.loads(requests.get(url).text)
songs = [re.search("\d+",f.a["href"]).group() for f in bs(data["data"]["html"]).find_all(class_="song-title")]
ALL_SONGS.extend(songs)
print(i)
print(len(ALL_SONGS))
return ALL_SONGS
if __name__ == '__main__':
assert sys.argv[1].isdigit()
assert sys.argv[2]
songs = getsongs(sys.argv[1])
multi = multidown(songs)
multi.start(3)
print(multi.geterror())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment