Skip to content

Instantly share code, notes, and snippets.

@mpco
Last active December 26, 2021 04:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mpco/d2a3acec383eff203bd6b0558341528f to your computer and use it in GitHub Desktop.
Save mpco/d2a3acec383eff203bd6b0558341528f to your computer and use it in GitHub Desktop.
使用Aria2下载喜马拉雅的专辑

使用方法:

python XmlyDownloadByAria2.py http://www.ximalaya.com/19539305/album/3635803 

原理:

喜马拉雅的专辑页面中,存在如下结构:

div class="personal_body" sound_ids="11935510,11935447,11935430,11935391,11935369,11935344,11935340,11935103,11934788,11932228,11932086"

这些id是当前页面上的音频id,http://www.ximalaya.com/tracks/${id}.json是该id的音频的信息,包含音频获取地址: http://audio.xmcdn.com/group16/M06/F4/D3/xxxxdfds5.m4a

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Todo: download dir
import re
import json
import urllib2
import sys
from time import sleep
from bs4 import BeautifulSoup
# Aria2c
TOKEN = "lalala"
def get_ids(albumUrl):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
try:
request = urllib2.Request(albumUrl, headers=headers)
response = urllib2.urlopen(request)
except urllib2.URLError, e:
if hasattr(e, "code"):
print e.code
if hasattr(e, "reason"):
print e.reason
content = response.read()
soup = BeautifulSoup(content, 'lxml')
newlist = soup.find("div", {"class": "personal_body"})
allids = newlist.get("sound_ids")
idlist = allids.split(",")
return idlist
def dlone(id):
base = "http://www.ximalaya.com/tracks/"
url = base + str(id) + ".json"
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
try:
request = urllib2.Request(url, headers=headers)
response = urllib2.urlopen(request)
except urllib2.URLError, e:
if hasattr(e, "code"):
print e.code
if hasattr(e, "reason"):
print e.reason
# print "Reading json"
content = response.read()
# print "Reading end"
json_paser(content)
def json_paser(content):
myjson = json.loads(content)
title = myjson["title"] + ".m4a"
mp3 = myjson["play_path_64"]
sendToAria2c(title, mp3)
def sendToAria2c(itemTitle, itemUrl):
jsonreq = json.dumps({'jsonrpc': '2.0', 'id': '1',
'method': 'aria2.addUri',
'params': ["token:" + TOKEN, [itemUrl], {"out": itemTitle}]})
c = urllib2.urlopen('http://localhost:6800/jsonrpc', jsonreq)
print "\nTitle: %s\nUrl: %s" % (itemTitle, itemUrl)
p = re.compile(r'[\d\w]{16}')
if p.findall(c.read()):
print "Download Task added successfully."
# raw_input("Press Enter to continue...")
def main(albumUrl):
idlist = get_ids(albumUrl)
print idlist
for xid in idlist:
sleep(3)
dlone(xid)
if __name__ == '__main__':
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment