Skip to content

Instantly share code, notes, and snippets.

@a0x
Last active June 9, 2019 04:55
Show Gist options
  • Save a0x/b36b946c56ab466b0b1d43295338ff91 to your computer and use it in GitHub Desktop.
Save a0x/b36b946c56ab466b0b1d43295338ff91 to your computer and use it in GitHub Desktop.
网易公开课下载脚本
# https://www.twblogs.net/a/5bdf0e822b717720b51c3328/zh-cn
# https://www.bilibili.com/read/cv1624058/
import os
import sys
import requests
import time
from lxml import html
from you_get.common import r1, get_decoded_html
from win32com.client import Dispatch
def resource_url(url):
html = get_decoded_html(url)
url = (r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html)) + ".mp4"
return url
def target_resource(course_url):
res = requests.get(course_url)
tree = html.fromstring(res.text)
# Filename list
file_index_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/text()')
filename_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a/text()')
file_index_list = list(map(lambda x:x.strip(), file_index_list))
file_index_list = [i for i in file_index_list if i]
filename_list = list(map(lambda i, x:'%s %s.mp4' % (i.strip(), x.strip()), file_index_list, filename_list))
# URL list
page_url_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a//@href')
page_url_list = list(map(lambda x:x.strip().replace('open.163.com', 'v.163.com'), page_url_list))
resource_url_list = list(map(lambda x: resource_url(x), page_url_list))
return list(map(lambda x, y: { 'filename': x, 'url': y }, filename_list, resource_url_list))
def dispatchXunLei(download_url, save_as, file_path):
xunlei = Dispatch("ThunderAgent.Agent64.1")
print(download_url, save_as, file_path)
xunlei.AddTask(download_url, save_as, file_path, "", "", -1, 0, 5)
xunlei.CommitTasks()
url = sys.argv[1]
data = target_resource(url)
for i in range(len(data)):
dispatchXunLei(data[i]['url'], data[i]['filename'], sys.argv[2])
time.sleep(0.5)
# pip install requests lxml you-get
import os
import requests
from lxml import html
from you_get.common import r1, get_decoded_html
def resource_url(url):
html = get_decoded_html(url)
url = (r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html)) + ".mp4"
return url
# Course info
course_path = 'http://open.163.com/special/opencourse/financialtheory.html'
ext = 'mp4'
res = requests.get(course_path)
tree = html.fromstring(res.text)
# Filename list
file_index_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/text()')
filename_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a/text()')
file_index_list = list(map(lambda x:x.strip(), file_index_list))
file_index_list = [i for i in file_index_list if i]
filename_list = list(map(lambda i, x:'%s %s' % (i.strip(), x.strip()), file_index_list, filename_list))
print("Here are your courses:")
print(*filename_list, sep='\n')
# URL list
page_url_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a//@href')
page_url_list = list(map(lambda x:x.strip().replace('open.163.com', 'v.163.com'), page_url_list))
resource_url_list = list(map(lambda x: resource_url(x), page_url_list))
print("\nHere are the resources:")
print(*resource_url_list, sep='\n')
for i in range(len(resource_url_list)):
print("Download %s at %s" % (filename_list[i], resource_url_list[i]))
os.system(r'curl {} --output "{}".{}'.format(resource_url_list[i], filename_list[i], ext))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment