Skip to content

Instantly share code, notes, and snippets.

@VincentSit
Created August 17, 2018 19:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VincentSit/a6ddda22ea05f4269878a48320245f0d to your computer and use it in GitHub Desktop.
Save VincentSit/a6ddda22ea05f4269878a48320245f0d to your computer and use it in GitHub Desktop.
Download youku video playlist. 批量下载优酷视频列表或保存列表视频链接.
#coding=utf-8
import os
import sys
import shlex
import requests
from bs4 import BeautifulSoup
# 前置
# 安装 you-get
# pip3 install BeautifulSoup4
# pip3 install requests
# 简单的脚本, 没有处理剧集分页的情况, 需要手动点击分页后的那一集然后贴地址到这里.
# 以 神探狄仁杰第二部 为例, 这部电视剧有 40 集, 先点击第 1 集,
# 获取第一页前 30 集的地址, 然后点 31 集, 复制 31 集的地址到这里,
# 获取后 10 集的地址.
url = 'https://v.youku.com/v_show/id_XMTIwODg1NzQw.html?spm=a2h0j.11185381.listitem_page2.5~A'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
html = requests.get(url, headers=headers).text
soup = BeautifulSoup(html, features="html.parser")
links = soup.find_all("a", class_="sn")
if len(links) == 0:
sys.exit("获取剧集列表失败")
# 是否不保存链接直接下载.
download = True
if download:
for link in links[1:]:
addr = 'https:' + link.get('href')
print("Downloading " + addr)
# 是否使用 Cookies. 替换你自己的路径.
using_cookies = False
cookies = '-c ~/Downloads/cookies.txt ' if using_cookies else ''
output_path = '-o ' + shlex.quote(soup.title.string) + ' ' if len(soup.title.string) > 0 else ''
cmd = '$(which you-get) ' + output_path + cookies + addr
# cmd = '~/GitHub/you-get/you-get ' + output_path + cookies + addr
os.system(cmd)
else:
filename = (soup.title.string if len(soup.title.string) > 0 else 'youku_url_list') + '.txt'
with open(filename, 'a') as out:
for link in links:
addr = 'https:' + link.get('href')
print(addr)
out.write(addr + '\n')
print("保存完毕")
@nguyendupco
Copy link

如何使用這種鱈魚?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment