Skip to content

Instantly share code, notes, and snippets.

@rebane2001
Created June 2, 2021 10:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rebane2001/fe22e091f2c1301e394dbb67ca3eef07 to your computer and use it in GitHub Desktop.
Save rebane2001/fe22e091f2c1301e394dbb67ca3eef07 to your computer and use it in GitHub Desktop.
Simple script to extract bilibili video IDs/URLs from a channel ID/URL
# Simple script to extract all video IDs from a bilibili channel
# Not properly cleaned up for public release, so you're on your own
import requests
import json
import re
def getChannelPageVideos(channelid,page):
print("Getting page",page)
headers = {
'authority': 'api.bilibili.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
'accept': 'application/json, text/plain, */*',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
'origin': 'https://space.bilibili.com',
'sec-fetch-site': 'same-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'https://space.bilibili.com/313791372/video?tid={channelid}&page={page}&keyword=&order=pubdate',
'accept-language': 'en-US,en;q=0.9',
}
params = (
('mid', channelid),
('ps', '30'),
('tid', '0'),
('pn', page),
('keyword', ''),
('order', 'pubdate'),
('jsonp', 'jsonp'),
)
response = requests.get('https://api.bilibili.com/x/space/arc/search', headers=headers, params=params)
pagejson = json.loads(response.text)
vidIDs = []
for v in pagejson["data"]["list"]["vlist"]:
vidIDs.append(v["bvid"])
return vidIDs
def getChannelVideos(channelid):
print("Getting videos from",channelid)
vidIDs = []
i = 1
while True:
tempVidIDs = getChannelPageVideos(channelid,i)
if len(tempVidIDs) == 0:
break
vidIDs.extend(tempVidIDs)
i += 1
print("Got",len(vidIDs),"videos")
return vidIDs
with open("urls.txt", "r") as f:
with open("urls_gen.txt", "a") as w:
for l in f:
channelre = re.search('https://space.bilibili.com/([0-9]*)', l, re.IGNORECASE)
if channelre:
channelid = channelre.group(1)
videos = getChannelVideos(channelid)
for video in videos:
w.write(f"https://www.bilibili.com/video/{video}\n")
print("Done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment