Created
June 2, 2021 10:06
-
-
Save rebane2001/fe22e091f2c1301e394dbb67ca3eef07 to your computer and use it in GitHub Desktop.
Simple script to extract bilibili video IDs/URLs from a channel ID/URL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simple script to extract all video IDs from a bilibili channel | |
# Not properly cleaned up for public release, so you're on your own | |
import requests | |
import json | |
import re | |
def getChannelPageVideos(channelid,page): | |
print("Getting page",page) | |
headers = { | |
'authority': 'api.bilibili.com', | |
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"', | |
'accept': 'application/json, text/plain, */*', | |
'sec-ch-ua-mobile': '?0', | |
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36', | |
'origin': 'https://space.bilibili.com', | |
'sec-fetch-site': 'same-site', | |
'sec-fetch-mode': 'cors', | |
'sec-fetch-dest': 'empty', | |
'referer': f'https://space.bilibili.com/313791372/video?tid={channelid}&page={page}&keyword=&order=pubdate', | |
'accept-language': 'en-US,en;q=0.9', | |
} | |
params = ( | |
('mid', channelid), | |
('ps', '30'), | |
('tid', '0'), | |
('pn', page), | |
('keyword', ''), | |
('order', 'pubdate'), | |
('jsonp', 'jsonp'), | |
) | |
response = requests.get('https://api.bilibili.com/x/space/arc/search', headers=headers, params=params) | |
pagejson = json.loads(response.text) | |
vidIDs = [] | |
for v in pagejson["data"]["list"]["vlist"]: | |
vidIDs.append(v["bvid"]) | |
return vidIDs | |
def getChannelVideos(channelid): | |
print("Getting videos from",channelid) | |
vidIDs = [] | |
i = 1 | |
while True: | |
tempVidIDs = getChannelPageVideos(channelid,i) | |
if len(tempVidIDs) == 0: | |
break | |
vidIDs.extend(tempVidIDs) | |
i += 1 | |
print("Got",len(vidIDs),"videos") | |
return vidIDs | |
with open("urls.txt", "r") as f: | |
with open("urls_gen.txt", "a") as w: | |
for l in f: | |
channelre = re.search('https://space.bilibili.com/([0-9]*)', l, re.IGNORECASE) | |
if channelre: | |
channelid = channelre.group(1) | |
videos = getChannelVideos(channelid) | |
for video in videos: | |
w.write(f"https://www.bilibili.com/video/{video}\n") | |
print("Done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment