rebane2001/bilibiliextract.py

## bilibiliextract.py
# Simple script to extract all video IDs from a bilibili channel
# Not properly cleaned up for public release, so you're on your own
import requests
import json
import re

def getChannelPageVideos(channelid,page):
    print("Getting page",page)
    headers = {
        'authority': 'api.bilibili.com',
        'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
        'accept': 'application/json, text/plain, */*',
        'sec-ch-ua-mobile': '?0',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
        'origin': 'https://space.bilibili.com',
        'sec-fetch-site': 'same-site',
        'sec-fetch-mode': 'cors',
        'sec-fetch-dest': 'empty',
        'referer': f'https://space.bilibili.com/313791372/video?tid={channelid}&page={page}&keyword=&order=pubdate',
        'accept-language': 'en-US,en;q=0.9',
    }
    params = (
        ('mid', channelid),
        ('ps', '30'),
        ('tid', '0'),
        ('pn', page),
        ('keyword', ''),
        ('order', 'pubdate'),
        ('jsonp', 'jsonp'),
    )
    response = requests.get('https://api.bilibili.com/x/space/arc/search', headers=headers, params=params)
    pagejson = json.loads(response.text)
    vidIDs = []
    for v in pagejson["data"]["list"]["vlist"]:
        vidIDs.append(v["bvid"])
    return vidIDs

def getChannelVideos(channelid):
    print("Getting videos from",channelid)
    vidIDs = []
    i = 1
    while True:
        tempVidIDs = getChannelPageVideos(channelid,i)
        if len(tempVidIDs) == 0:
            break
        vidIDs.extend(tempVidIDs)
        i += 1
    print("Got",len(vidIDs),"videos")
    return vidIDs

with open("urls.txt", "r") as f:
    with open("urls_gen.txt", "a") as w:
        for l in f:
            channelre = re.search('https://space.bilibili.com/([0-9]*)', l, re.IGNORECASE)
            if channelre:
                channelid = channelre.group(1)
                videos = getChannelVideos(channelid)
                for video in videos:
                    w.write(f"https://www.bilibili.com/video/{video}\n")
print("Done!")
	# Simple script to extract all video IDs from a bilibili channel
	# Not properly cleaned up for public release, so you're on your own
	import requests
	import json
	import re

	def getChannelPageVideos(channelid,page):
	print("Getting page",page)
	headers = {
	'authority': 'api.bilibili.com',
	'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
	'accept': 'application/json, text/plain, /',
	'sec-ch-ua-mobile': '?0',
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
	'origin': 'https://space.bilibili.com',
	'sec-fetch-site': 'same-site',
	'sec-fetch-mode': 'cors',
	'sec-fetch-dest': 'empty',
	'referer': f'https://space.bilibili.com/313791372/video?tid={channelid}&page={page}&keyword=&order=pubdate',
	'accept-language': 'en-US,en;q=0.9',
	}
	params = (
	('mid', channelid),
	('ps', '30'),
	('tid', '0'),
	('pn', page),
	('keyword', ''),
	('order', 'pubdate'),
	('jsonp', 'jsonp'),
	)
	response = requests.get('https://api.bilibili.com/x/space/arc/search', headers=headers, params=params)
	pagejson = json.loads(response.text)
	vidIDs = []
	for v in pagejson["data"]["list"]["vlist"]:
	vidIDs.append(v["bvid"])
	return vidIDs

	def getChannelVideos(channelid):
	print("Getting videos from",channelid)
	vidIDs = []
	i = 1
	while True:
	tempVidIDs = getChannelPageVideos(channelid,i)
	if len(tempVidIDs) == 0:
	break
	vidIDs.extend(tempVidIDs)
	i += 1
	print("Got",len(vidIDs),"videos")
	return vidIDs

	with open("urls.txt", "r") as f:
	with open("urls_gen.txt", "a") as w:
	for l in f:
	channelre = re.search('https://space.bilibili.com/([0-9]*)', l, re.IGNORECASE)
	if channelre:
	channelid = channelre.group(1)
	videos = getChannelVideos(channelid)
	for video in videos:
	w.write(f"https://www.bilibili.com/video/{video}\n")
	print("Done!")