Skip to content

Instantly share code, notes, and snippets.

@eternal-flame-AD
Created November 23, 2018 04:41
Show Gist options
  • Save eternal-flame-AD/b106ee648fde2954511747ad206fe0d4 to your computer and use it in GitHub Desktop.
Save eternal-flame-AD/b106ee648fde2954511747ad206fe0d4 to your computer and use it in GitHub Desktop.
Format bilibili novel into HTML format
import sys
import os
from xml.sax.saxutils import escape
import bs4
import requests
class BiliArticle():
def __init__(self, title: str, author: str, main_markup: str = ""):
self.title, self.author, self.main_markup = title, author, main_markup
@staticmethod
def from_bili_cv(id: str):
url = f"https://www.bilibili.com/read/cv{id}"
soup = bs4.BeautifulSoup(requests.get(url).text, "html5lib")
author = soup.find("a", class_="author-name").get_text(strip=True)
title = soup.find("h1", class_="title").get_text(strip=True)
main_markup = soup.find("div", class_="article-holder").prettify()
return BiliArticle(title, author, main_markup)
@staticmethod
def merge(new_title: str, new_author: str, alist):
return BiliArticle(new_title, new_author,
"".join([article.main_markup for article in alist]))
def format_markup(self):
author = escape(
self.author, entities={
"\"": """,
"'": "'",
})
soup = bs4.BeautifulSoup(
f'<html><head><title>{escape(self.title)}</title><meta name="author" content="{author}"></meta></head><body>{self.main_markup}</body></html>',
"html5lib")
return soup.prettify()
if __name__ == "__main__":
if sys.argv[1] == "cv":
cv_id = sys.argv[2]
cv = BiliArticle.from_bili_cv(cv_id)
markup = cv.format_markup()
with open(f"result/{cv.title}.html", "w") as f:
f.write(markup)
elif sys.argv[1] == "series":
series_id = sys.argv[2]
url = f"https://api.bilibili.com/x/article/list/articles?id={series_id}&jsonp=jsonp"
res = requests.get(url).json()
list_name = res["data"]["list"]["name"]
list_author = res["data"]["author"]["name"]
os.makedirs(f"result/{list_name}/", exist_ok=True)
cvs = []
for article in res["data"]["articles"]:
cv_id = article["id"]
cv_title = article["title"]
print(cv_id, cv_title)
cv = BiliArticle.from_bili_cv(cv_id)
cvs.append(cv)
with open(f"result/{list_name}/{cv_title}.html", "w") as f:
f.write(cv.format_markup())
with open(f"result/{list_name}/merge.html", "w") as f:
f.write(
BiliArticle.merge(list_name, list_author, cvs).format_markup())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment