Skip to content

Instantly share code, notes, and snippets.

@YusukeOba
Last active February 3, 2020 13:31
Show Gist options
  • Save YusukeOba/6011ce8aed5a04d1e382f883deae89a0 to your computer and use it in GitHub Desktop.
Save YusukeOba/6011ce8aed5a04d1e382f883deae89a0 to your computer and use it in GitHub Desktop.
カクヨムのランキングからタイトル・★の数・評価者数をcsv出力するpythonスクリプト
# ★の数/評価者数で割って平均値を知りたかった
import requests
from bs4 import BeautifulSoup
import csv
import time
# ひとまず300位まで
page_fetch_limit = 3
# ランキングから小説の固有ID一覧を抽出
def fetch_book_identifiers(page_num: int):
r = requests.get("https://kakuyomu.jp/rankings/all/entire?page=" + str(page_num))
soup = BeautifulSoup(r.content, "html.parser")
# ID抽出
links = soup.find_all("a", "widget-workCard-titleLabel bookWalker-work-title")
def extract_id(raw_link):
link = raw_link.get("href").replace("/works/", "")
return link
ids = map(extract_id, links)
return ids
# 1本の小説の評価情報を抜き出し
def fetch_book_review_information(id: int):
r = requests.get("https://kakuyomu.jp/works/" + str(id) + "#reviews")
soup = BeautifulSoup(r.content, "html.parser")
# タイトル
title = soup.find("h1", id="workTitle").a.text
# ★の数
raw_points = soup.find("span", "js-total-review-point-element").text
points = int(raw_points.replace(",", ""))
# 評価した人の数
raw_reviewer_count = soup.find("span", "js-review-count-element").text
reviewer_count = int(raw_reviewer_count.replace(",", ""))
return [
title,
points,
reviewer_count
]
f = open('book_reviews.csv', 'w')
writer = csv.writer(f, lineterminator='\n')
for i in range(page_fetch_limit):
# 小説のIDを取得
ids = fetch_book_identifiers(i+1)
# csv
for id in ids:
review_info = fetch_book_review_information(id)
writer.writerow(review_info)
print("proceeded: " + review_info[0])
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment