Last active
February 3, 2020 13:31
-
-
Save YusukeOba/6011ce8aed5a04d1e382f883deae89a0 to your computer and use it in GitHub Desktop.
カクヨムのランキングからタイトル・★の数・評価者数をcsv出力するpythonスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ★の数/評価者数で割って平均値を知りたかった | |
import requests | |
from bs4 import BeautifulSoup | |
import csv | |
import time | |
# ひとまず300位まで | |
page_fetch_limit = 3 | |
# ランキングから小説の固有ID一覧を抽出 | |
def fetch_book_identifiers(page_num: int): | |
r = requests.get("https://kakuyomu.jp/rankings/all/entire?page=" + str(page_num)) | |
soup = BeautifulSoup(r.content, "html.parser") | |
# ID抽出 | |
links = soup.find_all("a", "widget-workCard-titleLabel bookWalker-work-title") | |
def extract_id(raw_link): | |
link = raw_link.get("href").replace("/works/", "") | |
return link | |
ids = map(extract_id, links) | |
return ids | |
# 1本の小説の評価情報を抜き出し | |
def fetch_book_review_information(id: int): | |
r = requests.get("https://kakuyomu.jp/works/" + str(id) + "#reviews") | |
soup = BeautifulSoup(r.content, "html.parser") | |
# タイトル | |
title = soup.find("h1", id="workTitle").a.text | |
# ★の数 | |
raw_points = soup.find("span", "js-total-review-point-element").text | |
points = int(raw_points.replace(",", "")) | |
# 評価した人の数 | |
raw_reviewer_count = soup.find("span", "js-review-count-element").text | |
reviewer_count = int(raw_reviewer_count.replace(",", "")) | |
return [ | |
title, | |
points, | |
reviewer_count | |
] | |
f = open('book_reviews.csv', 'w') | |
writer = csv.writer(f, lineterminator='\n') | |
for i in range(page_fetch_limit): | |
# 小説のIDを取得 | |
ids = fetch_book_identifiers(i+1) | |
# csv | |
for id in ids: | |
review_info = fetch_book_review_information(id) | |
writer.writerow(review_info) | |
print("proceeded: " + review_info[0]) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment