Created
July 22, 2021 11:10
-
-
Save kenzo-tanaka/75aca94aa4033331cd5f135f5d467f74 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import json | |
import datetime | |
from itertools import groupby | |
def get_soup(url): | |
html = requests.get(url) | |
soup = BeautifulSoup(html.content, "html.parser") | |
return soup | |
def get_amazon_link(book): | |
soup = get_soup(book['url']) | |
amazon_link=soup.find(class_='itemInfoElm').find('a').get('href') | |
return amazon_link | |
def format_date(date_str): | |
date_format = '%Y年%m月%d日' | |
date_dt = datetime.datetime.strptime(date_str, date_format) | |
year=date_dt.year | |
month=f'{date_dt.month:02}' | |
date=f'{year}{month}' | |
return date | |
def get_register_date(book): | |
soup = get_soup(book['url']) | |
register_date=soup.find(class_='read-day-status-area').find('span').text | |
return register_date | |
def add_some_values(books): | |
for book in books: | |
book['amazon_link'] = get_amazon_link(book) | |
book['register_date'] = format_date(get_register_date(book)) | |
return books | |
def main(): | |
api_res=requests.get("http://api.booklog.jp/v2/json/4165b902f43abd44?count=10000") | |
json_res=json.loads(api_res.text) | |
books=json_res['books'] | |
books=add_some_values(books) | |
f=open('books.md', 'w') | |
books.sort(key=lambda b: int(b['register_date'])) | |
sorted_books={} | |
for key, group in groupby(books, key=lambda b: b['register_date']): | |
f.write(f'\n## {key}\n\n') | |
for book in group: | |
title=book['title'] | |
url=book['amazon_link'] | |
f.write(f'- [{title}]({url})\n') | |
f.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment