Created
April 6, 2020 13:00
-
-
Save varundey/022b2c1c4e6c248c1e60d260bb9d75ef to your computer and use it in GitHub Desktop.
Get reviews from goodreads and export as an excel file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import pandas | |
import requests | |
from string import Template | |
GOODREADS_API_KEY = "" | |
EBOOKS_DIRECTORY = "/Users/varun.dey/Documents/Kindle" | |
RATINGS_JSON_FILE = "ratings.json" | |
RATINGS_EXCEL_FILE = "ratings.xlsx" | |
API_URL = Template(f"https://www.goodreads.com/book/title.$format?author=$author&key={GOODREADS_API_KEY}&title=$title") | |
def parse_author_title(author, title): | |
author = " ".join(author.split(", ")[::-1]) | |
title = title.split(".")[0].split(" - ") | |
title = title[0] if len(title) == 1 else title[1] | |
return author, title | |
def parse_ebook(ebook): | |
ebook_split_string = ebook.split(" - ", 1) | |
author = ebook_split_string[0] | |
title = ebook_split_string[1] | |
parsed_author, parsed_title = parse_author_title(author, title) | |
return parsed_author, parsed_title | |
def fetch_details(book_author, book_title): | |
fetch_url = API_URL.substitute(format="xml", author=book_author, title=book_title) | |
response = requests.get(fetch_url) | |
print(f"Fetching response from {fetch_url}\n") | |
try: | |
book_rating = response.text.split("<average_rating>", 1)[1][0:4] | |
book_pages = response.text.split("<num_pages>", 1)[1].split("</num_pages>", 1)[0].split("CDATA[", 1)[1].split("]]")[0] | |
except: | |
book_rating, book_pages = "Result not found", "Result not found" | |
return book_rating, book_pages | |
def create_json(book_author, book_title, book_rating, book_pages, filename, list): | |
return list.append({ | |
"TITLE": book_title, "AUTHOR": book_author, "RATING": book_rating, "PAGES":book_pages, "GOODREADS_LINK": API_URL, | |
"FILE": filename.substitute(format="json", author=author, title=title) | |
}) | |
def write_to_file_and_convert_excel(list=[]): | |
rating_file_obj = open(RATINGS_JSON_FILE, "w") | |
json.dump(list, rating_file_obj) | |
rating_file_obj.close() | |
pandas.read_json(RATINGS_JSON_FILE).to_excel(RATINGS_EXCEL_FILE) | |
if __name__ == "__main__": | |
ebooks_list = os.listdir(EBOOKS_DIRECTORY) | |
parsed_ebooks_list = [] | |
for ebook_string in ebooks_list: | |
try: | |
if not ebook_string.startswith('.'): | |
print(f"============================================================================\nParsing {ebook_string}\n") | |
author, title = parse_ebook(ebook_string) | |
rating, pages = fetch_details(author, title) | |
create_json(author, title, rating, pages, ebook_string, parsed_ebooks_list) | |
print(f"Parsed {ebook_string}. {(len(parsed_ebooks_list)/len(ebooks_list))*100}% done!\n") | |
except Exception as exception: | |
print(f"Caught exception - {exception}") | |
write_to_file_and_convert_excel(parsed_ebooks_list) | |
write_to_file_and_convert_excel(parsed_ebooks_list) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment