Skip to content

Instantly share code, notes, and snippets.

@varundey
Created April 6, 2020 13:00
Show Gist options
  • Save varundey/022b2c1c4e6c248c1e60d260bb9d75ef to your computer and use it in GitHub Desktop.
Save varundey/022b2c1c4e6c248c1e60d260bb9d75ef to your computer and use it in GitHub Desktop.
Get reviews from goodreads and export as an excel file
import json
import os
import pandas
import requests
from string import Template
GOODREADS_API_KEY = ""
EBOOKS_DIRECTORY = "/Users/varun.dey/Documents/Kindle"
RATINGS_JSON_FILE = "ratings.json"
RATINGS_EXCEL_FILE = "ratings.xlsx"
API_URL = Template(f"https://www.goodreads.com/book/title.$format?author=$author&key={GOODREADS_API_KEY}&title=$title")
def parse_author_title(author, title):
author = " ".join(author.split(", ")[::-1])
title = title.split(".")[0].split(" - ")
title = title[0] if len(title) == 1 else title[1]
return author, title
def parse_ebook(ebook):
ebook_split_string = ebook.split(" - ", 1)
author = ebook_split_string[0]
title = ebook_split_string[1]
parsed_author, parsed_title = parse_author_title(author, title)
return parsed_author, parsed_title
def fetch_details(book_author, book_title):
fetch_url = API_URL.substitute(format="xml", author=book_author, title=book_title)
response = requests.get(fetch_url)
print(f"Fetching response from {fetch_url}\n")
try:
book_rating = response.text.split("<average_rating>", 1)[1][0:4]
book_pages = response.text.split("<num_pages>", 1)[1].split("</num_pages>", 1)[0].split("CDATA[", 1)[1].split("]]")[0]
except:
book_rating, book_pages = "Result not found", "Result not found"
return book_rating, book_pages
def create_json(book_author, book_title, book_rating, book_pages, filename, list):
return list.append({
"TITLE": book_title, "AUTHOR": book_author, "RATING": book_rating, "PAGES":book_pages, "GOODREADS_LINK": API_URL,
"FILE": filename.substitute(format="json", author=author, title=title)
})
def write_to_file_and_convert_excel(list=[]):
rating_file_obj = open(RATINGS_JSON_FILE, "w")
json.dump(list, rating_file_obj)
rating_file_obj.close()
pandas.read_json(RATINGS_JSON_FILE).to_excel(RATINGS_EXCEL_FILE)
if __name__ == "__main__":
ebooks_list = os.listdir(EBOOKS_DIRECTORY)
parsed_ebooks_list = []
for ebook_string in ebooks_list:
try:
if not ebook_string.startswith('.'):
print(f"============================================================================\nParsing {ebook_string}\n")
author, title = parse_ebook(ebook_string)
rating, pages = fetch_details(author, title)
create_json(author, title, rating, pages, ebook_string, parsed_ebooks_list)
print(f"Parsed {ebook_string}. {(len(parsed_ebooks_list)/len(ebooks_list))*100}% done!\n")
except Exception as exception:
print(f"Caught exception - {exception}")
write_to_file_and_convert_excel(parsed_ebooks_list)
write_to_file_and_convert_excel(parsed_ebooks_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment