Skip to content

Instantly share code, notes, and snippets.

@l-wagner
Last active October 24, 2020 19:03
Show Gist options
  • Save l-wagner/4fe3b5f8a9bbb459c1258d06bdd56628 to your computer and use it in GitHub Desktop.
Save l-wagner/4fe3b5f8a9bbb459c1258d06bdd56628 to your computer and use it in GitHub Desktop.
Pulling your read books from Goodreads and translate them from XML to JSON. If you read more than 200 books, it sends requests in batches with a 2s sleep between each get.
import json
import time
import math
import sys
import xmltodict
import requests
from datetime import date
with open("config.json") as f:
config = json.load(f)
API_KEY = config["GOODREADS_API_KEY"]
USER = config["GOODREADS_USER_ID"]
class book:
def __init__(
self, title, author, authorLink, year, imageUrl, link, ratingsAvg, ratingsNumber, rating=""
):
self.title = title
self.author = author
self.authorLink = authorLink
self.year = year
self.imageUrl = imageUrl
self.link = link
self.ratingsAvg = ratingsAvg
self.ratingsNumber = ratingsNumber
self.rating = rating
@property
def serialize(self):
"Return object data in easily serializable format"
return {
"title": self.title,
"author": self.author,
"authorLink": self.authorLink,
"year": self.year,
"imageUrl": self.imageUrl,
"link": self.link,
"ratingsAvg": self.ratingsAvg,
"ratingsNumber": self.ratingsNumber,
"rating": self.rating,
}
r = requests.get(
f"https://www.goodreads.com/review/list/{USER}.xml?key={API_KEY}&v=2&shelf=read&per_page=200&page=1"
)
if "Page not found" in r.text:
print("Goodreads returned an error. Are you sure {USER} is a valid user id.")
sys.exit()
if "Invalid API key" in r.text:
print("It looks like your API Key is invalid.")
sys.exit()
result = xmltodict.parse(r.text)
totalRead = int(result["GoodreadsResponse"]["reviews"]["@total"])
judgment = "Awesome job." if totalRead > 200 else "Good job."
print(f"Found {totalRead} read books. {judgment}")
finalPage = math.ceil(totalRead / 200) + 1
inflectedPage = "pages" if finalPage > 1 else "page"
print(f"Retrieving {finalPage} {inflectedPage} from Goodreads...")
results = []
for i in range(1, finalPage):
url = f"https://www.goodreads.com/review/list/{USER}.xml?key={API_KEY}&v=2&shelf=read&per_page=200&page={i}"
r = requests.get(url)
results.append(xmltodict.parse(r.text))
# goodreads allows only 1 request per second
time.sleep(2)
books = []
count = 0
for result in results:
count += 1
print(count)
if "review" in result["GoodreadsResponse"]["reviews"]:
for review in result["GoodreadsResponse"]["reviews"]["review"]:
title = review["book"]["title_without_series"]
author = review["book"]["authors"]["author"]["name"]
authorLink = review["book"]["authors"]["author"]["link"]
year = review["book"]["publication_year"]
imageUrl = review["book"]["image_url"]
link = review["book"]["link"]
ratingsAvg = review["book"]["average_rating"]
ratingsNumber = review["book"]["ratings_count"]
rating = ""
if "rating" in review:
rating = review["rating"]
books.append(
book(
title,
author,
authorLink,
year,
imageUrl,
link,
ratingsAvg,
ratingsNumber,
rating,
)
)
today = date.today()
filename = f"./books-{today}.json"
with open(filename, "w") as outfile:
json.dump([ob.__dict__ for ob in books], outfile)
print(f"Done! {len(books)} books added to {filename}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment