Skip to content

Instantly share code, notes, and snippets.

@diagprov
Last active December 27, 2021 21:31
Show Gist options
  • Save diagprov/27cde8ec94505afc136327da6c9c0372 to your computer and use it in GitHub Desktop.
Save diagprov/27cde8ec94505afc136327da6c9c0372 to your computer and use it in GitHub Desktop.
Extract all data from Goodreads
$tbl = document.getElementById("books");
var bl = new Array();
for (let i = 1; i < $tbl.rows.length; i++) {
$row = $tbl.rows[i]
$htitlerow = $row.getElementsByClassName("field title")[0]
$htitleanchor = $htitlerow.getElementsByClassName("value")[0].childElements()[0]
$title = $htitleanchor.text
$hauthorrow = $row.getElementsByClassName("field author")[0]
$hauthoranchor = $hauthorrow.getElementsByClassName("value")[0].childElements()[0]
$author = $hauthoranchor.text
$hisbnrow = $row.getElementsByClassName("field isbn13")[0]
$hisbn = $hisbnrow.getElementsByClassName("value")[0]
$isbn13 = ""
if($hisbn != null) {
$isbn13 = $hisbn.textContent
}
var entry = {
"Title": $title,
"Author": $author,
"ISBN13": $isbn13
};
bl.push(entry);
}
console.log(bl);
import csv
import json
import yaml
def isbn_filter(obj):
return obj.get("ISBN13").replace(" ", "").replace("\n", "")
def author_filter(obj):
last, first = obj.get("Author").split(",")
last = last.replace(" ", "")
first = first.replace(" ", "")
return (last, first)
def title_filter(obj):
cleaned = obj.get("Title").split("\n")[1:3]
title = cleaned[0].strip(" ")
series = ""
num = ""
try:
series, num = cleaned[1].strip(" ").strip("()").split(", #")
except ValueError:
try:
series, num = cleaned[1].strip(" ").strip("()").split(" #")
except ValueError:
try:
cleaner = cleaned[1].strip(" ")
if cleaner != "" and ";" in cleaner:
cleanrr = cleaner.split(";")[0].strip("(")
series, num = cleanrr.split(", #")
else:
series = ""
num = ""
except ValueError:
series = ""
num = ""
return (title, series, num)
rawbooks = b''
with open("allbooks.json", "rb+") as f:
rawbooks = f.read()
bookju = json.loads(rawbooks.decode("utf-8"))
books = []
for o in bookju:
ISBN13 = isbn_filter(o)
AuthorLast, AuthorFirst = author_filter(o)
Title, Series, Number = title_filter(o)
newobj = {
"ISBN13": ISBN13,
"AuthorLast": AuthorLast,
"AuthorFirst": AuthorFirst,
"Title": Title,
"Series": Series,
"Number": Number}
books.append(newobj)
books_encoded = yaml.dump(books)
with open("allbooks.yml", "wb") as f:
f.write(books_encoded.encode("utf-8"))
keys = books[0].keys()
with open("allbooks.csv", 'w') as f:
w = csv.DictWriter(f, fieldnames=list(keys), dialect='excel')
w.writeheader()
for obj in books:
w.writerow(obj)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment