diagprov/goodreads.js

## goodreads.js
$tbl = document.getElementById("books");

var bl = new Array();

for (let i = 1; i < $tbl.rows.length; i++) {

	$row = $tbl.rows[i]
	$htitlerow = $row.getElementsByClassName("field title")[0]
	$htitleanchor = $htitlerow.getElementsByClassName("value")[0].childElements()[0]
	$title = $htitleanchor.text

	$hauthorrow = $row.getElementsByClassName("field author")[0]
	$hauthoranchor = $hauthorrow.getElementsByClassName("value")[0].childElements()[0]
	$author = $hauthoranchor.text

	$hisbnrow = $row.getElementsByClassName("field isbn13")[0]
	$hisbn = $hisbnrow.getElementsByClassName("value")[0]
	$isbn13 = ""
	if($hisbn != null) {
		$isbn13 = $hisbn.textContent
	}

	var entry = {
		"Title": $title,
		"Author": $author,
		"ISBN13": $isbn13
	};
	bl.push(entry);
}

console.log(bl);

## procjson.py

import csv
import json
import yaml

def isbn_filter(obj):
    return obj.get("ISBN13").replace(" ", "").replace("\n", "")

def author_filter(obj):
    last, first = obj.get("Author").split(",")
    last = last.replace(" ", "")
    first = first.replace(" ", "")
    return (last, first)

def title_filter(obj):
    cleaned = obj.get("Title").split("\n")[1:3]
    title = cleaned[0].strip(" ")
    series = ""
    num = ""
    try:
        series, num = cleaned[1].strip(" ").strip("()").split(", #")
    except ValueError:
        try:
            series, num = cleaned[1].strip(" ").strip("()").split(" #")
        except ValueError:
            try:
                cleaner = cleaned[1].strip(" ")
                if cleaner != "" and ";" in cleaner:
                    cleanrr = cleaner.split(";")[0].strip("(")
                    series, num = cleanrr.split(", #")
                else:
                    series = ""
                    num = ""
            except ValueError:
                series = ""
                num = ""
    return (title, series, num)


rawbooks = b''

with open("allbooks.json", "rb+") as f:
    rawbooks = f.read()

bookju = json.loads(rawbooks.decode("utf-8"))

books = []

for o in bookju:

    ISBN13 = isbn_filter(o)
    AuthorLast, AuthorFirst = author_filter(o)
    Title, Series, Number = title_filter(o)

    newobj = {
        "ISBN13": ISBN13,
        "AuthorLast": AuthorLast,
        "AuthorFirst": AuthorFirst,
        "Title": Title,
        "Series": Series,
        "Number": Number}

    books.append(newobj)

books_encoded = yaml.dump(books)

with open("allbooks.yml", "wb") as f:
    f.write(books_encoded.encode("utf-8"))

keys = books[0].keys()

with open("allbooks.csv", 'w') as f:
    w = csv.DictWriter(f, fieldnames=list(keys), dialect='excel')
    w.writeheader()
    for obj in books:
        w.writerow(obj)
	$tbl = document.getElementById("books");

	var bl = new Array();

	for (let i = 1; i < $tbl.rows.length; i++) {

	$row = $tbl.rows[i]
	$htitlerow = $row.getElementsByClassName("field title")[0]
	$htitleanchor = $htitlerow.getElementsByClassName("value")[0].childElements()[0]
	$title = $htitleanchor.text

	$hauthorrow = $row.getElementsByClassName("field author")[0]
	$hauthoranchor = $hauthorrow.getElementsByClassName("value")[0].childElements()[0]
	$author = $hauthoranchor.text

	$hisbnrow = $row.getElementsByClassName("field isbn13")[0]
	$hisbn = $hisbnrow.getElementsByClassName("value")[0]
	$isbn13 = ""
	if($hisbn != null) {
	$isbn13 = $hisbn.textContent
	}

	var entry = {
	"Title": $title,
	"Author": $author,
	"ISBN13": $isbn13
	};
	bl.push(entry);
	}

	console.log(bl);

	import csv
	import json
	import yaml

	def isbn_filter(obj):
	return obj.get("ISBN13").replace(" ", "").replace("\n", "")

	def author_filter(obj):
	last, first = obj.get("Author").split(",")
	last = last.replace(" ", "")
	first = first.replace(" ", "")
	return (last, first)

	def title_filter(obj):
	cleaned = obj.get("Title").split("\n")[1:3]
	title = cleaned[0].strip(" ")
	series = ""
	num = ""
	try:
	series, num = cleaned[1].strip(" ").strip("()").split(", #")
	except ValueError:
	try:
	series, num = cleaned[1].strip(" ").strip("()").split(" #")
	except ValueError:
	try:
	cleaner = cleaned[1].strip(" ")
	if cleaner != "" and ";" in cleaner:
	cleanrr = cleaner.split(";")[0].strip("(")
	series, num = cleanrr.split(", #")
	else:
	series = ""
	num = ""
	except ValueError:
	series = ""
	num = ""
	return (title, series, num)


	rawbooks = b''

	with open("allbooks.json", "rb+") as f:
	rawbooks = f.read()

	bookju = json.loads(rawbooks.decode("utf-8"))

	books = []

	for o in bookju:

	ISBN13 = isbn_filter(o)
	AuthorLast, AuthorFirst = author_filter(o)
	Title, Series, Number = title_filter(o)

	newobj = {
	"ISBN13": ISBN13,
	"AuthorLast": AuthorLast,
	"AuthorFirst": AuthorFirst,
	"Title": Title,
	"Series": Series,
	"Number": Number}

	books.append(newobj)

	books_encoded = yaml.dump(books)

	with open("allbooks.yml", "wb") as f:
	f.write(books_encoded.encode("utf-8"))

	keys = books[0].keys()

	with open("allbooks.csv", 'w') as f:
	w = csv.DictWriter(f, fieldnames=list(keys), dialect='excel')
	w.writeheader()
	for obj in books:
	w.writerow(obj)