-
-
Save csandman/dba05dc48f29592d0db535282c00a2af to your computer and use it in GitHub Desktop.
An example of searching/parsing the Goodreads API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable prefer-destructuring */ | |
import { parseString } from "xml2js"; | |
const parseXmlString = (str: string) => | |
new Promise<any>((resolve, reject) => { | |
parseString(str, (err, result) => { | |
if (err) { | |
reject(err); | |
} | |
resolve(result); | |
}); | |
}); | |
/** | |
* If there are 2 or more spaces in a row, replace them with a single space | |
* and trim any leading and trailing spaces | |
* | |
* @param str - An input string | |
* @returns The cleaned string | |
*/ | |
export const removeExtraSpaces = (str: string): string => | |
str.replace(/\s{2,}/g, " ").trim(); | |
export const cleanTitle = (title: string) => { | |
if (!title) { | |
return title; | |
} | |
let newTitle = title.trim(); | |
// If the title ends with a series part, remove it | |
// works for "Book 1" and "Book One" | |
newTitle = newTitle.replace(/, book [\w\s-]+$/i, "").trim(); | |
// If the title ends with "unabridged", with or without parenthesis | |
// remove them; case insensitive | |
newTitle = newTitle.replace(/\(?unabridged\)?$/i, "").trim(); | |
// If there are 2 or more spaces in a row, replace them with a single space | |
newTitle = removeExtraSpaces(newTitle); | |
return newTitle; | |
}; | |
const BASE_SEARCH_URL = "https://www.goodreads.com/search/index.xml"; | |
export interface GoodreadsAuthor { | |
id: number; | |
name: string; | |
} | |
export interface GoodreadsBook { | |
id: number; | |
editionId: number; | |
type: "Book"; | |
title: string; | |
subtitle: string | null; | |
originalTitle: string; | |
authors: GoodreadsAuthor[]; | |
coverUrl: string; | |
totalEditions: number; | |
originalPublishDate: { | |
year: number; | |
month: number; | |
day: number; | |
} | null; | |
rating: { | |
average: number; | |
totalRatings: number; | |
totalReviews: number; | |
}; | |
} | |
export type GoodreadsSearchField = "title" | "author" | "all"; | |
/** | |
* Find books by title, author, or ISBN | |
* | |
* Get an xml response with the most popular books for the given query. This | |
* will search all books in the title/author/ISBN fields and show matches, | |
* sorted by popularity on Goodreads. There will be cases where a result is | |
* shown on the Goodreads site, but not through the API. This happens when the | |
* result is an Amazon-only edition and we have to honor Amazon's terms of service. | |
* | |
* - URL: https://www.goodreads.com/search/index.xml | |
* - HTTP method: GET | |
* | |
* @see {@link https://www.goodreads.com/api/index#search.books} | |
*/ | |
export interface GoodreadsSearchOptions { | |
/** | |
* The query text to match against book title, author, and ISBN fields. | |
* Supports boolean operators and phrase searching. | |
*/ | |
q: string; | |
/** | |
* Which page to return | |
* | |
* @defaultValue `1` | |
*/ | |
page?: number; | |
/** | |
* Field to search, one of `title`, `author`, or `all`. | |
* | |
* This is passed to the `search[field]` query param. | |
* | |
* @defaultValue `all` | |
*/ | |
searchField?: GoodreadsSearchField; | |
} | |
// TODO: Find an Audiobook verison of the match | |
export const searchGoodreadsApi = async ( | |
searchOptions: GoodreadsSearchOptions | |
) => { | |
if (!process.env.GOODREADS_API_KEY) { | |
throw new Error( | |
"Cannot access the Goodreads API without the environment variable `GOODREADS_API_KEY`" | |
); | |
} | |
const params = new URLSearchParams({ | |
key: process.env.GOODREADS_API_KEY, | |
q: searchOptions.q, | |
page: searchOptions.page?.toString() || "1", | |
"search[field]": searchOptions.searchField || "all", | |
}); | |
const searchUrl = `${BASE_SEARCH_URL}?${params.toString()}`; | |
const searchRes = await fetch(searchUrl); | |
const resXml = await searchRes.text(); | |
const resData = await parseXmlString(resXml); | |
const results: GoodreadsBook[] = | |
resData.GoodreadsResponse.search[0].results[0].work.map((result: any) => { | |
const book = result.best_book[0]; | |
const originalYear = result.original_publication_year[0]._; | |
const originalMonth = result.original_publication_month[0]._ || "1"; | |
const originalDay = result.original_publication_day[0]._ || "1"; | |
const originalPublishDate = originalYear | |
? { | |
year: Number(originalYear), | |
month: Number(originalMonth) || 1, | |
day: Number(originalDay) || 1, | |
} | |
: null; | |
const originalTitle = book.title[0]; | |
let title = originalTitle.trim(); | |
let subtitle = null; | |
const SUBTITLE_REGEX = /(\(.*\))$/; | |
const subtitleMatch = title.match(SUBTITLE_REGEX); | |
if (subtitleMatch) { | |
title = title.replace(SUBTITLE_REGEX, "").trim(); | |
// Remove leading and trailing parenthesis | |
subtitle = subtitleMatch[1] | |
.replace(/^\(/, "") | |
.replace(/\)$/, "") | |
.trim(); | |
} | |
return { | |
id: Number(result.id[0]._), | |
editionId: Number(book.id[0]._), | |
type: book.$.type, | |
title: cleanTitle(title), | |
subtitle, | |
originalTitle, | |
authors: [ | |
{ | |
id: Number(book.author[0].id[0]._), | |
name: removeExtraSpaces(book.author[0].name[0]), | |
}, | |
], | |
coverUrl: book.image_url[0].replace(/\._[A-Z0-9]+_/, ""), | |
totalEditions: Number(result.books_count[0]._), | |
originalPublishDate, | |
rating: { | |
average: Number(result.average_rating[0]), | |
totalRatings: Number(result.ratings_count[0]._), | |
totalReviews: Number(result.text_reviews_count[0]._), | |
}, | |
}; | |
}); | |
return results; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment