Skip to content

Instantly share code, notes, and snippets.

@sirupsen sirupsen/book.rb
Last active Jul 26, 2019

Embed
What would you like to do?
Script to import books from Instapaper to Airtable. Will not work out of the box.
class Book < Airrecord::Table
class Endorser < Airrecord::Table
self.base_key = ""
self.table_name = "Endorser"
end
self.base_key = ""
self.table_name = "Books"
has_many :endorsements, class: 'Book::Endorser', column: 'Endorsements'
GOODREADS_BLACKLIST = %w(
to-read favorites currently-reading owned
series favourites re-read owned-books
books-i-own wish-list si audiobook
book-club ebook kindle to-buy
)
GOODREADS_MERGE = {
"Non-fiction" => "Nonfiction",
"Classic" => "Classics",
"Cookbook" => "Cooking",
"Cookbooks" => "Cooking",
"Biography" => "Memoir",
"Biographies" => "Memoir",
"Autobiography" => "Memoir",
"Auto-biography" => "Memoir",
"Sci-fi" => "Science Fiction",
"Scifi" => "Science Fiction",
"Management" => "Leadership",
"Self-help" => "Personal Development",
"Selfhelp" => "Personal Development",
"Personal-development" => "Personal Development",
"Self-improvement" => "Personal Development",
"Science-fiction" => "Science Fiction",
"Ya" => "Young-adult",
"Tech" => "Technology",
"Young-adult" => "Young Adult",
"Computer-science" => "Programming",
"Investing" => "Economics",
"Fitness" => "Health",
"Food" => "Cooking",
"Finance" => "Economics",
"Software" => "Programming",
"Literature" => "Classics",
}
CATEGORIES = [
"Business", "Psychology", "Science", "Personal Development", "Philosophy",
"History", "Fiction", "Memoir", "Leadership", "Classics", "Economics",
"Cooking", "Programming", "Health", "Politics", "Technology", "Science Fiction",
"Entrepreneurship", "Design", "Writing", "Fantasy", "Young Adult", "Nonfiction",
]
def goodreads_id
query = self["ISBN"] if self["ISBN"]
query ||= "\"#{self[:title]}\""
search = goodreads_client.search_books(query)
if search.results.respond_to?(:work)
matches = [search.results.work].flatten
if self[:author]
best_match = matches.find { |match|
character_difference?(match["best_book"]["author"]["name"], self[:author])
}
end
best_match ||= matches.first
return unless best_match
best_match.best_book.id
end
end
def goodreads_book
@book ||= begin
id = goodreads_id
return unless id
goodreads_client.book(id)
end
end
def goodreads_categories(n = 5)
popular = goodreads_book.popular_shelves
return [] if popular.blank?
shelves = popular.shelf
return [] unless shelves.first.respond_to?(:name)
shelves.map(&:name).reject { |name|
GOODREADS_BLACKLIST.include?(name)
}.first(n).map { |name|
name = name.capitalize
name = GOODREADS_MERGE[name] if GOODREADS_MERGE[name]
(CATEGORIES.include?(name) && name) || nil
}.compact.uniq
end
def populate_from_goodreads(prevent_duplicates_from: [])
book = goodreads_book
unless book
$stderr.puts "Unable to find book #{self["Title"]}"
return
end
before = self.serializable_fields
self["Title"] = book.title
self["ISBN"] = book.isbn13 || self["ISBN"]
self["Publication Year"] = book.work.original_publication_year.to_s || book.publication_year.to_s
self["Goodreads Rating"] = book.average_rating
self["Pages"] = book.num_pages
authors = [book.authors.author].flatten
self["Author"] = authors.first.name
self["Categories"] = goodreads_categories.sort
self["Goodreads Ratings"] = book.work.ratings_count
difference = HashDiff.diff(before, self.serializable_fields)
flagged = false
author_ok = true
$stderr.puts "\x1b[35m#{before["Title"]}\x1b[0m"
difference.each do |(type, key, prev, new)|
if key == "Author" && type == "~"
unless authors.any? { |author| character_difference?(author.name, prev) }
$stderr.puts "Author changed too much"
flagged = true
author_ok = false
end
end
if key == "Title" && type == "~"
unless new.downcase.start_with?(prev.downcase) || author_ok
$stderr.puts "New title '#{new}' didn't start with old title '#{prev}'"
flagged = true
end
end
if type == "~"
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[31m#{prev} => \x1b[32m#{new}\x1b[0m"
elsif type == "+"
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[32m#{prev}\x1b[0m"
end
end
if flagged
Rollbar.warn("Skipping book", title: self[:title])
elsif prevent_duplicates_from.find { |other| other["ISBN"] == self["ISBN"] }
$stderr.puts "Skipping #{self[:title]} due to duplicate"
else
if self.new_record?
self.create
else
self.save
end
end
end
private
def goodreads_client
self.class.goodreads_client
end
def self.goodreads_client
@client ||= begin
Goodreads::Client.new(api_key: '', api_secret: '')
end
end
def character_difference?(a, b, n = 4)
(a.split('') - b.split('')).size <= n && (b.split('') - a.split('')).size <= n
end
end
class BookImport
def instapaper
InstapaperClient.bookmarks(limit: 500).to_enum(:each).map { |bookmark|
if URI(bookmark.url).host =~ /\A(www\.)?amazon\.(com|ca)/
uri = URI(bookmark.url)
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body
isbn = text.match(/(ISBN|ASIN)(-13|-10)?:\s*<\/b>\s*(\w{10,13})/)
create_record_from_isbn(isbn[3], bookmark.bookmark_id)
elsif bookmark.url =~ /goodreads\.com/
uri = URI(bookmark.url)
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body
doc = Nokogiri::HTML(text)
create_record_from_isbn(doc.at('meta[property="books:isbn"]')["content"], bookmark.bookmark_id)
end
}.compact
end
def kindle
books_from_highlights
end
private
# TODO: Do like what we do with words, where it puts the source multiple times
# TODO: Refactor to be consistent with Words?
# It does work though :)
def books_from_highlights
sources = JSON.parse(Readwise.get("/munger").body)["data"]
existing_books = Book.all
sources.each do |source|
book_highlights = source["highlights"].select { |h| h["note"] =~ /\A\.?book/i }
book_titles = book_highlights.map { |h| h["highlight"] }
book_titles.each do |title|
next if title == "Randomness)." # ugh can't get rid of it
book = Book.new("Title" => title)
book.populate_from_goodreads(prevent_duplicates_from: existing_books)
end
end
end
def create_record_from_isbn(isbn, bookmark_id)
Book.new("ISBN" => isbn).populate_from_goodreads
InstapaperClient.delete_bookmark(bookmark_id)
end
def client_for(host)
@clients ||= {}
return @clients[host] if @clients[host]
@clients[host] ||= Faraday.new(:url => host) do |b|
b.request :retry, max: 10, interval: 1, interval_randomness: 2, backoff_factor: 2, exceptions: Semian::NetHTTP::DEFAULT_ERRORS
b.use FaradayMiddleware::FollowRedirects
b.adapter :net_http_persistent
b.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36"
end
end
end
InstapaperClient = Instapaper::Client.new do |client|
client.consumer_key = ""
client.consumer_secret = ""
client.oauth_token = ''
client.oauth_token_secret = '' # check docs, need to email them for this
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.