Instantly share code, notes, and snippets.

@sirupsen /book.rb
Last active Jan 19, 2019

Embed
What would you like to do?
Script to import books from Instapaper to Airtable. Will not work out of the box.
class Book < Airrecord::Table
class Endorser < Airrecord::Table
self.base_key = ""
self.table_name = "Endorser"
end
self.base_key = ""
self.table_name = "Books"
has_many :endorsements, class: 'Book::Endorser', column: 'Endorsements'
GOODREADS_BLACKLIST = %w(
to-read favorites currently-reading owned
series favourites re-read owned-books
books-i-own wish-list si audiobook
book-club ebook kindle to-buy
)
GOODREADS_MERGE = {
"Non-fiction" => "Nonfiction",
"Classic" => "Classics",
"Cookbook" => "Cooking",
"Cookbooks" => "Cooking",
"Biography" => "Memoir",
"Biographies" => "Memoir",
"Autobiography" => "Memoir",
"Auto-biography" => "Memoir",
"Sci-fi" => "Science Fiction",
"Scifi" => "Science Fiction",
"Management" => "Leadership",
"Self-help" => "Personal Development",
"Selfhelp" => "Personal Development",
"Personal-development" => "Personal Development",
"Self-improvement" => "Personal Development",
"Science-fiction" => "Science Fiction",
"Ya" => "Young-adult",
"Tech" => "Technology",
"Young-adult" => "Young Adult",
"Computer-science" => "Programming",
"Investing" => "Economics",
"Fitness" => "Health",
"Food" => "Cooking",
"Finance" => "Economics",
"Software" => "Programming",
"Literature" => "Classics",
}
CATEGORIES = [
"Business", "Psychology", "Science", "Personal Development", "Philosophy",
"History", "Fiction", "Memoir", "Leadership", "Classics", "Economics",
"Cooking", "Programming", "Health", "Politics", "Technology", "Science Fiction",
"Entrepreneurship", "Design", "Writing", "Fantasy", "Young Adult", "Nonfiction",
]
def goodreads_id
query = self["ISBN"] if self["ISBN"]
query ||= "\"#{self[:title]}\""
search = goodreads_client.search_books(query)
if search.results.respond_to?(:work)
matches = [search.results.work].flatten
if self[:author]
best_match = matches.find { |match|
character_difference?(match["best_book"]["author"]["name"], self[:author])
}
end
best_match ||= matches.first
return unless best_match
best_match.best_book.id
end
end
def goodreads_book
@book ||= begin
id = goodreads_id
return unless id
goodreads_client.book(id)
end
end
def goodreads_categories(n = 5)
popular = goodreads_book.popular_shelves
return [] if popular.blank?
shelves = popular.shelf
return [] unless shelves.first.respond_to?(:name)
shelves.map(&:name).reject { |name|
GOODREADS_BLACKLIST.include?(name)
}.first(n).map { |name|
name = name.capitalize
name = GOODREADS_MERGE[name] if GOODREADS_MERGE[name]
(CATEGORIES.include?(name) && name) || nil
}.compact.uniq
end
def populate_from_goodreads(prevent_duplicates_from: [])
book = goodreads_book
unless book
$stderr.puts "Unable to find book #{self["Title"]}"
return
end
before = self.serializable_fields
self["Title"] = book.title
self["ISBN"] = book.isbn13 || self["ISBN"]
self["Publication Year"] = book.work.original_publication_year.to_s || book.publication_year.to_s
self["Goodreads Rating"] = book.average_rating
self["Pages"] = book.num_pages
authors = [book.authors.author].flatten
self["Author"] = authors.first.name
self["Categories"] = goodreads_categories.sort
self["Goodreads Ratings"] = book.work.ratings_count
difference = HashDiff.diff(before, self.serializable_fields)
flagged = false
author_ok = true
$stderr.puts "\x1b[35m#{before["Title"]}\x1b[0m"
difference.each do |(type, key, prev, new)|
if key == "Author" && type == "~"
unless authors.any? { |author| character_difference?(author.name, prev) }
$stderr.puts "Author changed too much"
flagged = true
author_ok = false
end
end
if key == "Title" && type == "~"
unless new.downcase.start_with?(prev.downcase) || author_ok
$stderr.puts "New title '#{new}' didn't start with old title '#{prev}'"
flagged = true
end
end
if type == "~"
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[31m#{prev} => \x1b[32m#{new}\x1b[0m"
elsif type == "+"
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[32m#{prev}\x1b[0m"
end
end
if flagged
Rollbar.warn("Skipping book", title: self[:title])
elsif prevent_duplicates_from.find { |other| other["ISBN"] == self["ISBN"] }
$stderr.puts "Skipping #{self[:title]} due to duplicate"
else
if self.new_record?
self.create
else
self.save
end
end
end
private
def goodreads_client
self.class.goodreads_client
end
def self.goodreads_client
@client ||= begin
Goodreads::Client.new(api_key: '', api_secret: '')
end
end
def character_difference?(a, b, n = 4)
(a.split('') - b.split('')).size <= n && (b.split('') - a.split('')).size <= n
end
end
class BookImport
def instapaper
InstapaperClient.bookmarks(limit: 500).to_enum(:each).map { |bookmark|
if URI(bookmark.url).host =~ /\A(www\.)?amazon\.(com|ca)/
uri = URI(bookmark.url)
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body
isbn = text.match(/(ISBN|ASIN)(-13|-10)?:\s*<\/b>\s*(\w{10,13})/)
create_record_from_isbn(isbn[3], bookmark.bookmark_id)
elsif bookmark.url =~ /goodreads\.com/
uri = URI(bookmark.url)
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body
doc = Nokogiri::HTML(text)
create_record_from_isbn(doc.at('meta[property="books:isbn"]')["content"], bookmark.bookmark_id)
end
}.compact
end
def kindle
books_from_highlights
end
private
# TODO: Do like what we do with words, where it puts the source multiple times
# TODO: Refactor to be consistent with Words?
# It does work though :)
def books_from_highlights
sources = JSON.parse(Readwise.get("/munger").body)["data"]
existing_books = Book.all
sources.each do |source|
book_highlights = source["highlights"].select { |h| h["note"] =~ /\A\.?book/i }
book_titles = book_highlights.map { |h| h["highlight"] }
book_titles.each do |title|
next if title == "Randomness)." # ugh can't get rid of it
book = Book.new("Title" => title)
book.populate_from_goodreads(prevent_duplicates_from: existing_books)
end
end
end
def create_record_from_isbn(isbn, bookmark_id)
Book.new("ISBN" => isbn).populate_from_goodreads
InstapaperClient.delete_bookmark(bookmark_id)
end
def client_for(host)
@clients ||= {}
return @clients[host] if @clients[host]
@clients[host] ||= Faraday.new(:url => host) do |b|
b.request :retry, max: 10, interval: 1, interval_randomness: 2, backoff_factor: 2, exceptions: Semian::NetHTTP::DEFAULT_ERRORS
b.use FaradayMiddleware::FollowRedirects
b.adapter :net_http_persistent
b.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36"
end
end
end
InstapaperClient = Instapaper::Client.new do |client|
client.consumer_key = ""
client.consumer_secret = ""
client.oauth_token = ''
client.oauth_token_secret = '' # check docs, need to email them for this
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment