Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jdhuntington/132411 to your computer and use it in GitHub Desktop.
Save jdhuntington/132411 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# Add books from http://sivers.org/book to a couch db
# todo - parse author out of 'title' field
require 'rubygems'
require 'couchrest'
require 'nokogiri'
require 'open-uri'
require 'clip'
options = Clip do |p|
p.optional 'd', 'database', :desc => 'URI of database', :default => 'http://localhost:5984/books'
p.optional 's', 'source', :desc => 'Data source', :default => 'http://sivers.org/book'
p.flag 'v', 'verbose', :desc => 'Make it chatty'
end
def get_content(noko_source, selector)
noko_source.css(selector).first.inner_html
end
class Book
def initialize(noko_source, source)
@title = get_content(noko_source, "h3 a")
@source = source
@isbn = retrieve_isbn get_content(noko_source, "p.small")
@derek_sivers_rating = retrieve_rating get_content(noko_source, "p.small")
@derek_sivers_summary = get_content(noko_source, ":nth-child(4)")
end
# sample p.small string: ISBN: 1416541993 READ: 2009-04-03 RATING: 10/10
def retrieve_isbn(source)
source =~ /ISBN:\ (\w+)/
raise source.inspect unless $1
$1
end
def retrieve_rating(source)
source =~ /RATING:\ (\w+)/
raise source.inspect unless $1
$1
end
def to_hash
{ "title" => @title,
"source" => @source,
"isbn" => @isbn,
"derek_sivers_rating" => @derek_sivers_rating,
"derek_sivers_summary" => @derek_sivers_summary }
end
end
doc = Nokogiri::HTML(open(options.source))
db = CouchRest.database!(options.database)
books = doc.css('#content div.book').collect do |book|
Book.new(book, options.source).to_hash
end
db.bulk_save books
STDERR.puts "#{books.length} saved."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment