Skip to content

Instantly share code, notes, and snippets.

@mattwynne
Created October 31, 2011 10:51
Show Gist options
  • Save mattwynne/1327269 to your computer and use it in GitHub Desktop.
Save mattwynne/1327269 to your computer and use it in GitHub Desktop.
require 'pdf-reader'
#doc = PDF::Reader.new(File.expand_path('~/Desktop/note-example.pdf'))
doc = PDF::Reader.new(File.expand_path('./book-mw-read-thru.pdf'))
$objects = doc.objects
def is_note?(object)
object[:Type] == :Annot && object[:Subtype] == :Text
end
def annots_on_page(page)
references = (page.attributes[:Annots] || [])
lookup_all(references).flatten
end
def lookup_all(refs)
refs = *refs
refs.map { |ref| lookup(ref) }
end
def lookup(ref)
object = $objects[ref]
return object unless object.is_a?(Array)
lookup_all(object)
end
def notes_on_page(page)
all_annots = annots_on_page(page)
all_annots.select { |a| is_note?(a) }
end
doc.pages.each do |page|
notes = notes_on_page(page)
next unless notes.any?
puts "# Page #{page.number}"
puts
puts page.text[0..200].lines.map { |line| "> #{line}" }
puts
notes.each do |note|
puts " * " + note[:Contents]
end
puts
puts
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment