Skip to content

Instantly share code, notes, and snippets.

@missingno15
Created August 11, 2018 16:35
Show Gist options
  • Save missingno15/761a6c78a25148d1e5cf46f6809f3e46 to your computer and use it in GitHub Desktop.
Save missingno15/761a6c78a25148d1e5cf46f6809f3e46 to your computer and use it in GitHub Desktop.
require "csv"
require "base64"
require "typhoeus" # HTTP toolkit that is backed by CuRL
require "pry" # debugger
require "json" # JSON parser that comes with standard Ruby library
require "oj" # JSON parser but uses C extensions
keywords = []
# {
# "Chapter Number" => 1,
# "Chapter Title" => "Basic Theory",
# "Section Number" -> 1.1,
# "Section Title" => "Basic Theory",
# "Topic" => "Discrete mathematics",
# "Importance Rating" => 3, # scale of 1-3 where 3 is most important
# "Keyword count",
# "Keyword" => [keyword]
# }
headers = [
"Chapter Number",
"Chapter Title",
"Section Title",
"Section Number",
"Topic",
"Importance Rating",
"Keyword count",
"Keywords"
]
GOOGLE_VISION = "https://vision.googleapis.com/v1/images:annotate"
API_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# Get all the images that I took of the section keywords
images = Dir.
children("images").
map { |image| File.join(File.expand_path(File.dirname(__FILE__)), "images", image) }.
reject { |file| /DS_Store/.match?(file) }
# Go through each image and run Google's OCR through it
images.each do |image|
# Prepare a POST body payload to send to Google
payload = {
"requests" => [{
"image" => {
"content" => Base64.encode64(File.read(image))
},
"features" => {
"type" => "TEXT_DETECTION"
}
}]
}
# Send request and transform it to a Ruby Hash/Map
response = Typhoeus.post(
"#{GOOGLE_VISION}?fields=responses%2FfullTextAnnotation%2Ftext&key=#{API_KEY}",
body: JSON.dump(payload),
headers: { "Content-Type" => "application/json"}
).body.yield_self { |body| Oj.load(body) }
# Pull out results
text = response.dig("responses", 0, "fullTextAnnotation", "text")
if text
# Clean up output so its more manageable
words = text.split("\n").map { |keyword| keyword.gsub(/[^A-Za-z()\s]/, "").strip.capitalize }
binding.pry
end
binding.pry
end
File.write("keywords.json", JSON.pretty_generate(keywords))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment