Skip to content

Instantly share code, notes, and snippets.

@stungeye
Last active December 6, 2018 10:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stungeye/9974f404f7e530d4e6afd27218a0a41f to your computer and use it in GitHub Desktop.
Save stungeye/9974f404f7e530d4e6afd27218a0a41f to your computer and use it in GitHub Desktop.
Finding Reading Animals with Machine Learning (Clarifai API)
# Step 1 - Collect reference images of reading cats
#
# This script extracts the URLs of all the cats on meow-reader-blog.tumblr.com
#
# INPUT: Tumblr API
# OUTPUT: JSON file of image URLS (meow-reader-images.json)
require 'net/http'
require 'json'
total_posts = 218
images = []
(0..total_posts).step(20) do |offset|
url = "https://api.tumblr.com/v2/blog/meow-reader-blog.tumblr.com/posts/photo?offset=#{offset}&api_key=q7faGrbfcKJBFxikysPD4z9DX7gH6SuZchoSvZKdxXXHLbihFq"
puts "Fetching #{url}..."
uri = URI(url)
json_response = Net::HTTP.get(uri)
puts "Parsing JSON reponse..."
response = JSON.parse(json_response)
if (response['meta']['msg'] == 'OK' && !response['response']['posts'].empty?)
puts "Adding #{response['response']['posts'].size} images..."
images += response['response']['posts'].map { |post| post['photos'][0]['alt_sizes'][0]['url'] }
end
end
puts "Writing JSON file..."
File.open("./meow-reader-images.json", "w") do |file|
file.write(images.to_json)
end
# Step 2 - Discover all the concepts present in the meoew reader images.
#
# This script uses the clarifai.com API to identify all concepts present in the meow reader images.
#
# INPUT: JSON file from previous step (meow-reader-images.json)
# OUTPUT: JSON file of concepts and their counts / confidence ratings (meow-reader-concepts.json)
require 'rubygems'
require 'bundler/setup'
require 'yaml'
require 'clarification'
Clarification.configure do |config|
config.api_key = '<API KEY GOES HERE>'
config.default_public_models = [:general]
end
file = File.read "./meow-reader-images.json"
images = JSON.parse(file)
puts "Loaded #{images.size} images."
client = Clarification::Client.new
concepts = Hash.new { |hash,key| hash[key] = {count: 0, confidence: []} }
images.each do |image|
response = client.predict.by_url(image)
print '.'
response[:general].concepts.each do |concept|
concepts[concept.name][:count] += 1 # Running count of how many times this concept appears in one of our images.
concepts[concept.name][:confidence] << concept.value # What was the confidence rating for this occurance of the concept.
end
end
puts "Writing JSON file..."
File.open("./meow-reader-concepts.json", "w") do |file|
file.write(concepts.to_json)
end
# Step 3 - Sort the concepts found in the meow reader images by count and confidence.
#
# This script sorts the concepts in two ways:
# 1) By how often the concept appeared in the meow reader images (by count).
# 2) By how confident the Clarifai API was in the concept, while ignoring low count concepts (5 or less appearances).
#
# INPUT: JSON file from the previous step (meow-reader-concepts.json)
# OUTPUT: Prints sorted concepts to console.
require 'json'
class Array
def avg
self.sum / self.size
end
end
file = File.read "./meow-reader-concepts.json"
concepts = JSON.parse(file)
# Sorted By Count
concepts_sorted_by_count = concepts.sort { |a, b| b[1][:count] <=> a[1][:count] }
puts "Loaded #{concepts_sorted_by_count.size} concepts."
puts "Concepts By Count Showing Average Confidence:"
puts concepts_sorted_by_count.map { |a| "#{a[0]} (#{a[1]['count']}) [#{a[1]['confidence'].avg}]" }
.join(', ')
# Sorted By Confidence
puts "Concepts By Average Confidence (Ignoring Low Count Concepts):"
concepts_sorted_by_confidence = concepts.sort { |a, b| b[1]['confidence'].avg <=> a[1]['confidence'].avg }
puts concepts_sorted_by_confidence.select { |c| c[1]['count'] > 5 }
.map { |a| "#{a[0]} (#{a[1]['count']}) [#{a[1]['confidence'].avg}]" }
.join(', ')
# Step 4 - Upload all the images from animalsthatdopeoplethings.tumblr.com to our Clarifai account.
#
# This script uploads a collection of images from URLs into our Clarifai account.
#
# INPUT: JSON file produced by a modified version of the script from step 1.
# OUTPUT: None
require 'rubygems'
require 'bundler/setup'
require 'json'
require 'clarification'
Clarification.configure do |config|
config.api_key = '<API KEY GOES HERE>'
config.default_public_models = [:general]
end
file = File.read "./animalsthatdopeoplethings.json"
images = JSON.parse(file)
puts "Loaded #{images.size} images."
client = Clarification::Client.new
images.each_slice(128) do |images_chunk|
print "."
client.search.index_images(images_chunk)
end
# Step 5 - Search the uploaded AnimalsThatDoPeopleThings images for reading concepts.
#
# This script searches the corpus of images we uploaded in step 4 for the concepts we identified in step 3.
# It then generates an HTML document of the reading animals discovered within this corpus of images.
#
# INPUT: Array of concepts we selected from the output from step 3.
# OUTPUT: An HTML document of the found images of reading animals. (reading-animals.html)
require 'rubygems'
require 'bundler/setup'
require 'yaml'
require 'set'
require 'clarification'
Clarification.configure do |config|
config.api_key = '<API KEY GOES HERE>'
config.default_public_models = [:general]
end
client = Clarification::Client.new
# Hand selected collection of the most promising concepts discovered in step 3.
concepts = ['book bindings', 'book series', 'book', 'education', 'literature', 'newspaper', 'research', 'technology']
result_urls = Set.new
concepts.each do |concept|
result_urls += client.search.by_concept(concept).hits.map(&:url)
end
open("./reading-animals.html", "w") do |file|
file << "<!DOCTYPE html>\n"
file << "<html lang='en'>\n"
file << "<head>\n"
file << "<meta charset='utf-8'>\n"
file << "<title>Animals Reading</title>\n"
file << "<link rel='stylesheet' href='style.css'>\n"
file << "</head><body><h1>Animals Reading</h1><div id='images'>\n"
result_urls.each do |url|
file << "<div class='image'><img src='#{url}'></div>"
end
file << "</div></body></html>"
end
@stungeye
Copy link
Author

stungeye commented Jun 1, 2018

The final HTML output can be seen here: animals-reading.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment