Last active
September 26, 2019 21:27
-
-
Save akirchner333/2ac1dafe65beb944fa3ed9dc56c01ee6 to your computer and use it in GitHub Desktop.
Script for downloading all the Met's swords
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'net/http' | |
require 'open-uri' | |
require 'json' | |
def get_page(url) | |
uri = URI(url) | |
search = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| | |
request = Net::HTTP::Get.new uri | |
response = http.request request | |
response.body | |
end | |
end | |
query = "sword" | |
query_url = "https://collectionapi.metmuseum.org/public/collection/v1/search?hasImages=true&q=#{query}" | |
search = JSON.parse(get_page(query_url)) | |
search["objectIDs"].each do |id| | |
individual_query = "https://collectionapi.metmuseum.org/public/collection/v1/objects/#{id}" | |
item = JSON.parse(get_page(individual_query)) | |
if item['isPublicDomain'] && item['primaryImage'] | |
image_url = item['primaryImage'] | |
begin | |
open(URI::encode(image_url)) do |f| | |
File.open("#{query}/#{query}_#{item["objectID"]}.jpg", "wb") do |file| | |
file.puts f.read | |
end | |
end | |
p "sword_#{item["objectID"]}" | |
rescue | |
p "Error on id #{item["objectID"]}, url #{item["primaryImage"]}" | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'fileutils' | |
require 'json' | |
require 'net/http' | |
# This is the script I used to sort the swords after I downloaded them, filtering out some non-swords | |
# This is not the smartest way to do this! You should sort before you download, don't be like me | |
def get_page(url) | |
uri = URI(url) | |
search = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| | |
request = Net::HTTP::Get.new uri | |
response = http.request request | |
response.body | |
end | |
end | |
def not_sword(file) | |
FileUtils.mv("./#{file}", "../not_swords/#{file}") | |
end | |
objects = [] | |
Dir.chdir("./swords") | |
files = Dir.glob("*.jpg") | |
not_swords = [ | |
"Figure", | |
"Watercolor", | |
"Cup and saucer", | |
"Badge", | |
"Dish", | |
"Medal", | |
"Cann", | |
"Pitcher", | |
"Plaque", | |
"Plate", | |
"Painting", | |
"Sword guard (Tsuba)", | |
"Print", | |
"Drawing", | |
"Tankard", | |
"Illustrated manuscript", | |
"Medallion", | |
"Tobacco Box", | |
"Soup plate", | |
"Vases", | |
"Lamp", | |
"Bowl", | |
"Coffee cup", | |
"Coffeepot", | |
"Helmet", | |
"Butter box", | |
"Teapot", | |
"Book", | |
"Cup with cover", | |
"Close-helmet", | |
"Coin", | |
"Hot milk pot", | |
"Goblet", | |
"Suit", | |
"Tureen with cover and stand", | |
"Tureen with cover", | |
"Mug", | |
"Box and tray", | |
"Vase with cover", | |
"Tea Caddy", | |
"Cider Cup", | |
"Standing cup with cover", | |
"Teabowl", | |
"Pair of sword-grip ornaments (Menuki)", | |
"Incense box", | |
"Sugar box", | |
"Dishes", | |
"Snuff bottle", | |
"", | |
"Portrait bust of a man", | |
"Shadow puppet", | |
"Sword-hilt collar (Fuchi)", | |
"Print", | |
"Cup", | |
"Relief fragment from a funerary monument", | |
"Orphrey fragment", | |
"Neck-amphora", | |
"Playing Card", | |
"Wine can", | |
"Statuette of Bes", | |
"Scroll", | |
"Chessmen", | |
"Tile", | |
"Illustrated single work", | |
"Sugar caster", | |
"Drawing", | |
"Bit boss" | |
] | |
files.each do |file| | |
id = file.scan(/\d+/).first | |
facts = JSON.parse(get_page("https://collectionapi.metmuseum.org/public/collection/v1/objects/#{id}")) | |
if not_swords.include?(facts['objectName']) | |
not_sword(file) | |
elsif !objects.include?(facts['objectName']) | |
objects << facts['objectName'] | |
p "#{facts['objectName']} - #{id}" | |
end | |
end | |
p objects.sort |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment