Skip to content

Instantly share code, notes, and snippets.

@askareija
Last active July 3, 2019 09:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save askareija/6c777df9600c7d52ec61bd0f73a31b46 to your computer and use it in GitHub Desktop.
Save askareija/6c777df9600c7d52ec61bd0f73a31b46 to your computer and use it in GitHub Desktop.
Instagram Scraper Metadata Script
require 'json'
require 'net/http'
require 'uri'
require 'fileutils'
puts "============= Instagram Scraper ============="
puts "================== v1.0 ==================="
puts "============== Megumi Aliya ==============="
3.times do
puts ""
end
puts "input instagram username: "
puts "* separated with comma if more than 1"
ig_user = gets
users = ig_user.delete(" ").delete("\n").split(",")
users.each do |user|
puts "Scraping profile metadata : #{user}"
puts ""
################### GENERAL PROFILE #########################
# Initialize request to Instagram
uri = URI("https://www.instagram.com/#{user.delete("\n")}/?__a=1")
res = Net::HTTP.get_response(uri)
if res.is_a?(Net::HTTPSuccess)
# Create directory by IG username
FileUtils.mkdir_p user.delete("\n")
# Creating file
file = File.open("#{user.delete("\n")}/#{user.delete("\n")}_profile.json", "w")
# Parse the JSON response
profile = JSON.parse(res.body)
file.puts res
file.close
puts "Scraping profile completed."
################### GENERAL PROFILE #########################
puts "Scraping All #{user} posts metadata"
puts ""
# Variables
query_hash = "f2405b236d85e8296cf30347c9f08c2a"
user_id = profile['graphql']['user']['id'].to_i
cursor_end = profile['graphql']['user']["edge_owner_to_timeline_media"]["page_info"]["end_cursor"]
has_next_page = profile['graphql']['user']["edge_owner_to_timeline_media"]["page_info"]["has_next_page"]
page = 1
while has_next_page != false do
begin
puts "Scraping posts page #{page}"
# Creating file
file = File.open("#{user.delete("\n")}/#{user.delete("\n")}_posts_page_#{page}.json", "w")
# Initialize request to Instagram
uri = URI("https://www.instagram.com/graphql/query/")
variables = {'id': user_id, 'first': 50, 'after': cursor_end }
params = { query_hash: query_hash, variables: variables.to_json }
uri.query = URI.encode_www_form(params)
http_post = Net::HTTP.new(uri.host, uri.port)
http_post.use_ssl = true
request = Net::HTTP::Get.new(uri.request_uri)
request['Cookie'] = "ig_pr=1"
res = http_post.request(request)
posts = JSON.parse(res.body)
file.puts res.body
file.close
cursor_end = posts["data"]["user"]["edge_owner_to_timeline_media"]["page_info"]["end_cursor"]
has_next_page = posts["data"]["user"]["edge_owner_to_timeline_media"]["page_info"]["has_next_page"]
page+= 1
puts "Hold request (20s)"
sleep(20)
rescue
puts "Scraping page #{page} failed, retrying.."
File.delete(file)
retry
end
end
puts "Scraping for user : #{user} has been finished."
puts ""
puts ""
else
puts "Account not found"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment