Skip to content

Instantly share code, notes, and snippets.

@askareija
Last active July 31, 2019 04:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save askareija/caf13dd5fdeceb84ffeae7e04097ac58 to your computer and use it in GitHub Desktop.
Save askareija/caf13dd5fdeceb84ffeae7e04097ac58 to your computer and use it in GitHub Desktop.
JSON Parser to CSV for Instagram scraper
require 'json'
require 'csv'
puts "======= JSON Parser for Instagram ========="
puts "================== v1.0 ==================="
puts "============== Megumi Aliya ==============="
def parse_posts(account_name)
pages = Dir.glob("#{account_name.chomp}/#{account_name.chomp}_posts_page_*.json")
total_pages = pages.sort_by { |s| s.scan(/\d+/).first.to_i }.last.split("_").last.delete(".json").to_i
posts_pages = []
puts "Account : #{account_name.chomp} with #{total_pages} pages will be parsed and exported to CSV file"
pages.each do |file|
file = File.read(file)
res = JSON.parse(file)
posts_pages << res["data"]["user"]["edge_owner_to_timeline_media"]["edges"]
end
CSV.open("#{account_name.chomp}_posts.csv", "w",
:write_headers=> true,
:col_sep => ";",
:headers => ['id', 'display_url', 'post_caption', 'total_comment', 'total_like', 'taken_at_timestamp']
) do |csv|
posts_pages.each do |posts|
posts.each do |post|
# byebug
post_id = post["node"]["id"]
post_caption = (post["node"]["edge_media_to_caption"]["edges"][0]["node"]["text"] rescue '')
total_comment = post["node"]["edge_media_to_comment"]["count"]
total_like = post["node"]["edge_media_preview_like"]["count"]
taken_at_timestamp = post["node"]["taken_at_timestamp"]
display_url = post["node"]["display_url"]
csv << [post_id, display_url, post_caption, total_comment, total_like, taken_at_timestamp]
end
end
end
end
def parse_profile(account_name)
puts "Account : #{account_name.chomp} profile will be parsed and exported to CSV file"
file = File.read("#{account_name.chomp}/#{account_name.chomp}_profile.json")
res = JSON.parse(file)
profile = res["graphql"]["user"]
CSV.open("#{account_name.chomp}_profile.csv", "w",
:write_headers=> true,
:col_sep => ";",
:headers => ['id', 'username', 'biography', 'external_url', 'followed_by', 'follow', 'fullname', 'is_bussiness_account', 'is_joined_recently', 'is_private', 'is_verified', 'profile_pic_url']) do |csv|
id = profile['id']
username = profile['username']
biography = profile['biography']
external_url = profile['external_url']
followed_by = profile['edge_followed_by']['count']
follow = profile['edge_follow']['count']
fullname = profile['full_name']
is_bussiness_account = profile['is_business_account']
is_joined_recently = profile['is_joined_recently']
is_private = profile['is_private']
is_verified = profile['is_verified']
profile_pic_url = profile['profile_pic_url_hd']
csv << [id, username, biography, external_url, followed_by, follow, fullname, is_bussiness_account, is_joined_recently, is_private, is_verified, profile_pic_url]
end
end
print "Input account name : "
input = gets
if input.chomp == '*'
Dir.glob("*/").each do |account_name|
parse_posts(account_name.delete("/"))
parse_profile(account_name.delete("/"))
end
else
parse_posts(input.chomp)
parse_profile(input.chomp)
end
puts "Parsing posts JSON has been finished."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment