Last active
December 27, 2015 10:18
-
-
Save nikhaldi/7309719 to your computer and use it in GitHub Desktop.
Parsing Gnip data offline into a CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"id":"tag:gnip.foursquare.com:2013:checkin/5277ee1311d238dfb8e36484", | |
"postedTime":"2013-11-04T18:57:22+00:00", | |
"verb":"checkin", | |
"actor":{ | |
"objectType":"person", | |
"gender":"female"}, | |
"object":{ | |
"id":"tag:gnip.foursquare.com:2013:venue/4b69feebf964a52098c02be3", | |
"displayName":"The Vitamin Shoppe", | |
"objectType":"place", | |
"geo":{ | |
"type":"Point", | |
"coordinates":[-73.98978361572186,40.735953254677234]}, | |
"address":{ | |
"locality":"New York", | |
"region":"NY", | |
"postalCode":"10003", | |
"country":"United States"}, | |
"foursquareCategories":[ | |
{ | |
"id":"tag:gnip.foursquare.com:2013:category/50aa9e744b90af0d42d5de0e", | |
"displayName":"Health Food Store", | |
"image":"https://ss1.4sqi.net/img/categories_v2/shops/food_grocery_88.png"}, | |
{ | |
"id":"tag:gnip.foursquare.com:2013:category/4bf58dd8d48988d10f951735", | |
"displayName":"Drugstore / Pharmacy", | |
"image":"https://ss1.4sqi.net/img/categories_v2/shops/pharmacy_88.png"}]}, | |
"provider":{ | |
"link":"https://foursquare.com", | |
"displayName":"Foursquare", | |
"objectType":"service"}, | |
"foursquareCheckinUtcOffset":-18000, | |
"gnip":{ | |
"matching_rules":[ | |
{ | |
"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]", | |
"tag":null}]}} | |
{ | |
"id":"tag:gnip.foursquare.com:2013:checkin/5277ee1311d238dfb8e36484", | |
"postedTime":"2013-11-04T18:57:22+00:00", | |
"verb":"checkin", | |
"actor":{ | |
"objectType":"person", | |
"gender":"female"}, | |
"object":{ | |
"id":"tag:gnip.foursquare.com:2013:venue/4b69feebf964a52098c02be3", | |
"displayName":"The Vitamin Shoppe", | |
"objectType":"place", | |
"geo":{ | |
"type":"Point", | |
"coordinates":[-73.98978361572186,40.735953254677234]}, | |
"address":{ | |
"locality":"New York", | |
"region":"NY", | |
"postalCode":"10003", | |
"country":"United States"}, | |
"foursquareCategories":[ | |
{ | |
"id":"tag:gnip.foursquare.com:2013:category/50aa9e744b90af0d42d5de0e", | |
"displayName":"Health Food Store", | |
"image":"https://ss1.4sqi.net/img/categories_v2/shops/food_grocery_88.png"}, | |
{ | |
"id":"tag:gnip.foursquare.com:2013:category/4bf58dd8d48988d10f951735", | |
"displayName":"Drugstore / Pharmacy", | |
"image":"https://ss1.4sqi.net/img/categories_v2/shops/pharmacy_88.png"}]}, | |
"provider":{ | |
"link":"https://foursquare.com", | |
"displayName":"Foursquare", | |
"objectType":"service"}, | |
"foursquareCheckinUtcOffset":-18000, | |
"gnip":{ | |
"matching_rules":[ | |
{ | |
"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]", | |
"tag":null}]}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Convert output from Gnip streaming API to CSV | |
# Works for Twitter, 4sq and instagram data | |
# Usage: | |
# ruby gnip.rb input.json output.csv | |
require 'csv' | |
require 'json' | |
require 'stringio' | |
require 'yajl' | |
input = StringIO.new(File.read(ARGV[0])) | |
parser = Yajl::Parser.new | |
CSV.open(ARGV[1], "wb") do |csv| | |
parser.parse(input) do |json| | |
# puts JSON.pretty_generate(json) | |
if json["geo"] | |
coordinates = json["geo"]["coordinates"] | |
elsif json.fetch("object", {})["geo"] | |
coordinates = json["object"]["geo"]["coordinates"] | |
elsif json["location"] | |
coordinates = json["location"]["latitude"], json["location"]["longitude"] | |
else | |
# Sometimes tweets won't have exact coordinates, we skip those | |
next | |
end | |
csv << [json['postedTime'], coordinates[0], coordinates[1]] | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"id":"tag:search.twitter.com,2005:397434260321468416", | |
"objectType":"activity", | |
"actor": | |
{"objectType":"person", | |
"id":"id:twitter.com:587709250", | |
"link":"http://www.twitter.com/uranikbegu", | |
"displayName":"Uranik Begu", | |
"postedTime":"2012-05-22T19:59:02.000Z", | |
"image":"https://pbs.twimg.com/profile_images/378800000185113188/ba6f6148858a190b1df7c414c4afa7a0_normal.jpeg", | |
"summary":"Executive Director at Innovation Center Kosovo & \r\n Doctorate of Management/c at SMC University", | |
"links":[{"href":"http://al.linkedin.com/in/uranik","rel":"me"}], | |
"friendsCount":576, | |
"followersCount":1536, | |
"listedCount":2, | |
"statusesCount":1753, | |
"twitterTimeZone":"Amsterdam", | |
"verified":false,"utcOffset":"3600", | |
"preferredUsername":"uranikbegu", | |
"languages":["en"], | |
"location": | |
{"objectType":"place", | |
"displayName":"Prishtina, Kosova"}, | |
"favoritesCount":1496}, | |
"verb":"post", | |
"postedTime":"2013-11-04T18:44:46.000Z", | |
"generator": | |
{"displayName":"Twitter for Android", | |
"link":"http://twitter.com/download/android"}, | |
"provider": | |
{"objectType":"service", | |
"displayName":"Twitter", | |
"link":"http://www.twitter.com"}, | |
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416", | |
"body":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc", | |
"object": | |
{"objectType":"note", | |
"id":"object:search.twitter.com,2005:397434260321468416", | |
"summary":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc", | |
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416", | |
"postedTime":"2013-11-04T18:44:46.000Z"}, | |
"favoritesCount":0, | |
"location": | |
{"objectType":"place", | |
"displayName":"Manhattan, NY", | |
"name":"Manhattan", | |
"country_code":"United States", | |
"twitter_country_code":"US", | |
"link":"https://api.twitter.com/1.1/geo/id/086752cb03de1d5d.json", | |
"geo": | |
{"type":"Polygon", | |
"coordinates":[[[-74.047285,40.679548],[-74.047285,40.882214],[-73.907,40.882214],[-73.907,40.679548]]]}}, | |
"geo": | |
{"type":"Point", | |
"coordinates":[40.747138,-73.9920425]}, | |
"twitter_entities": | |
{"hashtags": | |
[{"text":"businessangel","indices":[90,104]}, | |
{"text":"kosova","indices":[105,112]}, | |
{"text":"nyc","indices":[113,117]}], | |
"symbols":[], | |
"urls":[], | |
"user_mentions": | |
[{"screen_name":"gustly", | |
"name":"Gust", | |
"id":14186660, | |
"id_str":"14186660", | |
"indices":[21,28]}, | |
{"screen_name":"ICKosovo", | |
"name":"ICKosovo", | |
"id":432633145, | |
"id_str":"432633145", | |
"indices":[80,89]}]}, | |
"twitter_filter_level":"medium", | |
"twitter_lang":"en", | |
"retweetCount":0, | |
"gnip": | |
{"matching_rules": | |
[{"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]", | |
"tag":null}], | |
"language":{"value":"en"}}} | |
{"id":"tag:search.twitter.com,2005:397434260321468416", | |
"objectType":"activity", | |
"actor": | |
{"objectType":"person", | |
"id":"id:twitter.com:587709250", | |
"link":"http://www.twitter.com/uranikbegu", | |
"displayName":"Uranik Begu", | |
"postedTime":"2012-05-22T19:59:02.000Z", | |
"image":"https://pbs.twimg.com/profile_images/378800000185113188/ba6f6148858a190b1df7c414c4afa7a0_normal.jpeg", | |
"summary":"Executive Director at Innovation Center Kosovo & \r\n Doctorate of Management/c at SMC University", | |
"links":[{"href":"http://al.linkedin.com/in/uranik","rel":"me"}], | |
"friendsCount":576, | |
"followersCount":1536, | |
"listedCount":2, | |
"statusesCount":1753, | |
"twitterTimeZone":"Amsterdam", | |
"verified":false,"utcOffset":"3600", | |
"preferredUsername":"uranikbegu", | |
"languages":["en"], | |
"location": | |
{"objectType":"place", | |
"displayName":"Prishtina, Kosova"}, | |
"favoritesCount":1496}, | |
"verb":"post", | |
"postedTime":"2013-11-04T18:44:46.000Z", | |
"generator": | |
{"displayName":"Twitter for Android", | |
"link":"http://twitter.com/download/android"}, | |
"provider": | |
{"objectType":"service", | |
"displayName":"Twitter", | |
"link":"http://www.twitter.com"}, | |
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416", | |
"body":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc", | |
"object": | |
{"objectType":"note", | |
"id":"object:search.twitter.com,2005:397434260321468416", | |
"summary":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc", | |
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416", | |
"postedTime":"2013-11-04T18:44:46.000Z"}, | |
"favoritesCount":0, | |
"location": | |
{"objectType":"place", | |
"displayName":"Manhattan, NY", | |
"name":"Manhattan", | |
"country_code":"United States", | |
"twitter_country_code":"US", | |
"link":"https://api.twitter.com/1.1/geo/id/086752cb03de1d5d.json", | |
"geo": | |
{"type":"Polygon", | |
"coordinates":[[[-74.047285,40.679548],[-74.047285,40.882214],[-73.907,40.882214],[-73.907,40.679548]]]}}, | |
"geo": | |
{"type":"Point", | |
"coordinates":[40.747138,-73.9920425]}, | |
"twitter_entities": | |
{"hashtags": | |
[{"text":"businessangel","indices":[90,104]}, | |
{"text":"kosova","indices":[105,112]}, | |
{"text":"nyc","indices":[113,117]}], | |
"symbols":[], | |
"urls":[], | |
"user_mentions": | |
[{"screen_name":"gustly", | |
"name":"Gust", | |
"id":14186660, | |
"id_str":"14186660", | |
"indices":[21,28]}, | |
{"screen_name":"ICKosovo", | |
"name":"ICKosovo", | |
"id":432633145, | |
"id_str":"432633145", | |
"indices":[80,89]}]}, | |
"twitter_filter_level":"medium", | |
"twitter_lang":"en", | |
"retweetCount":0, | |
"gnip": | |
{"matching_rules": | |
[{"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]", | |
"tag":null}], | |
"language":{"value":"en"}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment