Skip to content

Instantly share code, notes, and snippets.

@nikhaldi
Last active December 27, 2015 10:18
Show Gist options
  • Save nikhaldi/7309719 to your computer and use it in GitHub Desktop.
Save nikhaldi/7309719 to your computer and use it in GitHub Desktop.
Parsing Gnip data offline into a CSV
{
"id":"tag:gnip.foursquare.com:2013:checkin/5277ee1311d238dfb8e36484",
"postedTime":"2013-11-04T18:57:22+00:00",
"verb":"checkin",
"actor":{
"objectType":"person",
"gender":"female"},
"object":{
"id":"tag:gnip.foursquare.com:2013:venue/4b69feebf964a52098c02be3",
"displayName":"The Vitamin Shoppe",
"objectType":"place",
"geo":{
"type":"Point",
"coordinates":[-73.98978361572186,40.735953254677234]},
"address":{
"locality":"New York",
"region":"NY",
"postalCode":"10003",
"country":"United States"},
"foursquareCategories":[
{
"id":"tag:gnip.foursquare.com:2013:category/50aa9e744b90af0d42d5de0e",
"displayName":"Health Food Store",
"image":"https://ss1.4sqi.net/img/categories_v2/shops/food_grocery_88.png"},
{
"id":"tag:gnip.foursquare.com:2013:category/4bf58dd8d48988d10f951735",
"displayName":"Drugstore / Pharmacy",
"image":"https://ss1.4sqi.net/img/categories_v2/shops/pharmacy_88.png"}]},
"provider":{
"link":"https://foursquare.com",
"displayName":"Foursquare",
"objectType":"service"},
"foursquareCheckinUtcOffset":-18000,
"gnip":{
"matching_rules":[
{
"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]",
"tag":null}]}}
{
"id":"tag:gnip.foursquare.com:2013:checkin/5277ee1311d238dfb8e36484",
"postedTime":"2013-11-04T18:57:22+00:00",
"verb":"checkin",
"actor":{
"objectType":"person",
"gender":"female"},
"object":{
"id":"tag:gnip.foursquare.com:2013:venue/4b69feebf964a52098c02be3",
"displayName":"The Vitamin Shoppe",
"objectType":"place",
"geo":{
"type":"Point",
"coordinates":[-73.98978361572186,40.735953254677234]},
"address":{
"locality":"New York",
"region":"NY",
"postalCode":"10003",
"country":"United States"},
"foursquareCategories":[
{
"id":"tag:gnip.foursquare.com:2013:category/50aa9e744b90af0d42d5de0e",
"displayName":"Health Food Store",
"image":"https://ss1.4sqi.net/img/categories_v2/shops/food_grocery_88.png"},
{
"id":"tag:gnip.foursquare.com:2013:category/4bf58dd8d48988d10f951735",
"displayName":"Drugstore / Pharmacy",
"image":"https://ss1.4sqi.net/img/categories_v2/shops/pharmacy_88.png"}]},
"provider":{
"link":"https://foursquare.com",
"displayName":"Foursquare",
"objectType":"service"},
"foursquareCheckinUtcOffset":-18000,
"gnip":{
"matching_rules":[
{
"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]",
"tag":null}]}}
# Convert output from Gnip streaming API to CSV
# Works for Twitter, 4sq and instagram data
# Usage:
# ruby gnip.rb input.json output.csv
require 'csv'
require 'json'
require 'stringio'
require 'yajl'
input = StringIO.new(File.read(ARGV[0]))
parser = Yajl::Parser.new
CSV.open(ARGV[1], "wb") do |csv|
parser.parse(input) do |json|
# puts JSON.pretty_generate(json)
if json["geo"]
coordinates = json["geo"]["coordinates"]
elsif json.fetch("object", {})["geo"]
coordinates = json["object"]["geo"]["coordinates"]
elsif json["location"]
coordinates = json["location"]["latitude"], json["location"]["longitude"]
else
# Sometimes tweets won't have exact coordinates, we skip those
next
end
csv << [json['postedTime'], coordinates[0], coordinates[1]]
end
end
{"id":"tag:search.twitter.com,2005:397434260321468416",
"objectType":"activity",
"actor":
{"objectType":"person",
"id":"id:twitter.com:587709250",
"link":"http://www.twitter.com/uranikbegu",
"displayName":"Uranik Begu",
"postedTime":"2012-05-22T19:59:02.000Z",
"image":"https://pbs.twimg.com/profile_images/378800000185113188/ba6f6148858a190b1df7c414c4afa7a0_normal.jpeg",
"summary":"Executive Director at Innovation Center Kosovo & \r\n Doctorate of Management/c at SMC University",
"links":[{"href":"http://al.linkedin.com/in/uranik","rel":"me"}],
"friendsCount":576,
"followersCount":1536,
"listedCount":2,
"statusesCount":1753,
"twitterTimeZone":"Amsterdam",
"verified":false,"utcOffset":"3600",
"preferredUsername":"uranikbegu",
"languages":["en"],
"location":
{"objectType":"place",
"displayName":"Prishtina, Kosova"},
"favoritesCount":1496},
"verb":"post",
"postedTime":"2013-11-04T18:44:46.000Z",
"generator":
{"displayName":"Twitter for Android",
"link":"http://twitter.com/download/android"},
"provider":
{"objectType":"service",
"displayName":"Twitter",
"link":"http://www.twitter.com"},
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416",
"body":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc",
"object":
{"objectType":"note",
"id":"object:search.twitter.com,2005:397434260321468416",
"summary":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc",
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416",
"postedTime":"2013-11-04T18:44:46.000Z"},
"favoritesCount":0,
"location":
{"objectType":"place",
"displayName":"Manhattan, NY",
"name":"Manhattan",
"country_code":"United States",
"twitter_country_code":"US",
"link":"https://api.twitter.com/1.1/geo/id/086752cb03de1d5d.json",
"geo":
{"type":"Polygon",
"coordinates":[[[-74.047285,40.679548],[-74.047285,40.882214],[-73.907,40.882214],[-73.907,40.679548]]]}},
"geo":
{"type":"Point",
"coordinates":[40.747138,-73.9920425]},
"twitter_entities":
{"hashtags":
[{"text":"businessangel","indices":[90,104]},
{"text":"kosova","indices":[105,112]},
{"text":"nyc","indices":[113,117]}],
"symbols":[],
"urls":[],
"user_mentions":
[{"screen_name":"gustly",
"name":"Gust",
"id":14186660,
"id_str":"14186660",
"indices":[21,28]},
{"screen_name":"ICKosovo",
"name":"ICKosovo",
"id":432633145,
"id_str":"432633145",
"indices":[80,89]}]},
"twitter_filter_level":"medium",
"twitter_lang":"en",
"retweetCount":0,
"gnip":
{"matching_rules":
[{"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]",
"tag":null}],
"language":{"value":"en"}}}
{"id":"tag:search.twitter.com,2005:397434260321468416",
"objectType":"activity",
"actor":
{"objectType":"person",
"id":"id:twitter.com:587709250",
"link":"http://www.twitter.com/uranikbegu",
"displayName":"Uranik Begu",
"postedTime":"2012-05-22T19:59:02.000Z",
"image":"https://pbs.twimg.com/profile_images/378800000185113188/ba6f6148858a190b1df7c414c4afa7a0_normal.jpeg",
"summary":"Executive Director at Innovation Center Kosovo & \r\n Doctorate of Management/c at SMC University",
"links":[{"href":"http://al.linkedin.com/in/uranik","rel":"me"}],
"friendsCount":576,
"followersCount":1536,
"listedCount":2,
"statusesCount":1753,
"twitterTimeZone":"Amsterdam",
"verified":false,"utcOffset":"3600",
"preferredUsername":"uranikbegu",
"languages":["en"],
"location":
{"objectType":"place",
"displayName":"Prishtina, Kosova"},
"favoritesCount":1496},
"verb":"post",
"postedTime":"2013-11-04T18:44:46.000Z",
"generator":
{"displayName":"Twitter for Android",
"link":"http://twitter.com/download/android"},
"provider":
{"objectType":"service",
"displayName":"Twitter",
"link":"http://www.twitter.com"},
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416",
"body":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc",
"object":
{"objectType":"note",
"id":"object:search.twitter.com,2005:397434260321468416",
"summary":"Currently at a visit @gustly in Manhattan...looking forward to cooperation with @ICKosovo #businessangel #kosova #nyc",
"link":"http://twitter.com/uranikbegu/statuses/397434260321468416",
"postedTime":"2013-11-04T18:44:46.000Z"},
"favoritesCount":0,
"location":
{"objectType":"place",
"displayName":"Manhattan, NY",
"name":"Manhattan",
"country_code":"United States",
"twitter_country_code":"US",
"link":"https://api.twitter.com/1.1/geo/id/086752cb03de1d5d.json",
"geo":
{"type":"Polygon",
"coordinates":[[[-74.047285,40.679548],[-74.047285,40.882214],[-73.907,40.882214],[-73.907,40.679548]]]}},
"geo":
{"type":"Point",
"coordinates":[40.747138,-73.9920425]},
"twitter_entities":
{"hashtags":
[{"text":"businessangel","indices":[90,104]},
{"text":"kosova","indices":[105,112]},
{"text":"nyc","indices":[113,117]}],
"symbols":[],
"urls":[],
"user_mentions":
[{"screen_name":"gustly",
"name":"Gust",
"id":14186660,
"id_str":"14186660",
"indices":[21,28]},
{"screen_name":"ICKosovo",
"name":"ICKosovo",
"id":432633145,
"id_str":"432633145",
"indices":[80,89]}]},
"twitter_filter_level":"medium",
"twitter_lang":"en",
"retweetCount":0,
"gnip":
{"matching_rules":
[{"value":"bounding_box:[-74.0357 40.6817 -73.9642 40.7556]",
"tag":null}],
"language":{"value":"en"}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment