Skip to content

Instantly share code, notes, and snippets.

@hendra-herviawan
Last active June 22, 2018 04:15
Show Gist options
  • Save hendra-herviawan/56fc2b057da3d72ada80d4c9763b66b9 to your computer and use it in GitHub Desktop.
Save hendra-herviawan/56fc2b057da3d72ada80d4c9763b66b9 to your computer and use it in GitHub Desktop.
{
"created_at" : "Thu Jun 19 04:17:56 +0000 2014",
"id" : 479478159050420200,
"id_str" : "479478159050420224",
"text" : "RT @FIFAWorldCup: A dramatic #WorldCup matchday 7 reviewed @onsoranje @SeFutbol @anfpchile @Socceroos @HNS_CFF - http://t.co/rJqOLG0cvE htt…",
"source" : "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>",
"truncated" : false,
"in_reply_to_status_id" : null,
"in_reply_to_status_id_str" : null,
"in_reply_to_user_id" : null,
"in_reply_to_user_id_str" : null,
"in_reply_to_screen_name" : null,
"user" : {
"id" : 240133940,
"id_str" : "240133940",
"name" : "3 Lions",
"screen_name" : "AdrianHdzHoyos",
"location" : "#Brasil2014",
"url" : "http://es.favstar.fm/users/AdrianHdzHoyos",
"description" : "Straight Edge Till Death - América - Real Madrid - Manchester United - AC Milan - Lakers - Rangers - Colts - Borussia Dortmund - Blackhawks - México - England.",
"protected" : false,
"followers_count" : 1292,
"friends_count" : 434,
"listed_count" : 3,
"created_at" : "Wed Jan 19 06:51:13 +0000 2011",
"favourites_count" : 7389,
"utc_offset" : -18000,
"time_zone" : "Mexico City",
"geo_enabled" : true,
"verified" : false,
"statuses_count" : 139358,
"lang" : "en",
"contributors_enabled" : false,
"is_translator" : false,
"is_translation_enabled" : false,
"profile_background_color" : "352726",
"profile_background_image_url" : "http://pbs.twimg.com/profile_background_images/378800000102054839/8055641aadc73bf1ea87a4f208d4ae15.jpeg",
"profile_background_image_url_https" : "https://pbs.twimg.com/profile_background_images/378800000102054839/8055641aadc73bf1ea87a4f208d4ae15.jpeg",
"profile_background_tile" : true,
"profile_image_url" : "http://pbs.twimg.com/profile_images/477177889566097408/pI-wDmhI_normal.jpeg",
"profile_image_url_https" : "https://pbs.twimg.com/profile_images/477177889566097408/pI-wDmhI_normal.jpeg",
"profile_banner_url" : "https://pbs.twimg.com/profile_banners/240133940/1402488692",
"profile_link_color" : "372CCF",
"profile_sidebar_border_color" : "FFFFFF",
"profile_sidebar_fill_color" : "99CC33",
"profile_text_color" : "3E4415",
"profile_use_background_image" : true,
"default_profile" : false,
"default_profile_image" : false,
"following" : null,
"follow_request_sent" : null,
"notifications" : null
},
"geo" : null,
"coordinates" : null,
"place" : null,
"contributors" : null,
"retweeted_status" : {
"created_at" : "Thu Jun 19 02:55:28 +0000 2014",
"id" : 479457406842204160,
"id_str" : "479457406842204160",
"text" : "A dramatic #WorldCup matchday 7 reviewed @onsoranje @SeFutbol @anfpchile @Socceroos @HNS_CFF - http://t.co/rJqOLG0cvE http://t.co/oiIvASKSnE",
"source" : "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>",
"truncated" : false,
"in_reply_to_status_id" : null,
"in_reply_to_status_id_str" : null,
"in_reply_to_user_id" : null,
"in_reply_to_user_id_str" : null,
"in_reply_to_screen_name" : null,
"user" : {
"id" : 138372303,
"id_str" : "138372303",
"name" : "FIFAWorldCup",
"screen_name" : "FIFAWorldCup",
"location" : "",
"url" : "http://www.fifa.com/worldcup",
"description" : "Updates from Brazil provided by the Official Site of the 2014 FIFA World Cup.",
"protected" : false,
"followers_count" : 1972406,
"friends_count" : 374,
"listed_count" : 6341,
"created_at" : "Thu Apr 29 10:58:07 +0000 2010",
"favourites_count" : 0,
"utc_offset" : 10800,
"time_zone" : "Baghdad",
"geo_enabled" : false,
"verified" : true,
"statuses_count" : 3008,
"lang" : "en",
"contributors_enabled" : false,
"is_translator" : false,
"is_translation_enabled" : false,
"profile_background_color" : "C6E2EE",
"profile_background_image_url" : "http://pbs.twimg.com/profile_background_images/455790780603330560/FkqLroxe.png",
"profile_background_image_url_https" : "https://pbs.twimg.com/profile_background_images/455790780603330560/FkqLroxe.png",
"profile_background_tile" : false,
"profile_image_url" : "http://pbs.twimg.com/profile_images/459425641092247552/DrNgVrKG_normal.png",
"profile_image_url_https" : "https://pbs.twimg.com/profile_images/459425641092247552/DrNgVrKG_normal.png",
"profile_banner_url" : "https://pbs.twimg.com/profile_banners/138372303/1398433392",
"profile_link_color" : "1F98C7",
"profile_sidebar_border_color" : "FFFFFF",
"profile_sidebar_fill_color" : "DAECF4",
"profile_text_color" : "663B12",
"profile_use_background_image" : true,
"default_profile" : false,
"default_profile_image" : false,
"following" : null,
"follow_request_sent" : null,
"notifications" : null
},
"geo" : null,
"coordinates" : null,
"place" : null,
"contributors" : null,
"retweet_count" : 692,
"favorite_count" : 639,
"entities" : {
"hashtags" : [
{
"text" : "WorldCup",
"indices" : [
11,
20
]
}
],
"symbols" : [ ],
"urls" : [
{
"url" : "http://t.co/rJqOLG0cvE",
"expanded_url" : "http://fifa.to/1vVvlCQ",
"display_url" : "fifa.to/1vVvlCQ",
"indices" : [
95,
117
]
}
],
"user_mentions" : [
{
"screen_name" : "OnsOranje",
"name" : "OnsOranje",
"id" : 143003085,
"id_str" : "143003085",
"indices" : [
41,
51
]
},
{
"screen_name" : "SeFutbol",
"name" : "Selección Española",
"id" : 1139001474,
"id_str" : "1139001474",
"indices" : [
52,
61
]
},
{
"screen_name" : "ANFPChile",
"name" : "ANFP Chile",
"id" : 442209771,
"id_str" : "442209771",
"indices" : [
62,
72
]
},
{
"screen_name" : "Socceroos",
"name" : "#GoSocceroos",
"id" : 110855776,
"id_str" : "110855776",
"indices" : [
73,
83
]
},
{
"screen_name" : "HNS_CFF",
"name" : "HNS | CFF",
"id" : 790921536,
"id_str" : "790921536",
"indices" : [
84,
92
]
}
],
"media" : [
{
"id" : 479457401486053400,
"id_str" : "479457401486053377",
"indices" : [
118,
140
],
"media_url" : "http://pbs.twimg.com/media/Bqdf92SCEAEi41t.jpg",
"media_url_https" : "https://pbs.twimg.com/media/Bqdf92SCEAEi41t.jpg",
"url" : "http://t.co/oiIvASKSnE",
"display_url" : "pic.twitter.com/oiIvASKSnE",
"expanded_url" : "http://twitter.com/FIFAWorldCup/status/479457406842204160/photo/1",
"type" : "photo",
"sizes" : {
"thumb" : {
"w" : 150,
"h" : 150,
"resize" : "crop"
},
"small" : {
"w" : 339,
"h" : 191,
"resize" : "fit"
},
"medium" : {
"w" : 599,
"h" : 337,
"resize" : "fit"
},
"large" : {
"w" : 960,
"h" : 540,
"resize" : "fit"
}
}
}
]
},
"favorited" : false,
"retweeted" : false,
"possibly_sensitive" : false,
"lang" : "en"
},
"retweet_count" : 0,
"favorite_count" : 0,
"entities" : {
"hashtags" : [
{
"text" : "WorldCup",
"indices" : [
29,
38
]
}
],
"symbols" : [ ],
"urls" : [
{
"url" : "http://t.co/rJqOLG0cvE",
"expanded_url" : "http://fifa.to/1vVvlCQ",
"display_url" : "fifa.to/1vVvlCQ",
"indices" : [
113,
135
]
}
],
"user_mentions" : [
{
"screen_name" : "FIFAWorldCup",
"name" : "FIFAWorldCup",
"id" : 138372303,
"id_str" : "138372303",
"indices" : [
3,
16
]
},
{
"screen_name" : "OnsOranje",
"name" : "OnsOranje",
"id" : 143003085,
"id_str" : "143003085",
"indices" : [
59,
69
]
},
{
"screen_name" : "SeFutbol",
"name" : "Selección Española",
"id" : 1139001474,
"id_str" : "1139001474",
"indices" : [
70,
79
]
},
{
"screen_name" : "ANFPChile",
"name" : "ANFP Chile",
"id" : 442209771,
"id_str" : "442209771",
"indices" : [
80,
90
]
},
{
"screen_name" : "Socceroos",
"name" : "#GoSocceroos",
"id" : 110855776,
"id_str" : "110855776",
"indices" : [
91,
101
]
},
{
"screen_name" : "HNS_CFF",
"name" : "HNS | CFF",
"id" : 790921536,
"id_str" : "790921536",
"indices" : [
102,
110
]
}
],
"media" : [
{
"id" : 479457401486053400,
"id_str" : "479457401486053377",
"indices" : [
139,
140
],
"media_url" : "http://pbs.twimg.com/media/Bqdf92SCEAEi41t.jpg",
"media_url_https" : "https://pbs.twimg.com/media/Bqdf92SCEAEi41t.jpg",
"url" : "http://t.co/oiIvASKSnE",
"display_url" : "pic.twitter.com/oiIvASKSnE",
"expanded_url" : "http://twitter.com/FIFAWorldCup/status/479457406842204160/photo/1",
"type" : "photo",
"sizes" : {
"thumb" : {
"w" : 150,
"h" : 150,
"resize" : "crop"
},
"small" : {
"w" : 339,
"h" : 191,
"resize" : "fit"
},
"medium" : {
"w" : 599,
"h" : 337,
"resize" : "fit"
},
"large" : {
"w" : 960,
"h" : 540,
"resize" : "fit"
}
},
"source_status_id" : 479457406842204160,
"source_status_id_str" : "479457406842204160"
}
]
},
"favorited" : false,
"retweeted" : false,
"possibly_sensitive" : false,
"filter_level" : "medium",
"lang" : "en"
}
#
db.allTweets.aggregate([
{$unwind: '$entities.hashtags'},
{ $group: {
_id: '$entities.hashtags.text',
tagCount: {$sum: 1}
}},
{ $sort: {
tagCount: -1
}},
{ $limit: 5 }
#The result:
{ "_id" : "WorldCup", "tagCount" : 399139 }
{ "_id" : "Brasil2014", "tagCount" : 172419 }
{ "_id" : "worldcup", "tagCount" : 98049 }
{ "_id" : "CRC", "tagCount" : 70970 }
{ "_id" : "FRA", "tagCount" : 67226 }
#top five languages used to tweet about World Cup games? Add another step to the pipeline:
db.allTweets.aggregate([
{ $group: {
_id: '$lang',
count: {$sum: 1}
}},
{$sort: {
count: -1
}},
{$limit: 5}
]);
#The result:
{ "_id" : "en", "count" : 516745 }
{ "_id" : "es", "count" : 262056 }
{ "_id" : "pt", "count" : 55117 }
{ "_id" : "ar", "count" : 36122 }
{ "_id" : "fr", "count" : 30003 }
#https://gist.github.com/javikalsan/bf892f213d6e4f2ae442
# general #
use database; db.createCollection( 'collection' ); # create database with name database with empty collection named collection
use database; db.runCommand( { dropDatabase: 1 } ); or use database; db.dropDatabase(); # remove database
db.twits.remove(); # empty collection
db.twits.find().count(); # count all twits in the collection
mongodump -d database -c collection -o collection.dump # dump entire collection
mongorestore -d database -c collection myfoo.dump/database/collection.bson # restore dumped collection
mongoexport --db database --collection collection -q "{}{ "_id":0, "date":1, "lon":1, "lat":1 }" --csv -f "date,lon,lat" --out results_query.csv # export to CSV a query
# basic search and display #
db.twits.find( { "coordinates" : { $ne: null } } ).count(); # count all twits with coordinates
db.twits.find( { "coordinates" : null } ).count(); # count all twits with coordinates null
db.twits.find( {},{ "coordinates":1, "geo":1 } ); # show all twits but only display coordinates and geo fields
# geo search #
db.twits.find( { "coordinates" : { $ne: null } }, { "_id":0, "coordinates.coordinates": 1, "text": 1, "entities.hashtags.text" : 1 } ).pretty(); # search twits with coords and display them, the text and hashtags used
https://www.javaworld.com/article/2369159/big-data/do-twitter-analysis-the-easy-way-with-mongodb.html
https://ianlondon.github.io/blog/mongodb-auth/
use twitter-analyst
db.createUser({ user: 'ian', pwd: 'secretPassword', roles: [{ role: 'readWrite', db:'twitter-analyst'}] })
sudo vim /etc/mongod.conf #bindip, authorization
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment