Last active
August 29, 2015 14:17
-
-
Save amirziai/40bded657a6731681234 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:3788d1437e77b7d2fa2a78e7dca1bdda202f15914571d134164b46047b4601db" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"MIDS W205 Assignment 3" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Storing, Retrieving, and Analyzing Social Media Data Using MongoDB" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 4, | |
"metadata": {}, | |
"source": [ | |
"Amir Ziai @amirziai" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Dependencies: bson, boto, bson, nltk, pymongo, tweepy, pandas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%matplotlib inline\n", | |
"import json\n", | |
"import string\n", | |
"import time\n", | |
"import pymongo\n", | |
"import tweepy\n", | |
"import datetime\n", | |
"import urllib\n", | |
"import boto\n", | |
"import re\n", | |
"import pandas as pd\n", | |
"from nltk import word_tokenize\n", | |
"import nltk\n", | |
"from boto.s3.key import Key\n", | |
"from bson.json_util import dumps\n", | |
"from bson import json_util\n", | |
"\n", | |
"xsdDateFormat = \"%Y-%m-%d\"\n", | |
"conn = pymongo.MongoClient()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 49 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"aws_key = \"\"\n", | |
"aws_secret = \"\"\n", | |
"consumer_key = \"\"\n", | |
"consumer_secret = \"\"\n", | |
"access_token = \"\"\n", | |
"access_token_secret = \"\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n", | |
"auth.set_access_token(access_token, access_token_secret)\n", | |
"api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Ancillary functions for text cleanup" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def remove_urls_usernames(txt):\n", | |
" # escape unicode characters\n", | |
" txt = txt.encode('utf-8').decode('unicode_escape').encode('ascii', 'ignore')\n", | |
"\n", | |
" regex_url = r'(http|https)://\\S+'\n", | |
" regex_username = r'@\\S+'\n", | |
" txt_re = re.sub(regex_url, '', txt)\n", | |
" txt_re = re.sub(regex_username, '', txt_re)\n", | |
"\n", | |
" return [x.lower() for x in word_tokenize(txt_re)]\n", | |
"\n", | |
"\n", | |
"def remove_punctuation(tokenized_docs):\n", | |
" regex = re.compile('[%s]' % re.escape(string.punctuation))\n", | |
"\n", | |
" tokenized_docs_no_punctuation = []\n", | |
"\n", | |
" for token in tokenized_docs:\n", | |
" new_token = regex.sub(u'', token)\n", | |
" if not new_token == u'':\n", | |
" tokenized_docs_no_punctuation.append(new_token)\n", | |
"\n", | |
" return tokenized_docs_no_punctuation\n", | |
"\n", | |
"\n", | |
"def remove_stopwords(tokenized_docs_no_punctuation):\n", | |
" from nltk.corpus import stopwords\n", | |
" tokenized_docs_no_stopwords = []\n", | |
"\n", | |
" for word in tokenized_docs_no_punctuation:\n", | |
" if not word in stopwords.words('english'):\n", | |
" tokenized_docs_no_stopwords.append(word)\n", | |
"\n", | |
" return tokenized_docs_no_stopwords" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"1- Storing task" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"1.1- db_streamT" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Note: looking for all tweets begining Jan 2015" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"since = '2015-1-1'\n", | |
"start = datetime.datetime.strptime(since, xsdDateFormat).date()\n", | |
"q='#microsoft #mojang'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"db = conn['db_streamT']\n", | |
"db.tweets.drop()\n", | |
"tweets = tweepy.Cursor(api.search, q=q, since=start).items(100)\n", | |
"for tweet in tweets:\n", | |
" tweet_id = tweet._json['id']\n", | |
" print tweet.text + '\\n'\n", | |
" if db.tweets.find({'id':tweet_id}).count() == 0:\n", | |
" db.tweets.insert(tweet._json)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"#Turki Akan Larang Peredaran Minecraft - #Re http://t.co/pAqBqvvKHy\n", | |
" #gamingnews #microsoft #mojang\n", | |
"\n", | |
"When someone asks me to play Minecraft XD #minecraft #Mojang #microsoft https://t.co/yUHu5UJnGt\n", | |
"\n", | |
"#Turki Akan Larang Peredaran Minecraft - #Re http://t.co/pAqBqvvKHy\n", | |
" #gamingnews #microsoft #mojang\n", | |
"\n", | |
"The Maya's Pyramid ... #minecraft #minecraftpe #mojang #microsoft #maya #pyramid #android #gamer @Mojang @Microsoft http://t.co/W6WB4gbQqn\n", | |
"\n", | |
"#Turki Akan Larang Peredaran Minecraft - #Re http://t.co/pAqBqvvKHy\n", | |
" #gamingnews #microsoft #mojang\n", | |
"\n", | |
"New post: #Microsoft #Minecraft #Mojang #News #PC Mojang Responds to Potential \u2018Minecraft\u2019 Ban in Turkey http://t.co/k4rRmZpfFz\n", | |
"\n", | |
"RT @SAU_Pallab: Minecraft's new launcher remove the need for standalone Java to play http://t.co/CoAutNwt7I #Microsoft #Mojang http://t.co/\u2026\n", | |
"\n", | |
"RT @WinBetaDotOrg: Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.\u2026\n", | |
"\n", | |
"Minecraft's new launcher remove the need for standalone Java to play http://t.co/CoAutNwt7I #Microsoft #Mojang http://t.co/JfsWyzIRuD\n", | |
"\n", | |
"RT @WinBetaDotOrg: Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.\u2026\n", | |
"\n", | |
"RT @WinBetaDotOrg: Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.\u2026\n", | |
"\n", | |
"Minecraft's new launcher remove the need for standalone Java to play http://t.co/h1lNtJ2V8P #Microsoft #Mojang http://t.co/BxgZTfb7kp\n", | |
"\n", | |
"Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.co/vIQnK31D5O\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"Mojang' dan Minecraft A\u00e7\u0131klamas\u0131 #minecraft #mojang #microsoft\n", | |
"http://t.co/SdeM1kPe0L http://t.co/0Uq7upsMwa\n", | |
"\n", | |
"http://t.co/ZzSEK3UhQL\n", | |
"For sale\n", | |
"https://t.co/yS52UKlYqX\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"RT @Designretrovint: http://t.co/YFo8wxhmGt\n", | |
"For sale\n", | |
"https://t.co/WPmD0AqH6W\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor\u2026\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"http://t.co/YFo8wxhmGt\n", | |
"For sale\n", | |
"https://t.co/WPmD0AqH6W\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/o4aON6Fm4f\n", | |
"For sale\n", | |
"https://t.co/ile5EpYMfk\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"RT @domaintoyou: http://t.co/aNXkh0Y4aC\n", | |
"For sale\n", | |
"https://t.co/0aeNmg5qRW\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK\u2026\n", | |
"\n", | |
"RT @domainfuture: http://t.co/kGcTqz3LLA\n", | |
"For sale\n", | |
"https://t.co/cS6vBWQj6V\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #U\u2026\n", | |
"\n", | |
"http://t.co/d2PbMNlBSH\n", | |
"For sale\n", | |
"https://t.co/TBcjcSipcb\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/V45faz5DEd\n", | |
"For sale\n", | |
"https://t.co/FvNlWUU5SD\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/i9oj9auu1R\n", | |
"For sale\n", | |
"https://t.co/FqGByZ3HPR\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/kGcTqz3LLA\n", | |
"For sale\n", | |
"https://t.co/cS6vBWQj6V\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/aNXkh0Y4aC\n", | |
"For sale\n", | |
"https://t.co/0aeNmg5qRW\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/h660oD1L8J\n", | |
"For sale\n", | |
"https://t.co/TN0PtPnBLN\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/FctpyWnF1D\n", | |
"For sale\n", | |
"https://t.co/zT0GXuS8Kl\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/9zC1bB4iZr\n", | |
"For sale\n", | |
"https://t.co/w8WGZSoWew\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/LuQnMbMRsN\n", | |
"For sale\n", | |
"https://t.co/g4kV5xLhkg\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"http://t.co/IT7mooEZec\n", | |
"For sale\n", | |
"https://t.co/k85Nq1ed8U\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n", | |
"\n", | |
"http://t.co/H05gO5AK4Q\n", | |
"For sale\n", | |
"https://t.co/ORbvyqKebk\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK #Amazon #Mojang\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"WTF Turkey?\n", | |
"http://t.co/antqspUPud\n", | |
"#Minecraft #Turchia #Mojang #Microsoft\n", | |
"\n", | |
"New post: #Microsoft #Minecraft #Mojang #News \u2018Minecraft\u2019 May Get Banned in Turkey http://t.co/VDgXvOSvaR\n", | |
"\n", | |
"#Notch \u0e1c\u0e39\u0e49\u0e2a\u0e23\u0e49\u0e32\u0e07 #\u0e40\u0e01\u0e21 #Minecraft \u0e40\u0e1b\u0e34\u0e14\u0e43\u0e08 \u0e2d\u0e30\u0e44\u0e23\u0e04\u0e37\u0e2d #\u0e40\u0e2b\u0e15\u0e38\u0e1c\u0e25 \u0e17\u0e35\u0e48\u0e15\u0e49\u0e2d\u0e07\u0e02\u0e32\u0e22\u0e1a\u0e23\u0e34\u0e29\u0e31\u0e17 #Mojang \u0e43\u0e2b\u0e49 #Microsoft http://t.co/P5l9jJIDMN #pantip #Lumia #Game\n", | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"db.tweets.count()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": [ | |
"34" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"1.2- db_tweets" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Read from S3 bucket and insert into MongoDB (db:db_tweets, collection: tweets)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"db = conn['db_tweets']\n", | |
"db.tweets.drop()\n", | |
"c = boto.connect_s3(aws_key, aws_secret)\n", | |
"b = c.get_bucket('amirziai-mids-w205-assignment2')\n", | |
"\n", | |
"for k in b.list():\n", | |
" key = Key(b)\n", | |
" for tweet in json.loads(k.get_contents_as_string()):\n", | |
" db.tweets.insert(tweet)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 53 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"db.tweets.count()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"23" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"2- Retrieving and analyzing task" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"2.1- Top 30 retweets and associated username and locations of users" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def top_retweets(db, limit = 30):\n", | |
" try:\n", | |
" db=conn[db]\n", | |
" return list(db.tweets.find({'retweeted_status':{ '$exists': True}},{'user.location':1, 'user.screen_name':1, 'text':1}).limit(limit))\n", | |
" except:\n", | |
" return []\n", | |
"\n", | |
"tweets = top_retweets('db_tweets')\n", | |
"i = 1\n", | |
"for tweet in tweets:\n", | |
" print str(i) + '-', tweet['text'],'\\n',tweet['user']['screen_name'],'from',tweet['user']['location'],'\\n'\n", | |
" i += 1" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1- RT @Unganked: Scrolls #2 - Buying 100 Scrolls! - YouTube http://t.co/sdFrYe78Yw #scrolls #mojang #microsoft #games #gaming #letsplay \n", | |
"JonathanMH_com from Odense, Denmark \n", | |
"\n", | |
"2- RT @bondmoran1: http://t.co/LuQnMbMRsN\n", | |
"For sale\n", | |
"https://t.co/g4kV5xLhkg\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK \u2026 \n", | |
"NoSQLDigest from \n", | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 70 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"2.2- Lexical diversity of db_streamT tweets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"db = conn['db_streamT']\n", | |
"\n", | |
"db.lexical_diversity.drop()\n", | |
"for tweet in list(db.tweets.find()):\n", | |
" words = remove_stopwords(remove_punctuation(remove_urls_usernames(tweet['text']))) \n", | |
" ld = float(len(set(words)))/len(words)\n", | |
" db.lexical_diversity.insert({'tweet_id': tweet['id'], 'lexical_diversity':ld})" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 34 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 4, | |
"metadata": {}, | |
"source": [ | |
"Plot data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"c = 0\n", | |
"rows = []\n", | |
"for a in list(db.lexical_diversity.find()):\n", | |
" print 'tweet id:',a['tweet_id'],', lexical diversity:',a['lexical_diversity']\n", | |
" c += 1\n", | |
" rows.append({'tweet':str(c), 'ld':float(a['lexical_diversity'])})\n", | |
" \n", | |
"ld = pd.DataFrame(rows).set_index('tweet')\n", | |
"ld.plot(kind='bar', figsize=(10,6))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"tweet id: 577054587829530624 , lexical diversity: 1.0\n", | |
"tweet id: 576799469901111296 , lexical diversity: 0.875\n", | |
"tweet id: 576513550958616577 , lexical diversity: 1.0\n", | |
"tweet id: 576271407320948736 , lexical diversity: 0.8\n", | |
"tweet id: 576215147187978240 , lexical diversity: 1.0\n", | |
"tweet id: 576181101590265856 , lexical diversity: 0.846153846154\n", | |
"tweet id: 576062093235003393 , lexical diversity: 1.0\n", | |
"tweet id: 576055225292701697 , lexical diversity: 1.0\n", | |
"tweet id: 576049705903247360 , lexical diversity: 1.0\n", | |
"tweet id: 576048855457792001 , lexical diversity: 1.0\n", | |
"tweet id: 576046157127946240 , lexical diversity: 1.0\n", | |
"tweet id: 576046032041066496 , lexical diversity: 1.0\n", | |
"tweet id: 576046024516501504 , lexical diversity: 1.0\n", | |
"tweet id: 576028588878782464 , lexical diversity: 0.714285714286\n", | |
"tweet id: 575912994988802048 , lexical diversity: 1.0\n", | |
"tweet id: 575912946137718784 , lexical diversity: 1.0\n", | |
"tweet id: 575912945361752065 , lexical diversity: 1.0\n", | |
"tweet id: 575912882224939008 , lexical diversity: 1.0\n", | |
"tweet id: 575912844434276352 , lexical diversity: 1.0\n", | |
"tweet id: 575912842811084801 , lexical diversity: 1.0\n", | |
"tweet id: 575912831016636416 , lexical diversity: 1.0\n", | |
"tweet id: 575912792777232384 , lexical diversity: 1.0\n", | |
"tweet id: 575912749907247104 , lexical diversity: 1.0\n", | |
"tweet id: 575912708383637504 , lexical diversity: 1.0\n", | |
"tweet id: 575912656940494848 , lexical diversity: 1.0\n", | |
"tweet id: 575912572806938624 , lexical diversity: 1.0\n", | |
"tweet id: 575912521275699200 , lexical diversity: 1.0\n", | |
"tweet id: 575912479836008448 , lexical diversity: 1.0\n", | |
"tweet id: 575912441084780544 , lexical diversity: 1.0\n", | |
"tweet id: 575912398684553216 , lexical diversity: 1.0\n", | |
"tweet id: 575911711234002944 , lexical diversity: 1.0\n", | |
"tweet id: 575607566031589376 , lexical diversity: 1.0\n", | |
"tweet id: 575374079059828737 , lexical diversity: 0.909090909091\n", | |
"tweet id: 575279880209760256 , lexical diversity: 1.0\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 47, | |
"text": [ | |
"<matplotlib.axes._subplots.AxesSubplot at 0x1934bdd8>" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "display_data", | |
"png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAGACAYAAABiNFz3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXuUZVdd578/0oDh2TbhZdLQWRAxMGIjDxlfVAgjTZaK\nb1aUR8MszegAQUUCogbHB4FxjcqAGk3IijgSRR0Ms+QRXDm+ICHBJCQQmAQS81IUDY84WU4y7Pnj\nnk7f3HTVPbd/55793ZXPZ61aqVt1P7s+p/ap7t21d6qilCIAAAAAGM59agcAAAAAtAYLKAAAAIAV\nYQEFAAAAsCIsoAAAAABWhAUUAAAAwIqwgAIAAABYkaULqIh4e0R8NiKu3OI5b4mIayLiioh4yriJ\nAAAAAF4M+Q7UOZL2bfbOiDhJ0uNLKcdJ+hFJvzlSGwAAAIAlSxdQpZS/knTrFk/5Tknn9s+9WNLO\niHjkOHkAAAAAfoxxBupoSTfOPb5J0jEjjAsAAABgyViHyGPhMb8fBgAAALYtO0YY42ZJu+ceH9O/\n7W5EBIsqAAAAaIZSyuI3iO5ijO9AnS/pxZIUEc+U9PlSymc3CRn80htzLy9ZeLzaeKUUveQlL6na\nUPvj12gY4xpqN6zfn+bzWPte9J8H/3ms7Ts0jO87NDCPrn+/bcXS70BFxDslPUvSURFxo6TTJd23\nv7gzSyl/FhEnRcS1kv5V0kuXflQAAACAhlm6gCqlnDzgOS8fJ2cr9uRH2JMdI+fX/vgeDVnfoaG2\nnx+j/n0wxhit+w4NWd+hIes7NNT2HRqy/vR/rjX0k8g38iNsZMfI+bU/vkdD1ndoqO3nx6h/H4wx\nRuu+Q0PWd2jI+g4NtX2Hhqw//Z9rDS2gAAAAADwY4//CSxGx6QF33fOnI6zijsXqDQcPpwEAAMB2\nJKb6yz4iyqE+VkRsqwXHmNczW5xtNdb6P3fZhjGuoXbD+v31N2RhHj3msbbv0MA8Mo9jMbBhrT/G\nAAAAAOBeBQuoCem6LjvCNmjI+g4Ntf38GPXvgzHGaN13aMj6Dg1Z36Ghtu/QkPWn/3ONBRQAAADA\ninAGahP27Nmjs88+WyeeeOLd3t51nV70ohfpxhtvPKTHGahxfYeG7XDmIgvz6DGPtX2HBuaReRyL\n7Bmo6v8X3qGY4v+uG3JzTfN/+QEAAEBrGG/hlTW+1MHh3En9hqzv0FDbz49R/z4YY4zWfYeGrO/Q\nkPUdGmr7Dg1ZnzNQdtx+++3av3+/du3apSc96Um65JJLaicBAABAZSzPQA3bo07VLN3CO/bYY3XW\nWWfpggsu0Ic//GH96Z/+qW677Tbt27dPX/ziF3XDDTccemTOQI3qOzRshzMXWZhHj3ms7Ts0MI/M\n41jwc6DWzLve9S69/vWv186dO3XMMcfo1FNPtTz0DgAAANPBAmoJt9xyi3bv3n3X48c85jGHPZbD\nuZP6DVnfoaG2nx+j/n0wxhit+w4NWd+hIes7NNT2HRqyPmeg7Hj0ox99t+26zbbuAAAA4N4DZ6A2\nYf4M1EUXXaR3v/vduu2223TSSSfp1ltv5edADWxgr97jzEUW5tFjHmv7Dg3MI/M4Ftv4DFSs8WVg\nQYROP/10Pfaxj9Wxxx6rffv26cUvfjE/HwoAAOBejuUCqpSy9pdlXHfddXr2s5+tI488Uueee65u\nvfVWXXXVVXr1q1992Nt4DudO6jdkfYeG2n5+jPr3wRhjtO47NGR9h4as79BQ23doyPqcgQIAAACw\nx/IMVMtwBmpc36FhO5y5yMI8esxjbd+hgXlkHsdiG5+BAgAAAPCEBdSEOJw7qd+Q9R0aavv5Merf\nB2OM0brv0JD1HRqyvkNDbd+hIetzBgoAAADAHs5AjQxnoMb1HRq2w5mLLMyjxzzW9h0amEfmcSyy\nZ6B2jJ+0OvxcJQAAAGiJ6lt4W/+cpjL3cuHC49V/XtSFF154GD8ravWGzXA4d1K/Ies7NNT282PU\nvw/GGKN136Eh6zs0ZH2Hhtq+Q0PWn/7PNYvvQAEAAACswpDdq7Uebah9BmqL56uR/dFt+/HHaGCv\n3uPMRRbm0WMea/sODcwj8zhxAz8HCgAAAGAsGlpAdfkRKp/7qP3xPRqyvkNDbT8/Rv37YIwxWvcd\nGrK+Q0PWd2io7Ts0ZP3pGxpaQAEAAAB4wBko44baH3+Mhob2uSv662/Iwjx6zGNt36GBeWQeJ27g\nDBQAAADAWDS0gOryI3AGyqAh6zs01PbzY9S/D8YYo3XfoSHrOzRkfYeG2r5DQ9afvqGhBRQAAACA\nB5yBMm6o/fHHaGhon7uiv/6GLMyjxzzW9h0amEfmceIGzkABAAAAjEVDC6hupWdHxKCXdTbcwzY4\nd1K/Ies7NNT282PUvw/GGKN136Eh6zs0ZH2Hhtq+Q0PWn76hoQXU4VAWXi5ceAwAAACwOtv2DJTD\nPneW2h9/jIaG9rkr+utvyMI8esxjbd+hgXlkHidu4AwUAAAAwFg0tIDqDMbI+Q7nTuo3ZH2Hhtp+\nfoz698EYY7TuOzRkfYeGrO/QUNt3aMj60zc0tIACAAAA8IAzUNv83Enthob2uSv662/Iwjx6zGNt\n36GBeWQeJ27gDBQAAADAWDS0gOoMxsj5DudO6jdkfYeG2n5+jPr3wRhjtO47NGR9h4as79BQ23do\nyPrTNzS0gAIAAADwgDNQ94pzJ1vDPndtf/0NWZhHj3ms7Ts0MI/M48QNm/4lumPLOtgmbH2DAAAA\nwGo0tIXXGYyR8zl3Mobv0FDbz4/BvejgOzRkfYeGrO/QUNt3aMj60zc0tIACAAAA8IAzUJw7YZ+7\nur/+hizMo8c81vYdGphH5nHiBn4OFAAAAMBYNLSA6gzGyPmcOxnDd2io7efH4F508B0asr5DQ9Z3\naKjtOzRk/ekbGlpAAQAAAHjAGSjOnbDPXd1ff0MW5tFjHmv7Dg3MI/M4cQNnoAAAAADGoqEFVGcw\nRs7n3MkYvkNDbT8/Bveig+/QkPUdGrK+Q0Nt36Eh60/fsHQBFRH7IuKTEXFNRJx2iPcfFRHvi4jL\nI+KqiNi/UgEAAABAY2x5BioijpD0KUnPkXSzpEsknVxKuXruOW+QdP9Syusi4qj++Y8spdy5MBZn\noFakoT3itfkODdyLzOMw36GBr0fmcbnv0NDQNRz2GahnSLq2lHJ9KeUOSedJev7Cc/5e0kP61x8i\n6Z8XF08AAAAA24llC6ijJd049/im/m3z/I6kJ0XELZKukHTqeHnzdAZj5HzOnYzhOzTU9vNjcC86\n+A4NWd+hIes7NNT2HRqy/vQNyxZQQ/YEflrS5aWUr5K0V9LbIuLBK1UAAAAANMSOJe+/WdLuuce7\nNfsu1DzfKOmXJKmU8umIuE7SEyRdujjY/v37tWfPHknSzp07tXfvXm1sbEg6+C/iA49ndJI2f9x1\n3Qp+t1Bz98eL/j3/hb66f8IJJ2gZB/Zn1/HxD/38jab8g/M572/cY7zW/QPOur4eDjXeqs8f4+Pf\n3d388b3H32jKb+XraTVfc287+HidX4+1fd+vh6y/McLHf4Ok6zWEZYfId2h2KPxESbdI+ojueYj8\nv0n6Qinl5yPikZI+KunJpZR/WRjrXneIvLbv0MA1eNyLWZhHj3ms7Ts0MI/M48QNh3eIvD8M/nJJ\n75f0CUl/UEq5OiJOiYhT+qf9sqSnRcQVkj4o6TWLi6dx6AzGaN13aMj6Dg21/fwY9/wX2LQff5wx\nWvcdGrK+Q0PWd2io7Ts0ZP3pG5Zt4amU8l5J711425lzr39O0nes9FEBAAAAGobfhWf8LcqGvsW5\nNt+hYTvci1mYR495rO07NDCPzOPEDfwuPAAAAICxaGgB1RmM0brv0JD1HRpq+/kxOAPl4Ds0ZH2H\nhqzv0FDbd2jI+tM3NLSAAgAAAPCAM1DGe7wN7RGvzXdo2A73Yhbm0WMea/sODcwj8zhxA2egAAAA\nAMaioQVUZzBG675DQ9Z3aKjt58fgDJSD79CQ9R0asr5DQ23foSHrT9/Q0AIKAAAAwAPOQBnv8Ta0\nR7w236FhO9yLWZhHj3ms7Ts0MI/M48QNnIECAAAAGIuGFlCdwRit+w4NWd+hobafH4MzUA6+Q0PW\nd2jI+g4NtX2Hhqw/fUNDCygAAAAADzgDZbzH29Ae8dp8h4btcC9mYR495rG279DAPDKPEzdwBgoA\nAABgLBpaQHUGY7TuOzRkfYeG2n5+DM5AOfgODVnfoSHrOzTU9h0asv70DQ0toAAAAAA84AyU8R5v\nQ3vEa/MdGrbDvZiFefSYx9q+QwPzyDxO3MAZKAAAAICxaGgB1RmM0brv0JD1HRpq+/kxOAPl4Ds0\nZH2Hhqzv0FDbd2jI+tM3NLSAAgAAAPCAM1DGe7wN7RGvzXdo2A73Yhbm0WMea/sODcwj8zhxA2eg\nAAAAAMaioQVUZzBG675DQ9Z3aKjt58fgDJSD79CQ9R0asr5DQ23foSHrT9/Q0AIKAAAAwAPOQBnv\n8Ta0R7w236Fh+9yLW8M8tjKPfD0yj8zjhA2b/uG5Y8s6ANhGbP0HBQAADKehLbzOYIzWfYeGrO/Q\nUNt3aMj6Dg21fYeGrO/QkPUdGmr7Dg1Zf/qGhhZQAAAAAB5wBsp4j7ehPeK1+Q4N3IvM4zDfoYF5\nZB6X+w4NDV0DPwcKAAAAYCwaWkB1BmO07js0ZH2Hhtq+Q0PWd2io7Ts0ZH2Hhqzv0FDbd2jI+tM3\nNLSAAgAAAPCAM1DGe7wN7RGvzXdo4F5kHof5Dg3MI/O43HdoaOgaOAMFAAAAMBYNLaA6gzFa9x0a\nsr5DQ23foSHrOzTU9h0asr5DQ9Z3aKjtOzRk/ekbGlpAAQAAAHjAGSjjPd6G9ojX5js0cC8yj8N8\nhwbmkXlc7js0NHQNnIECAAAAGIuGFlCdwRit+w4NWd+hobbv0JD1HRpq+w4NWd+hIes7NNT2HRqy\n/vQNDS2gAAAAADzgDJTxHm9De8Rr8x0auBeZx2G+QwPzyDwu9x0aGroGzkABAAAAjEVDC6jOYIzW\nfYeGrO/QUNt3aMj6Dg21fYeGrO/QkPUdGmr7Dg1Zf/qGhhZQAAAAAB5wBsp4j7ehPeK1+Q4N3IvM\n4zDfoYF5ZB6X+w4NDV0DZ6AAAAAAxqKhBVRnMEbrvkND1ndoqO07NGR9h4bavkND1ndoyPoODbV9\nh4asP31DQwsoAAAAAA84A2W8x9vQHvHafIcG7kXmcZjv0MA8Mo/LfYeGhq6BM1AAAAAAY9HQAqoz\nGKN136Eh6zs01PYdGrK+Q0Nt36Eh6zs0ZH2Hhtq+Q0PWn76hoQUUAAAAgAecgTLe421oj3htvkMD\n9yLzOMx3aGAemcflvkNDQ9fAGSgAAACAsWhoAdUZjNG679CQ9R0aavsODVnfoaG279CQ9R0asr5D\nQ23foSHrT9/Q0AIKAAAAwAPOQBnv8Ta0R7w236GBe5F5HOY7NDCPzONy36GhoWvgDBQAAADAWCxd\nQEXEvoj4ZERcExGnbfKcjYi4LCKuiohu9EpJLe6P+vkODVnfoaG279CQ9R0aavsODVnfoSHrOzTU\n9h0asv70DTu2emdEHCHprZKeI+lmSZdExPmllKvnnrNT0tskPbeUclNEHLVqMgAAAEBLbHkGKiL+\nvaTTSyn7+sevlaRSyhlzz/kxSY8qpfzclh+IM1CT+w4NXAP3oksD88g8DvMdGphHo2s47DNQR0u6\nce7xTf3b5jlO0q6IuDAiLo2IFy0ZEwAAAKBpli2ghnzL6L6Svl7SSZKeK+lnI+K4bNg96QzGaN13\naMj6Dg21fYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb9jyDJRm5552zz3erdl3oea5UdLnSim3S7o9\nIv5S0tdJumZxsP3792vPnj2SpJ07d2rv3r3a2NiYZXez8AOPZ3SSDjy+vP/vwfd3XbeC3/VjzD+e\ne+aCf+Dx3cdq3d/8sat/cD4PPP/Qj1v3DzjTfT1M63t+PdT2N3/s6rfy9bS639bXU9b3/Hpw+Hq6\nvH/9eg1h2RmoHZI+JelESbdI+oikkxcOkX+NZgfNnyvp/pIulvSCUsonFsbiDNTEvkMD18C96NLA\nPDKPw3yHBubR6Bo2PQO15XegSil3RsTLJb1f0hGSzi6lXB0Rp/TvP7OU8smIeJ+kj0n6sqTfWVw8\nAQAAAGwnlv4cqFLKe0spTyilPL6U8sb+bWeWUs6ce86vlFKeVEr52lLKW9aT2hmM0brv0JD1HRpq\n+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP38BPIgcAAABYEX4XnvEeb0N7xGvzHRq4F5nHYb5DA/PI\nPC73HRoaugZ+Fx4AAADAWDS0gOoMxmjdd2jI+g4NtX2Hhqzv0FDbd2jI+g4NWd+hobbv0JD1p29o\naAEFAAAA4AFnoIz3eBvaI16b79DAvcg8DvMdGphH5nG579DQ0DVwBgoAAABgLBpaQHUGY7TuOzRk\nfYeG2r5DQ9Z3aKjtOzRkfYeGrO/QUNt3aMj60zc0tIACAAAA8IAzUMZ7vA3tEa/Nd2jgXmQeh/kO\nDcwj87jcd2ho6Bo4AwUAAAAwFg0toDqDMVr3HRqyvkNDbd+hIes7NNT2HRqyvkND1ndoqO07NGT9\n6RsaWkABAAAAeMAZKOM93ob2iNfmOzRwLzKPw3yHBuaReVzuOzQ0dA2cgQIAAAAYi4YWUJ3BGK37\nDg1Z36Ghtu/QkPUdGmr7Dg1Z36Eh6zs01PYdGrL+9A0NLaAAAAAAPOAMlPEeb0N7xGvzHRq4F5nH\nYb5DA/PIPC73HRoaugbOQAEAAACMRUMLqM5gjNZ9h4as79BQ23doyPoODbV9h4as79CQ9R0aavsO\nDVl/+oaGFlAAAAAAHnAGyniPt6E94rX5Dg3ci8zjMN+hgXlkHpf7Dg0NXQNnoAAAAADGoqEFVGcw\nRuu+Q0PWd2io7Ts0ZH2Hhtq+Q0PWd2jI+g4NtX2Hhqw/fUNDCygAAAAADzgDZbzH29Ae8dp8hwbu\nReZxmO/QwDwyj8t9h4aGroEzUAAAAABj0dACqjMYo3XfoSHrOzTU9h0asr5DQ23foSHrOzRkfYeG\n2r5DQ9afvqGhBRQAAACAB5yBMt7jbWiPeG2+QwP3IvM4zHdoYB6Zx+W+Q0ND18AZKAAAAICxaGgB\n1RmM0brv0JD1HRpq+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP39DQAgoAAADAA85AGe/xNrRHvDbf\noYF7kXkc5js0MI/M43LfoaGha+AMFAAAAMBYNLSA6gzGaN13aMj6Dg21fYeGrO/QUNt3aMj6Dg1Z\n36Ghtu/QkPWnb2hoAQUAAADgAWegjPd4G9ojXpvv0MC9yDwO8x0amEfmcbnv0NDQNXAGCgAAAGAs\nGlpAdQZjtO47NGR9h4bavkND1ndoqO07NGR9h4as79BQ23doyPrTNzS0gAIAAADwgDNQxnu8De0R\nr813aOBeZB6H+Q4NzCPzuNx3aGjoGjgDBQAAADAWDS2gOoMxWvcdGrK+Q0Nt36Eh6zs01PYdGrK+\nQ0PWd2io7Ts0ZP3pGxpaQAEAAAB4wBko4z3ehvaI1+Y7NHAvMo/DfIcG5pF5XO47NDR0DZyBAgAA\nABiLhhZQncEYrfsODVnfoaG279CQ9R0aavsODVnfoSHrOzTU9h0asv70DQ0toAAAAAA84AyU8R5v\nQ3vEa/MdGrgXmcdhvkMD88g8LvcdGhq6Bs5AAQAAAIxFQwuozmCM1n2Hhqzv0FDbd2jI+g4NtX2H\nhqzv0JD1HRpq+w4NWX/6hoYWUAAAAAAecAbKeI+3oT3itfkODdyLzOMw36GBeWQel/sODQ1dA2eg\nAAAAAMaioQVUZzBG675DQ9Z3aKjtOzRkfYeG2r5DQ9Z3aMj6Dg21fYeGrD99Q0MLKAAAAAAPOANl\nvMfb0B7x2nyHBu5F5nGY79DAPDKPy32Hhoau4fDPQEXEvoj4ZERcExGnbfG8p0fEnRHxPcvGBAAA\nAGiZLRdQEXGEpLdK2ifpiZJOjojjN3nemyS9T9Kmq7UcncEYrfsODVnfoaG279CQ9R0aavsODVnf\noSHrOzTU9h0asv70Dcu+A/UMSdeWUq4vpdwh6TxJzz/E814h6Y8k/dNKHx0AAACgQbY8AxUR3yfp\nuaWUH+4fv1DSN5RSXjH3nKMl/Z6kZ0t6u6T3lFL+5BBjcQZqYt+hgWvgXnRpYB6Zx2G+QwPzaHQN\nh30GasiK59ckvbZfHYXWtoUHAAAA4MGOJe+/WdLuuce7Jd208JynSjpvtpLTUZKeFxF3lFLOXxxs\n//792rNnjyRp586d2rt3rzY2NiRJXddJ0l2PZ3SSDjz+NUl75x7PnOF+J+lySa+aezz3zAX/wOO7\nj9W6f4CNZvyD83ng+YvuoZ/fmn/Ame7rYVrf8+uhtn+AjWb8Vr6eVvOl1r6esr7n14PD19Plkj4v\n6XoNopSy6YtmC6xPS9oj6X796Mdv8fxzJH3PJu8rqyCpSGXu5cKFx1uPd09/jDHa8h0auIYhvkMD\n88g83lvn0aGBeTS+Bm32svTnQEXE8zRb3h4h6exSyhsj4pR+RXTmwnPPEWegbHyHBq6Be9GlgXlk\nHof5Dg3Mo9E1bHosiR+kaXyDNHSDrc13aOBeZB6H+Q4NzCPzuNx3aGjoGg7/B2n60BmM0brv0JD1\nHRpq+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP39DQAgoAAADAA7bwjL9F2dC3ONfmOzRwLzKPw3yH\nBuaReVzuOzQ0dA3bYQsPAAAAwIOGFlCdwRit+w4NWd+hobbv0JD1HRpq+w4NWd+hIes7NNT2HRqy\n/vQNDS2gAAAAADzgDJTxHm9De8Rr8x0auBeZx2G+QwPzyDwu9x0aGroGzkABAAAAjEVDC6jOYIzW\nfYeGrO/QUNt3aMj6Dg21fYeGrO/QkPUdGmr7Dg1Zf/qGhhZQAAAAAB5wBsp4j7ehPeK1+Q4N3IvM\n4zDfoYF5ZB6X+w4NDV0DZ6AAAAAAxqKhBVRnMEbrvkND1ndoqO07NGR9h4bavkND1ndoyPoODbV9\nh4asP31DQwsoAAAAAA84A2W8x9vQHvHafIcG7kXmcZjv0MA8Mo/LfYeGhq6BM1AAAAAAY9HQAqoz\nGKN136Eh6zs01PYdGrK+Q0Nt36Eh6zs0ZH2Hhtq+Q0PWn76hoQUUAAAAgAecgTLe421oj3htvkMD\n9yLzOMx3aGAemcflvkNDQ9fAGSgAAACAsWhoAdUZjNG679CQ9R0aavsODVnfoaG279CQ9R0asr5D\nQ23foSHrT9/Q0AIKAAAAwAPOQBnv8Ta0R7w236GBe5F5HOY7NDCPzONy36GhoWvgDBQAAADAWDS0\ngOoMxmjdd2jI+g4NtX2Hhqzv0FDbd2jI+g4NWd+hobbv0JD1p29oaAEFAAAA4AFnoIz3eBvaI16b\n79DAvcg8DvMdGphH5nG579DQ0DVwBgoAAABgLBpaQHUGY7TuOzRkfYeG2r5DQ9Z3aKjtOzRkfYeG\nrO/QUNt3aMj60zc0tIACAAAA8IAzUMZ7vA3tEa/Nd2jgXmQeh/kODcwj87jcd2ho6Bo4AwUAAAAw\nFg0toDqDMVr3HRqyvkNDbd+hIes7NNT2HRqyvkND1ndoqO07NGT96RsaWkABAAAAeMAZKOM93ob2\niNfmOzRwLzKPw3yHBuaReVzuOzQ0dA2cgQIAAAAYi4YWUJ3BGK37Dg1Z36Ghtu/QkPUdGmr7Dg1Z\n36Eh6zs01PYdGrL+9A0NLaAAAAAAPOAMlPEeb0N7xGvzHRq4F5nHYb5DA/PIPC73HRoaugbOQAEA\nAACMRUMLqM5gjNZ9h4as79BQ23doyPoODbV9h4as79CQ9R0aavsODVl/+oaGFlAAAAAAHnAGyniP\nt6E94rX5Dg3ci8zjMN+hgXlkHpf7Dg0NXQNnoAAAAADGoqEFVGcwRuu+Q0PWd2io7Ts0ZH2Hhtq+\nQ0PWd2jI+g4NtX2Hhqw/fUNDCygAAAAADzgDZbzH29Ae8dp8hwbuReZxmO/QwDwyj8t9h4aGroEz\nUAAAAABj0dACqjMYo3XfoSHrOzTU9h0asr5DQ23foSHrOzRkfYeG2r5DQ9afvqGhBRQAAACAB5yB\nMt7jbWiPeG2+QwP3IvM4zHdoYB6Zx+W+Q0ND18AZKAAAAICxaGgB1RmM0brv0JD1HRpq+w4NWd+h\nobbv0JD1HRqyvkNDbd+hIetP39DQAgoAAADAA85AGe/xNrRHvDbfoYF7kXkc5js0MI/M43LfoaGh\na+AMFAAAAMBYNLSA6gzGaN13aMj6Dg21fYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb2hoAQUAAADg\nwaAzUBGxT9KvSTpC0lmllDctvP+HJL1GUkj6kqQfLaV8bOE5nIGa2Hdo4Bq4F10amEfmcZjv0MA8\nGl3D4Z+BiogjJL1V0j5JT5R0ckQcv/C0z0j61lLKkyX9gqTfXjYuAAAAQKsM2cJ7hqRrSynXl1Lu\nkHSepOfPP6GU8uFSyhf6hxdLOmbcTKnF/VE/36Eh6zs01PYdGrK+Q0Nt36Eh6zs0ZH2Hhtq+Q0PW\nn75hyALqaEk3zj2+qX/bZvxHSX+2UgUAAABAQ+wY8JzBB5ci4gRJL5P0TYd6//79+7Vnzx5J0s6d\nO7V3715tbGxIkrquk6S7Hs/oJG3+uOu6Ffxuoebujxf9A483e367/kZT/sH5nPc37jFe6/4BZ7qv\nh2l936+H2v5GU34rX0+r+Zp728HHzl9PWd/36yHrb4zw8d8g6XoNYekh8oh4pqQ3lFL29Y9fJ+nL\nhzhI/mRJfyJpXynl2kOMwyHyiX2HBq6Be9GlgXlkHof5Dg3Mo9E1pH6Q5qWSjouIPRFxP0kvkHT+\nQsRjNFs8vfBQi6dx6AzGaN13aMj6Dg21fYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb1i6hVdKuTMi\nXi7p/Zr9GIOzSylXR8Qp/fvPlPRzkr5S0m/OVnS6o5TyjNXCAQAAANqA34Vn/C3Khr7FuTbfoYF7\nkXkc5jszpr7kAAARJElEQVQ0MI/M43LfoaGha+B34QEAAACMRUMLqM5gjNZ9h4as79BQ23doyPoO\nDbV9h4as79CQ9R0aavsODVl/+oaGFlAAAAAAHnAGyniPt6E94rX5Dg3ci8zjMN+hgXlkHpf7Dg0N\nXQNnoAAAAADGoqEFVGcwRuu+Q0PWd2io7Ts0ZH2Hhtq+Q0PWd2jI+g4NtX2Hhqw/fUNDCygAAAAA\nDzgDZbzH29Ae8dp8hwbuReZxmO/QwDwyj8t9h4aGroEzUAAAAABj0dACqjMYo3XfoSHrOzTU9h0a\nsr5DQ23foSHrOzRkfYeG2r5DQ9afvqGhBRQAAACAB5yBMt7jbWiPeG2+QwP3IvM4zHdoYB6Zx+W+\nQ0ND18AZKAAAAICxaGgB1RmM0brv0JD1HRpq+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP39DQAgoA\nAADAA85AGe/xNrRHvDbfoYF7kXkc5js0MI/M43LfoaGha+AMFAAAAMBYNLSA6gzGaN13aMj6Dg21\nfYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb2hoAQUAAADgAWegjPd4G9ojXpvv0MC9yDwO8x0amEfm\ncbnv0NDQNXAGCgAAAGAsGlpAdQZjtO47NGR9h4bavkND1ndoqO07NGR9h4as79BQ23doyPrTNzS0\ngAIAAADwgDNQxnu8De0Rr813aOBeZB6H+Q4NzCPzuNx3aGjoGjgDBQAAADAWDS2gOoMxWvcdGrK+\nQ0Nt36Eh6zs01PYdGrK+Q0PWd2io7Ts0ZP3pGxpaQAEAAAB4wBko4z3ehvaI1+Y7NHAvMo/DfIcG\n5pF5XO47NDR0DZyBAgAAABiLhhZQncEYrfsODVnfoaG279CQ9R0aavsODVnfoSHrOzTU9h0asv70\nDQ0toAAAAAA84AyU8R5vQ3vEa/MdGrgXmcdhvkMD88g8LvcdGhq6Bs5AAQAAAIxFQwuozmCM1n2H\nhqzv0FDbd2jI+g4NtX2Hhqzv0JD1HRpq+w4NWX/6hoYWUAAAAAAecAbKeI+3oT3itfkODdyLzOMw\n36GBeWQel/sODQ1dA2egAAAAAMaioQVUZzBG675DQ9Z3aKjtOzRkfYeG2r5DQ9Z3aMj6Dg21fYeG\nrD99Q0MLKAAAAAAPOANlvMfb0B7x2nyHBu5F5nGY79DAPDKPy32HhoaugTNQAAAAAGPR0AKqMxij\ndd+hIes7NNT2HRqyvkNDbd+hIes7NGR9h4bavkND1p++oaEFFAAAAIAHnIEy3uNtaI94bb5DA/ci\n8zjMd2hgHpnH5b5DQ0PXwBkoAAAAgLFoaAHVGYzRuu/QkPUdGmr7Dg1Z36Ghtu/QkPUdGrK+Q0Nt\n36Eh60/f0NACCgAAAMADzkAZ7/E2tEe8Nt+hgXuReRzmOzQwj8zjct+hoaFr4AwUAAAAwFg0tIDq\nDMZo3XdoyPoODbV9h4as79BQ23doyPoODVnfoaG279CQ9advaGgBBQAAAOABZ6CM93gb2iNem+/Q\nwL3IPA7zHRqYR+Zxue/Q0NA1cAYKAAAAYCwaWkB1BmO07js0ZH2Hhtq+Q0PWd2io7Ts0ZH2Hhqzv\n0FDbd2jI+tM3NLSAAgAAAPCAM1DGe7wN7RGvzXdo4F5kHof5Dg3MI/O43HdoaOgaOAMFAAAAMBYN\nLaA6gzFa9x0asr5DQ23foSHrOzTU9h0asr5DQ9Z3aKjtOzRk/ekbli6gImJfRHwyIq6JiNM2ec5b\n+vdfERFPWalgMJcbjNG679DANfA5cGmo7Ts0cA0eDbV9h4b2rmHLBVREHCHprZL2SXqipJMj4viF\n55wk6fGllOMk/Yik31ypYDCfNxijdd+hgWvgc+DSUNt3aOAaPBpq+w4N7V3Dsu9APUPStaWU60sp\nd0g6T9LzF57znZLOlaRSysWSdkbEI1eqAAAAAGiIZQuooyXdOPf4pv5ty55zTD5tkesNxmjdd2jI\n+g4NtX2Hhqzv0FDbd2jI+g4NWd+hobbv0JD1p2/Y8scYRMT3StpXSvnh/vELJX1DKeUVc895j6Qz\nSil/0z/+oKTXlFL+dmGsaX5eAgAAAMAIbPVjDHYscW+WtHvu8W7NvsO01XOO6d82OAIAAACgJZZt\n4V0q6biI2BMR95P0AknnLzznfEkvlqSIeKakz5dSPjt6KQAAAIAJW34HqpRyZ0S8XNL7JR0h6exS\nytURcUr//jNLKX8WESdFxLWS/lXSS9deDQAAAFCRyX6VCwAAAMB2oaGfRC5FxODvbkXE8RFxYkQ8\naOHt+wb63xwRT+xf34iIV0fEiasV322830243xIRPxkR37aC88yIeGj/+gMi4r9ExP+KiDcdePsS\n/5URsXvZ87bw7x8RL4mI5/SPfygi3hYR/zki7rvCOI+LiJ+KiF+PiF+NiP8UEQ853C4A8CEiHmHQ\n8LDaDVCfw7kXm1pASfovQ54UEa+U9G5Jr5D08Yj4rrl3v3GA/0ZJvyLp3Ih4s6QzJB0p6fSI+KkB\n/nsi4vz+v+/p/0/F7z3w9gH+R+Ze/2FJ/13Sg/qP/7plfs/bNdtSlaRfl/SQ/jpul3TOAP8XJH0k\nIv46In4sIh4+8OMe4BxJJ0k6NSLeIen7JF2k2c8WO2vIABFxqqTfknT/3ru/pMdIujgiTlixB3r4\nS2t6ImJnRJzR/1aHWyPiX/rXz4iIncmx3zvgOQ/tP9bvRcQPLrzvNwZ+nN0RcdaB5og4JyKuioh3\nDLmnImLXwsvDNPszZldE7Brg75t7fWdEnB0RV0bE7w/92YP9PyAf3r/+tIj4jGZ/ntwQERsD/Msi\n4mci4nFDPt4h/KdHxIX9POyOiAsi4gsRcUkM+C0eEfHg/h/DH4+IL0bE5yLi4ojYv0ID92LyXryL\nUorVi6Qrt3j5t4FjXCXpQf3rezQ7DP+q/vFlA/xPaHY+7AGSviTpof3bj5T0sQH+ZZL+h6QTJD1L\n0oakv+9ff9YQf+71SyU9vH/9gZKuGvg5uHru9b9deN8VA6/hPpK+TbPF2D9Jep+kl0h68JB57P+7\nQ9I/StrRP44D7xs4j0f0rz9A0l/0rz9G0uUD/J2aLRo/KelWSf/Sv36GpJ3J+/S9A57z0P5j/Z6k\nH1x4328M/Di7NVtwntFfzzn95+Udkh4xwN+18PIwzX7YyS5Juwb4+xY+n2f3X4u/L+mRA6/hTXP3\n8NMkfUbStZJukLQx8F78GUmPO8y5erqkC/t52C3pAklfkHSJpKcM8B+s2T/ePi7pi5I+J+liSftX\naPiApNMkPUoHj048WtJrJX1ggP/1m7w8VdI/DPD/pL+HvlvSeyT9saSvOPD5HXgNf67ZP0pf138d\nvbb/WnyFpD8e4H9Z0nULL3f0//3MkPtg7vWzJf2iZn++/7ikdw+8hqvmXu8kPb1//aslfXSAf51m\n/7i+ob9/flzSV61wH1wi6XmSTtbs/2j/fs3+TDxR0ocH+Odrds54t6SfkPRzffvvSvpl7sVp7sW7\nxhn6xKleJH1W0lP6L4zFl1sGjvHxhccP0uwg/K9q2F+8lx/q9UM93sQ/or+5P6j+D2hJ163wOfiY\nDv5ld9mqH79/3h9Jeln/+jkLf1BcMsBf/Lj30+yn0J8n6XND5kCz7xh9pWaL0If1bz9ycX62GOPK\nuS+sXZIu3WyON/H5g4K/tCSPv7T+9+G8b+45/0+zReChXm4f4F+x8Pj1kv5G0lEr3Ivzfy7esNn7\ntvB/UrN/hD15fm5XmMf5e/GKA1/Th7q+Lca4WtJ9+9cvWnjf0n/YHWjo759v1exXl/1DPw8/suI1\nHM7n8GMLjy/t/3sfSZ/iXpzmXrzLWVVY94tm3+34lk3e986BY1woae/C2+7b/4H35QH+xZIe0L9+\nn7m379TCd3OWjHOMpHdJepukG1fwrtfBv+w+I+nR/dsfPOTmmGs9t/cv1sG/NP9S0tcN8De9kSU9\ncID/uv5jf0qz35H4Cc2+k3KVZj9odcg1nKrZIuqsfpwDC8JHSPrLAT5/UPCX1uI11PpL6wJJr9Hc\nd+00W9ifJumDA/yPS/rqTd639M+Wfg7us/C2/f24fzfwGq6Ye/2XVp3H/nm7Nfsz8Vc1O1awyr14\nk2YL2J/U7M/I+Xtx6c5A/7xX9HPxbElv0Ox4w7Mk/bykd6xyL829bYdmvy/2nAH+RyQ9V9IPaPYb\nPL67f/uzJF08wP+w+r8fNfsH7fvn3se9ONG9eNcYqwotvPSfmEcd4u0h6ZsH+F+xyduPkvS1h9Hz\n7Rr4L9Ul4zxA0rErOg+VtFezrZN7fE628J4wQu8e9dtEkh6n2c8RW7p4Wxjj32l2fuprDuPj8wdF\n4S8tefyltUvSm3VwO/nW/vU3a9hW6vdv9jUg6bsG+P9V0n84xNv3Sbpm4DX8gg6xfS/pOEl/NPSe\nmvs8Xizpsys4b5B0+tzLI/q3P1rS764wzgmS/kCzreErJb1X0inqF/lL3D9Y5ToP4T9Ds+/CvlPS\nYzXbpfiipL+V9LQB/tdp9h3Vz2v2j7En9G9/uKRXGtyL3135Xnz8Yd6LF61yL97lZm4GXnhxfuEv\nrXs4tf/S+sPD/EvrvOR9UP0vrf75x0t6zuJ8au6c2QD/RPXnO+fe/rwp/LEbNPsH4dcehv+cEa5h\nHfOwin/Y17BF/yqfg2+W9KT+9RMkvVrSiSv6T+xf35jaX0PDSZqds1ytYZUn88LLdnmR9NKk/7Ka\n/uGOsfCXVtXPwUgNtf1BnwNJr9RsG/rdkv5OcwtwDfsfW6r6Dg1cw2ifgzdq9h2XSzT7x+RFkn5W\ns+MdP+XuuzSUwgKKl3vpi1Y4k+boOzRwDcN93fP/DP6oVvs/g7P/Z3HKd2jgGkb7HGT/L/OqvktD\nKWXpLxMGaJaIuHKLdy/9uTG1/Qkahvy8lHVfQ7Zh7dcwxudAs7Njt0lSKeX6iHiWpD+OiMdqdjZz\nVX9jYt+hgWsY53Pwf0spd0q6MyI+XUr5Qj/e7RHx5QZ8lwYWULCteYRm541uPcT7PtSA79DANYzz\nOfjHiNhbSrlckkopt0XEt2v2oyGe3IDv0MA1jPM5+LeIeEAp5f9o9mNZJM1+wKZmP/bE3XdpYAuP\nl+37ouSPxKjtOzRwDaN9DrL/Z3BV36GBaxjtc5D6v8xr+y4NpRR+mTAAAADAqrT2u/AAAAAAqsMC\nCgAAAGBFWEABAAAArAgLKACwICIeGhE/usbxXxURR65rfAC4d8ECCgBc+EpJP7bG8U/V7AfnAQCk\nYQEFAC6cIelxEXFZRLw9Ir5DkiLif0bE2f3rL4uIX+xff2FEXNw//7ci4j79278tIj4UER+NiD+M\niAdGxCslfZWkCyPizytdHwBsI1hAAYALp0n6dCnlKZLeL+lb+rcfrdkvUFX/tr+IiOMl/YCkb+yf\n/2VJPxQRR0l6vWa/FPSpmv3KlJ8opbxF0i2SNkopJ052RQCwbeEnkQOAC/O/iuKvJb2qXyh9XNLO\niHiUpGdKermkl0p6qqRLI0KSvkLSP0j6BklPlPSh/u330/CfFg4AMBgWUABgRynl5v7XKuzT7Dek\n75L0AklfKqX8a784OreU8tPzXv9rLS4opfzg1M0AcO+CLTwAcOFLkh489/giSa+S9BeS/krSq/v/\nStKfS/q+iHi4JEXEroh4TO98U0Q8rn/7AyPiuLnxH7L2qwCAewUsoADAglLKP0v6m4i4MiLerNli\n6YhSymckXabZ/6X3V/1zr5b0M5I+EBFXSPqAZr8j7HOS9kt6Z//2D0l6Qv8hflvS+zhEDgBjwO/C\nAwAAAFgRvgMFAAAAsCIsoAAAAABWhAUUAAAAwIqwgAIAAABYERZQAAAAACvCAgoAAABgRVhAAQAA\nAKwICygAAACAFfn/HDVvDqlgnjIAAAAASUVORK5CYII=\n", | |
"text": [ | |
"<matplotlib.figure.Figure at 0x19344d68>" | |
] | |
} | |
], | |
"prompt_number": 47 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"2.3- db_followers, find un-followed friends for the top 10 users" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def find_unfollowed(old, new):\n", | |
" unfollowed = []\n", | |
" for o in old:\n", | |
" if o not in new:\n", | |
" unfollowed.append(o)\n", | |
" \n", | |
" return unfollowed" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def get_username_from_id(id = [1654270760]):\n", | |
" usernames = []\n", | |
" try:\n", | |
" users = api.lookup_users(user_ids=id)\n", | |
" for u in users:\n", | |
" usernames.append(u.screen_name)\n", | |
" except:\n", | |
" usernames.append('ERROR')\n", | |
" \n", | |
" return usernames" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 31 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def get_followers(user_screen_name):\n", | |
" followers = []\n", | |
" \n", | |
" limit = api.rate_limit_status()['resources']['followers']['/followers/ids']\n", | |
" if limit['remaining'] == 0:\n", | |
" now = datetime.datetime.now()\n", | |
" delta = datetime.datetime.fromtimestamp(limit['reset']) - now\n", | |
" if delta.total_seconds()>0:\n", | |
" print 'sleeping for', delta.total_seconds(),'seconds'\n", | |
" time.sleep(delta.total_seconds())\n", | |
" elif limit['remaining']<5:\n", | |
" print 'remaining <5, sleeping for 20s...'\n", | |
" time.sleep(20)\n", | |
" \n", | |
" c = tweepy.Cursor(api.followers_ids, screen_name = user_screen_name).items()\n", | |
" \n", | |
" while True:\n", | |
" try:\n", | |
" f = c.next()\n", | |
" followers.append(f)\n", | |
" except tweepy.TweepError:\n", | |
" print 'sleeping for 10s...'\n", | |
" time.sleep(10)\n", | |
" continue\n", | |
" except StopIteration:\n", | |
" break\n", | |
" \n", | |
" return followers" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Find and store follwers in db_followers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"db = conn['db_streamT']\n", | |
"dbf = conn['db_followers']\n", | |
"dbf.followers.drop()\n", | |
"pipe = [{'$group': {'_id':'$user.screen_name'}}]\n", | |
"for user in db.tweets.aggregate(pipe)['result']:\n", | |
" user_screen_name = user['_id']\n", | |
" print user_screen_name\n", | |
" followers = get_followers(user_screen_name)\n", | |
" print len(followers),'followers'\n", | |
" dbf.followers.insert({'user':user_screen_name,'followers':followers})" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"vinod272\n", | |
"26" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"Alibastrrr\n", | |
"74" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"Goldenrushmc\n", | |
"372" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"andreapurnama\n", | |
"10" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"devlato\n", | |
"58" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"kwareptar\n", | |
"37" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"IniciativaNerd\n", | |
"6418" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"GabStez\n", | |
"2833" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"infocrowler\n", | |
"386" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"sakurasong\n", | |
"19" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n", | |
"CH_MarcelloMark\n", | |
"remaining <5, sleeping for 20s..." | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"131" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" followers\n" | |
] | |
} | |
], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Find unfollowed friends for top 10 user (based on number of followers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"dbf = conn['db_followers']\n", | |
"fs = dbf.followers.aggregate([{'$project':{'user':1,'count':{'$size':'$followers'} }}, {'$sort':{'count':-1}}, {'$limit':10}])['result']\n", | |
"\n", | |
"# check for unfollowers\n", | |
"for f in fs:\n", | |
" print f['user'],f['count'],'followers'\n", | |
" followers_old = dbf.followers.find_one({'user':f['user']})['followers']\n", | |
" followers_new = get_followers(f['user'])\n", | |
" if len(followers_old) != len(followers_new):\n", | |
" diff = find_unfollowed(followers_old, followers_new)\n", | |
" if len(diff)>0:\n", | |
" unfollowed = get_username_from_id(diff)\n", | |
" if len(unfollowed)>0:\n", | |
" print 'Unfollowed friends: '\n", | |
" for u in unfollowed:\n", | |
" print u\n", | |
" \n", | |
" print '\\n'\n", | |
" else:\n", | |
" print 'no change in followers\\n'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"IniciativaNerd 6418 followers\n", | |
"remaining <5, sleeping for 20s..." | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"GabStez 2833 followers\n", | |
"sleeping for" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" 636.67 seconds\n", | |
"Unfollowed friends: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"waseemnadaf\n", | |
"\n", | |
"\n", | |
"infocrowler 386 followers\n", | |
"Unfollowed friends: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ossia\n", | |
"\n", | |
"\n", | |
"Goldenrushmc 372 followers\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"CH_MarcelloMark 131 followers\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"Alibastrrr 74 followers\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"devlato 58 followers\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"kwareptar 37 followers\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"vinod272 26 followers\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"sakurasong 19 followers\n", | |
"no change in followers\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 47 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"2.4- Sentiment analysis" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Create a classifier and train it using a sample of positive and negative tweets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# code from:\n", | |
"# http://www.laurentluce.com/posts/twitter-sentiment-analysis-using-python-and-nltk/\n", | |
"\n", | |
"def get_words_in_tweets(tweets):\n", | |
" all_words = []\n", | |
" for (words, sentiment) in tweets:\n", | |
" all_words.extend(words)\n", | |
" return all_words\n", | |
"\n", | |
"def get_word_features(wordlist):\n", | |
" wordlist = nltk.FreqDist(wordlist)\n", | |
" word_features = wordlist.keys()\n", | |
" return word_features\n", | |
"\n", | |
"pos_tweets = [('Minecraft new launcher remove the need for standalone Java to play', 'positive'),\n", | |
" ('Minecraft PS4 Survival Lets Play playlist check it out #minecraft #minecrfatps4 #minecon #mojang #minecraft','positive'),\n", | |
" ('Standing room only for #Mojang #Minecraft session #SXSWGaming','positive'),\n", | |
" ('for sale apple startup vega bigdata microsoft', 'positive')]\n", | |
"neg_tweets = [('WTF Turkey?', 'negative'),\n", | |
" ('lawsuit against #Mojang in Turkey because #Minecraft is too violent!? WTF ! #gamedev #indiegamedev #indiegamelover', 'negative')]\n", | |
"tweets = []\n", | |
"\n", | |
"for (words, sentiment) in pos_tweets + neg_tweets:\n", | |
" words_filtered = remove_stopwords(remove_punctuation(remove_urls_usernames(words)))\n", | |
" # words_filtered = [e.lower() for e in words.split() if len(e) >= 3]\n", | |
" tweets.append((words_filtered, sentiment))\n", | |
" \n", | |
"word_features = get_word_features(get_words_in_tweets(tweets))\n", | |
"\n", | |
"def extract_features(document):\n", | |
" document_words = set(document)\n", | |
" features = {}\n", | |
" for word in word_features:\n", | |
" features['contains(%s)' % word] = (word in document_words)\n", | |
" return features\n", | |
"\n", | |
"training_set = nltk.classify.apply_features(extract_features, tweets)\n", | |
"classifier = nltk.NaiveBayesClassifier.train(training_set)\n", | |
"\n", | |
"# details of classifier\n", | |
"print classifier.show_most_informative_features(32)\n", | |
"\n", | |
"def classify_tweet(tweet):\n", | |
" tweet = remove_stopwords(remove_punctuation(remove_urls_usernames(tweet)))\n", | |
" print classifier.classify(extract_features(tweet))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Most Informative Features\n", | |
" contains(indiegamedev) = False positi : negati = 1.8 : 1.0\n", | |
" contains(gamedev) = False positi : negati = 1.8 : 1.0\n", | |
" contains(violent) = False positi : negati = 1.8 : 1.0\n", | |
" contains(lawsuit) = False positi : negati = 1.8 : 1.0\n", | |
"contains(indiegamelover) = False positi : negati = 1.8 : 1.0\n", | |
" contains(play) = False negati : positi = 1.7 : 1.0\n", | |
" contains(minecraft) = False negati : positi = 1.7 : 1.0\n", | |
" contains(minecraft) = True positi : negati = 1.4 : 1.0\n", | |
" contains(microsoft) = False negati : positi = 1.2 : 1.0\n", | |
" contains(standing) = False negati : positi = 1.2 : 1.0\n", | |
" contains(bigdata) = False negati : positi = 1.2 : 1.0\n", | |
" contains(room) = False negati : positi = 1.2 : 1.0\n", | |
" contains(need) = False negati : positi = 1.2 : 1.0\n", | |
" contains(apple) = False negati : positi = 1.2 : 1.0\n", | |
" contains(check) = False negati : positi = 1.2 : 1.0\n", | |
" contains(ps4) = False negati : positi = 1.2 : 1.0\n", | |
" contains(new) = False negati : positi = 1.2 : 1.0\n", | |
" contains(minecrfatps4) = False negati : positi = 1.2 : 1.0\n", | |
" contains(playlist) = False negati : positi = 1.2 : 1.0\n", | |
" contains(session) = False negati : positi = 1.2 : 1.0\n", | |
" contains(minecon) = False negati : positi = 1.2 : 1.0\n", | |
" contains(startup) = False negati : positi = 1.2 : 1.0\n", | |
" contains(sale) = False negati : positi = 1.2 : 1.0\n", | |
" contains(vega) = False negati : positi = 1.2 : 1.0\n", | |
" contains(sxswgaming) = False negati : positi = 1.2 : 1.0\n", | |
" contains(lets) = False negati : positi = 1.2 : 1.0\n", | |
" contains(java) = False negati : positi = 1.2 : 1.0\n", | |
" contains(standalone) = False negati : positi = 1.2 : 1.0\n", | |
" contains(remove) = False negati : positi = 1.2 : 1.0\n", | |
" contains(launcher) = False negati : positi = 1.2 : 1.0\n", | |
" contains(survival) = False negati : positi = 1.2 : 1.0\n", | |
"None\n" | |
] | |
} | |
], | |
"prompt_number": 95 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Classify tweets as positive or negative" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"tweets = top_retweets('db_tweets')\n", | |
"for tweet in tweets:\n", | |
" print tweet['text']\n", | |
" print classify_tweet(tweet['text'])\n", | |
" print '\\n'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"RT @Unganked: Scrolls #2 - Buying 100 Scrolls! - YouTube http://t.co/sdFrYe78Yw #scrolls #mojang #microsoft #games #gaming #letsplay\n", | |
"positive\n", | |
"None\n", | |
"\n", | |
"\n", | |
"RT @bondmoran1: http://t.co/LuQnMbMRsN\n", | |
"For sale\n", | |
"https://t.co/g4kV5xLhkg\n", | |
"#Apple #Startup #Vegas #bigdata #Microsoft \n", | |
"#google #investor #UK \u2026\n", | |
"positive\n", | |
"None\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 96 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"3- Storing and retrieving task" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"db1 = conn['db_tweets']\n", | |
"db2 = conn['db_streamT']\n", | |
"db1_key = 'db_tweets.json'\n", | |
"db2_key = 'db_streamT.json'\n", | |
"\n", | |
"c = boto.connect_s3(aws_key, aws_secret)\n", | |
"b = c.get_bucket('amirziai-mids-w205-assignment3')\n", | |
"k = Key(b)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 73 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"3.1- create and store backups of db_tweets and db_stremT to S3 + restore backups" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"I'm going to store the collection inside a JSON file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"k.key = db1_key\n", | |
"k.set_contents_from_string(dumps(db1.tweets.find()))\n", | |
"k.key = db2_key\n", | |
"k.set_contents_from_string(dumps(db2.tweets.find()))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 74, | |
"text": [ | |
"15214" | |
] | |
} | |
], | |
"prompt_number": 74 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Now I'll read both collections from S3 and restore them into their corresponding MongoDB db" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"k.key = db1_key\n", | |
"j = json.loads(k.get_contents_as_string(), object_hook=json_util.object_hook)\n", | |
"db1.tweets.drop()\n", | |
"for tweet in j:\n", | |
" db1.tweets.insert(tweet)\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 95 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"k.key = db2_key\n", | |
"j = json.loads(k.get_contents_as_string(), object_hook=json_util.object_hook)\n", | |
"db2.tweets.drop()\n", | |
"for tweet in j:\n", | |
" db2.tweets.insert(tweet)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 94 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment