Skip to content

Instantly share code, notes, and snippets.

@amirziai
Last active August 29, 2015 14:17
Show Gist options
  • Save amirziai/40bded657a6731681234 to your computer and use it in GitHub Desktop.
Save amirziai/40bded657a6731681234 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "",
"signature": "sha256:3788d1437e77b7d2fa2a78e7dca1bdda202f15914571d134164b46047b4601db"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"MIDS W205 Assignment 3"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Storing, Retrieving, and Analyzing Social Media Data Using MongoDB"
]
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": [
"Amir Ziai @amirziai"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dependencies: bson, boto, bson, nltk, pymongo, tweepy, pandas"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%matplotlib inline\n",
"import json\n",
"import string\n",
"import time\n",
"import pymongo\n",
"import tweepy\n",
"import datetime\n",
"import urllib\n",
"import boto\n",
"import re\n",
"import pandas as pd\n",
"from nltk import word_tokenize\n",
"import nltk\n",
"from boto.s3.key import Key\n",
"from bson.json_util import dumps\n",
"from bson import json_util\n",
"\n",
"xsdDateFormat = \"%Y-%m-%d\"\n",
"conn = pymongo.MongoClient()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 49
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"aws_key = \"\"\n",
"aws_secret = \"\"\n",
"consumer_key = \"\"\n",
"consumer_secret = \"\"\n",
"access_token = \"\"\n",
"access_token_secret = \"\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
"auth.set_access_token(access_token, access_token_secret)\n",
"api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Ancillary functions for text cleanup"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def remove_urls_usernames(txt):\n",
" # escape unicode characters\n",
" txt = txt.encode('utf-8').decode('unicode_escape').encode('ascii', 'ignore')\n",
"\n",
" regex_url = r'(http|https)://\\S+'\n",
" regex_username = r'@\\S+'\n",
" txt_re = re.sub(regex_url, '', txt)\n",
" txt_re = re.sub(regex_username, '', txt_re)\n",
"\n",
" return [x.lower() for x in word_tokenize(txt_re)]\n",
"\n",
"\n",
"def remove_punctuation(tokenized_docs):\n",
" regex = re.compile('[%s]' % re.escape(string.punctuation))\n",
"\n",
" tokenized_docs_no_punctuation = []\n",
"\n",
" for token in tokenized_docs:\n",
" new_token = regex.sub(u'', token)\n",
" if not new_token == u'':\n",
" tokenized_docs_no_punctuation.append(new_token)\n",
"\n",
" return tokenized_docs_no_punctuation\n",
"\n",
"\n",
"def remove_stopwords(tokenized_docs_no_punctuation):\n",
" from nltk.corpus import stopwords\n",
" tokenized_docs_no_stopwords = []\n",
"\n",
" for word in tokenized_docs_no_punctuation:\n",
" if not word in stopwords.words('english'):\n",
" tokenized_docs_no_stopwords.append(word)\n",
"\n",
" return tokenized_docs_no_stopwords"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"1- Storing task"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"1.1- db_streamT"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: looking for all tweets begining Jan 2015"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"since = '2015-1-1'\n",
"start = datetime.datetime.strptime(since, xsdDateFormat).date()\n",
"q='#microsoft #mojang'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db = conn['db_streamT']\n",
"db.tweets.drop()\n",
"tweets = tweepy.Cursor(api.search, q=q, since=start).items(100)\n",
"for tweet in tweets:\n",
" tweet_id = tweet._json['id']\n",
" print tweet.text + '\\n'\n",
" if db.tweets.find({'id':tweet_id}).count() == 0:\n",
" db.tweets.insert(tweet._json)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"#Turki Akan Larang Peredaran Minecraft - #Re http://t.co/pAqBqvvKHy\n",
" #gamingnews #microsoft #mojang\n",
"\n",
"When someone asks me to play Minecraft XD #minecraft #Mojang #microsoft https://t.co/yUHu5UJnGt\n",
"\n",
"#Turki Akan Larang Peredaran Minecraft - #Re http://t.co/pAqBqvvKHy\n",
" #gamingnews #microsoft #mojang\n",
"\n",
"The Maya's Pyramid ... #minecraft #minecraftpe #mojang #microsoft #maya #pyramid #android #gamer @Mojang @Microsoft http://t.co/W6WB4gbQqn\n",
"\n",
"#Turki Akan Larang Peredaran Minecraft - #Re http://t.co/pAqBqvvKHy\n",
" #gamingnews #microsoft #mojang\n",
"\n",
"New post: #Microsoft #Minecraft #Mojang #News #PC Mojang Responds to Potential \u2018Minecraft\u2019 Ban in Turkey http://t.co/k4rRmZpfFz\n",
"\n",
"RT @SAU_Pallab: Minecraft's new launcher remove the need for standalone Java to play http://t.co/CoAutNwt7I #Microsoft #Mojang http://t.co/\u2026\n",
"\n",
"RT @WinBetaDotOrg: Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.\u2026\n",
"\n",
"Minecraft's new launcher remove the need for standalone Java to play http://t.co/CoAutNwt7I #Microsoft #Mojang http://t.co/JfsWyzIRuD\n",
"\n",
"RT @WinBetaDotOrg: Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.\u2026\n",
"\n",
"RT @WinBetaDotOrg: Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.\u2026\n",
"\n",
"Minecraft's new launcher remove the need for standalone Java to play http://t.co/h1lNtJ2V8P #Microsoft #Mojang http://t.co/BxgZTfb7kp\n",
"\n",
"Minecraft's new launcher remove the need for standalone Java to play http://t.co/FohB186I9G #Microsoft #Mojang http://t.co/vIQnK31D5O\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Mojang' dan Minecraft A\u00e7\u0131klamas\u0131 #minecraft #mojang #microsoft\n",
"http://t.co/SdeM1kPe0L http://t.co/0Uq7upsMwa\n",
"\n",
"http://t.co/ZzSEK3UhQL\n",
"For sale\n",
"https://t.co/yS52UKlYqX\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"RT @Designretrovint: http://t.co/YFo8wxhmGt\n",
"For sale\n",
"https://t.co/WPmD0AqH6W\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor\u2026\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"http://t.co/YFo8wxhmGt\n",
"For sale\n",
"https://t.co/WPmD0AqH6W\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/o4aON6Fm4f\n",
"For sale\n",
"https://t.co/ile5EpYMfk\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"RT @domaintoyou: http://t.co/aNXkh0Y4aC\n",
"For sale\n",
"https://t.co/0aeNmg5qRW\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK\u2026\n",
"\n",
"RT @domainfuture: http://t.co/kGcTqz3LLA\n",
"For sale\n",
"https://t.co/cS6vBWQj6V\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #U\u2026\n",
"\n",
"http://t.co/d2PbMNlBSH\n",
"For sale\n",
"https://t.co/TBcjcSipcb\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/V45faz5DEd\n",
"For sale\n",
"https://t.co/FvNlWUU5SD\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/i9oj9auu1R\n",
"For sale\n",
"https://t.co/FqGByZ3HPR\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/kGcTqz3LLA\n",
"For sale\n",
"https://t.co/cS6vBWQj6V\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/aNXkh0Y4aC\n",
"For sale\n",
"https://t.co/0aeNmg5qRW\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/h660oD1L8J\n",
"For sale\n",
"https://t.co/TN0PtPnBLN\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/FctpyWnF1D\n",
"For sale\n",
"https://t.co/zT0GXuS8Kl\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/9zC1bB4iZr\n",
"For sale\n",
"https://t.co/w8WGZSoWew\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/LuQnMbMRsN\n",
"For sale\n",
"https://t.co/g4kV5xLhkg\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"http://t.co/IT7mooEZec\n",
"For sale\n",
"https://t.co/k85Nq1ed8U\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n",
"\n",
"http://t.co/H05gO5AK4Q\n",
"For sale\n",
"https://t.co/ORbvyqKebk\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK #Amazon #Mojang\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"WTF Turkey?\n",
"http://t.co/antqspUPud\n",
"#Minecraft #Turchia #Mojang #Microsoft\n",
"\n",
"New post: #Microsoft #Minecraft #Mojang #News \u2018Minecraft\u2019 May Get Banned in Turkey http://t.co/VDgXvOSvaR\n",
"\n",
"#Notch \u0e1c\u0e39\u0e49\u0e2a\u0e23\u0e49\u0e32\u0e07 #\u0e40\u0e01\u0e21 #Minecraft \u0e40\u0e1b\u0e34\u0e14\u0e43\u0e08 \u0e2d\u0e30\u0e44\u0e23\u0e04\u0e37\u0e2d #\u0e40\u0e2b\u0e15\u0e38\u0e1c\u0e25 \u0e17\u0e35\u0e48\u0e15\u0e49\u0e2d\u0e07\u0e02\u0e32\u0e22\u0e1a\u0e23\u0e34\u0e29\u0e31\u0e17 #Mojang \u0e43\u0e2b\u0e49 #Microsoft http://t.co/P5l9jJIDMN #pantip #Lumia #Game\n",
"\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db.tweets.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"34"
]
}
],
"prompt_number": 8
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"1.2- db_tweets"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read from S3 bucket and insert into MongoDB (db:db_tweets, collection: tweets)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db = conn['db_tweets']\n",
"db.tweets.drop()\n",
"c = boto.connect_s3(aws_key, aws_secret)\n",
"b = c.get_bucket('amirziai-mids-w205-assignment2')\n",
"\n",
"for k in b.list():\n",
" key = Key(b)\n",
" for tweet in json.loads(k.get_contents_as_string()):\n",
" db.tweets.insert(tweet)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 53
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db.tweets.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"23"
]
}
],
"prompt_number": 6
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"2- Retrieving and analyzing task"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"2.1- Top 30 retweets and associated username and locations of users"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def top_retweets(db, limit = 30):\n",
" try:\n",
" db=conn[db]\n",
" return list(db.tweets.find({'retweeted_status':{ '$exists': True}},{'user.location':1, 'user.screen_name':1, 'text':1}).limit(limit))\n",
" except:\n",
" return []\n",
"\n",
"tweets = top_retweets('db_tweets')\n",
"i = 1\n",
"for tweet in tweets:\n",
" print str(i) + '-', tweet['text'],'\\n',tweet['user']['screen_name'],'from',tweet['user']['location'],'\\n'\n",
" i += 1"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1- RT @Unganked: Scrolls #2 - Buying 100 Scrolls! - YouTube http://t.co/sdFrYe78Yw #scrolls #mojang #microsoft #games #gaming #letsplay \n",
"JonathanMH_com from Odense, Denmark \n",
"\n",
"2- RT @bondmoran1: http://t.co/LuQnMbMRsN\n",
"For sale\n",
"https://t.co/g4kV5xLhkg\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK \u2026 \n",
"NoSQLDigest from \n",
"\n"
]
}
],
"prompt_number": 70
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"2.2- Lexical diversity of db_streamT tweets"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db = conn['db_streamT']\n",
"\n",
"db.lexical_diversity.drop()\n",
"for tweet in list(db.tweets.find()):\n",
" words = remove_stopwords(remove_punctuation(remove_urls_usernames(tweet['text']))) \n",
" ld = float(len(set(words)))/len(words)\n",
" db.lexical_diversity.insert({'tweet_id': tweet['id'], 'lexical_diversity':ld})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 34
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": [
"Plot data"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = 0\n",
"rows = []\n",
"for a in list(db.lexical_diversity.find()):\n",
" print 'tweet id:',a['tweet_id'],', lexical diversity:',a['lexical_diversity']\n",
" c += 1\n",
" rows.append({'tweet':str(c), 'ld':float(a['lexical_diversity'])})\n",
" \n",
"ld = pd.DataFrame(rows).set_index('tweet')\n",
"ld.plot(kind='bar', figsize=(10,6))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"tweet id: 577054587829530624 , lexical diversity: 1.0\n",
"tweet id: 576799469901111296 , lexical diversity: 0.875\n",
"tweet id: 576513550958616577 , lexical diversity: 1.0\n",
"tweet id: 576271407320948736 , lexical diversity: 0.8\n",
"tweet id: 576215147187978240 , lexical diversity: 1.0\n",
"tweet id: 576181101590265856 , lexical diversity: 0.846153846154\n",
"tweet id: 576062093235003393 , lexical diversity: 1.0\n",
"tweet id: 576055225292701697 , lexical diversity: 1.0\n",
"tweet id: 576049705903247360 , lexical diversity: 1.0\n",
"tweet id: 576048855457792001 , lexical diversity: 1.0\n",
"tweet id: 576046157127946240 , lexical diversity: 1.0\n",
"tweet id: 576046032041066496 , lexical diversity: 1.0\n",
"tweet id: 576046024516501504 , lexical diversity: 1.0\n",
"tweet id: 576028588878782464 , lexical diversity: 0.714285714286\n",
"tweet id: 575912994988802048 , lexical diversity: 1.0\n",
"tweet id: 575912946137718784 , lexical diversity: 1.0\n",
"tweet id: 575912945361752065 , lexical diversity: 1.0\n",
"tweet id: 575912882224939008 , lexical diversity: 1.0\n",
"tweet id: 575912844434276352 , lexical diversity: 1.0\n",
"tweet id: 575912842811084801 , lexical diversity: 1.0\n",
"tweet id: 575912831016636416 , lexical diversity: 1.0\n",
"tweet id: 575912792777232384 , lexical diversity: 1.0\n",
"tweet id: 575912749907247104 , lexical diversity: 1.0\n",
"tweet id: 575912708383637504 , lexical diversity: 1.0\n",
"tweet id: 575912656940494848 , lexical diversity: 1.0\n",
"tweet id: 575912572806938624 , lexical diversity: 1.0\n",
"tweet id: 575912521275699200 , lexical diversity: 1.0\n",
"tweet id: 575912479836008448 , lexical diversity: 1.0\n",
"tweet id: 575912441084780544 , lexical diversity: 1.0\n",
"tweet id: 575912398684553216 , lexical diversity: 1.0\n",
"tweet id: 575911711234002944 , lexical diversity: 1.0\n",
"tweet id: 575607566031589376 , lexical diversity: 1.0\n",
"tweet id: 575374079059828737 , lexical diversity: 0.909090909091\n",
"tweet id: 575279880209760256 , lexical diversity: 1.0\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 47,
"text": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1934bdd8>"
]
},
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAGACAYAAABiNFz3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXuUZVdd578/0oDh2TbhZdLQWRAxMGIjDxlfVAgjTZaK\nb1aUR8MszegAQUUCogbHB4FxjcqAGk3IijgSRR0Ms+QRXDm+ICHBJCQQmAQS81IUDY84WU4y7Pnj\nnk7f3HTVPbd/55793ZXPZ61aqVt1P7s+p/ap7t21d6qilCIAAAAAGM59agcAAAAAtAYLKAAAAIAV\nYQEFAAAAsCIsoAAAAABWhAUUAAAAwIqwgAIAAABYkaULqIh4e0R8NiKu3OI5b4mIayLiioh4yriJ\nAAAAAF4M+Q7UOZL2bfbOiDhJ0uNLKcdJ+hFJvzlSGwAAAIAlSxdQpZS/knTrFk/5Tknn9s+9WNLO\niHjkOHkAAAAAfoxxBupoSTfOPb5J0jEjjAsAAABgyViHyGPhMb8fBgAAALYtO0YY42ZJu+ceH9O/\n7W5EBIsqAAAAaIZSyuI3iO5ijO9AnS/pxZIUEc+U9PlSymc3CRn80htzLy9ZeLzaeKUUveQlL6na\nUPvj12gY4xpqN6zfn+bzWPte9J8H/3ms7Ts0jO87NDCPrn+/bcXS70BFxDslPUvSURFxo6TTJd23\nv7gzSyl/FhEnRcS1kv5V0kuXflQAAACAhlm6gCqlnDzgOS8fJ2cr9uRH2JMdI+fX/vgeDVnfoaG2\nnx+j/n0wxhit+w4NWd+hIes7NNT2HRqy/vR/rjX0k8g38iNsZMfI+bU/vkdD1ndoqO3nx6h/H4wx\nRuu+Q0PWd2jI+g4NtX2Hhqw//Z9rDS2gAAAAADwY4//CSxGx6QF33fOnI6zijsXqDQcPpwEAAMB2\nJKb6yz4iyqE+VkRsqwXHmNczW5xtNdb6P3fZhjGuoXbD+v31N2RhHj3msbbv0MA8Mo9jMbBhrT/G\nAAAAAOBeBQuoCem6LjvCNmjI+g4Ntf38GPXvgzHGaN13aMj6Dg1Z36Ghtu/QkPWn/3ONBRQAAADA\ninAGahP27Nmjs88+WyeeeOLd3t51nV70ohfpxhtvPKTHGahxfYeG7XDmIgvz6DGPtX2HBuaReRyL\n7Bmo6v8X3qGY4v+uG3JzTfN/+QEAAEBrGG/hlTW+1MHh3En9hqzv0FDbz49R/z4YY4zWfYeGrO/Q\nkPUdGmr7Dg1ZnzNQdtx+++3av3+/du3apSc96Um65JJLaicBAABAZSzPQA3bo07VLN3CO/bYY3XW\nWWfpggsu0Ic//GH96Z/+qW677Tbt27dPX/ziF3XDDTccemTOQI3qOzRshzMXWZhHj3ms7Ts0MI/M\n41jwc6DWzLve9S69/vWv186dO3XMMcfo1FNPtTz0DgAAANPBAmoJt9xyi3bv3n3X48c85jGHPZbD\nuZP6DVnfoaG2nx+j/n0wxhit+w4NWd+hIes7NNT2HRqyPmeg7Hj0ox99t+26zbbuAAAA4N4DZ6A2\nYf4M1EUXXaR3v/vduu2223TSSSfp1ltv5edADWxgr97jzEUW5tFjHmv7Dg3MI/M4Ftv4DFSs8WVg\nQYROP/10Pfaxj9Wxxx6rffv26cUvfjE/HwoAAOBejuUCqpSy9pdlXHfddXr2s5+tI488Uueee65u\nvfVWXXXVVXr1q1992Nt4DudO6jdkfYeG2n5+jPr3wRhjtO47NGR9h4as79BQ23doyPqcgQIAAACw\nx/IMVMtwBmpc36FhO5y5yMI8esxjbd+hgXlkHsdiG5+BAgAAAPCEBdSEOJw7qd+Q9R0aavv5Merf\nB2OM0brv0JD1HRqyvkNDbd+hIetzBgoAAADAHs5AjQxnoMb1HRq2w5mLLMyjxzzW9h0amEfmcSyy\nZ6B2jJ+0OvxcJQAAAGiJ6lt4W/+cpjL3cuHC49V/XtSFF154GD8ravWGzXA4d1K/Ies7NNT282PU\nvw/GGKN136Eh6zs0ZH2Hhtq+Q0PWn/7PNYvvQAEAAACswpDdq7Uebah9BmqL56uR/dFt+/HHaGCv\n3uPMRRbm0WMea/sODcwj8zhxAz8HCgAAAGAsGlpAdfkRKp/7qP3xPRqyvkNDbT8/Rv37YIwxWvcd\nGrK+Q0PWd2io7Ts0ZP3pGxpaQAEAAAB4wBko44baH3+Mhob2uSv662/Iwjx6zGNt36GBeWQeJ27g\nDBQAAADAWDS0gOryI3AGyqAh6zs01PbzY9S/D8YYo3XfoSHrOzRkfYeG2r5DQ9afvqGhBRQAAACA\nB5yBMm6o/fHHaGhon7uiv/6GLMyjxzzW9h0amEfmceIGzkABAAAAjEVDC6hupWdHxKCXdTbcwzY4\nd1K/Ies7NNT282PUvw/GGKN136Eh6zs0ZH2Hhtq+Q0PWn76hoQXU4VAWXi5ceAwAAACwOtv2DJTD\nPneW2h9/jIaG9rkr+utvyMI8esxjbd+hgXlkHidu4AwUAAAAwFg0tIDqDMbI+Q7nTuo3ZH2Hhtp+\nfoz698EYY7TuOzRkfYeGrO/QUNt3aMj60zc0tIACAAAA8IAzUNv83Enthob2uSv662/Iwjx6zGNt\n36GBeWQeJ27gDBQAAADAWDS0gOoMxsj5DudO6jdkfYeG2n5+jPr3wRhjtO47NGR9h4as79BQ23do\nyPrTNzS0gAIAAADwgDNQ94pzJ1vDPndtf/0NWZhHj3ms7Ts0MI/M48QNm/4lumPLOtgmbH2DAAAA\nwGo0tIXXGYyR8zl3Mobv0FDbz4/BvejgOzRkfYeGrO/QUNt3aMj60zc0tIACAAAA8IAzUJw7YZ+7\nur/+hizMo8c81vYdGphH5nHiBn4OFAAAAMBYNLSA6gzGyPmcOxnDd2io7efH4F508B0asr5DQ9Z3\naKjtOzRk/ekbGlpAAQAAAHjAGSjOnbDPXd1ff0MW5tFjHmv7Dg3MI/M4cQNnoAAAAADGoqEFVGcw\nRs7n3MkYvkNDbT8/Bveig+/QkPUdGrK+Q0Nt36Eh60/fsHQBFRH7IuKTEXFNRJx2iPcfFRHvi4jL\nI+KqiNi/UgEAAABAY2x5BioijpD0KUnPkXSzpEsknVxKuXruOW+QdP9Syusi4qj++Y8spdy5MBZn\noFakoT3itfkODdyLzOMw36GBr0fmcbnv0NDQNRz2GahnSLq2lHJ9KeUOSedJev7Cc/5e0kP61x8i\n6Z8XF08AAAAA24llC6ijJd049/im/m3z/I6kJ0XELZKukHTqeHnzdAZj5HzOnYzhOzTU9vNjcC86\n+A4NWd+hIes7NNT2HRqy/vQNyxZQQ/YEflrS5aWUr5K0V9LbIuLBK1UAAAAANMSOJe+/WdLuuce7\nNfsu1DzfKOmXJKmU8umIuE7SEyRdujjY/v37tWfPHknSzp07tXfvXm1sbEg6+C/iA49ndJI2f9x1\n3Qp+t1Bz98eL/j3/hb66f8IJJ2gZB/Zn1/HxD/38jab8g/M572/cY7zW/QPOur4eDjXeqs8f4+Pf\n3d388b3H32jKb+XraTVfc287+HidX4+1fd+vh6y/McLHf4Ok6zWEZYfId2h2KPxESbdI+ojueYj8\nv0n6Qinl5yPikZI+KunJpZR/WRjrXneIvLbv0MA1eNyLWZhHj3ms7Ts0MI/M48QNh3eIvD8M/nJJ\n75f0CUl/UEq5OiJOiYhT+qf9sqSnRcQVkj4o6TWLi6dx6AzGaN13aMj6Dg21/fwY9/wX2LQff5wx\nWvcdGrK+Q0PWd2io7Ts0ZP3pG5Zt4amU8l5J711425lzr39O0nes9FEBAAAAGobfhWf8LcqGvsW5\nNt+hYTvci1mYR495rO07NDCPzOPEDfwuPAAAAICxaGgB1RmM0brv0JD1HRpq+/kxOAPl4Ds0ZH2H\nhqzv0FDbd2jI+tM3NLSAAgAAAPCAM1DGe7wN7RGvzXdo2A73Yhbm0WMea/sODcwj8zhxA2egAAAA\nAMaioQVUZzBG675DQ9Z3aKjt58fgDJSD79CQ9R0asr5DQ23foSHrT9/Q0AIKAAAAwAPOQBnv8Ta0\nR7w236FhO9yLWZhHj3ms7Ts0MI/M48QNnIECAAAAGIuGFlCdwRit+w4NWd+hobafH4MzUA6+Q0PW\nd2jI+g4NtX2Hhqw/fUNDCygAAAAADzgDZbzH29Ae8dp8h4btcC9mYR495rG279DAPDKPEzdwBgoA\nAABgLBpaQHUGY7TuOzRkfYeG2n5+DM5AOfgODVnfoSHrOzTU9h0asv70DQ0toAAAAAA84AyU8R5v\nQ3vEa/MdGrbDvZiFefSYx9q+QwPzyDxO3MAZKAAAAICxaGgB1RmM0brv0JD1HRpq+/kxOAPl4Ds0\nZH2Hhqzv0FDbd2jI+tM3NLSAAgAAAPCAM1DGe7wN7RGvzXdo2A73Yhbm0WMea/sODcwj8zhxA2eg\nAAAAAMaioQVUZzBG675DQ9Z3aKjt58fgDJSD79CQ9R0asr5DQ23foSHrT9/Q0AIKAAAAwAPOQBnv\n8Ta0R7w236Fh+9yLW8M8tjKPfD0yj8zjhA2b/uG5Y8s6ANhGbP0HBQAADKehLbzOYIzWfYeGrO/Q\nUNt3aMj6Dg21fYeGrO/QkPUdGmr7Dg1Zf/qGhhZQAAAAAB5wBsp4j7ehPeK1+Q4N3IvM4zDfoYF5\nZB6X+w4NDV0DPwcKAAAAYCwaWkB1BmO07js0ZH2Hhtq+Q0PWd2io7Ts0ZH2Hhqzv0FDbd2jI+tM3\nNLSAAgAAAPCAM1DGe7wN7RGvzXdo4F5kHof5Dg3MI/O43HdoaOgaOAMFAAAAMBYNLaA6gzFa9x0a\nsr5DQ23foSHrOzTU9h0asr5DQ9Z3aKjtOzRk/ekbGlpAAQAAAHjAGSjjPd6G9ojX5js0cC8yj8N8\nhwbmkXlc7js0NHQNnIECAAAAGIuGFlCdwRit+w4NWd+hobbv0JD1HRpq+w4NWd+hIes7NNT2HRqy\n/vQNDS2gAAAAADzgDJTxHm9De8Rr8x0auBeZx2G+QwPzyDwu9x0aGroGzkABAAAAjEVDC6jOYIzW\nfYeGrO/QUNt3aMj6Dg21fYeGrO/QkPUdGmr7Dg1Zf/qGhhZQAAAAAB5wBsp4j7ehPeK1+Q4N3IvM\n4zDfoYF5ZB6X+w4NDV0DZ6AAAAAAxqKhBVRnMEbrvkND1ndoqO07NGR9h4bavkND1ndoyPoODbV9\nh4asP31DQwsoAAAAAA84A2W8x9vQHvHafIcG7kXmcZjv0MA8Mo/LfYeGhq6BM1AAAAAAY9HQAqoz\nGKN136Eh6zs01PYdGrK+Q0Nt36Eh6zs0ZH2Hhtq+Q0PWn76hoQUUAAAAgAecgTLe421oj3htvkMD\n9yLzOMx3aGAemcflvkNDQ9fAGSgAAACAsWhoAdUZjNG679CQ9R0aavsODVnfoaG279CQ9R0asr5D\nQ23foSHrT9/Q0AIKAAAAwAPOQBnv8Ta0R7w236GBe5F5HOY7NDCPzONy36GhoWvgDBQAAADAWCxd\nQEXEvoj4ZERcExGnbfKcjYi4LCKuiohu9EpJLe6P+vkODVnfoaG279CQ9R0aavsODVnfoSHrOzTU\n9h0asv70DTu2emdEHCHprZKeI+lmSZdExPmllKvnnrNT0tskPbeUclNEHLVqMgAAAEBLbHkGKiL+\nvaTTSyn7+sevlaRSyhlzz/kxSY8qpfzclh+IM1CT+w4NXAP3oksD88g8DvMdGphHo2s47DNQR0u6\nce7xTf3b5jlO0q6IuDAiLo2IFy0ZEwAAAKBpli2ghnzL6L6Svl7SSZKeK+lnI+K4bNg96QzGaN13\naMj6Dg21fYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb9jyDJRm5552zz3erdl3oea5UdLnSim3S7o9\nIv5S0tdJumZxsP3792vPnj2SpJ07d2rv3r3a2NiYZXez8AOPZ3SSDjy+vP/vwfd3XbeC3/VjzD+e\ne+aCf+Dx3cdq3d/8sat/cD4PPP/Qj1v3DzjTfT1M63t+PdT2N3/s6rfy9bS639bXU9b3/Hpw+Hq6\nvH/9eg1h2RmoHZI+JelESbdI+oikkxcOkX+NZgfNnyvp/pIulvSCUsonFsbiDNTEvkMD18C96NLA\nPDKPw3yHBubR6Bo2PQO15XegSil3RsTLJb1f0hGSzi6lXB0Rp/TvP7OU8smIeJ+kj0n6sqTfWVw8\nAQAAAGwnlv4cqFLKe0spTyilPL6U8sb+bWeWUs6ce86vlFKeVEr52lLKW9aT2hmM0brv0JD1HRpq\n+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP38BPIgcAAABYEX4XnvEeb0N7xGvzHRq4F5nHYb5DA/PI\nPC73HRoaugZ+Fx4AAADAWDS0gOoMxmjdd2jI+g4NtX2Hhqzv0FDbd2jI+g4NWd+hobbv0JD1p29o\naAEFAAAA4AFnoIz3eBvaI16b79DAvcg8DvMdGphH5nG579DQ0DVwBgoAAABgLBpaQHUGY7TuOzRk\nfYeG2r5DQ9Z3aKjtOzRkfYeGrO/QUNt3aMj60zc0tIACAAAA8IAzUMZ7vA3tEa/Nd2jgXmQeh/kO\nDcwj87jcd2ho6Bo4AwUAAAAwFg0toDqDMVr3HRqyvkNDbd+hIes7NNT2HRqyvkND1ndoqO07NGT9\n6RsaWkABAAAAeMAZKOM93ob2iNfmOzRwLzKPw3yHBuaReVzuOzQ0dA2cgQIAAAAYi4YWUJ3BGK37\nDg1Z36Ghtu/QkPUdGmr7Dg1Z36Eh6zs01PYdGrL+9A0NLaAAAAAAPOAMlPEeb0N7xGvzHRq4F5nH\nYb5DA/PIPC73HRoaugbOQAEAAACMRUMLqM5gjNZ9h4as79BQ23doyPoODbV9h4as79CQ9R0aavsO\nDVl/+oaGFlAAAAAAHnAGyniPt6E94rX5Dg3ci8zjMN+hgXlkHpf7Dg0NXQNnoAAAAADGoqEFVGcw\nRuu+Q0PWd2io7Ts0ZH2Hhtq+Q0PWd2jI+g4NtX2Hhqw/fUNDCygAAAAADzgDZbzH29Ae8dp8hwbu\nReZxmO/QwDwyj8t9h4aGroEzUAAAAABj0dACqjMYo3XfoSHrOzTU9h0asr5DQ23foSHrOzRkfYeG\n2r5DQ9afvqGhBRQAAACAB5yBMt7jbWiPeG2+QwP3IvM4zHdoYB6Zx+W+Q0ND18AZKAAAAICxaGgB\n1RmM0brv0JD1HRpq+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP39DQAgoAAADAA85AGe/xNrRHvDbf\noYF7kXkc5js0MI/M43LfoaGha+AMFAAAAMBYNLSA6gzGaN13aMj6Dg21fYeGrO/QUNt3aMj6Dg1Z\n36Ghtu/QkPWnb2hoAQUAAADgAWegjPd4G9ojXpvv0MC9yDwO8x0amEfmcbnv0NDQNXAGCgAAAGAs\nGlpAdQZjtO47NGR9h4bavkND1ndoqO07NGR9h4as79BQ23doyPrTNzS0gAIAAADwgDNQxnu8De0R\nr813aOBeZB6H+Q4NzCPzuNx3aGjoGjgDBQAAADAWDS2gOoMxWvcdGrK+Q0Nt36Eh6zs01PYdGrK+\nQ0PWd2io7Ts0ZP3pGxpaQAEAAAB4wBko4z3ehvaI1+Y7NHAvMo/DfIcG5pF5XO47NDR0DZyBAgAA\nABiLhhZQncEYrfsODVnfoaG279CQ9R0aavsODVnfoSHrOzTU9h0asv70DQ0toAAAAAA84AyU8R5v\nQ3vEa/MdGrgXmcdhvkMD88g8LvcdGhq6Bs5AAQAAAIxFQwuozmCM1n2Hhqzv0FDbd2jI+g4NtX2H\nhqzv0JD1HRpq+w4NWX/6hoYWUAAAAAAecAbKeI+3oT3itfkODdyLzOMw36GBeWQel/sODQ1dA2eg\nAAAAAMaioQVUZzBG675DQ9Z3aKjtOzRkfYeG2r5DQ9Z3aMj6Dg21fYeGrD99Q0MLKAAAAAAPOANl\nvMfb0B7x2nyHBu5F5nGY79DAPDKPy32Hhoau4fDPQEXEvoj4ZERcExGnbfG8p0fEnRHxPcvGBAAA\nAGiZLRdQEXGEpLdK2ifpiZJOjojjN3nemyS9T9Kmq7UcncEYrfsODVnfoaG279CQ9R0aavsODVnf\noSHrOzTU9h0asv70Dcu+A/UMSdeWUq4vpdwh6TxJzz/E814h6Y8k/dNKHx0AAACgQbY8AxUR3yfp\nuaWUH+4fv1DSN5RSXjH3nKMl/Z6kZ0t6u6T3lFL+5BBjcQZqYt+hgWvgXnRpYB6Zx2G+QwPzaHQN\nh30GasiK59ckvbZfHYXWtoUHAAAA4MGOJe+/WdLuuce7Jd208JynSjpvtpLTUZKeFxF3lFLOXxxs\n//792rNnjyRp586d2rt3rzY2NiRJXddJ0l2PZ3SSDjz+NUl75x7PnOF+J+lySa+aezz3zAX/wOO7\nj9W6f4CNZvyD83ng+YvuoZ/fmn/Ame7rYVrf8+uhtn+AjWb8Vr6eVvOl1r6esr7n14PD19Plkj4v\n6XoNopSy6YtmC6xPS9oj6X796Mdv8fxzJH3PJu8rqyCpSGXu5cKFx1uPd09/jDHa8h0auIYhvkMD\n88g83lvn0aGBeTS+Bm32svTnQEXE8zRb3h4h6exSyhsj4pR+RXTmwnPPEWegbHyHBq6Be9GlgXlk\nHof5Dg3Mo9E1bHosiR+kaXyDNHSDrc13aOBeZB6H+Q4NzCPzuNx3aGjoGg7/B2n60BmM0brv0JD1\nHRpq+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP39DQAgoAAADAA7bwjL9F2dC3ONfmOzRwLzKPw3yH\nBuaReVzuOzQ0dA3bYQsPAAAAwIOGFlCdwRit+w4NWd+hobbv0JD1HRpq+w4NWd+hIes7NNT2HRqy\n/vQNDS2gAAAAADzgDJTxHm9De8Rr8x0auBeZx2G+QwPzyDwu9x0aGroGzkABAAAAjEVDC6jOYIzW\nfYeGrO/QUNt3aMj6Dg21fYeGrO/QkPUdGmr7Dg1Zf/qGhhZQAAAAAB5wBsp4j7ehPeK1+Q4N3IvM\n4zDfoYF5ZB6X+w4NDV0DZ6AAAAAAxqKhBVRnMEbrvkND1ndoqO07NGR9h4bavkND1ndoyPoODbV9\nh4asP31DQwsoAAAAAA84A2W8x9vQHvHafIcG7kXmcZjv0MA8Mo/LfYeGhq6BM1AAAAAAY9HQAqoz\nGKN136Eh6zs01PYdGrK+Q0Nt36Eh6zs0ZH2Hhtq+Q0PWn76hoQUUAAAAgAecgTLe421oj3htvkMD\n9yLzOMx3aGAemcflvkNDQ9fAGSgAAACAsWhoAdUZjNG679CQ9R0aavsODVnfoaG279CQ9R0asr5D\nQ23foSHrT9/Q0AIKAAAAwAPOQBnv8Ta0R7w236GBe5F5HOY7NDCPzONy36GhoWvgDBQAAADAWDS0\ngOoMxmjdd2jI+g4NtX2Hhqzv0FDbd2jI+g4NWd+hobbv0JD1p29oaAEFAAAA4AFnoIz3eBvaI16b\n79DAvcg8DvMdGphH5nG579DQ0DVwBgoAAABgLBpaQHUGY7TuOzRkfYeG2r5DQ9Z3aKjtOzRkfYeG\nrO/QUNt3aMj60zc0tIACAAAA8IAzUMZ7vA3tEa/Nd2jgXmQeh/kODcwj87jcd2ho6Bo4AwUAAAAw\nFg0toDqDMVr3HRqyvkNDbd+hIes7NNT2HRqyvkND1ndoqO07NGT96RsaWkABAAAAeMAZKOM93ob2\niNfmOzRwLzKPw3yHBuaReVzuOzQ0dA2cgQIAAAAYi4YWUJ3BGK37Dg1Z36Ghtu/QkPUdGmr7Dg1Z\n36Eh6zs01PYdGrL+9A0NLaAAAAAAPOAMlPEeb0N7xGvzHRq4F5nHYb5DA/PIPC73HRoaugbOQAEA\nAACMRUMLqM5gjNZ9h4as79BQ23doyPoODbV9h4as79CQ9R0aavsODVl/+oaGFlAAAAAAHnAGyniP\nt6E94rX5Dg3ci8zjMN+hgXlkHpf7Dg0NXQNnoAAAAADGoqEFVGcwRuu+Q0PWd2io7Ts0ZH2Hhtq+\nQ0PWd2jI+g4NtX2Hhqw/fUNDCygAAAAADzgDZbzH29Ae8dp8hwbuReZxmO/QwDwyj8t9h4aGroEz\nUAAAAABj0dACqjMYo3XfoSHrOzTU9h0asr5DQ23foSHrOzRkfYeG2r5DQ9afvqGhBRQAAACAB5yB\nMt7jbWiPeG2+QwP3IvM4zHdoYB6Zx+W+Q0ND18AZKAAAAICxaGgB1RmM0brv0JD1HRpq+w4NWd+h\nobbv0JD1HRqyvkNDbd+hIetP39DQAgoAAADAA85AGe/xNrRHvDbfoYF7kXkc5js0MI/M43LfoaGh\na+AMFAAAAMBYNLSA6gzGaN13aMj6Dg21fYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb2hoAQUAAADg\nwaAzUBGxT9KvSTpC0lmllDctvP+HJL1GUkj6kqQfLaV8bOE5nIGa2Hdo4Bq4F10amEfmcZjv0MA8\nGl3D4Z+BiogjJL1V0j5JT5R0ckQcv/C0z0j61lLKkyX9gqTfXjYuAAAAQKsM2cJ7hqRrSynXl1Lu\nkHSepOfPP6GU8uFSyhf6hxdLOmbcTKnF/VE/36Eh6zs01PYdGrK+Q0Nt36Eh6zs0ZH2Hhtq+Q0PW\nn75hyALqaEk3zj2+qX/bZvxHSX+2UgUAAABAQ+wY8JzBB5ci4gRJL5P0TYd6//79+7Vnzx5J0s6d\nO7V3715tbGxIkrquk6S7Hs/oJG3+uOu6Ffxuoebujxf9A483e367/kZT/sH5nPc37jFe6/4BZ7qv\nh2l936+H2v5GU34rX0+r+Zp728HHzl9PWd/36yHrb4zw8d8g6XoNYekh8oh4pqQ3lFL29Y9fJ+nL\nhzhI/mRJfyJpXynl2kOMwyHyiX2HBq6Be9GlgXlkHof5Dg3Mo9E1pH6Q5qWSjouIPRFxP0kvkHT+\nQsRjNFs8vfBQi6dx6AzGaN13aMj6Dg21fYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb1i6hVdKuTMi\nXi7p/Zr9GIOzSylXR8Qp/fvPlPRzkr5S0m/OVnS6o5TyjNXCAQAAANqA34Vn/C3Khr7FuTbfoYF7\nkXkc5jszpr7kAAARJElEQVQ0MI/M43LfoaGha+B34QEAAACMRUMLqM5gjNZ9h4as79BQ23doyPoO\nDbV9h4as79CQ9R0aavsODVl/+oaGFlAAAAAAHnAGyniPt6E94rX5Dg3ci8zjMN+hgXlkHpf7Dg0N\nXQNnoAAAAADGoqEFVGcwRuu+Q0PWd2io7Ts0ZH2Hhtq+Q0PWd2jI+g4NtX2Hhqw/fUNDCygAAAAA\nDzgDZbzH29Ae8dp8hwbuReZxmO/QwDwyj8t9h4aGroEzUAAAAABj0dACqjMYo3XfoSHrOzTU9h0a\nsr5DQ23foSHrOzRkfYeG2r5DQ9afvqGhBRQAAACAB5yBMt7jbWiPeG2+QwP3IvM4zHdoYB6Zx+W+\nQ0ND18AZKAAAAICxaGgB1RmM0brv0JD1HRpq+w4NWd+hobbv0JD1HRqyvkNDbd+hIetP39DQAgoA\nAADAA85AGe/xNrRHvDbfoYF7kXkc5js0MI/M43LfoaGha+AMFAAAAMBYNLSA6gzGaN13aMj6Dg21\nfYeGrO/QUNt3aMj6Dg1Z36Ghtu/QkPWnb2hoAQUAAADgAWegjPd4G9ojXpvv0MC9yDwO8x0amEfm\ncbnv0NDQNXAGCgAAAGAsGlpAdQZjtO47NGR9h4bavkND1ndoqO07NGR9h4as79BQ23doyPrTNzS0\ngAIAAADwgDNQxnu8De0Rr813aOBeZB6H+Q4NzCPzuNx3aGjoGjgDBQAAADAWDS2gOoMxWvcdGrK+\nQ0Nt36Eh6zs01PYdGrK+Q0PWd2io7Ts0ZP3pGxpaQAEAAAB4wBko4z3ehvaI1+Y7NHAvMo/DfIcG\n5pF5XO47NDR0DZyBAgAAABiLhhZQncEYrfsODVnfoaG279CQ9R0aavsODVnfoSHrOzTU9h0asv70\nDQ0toAAAAAA84AyU8R5vQ3vEa/MdGrgXmcdhvkMD88g8LvcdGhq6Bs5AAQAAAIxFQwuozmCM1n2H\nhqzv0FDbd2jI+g4NtX2Hhqzv0JD1HRpq+w4NWX/6hoYWUAAAAAAecAbKeI+3oT3itfkODdyLzOMw\n36GBeWQel/sODQ1dA2egAAAAAMaioQVUZzBG675DQ9Z3aKjtOzRkfYeG2r5DQ9Z3aMj6Dg21fYeG\nrD99Q0MLKAAAAAAPOANlvMfb0B7x2nyHBu5F5nGY79DAPDKPy32HhoaugTNQAAAAAGPR0AKqMxij\ndd+hIes7NNT2HRqyvkNDbd+hIes7NGR9h4bavkND1p++oaEFFAAAAIAHnIEy3uNtaI94bb5DA/ci\n8zjMd2hgHpnH5b5DQ0PXwBkoAAAAgLFoaAHVGYzRuu/QkPUdGmr7Dg1Z36Ghtu/QkPUdGrK+Q0Nt\n36Eh60/f0NACCgAAAMADzkAZ7/E2tEe8Nt+hgXuReRzmOzQwj8zjct+hoaFr4AwUAAAAwFg0tIDq\nDMZo3XdoyPoODbV9h4as79BQ23doyPoODVnfoaG279CQ9advaGgBBQAAAOABZ6CM93gb2iNem+/Q\nwL3IPA7zHRqYR+Zxue/Q0NA1cAYKAAAAYCwaWkB1BmO07js0ZH2Hhtq+Q0PWd2io7Ts0ZH2Hhqzv\n0FDbd2jI+tM3NLSAAgAAAPCAM1DGe7wN7RGvzXdo4F5kHof5Dg3MI/O43HdoaOgaOAMFAAAAMBYN\nLaA6gzFa9x0asr5DQ23foSHrOzTU9h0asr5DQ9Z3aKjtOzRk/ekbli6gImJfRHwyIq6JiNM2ec5b\n+vdfERFPWalgMJcbjNG679DANfA5cGmo7Ts0cA0eDbV9h4b2rmHLBVREHCHprZL2SXqipJMj4viF\n55wk6fGllOMk/Yik31ypYDCfNxijdd+hgWvgc+DSUNt3aOAaPBpq+w4N7V3Dsu9APUPStaWU60sp\nd0g6T9LzF57znZLOlaRSysWSdkbEI1eqAAAAAGiIZQuooyXdOPf4pv5ty55zTD5tkesNxmjdd2jI\n+g4NtX2Hhqzv0FDbd2jI+g4NWd+hobbv0JD1p2/Y8scYRMT3StpXSvnh/vELJX1DKeUVc895j6Qz\nSil/0z/+oKTXlFL+dmGsaX5eAgAAAMAIbPVjDHYscW+WtHvu8W7NvsO01XOO6d82OAIAAACgJZZt\n4V0q6biI2BMR95P0AknnLzznfEkvlqSIeKakz5dSPjt6KQAAAIAJW34HqpRyZ0S8XNL7JR0h6exS\nytURcUr//jNLKX8WESdFxLWS/lXSS9deDQAAAFCRyX6VCwAAAMB2oaGfRC5FxODvbkXE8RFxYkQ8\naOHt+wb63xwRT+xf34iIV0fEiasV322830243xIRPxkR37aC88yIeGj/+gMi4r9ExP+KiDcdePsS\n/5URsXvZ87bw7x8RL4mI5/SPfygi3hYR/zki7rvCOI+LiJ+KiF+PiF+NiP8UEQ853C4A8CEiHmHQ\n8LDaDVCfw7kXm1pASfovQ54UEa+U9G5Jr5D08Yj4rrl3v3GA/0ZJvyLp3Ih4s6QzJB0p6fSI+KkB\n/nsi4vz+v+/p/0/F7z3w9gH+R+Ze/2FJ/13Sg/qP/7plfs/bNdtSlaRfl/SQ/jpul3TOAP8XJH0k\nIv46In4sIh4+8OMe4BxJJ0k6NSLeIen7JF2k2c8WO2vIABFxqqTfknT/3ru/pMdIujgiTlixB3r4\nS2t6ImJnRJzR/1aHWyPiX/rXz4iIncmx3zvgOQ/tP9bvRcQPLrzvNwZ+nN0RcdaB5og4JyKuioh3\nDLmnImLXwsvDNPszZldE7Brg75t7fWdEnB0RV0bE7w/92YP9PyAf3r/+tIj4jGZ/ntwQERsD/Msi\n4mci4nFDPt4h/KdHxIX9POyOiAsi4gsRcUkM+C0eEfHg/h/DH4+IL0bE5yLi4ojYv0ID92LyXryL\nUorVi6Qrt3j5t4FjXCXpQf3rezQ7DP+q/vFlA/xPaHY+7AGSviTpof3bj5T0sQH+ZZL+h6QTJD1L\n0oakv+9ff9YQf+71SyU9vH/9gZKuGvg5uHru9b9deN8VA6/hPpK+TbPF2D9Jep+kl0h68JB57P+7\nQ9I/StrRP44D7xs4j0f0rz9A0l/0rz9G0uUD/J2aLRo/KelWSf/Sv36GpJ3J+/S9A57z0P5j/Z6k\nH1x4328M/Di7NVtwntFfzzn95+Udkh4xwN+18PIwzX7YyS5Juwb4+xY+n2f3X4u/L+mRA6/hTXP3\n8NMkfUbStZJukLQx8F78GUmPO8y5erqkC/t52C3pAklfkHSJpKcM8B+s2T/ePi7pi5I+J+liSftX\naPiApNMkPUoHj048WtJrJX1ggP/1m7w8VdI/DPD/pL+HvlvSeyT9saSvOPD5HXgNf67ZP0pf138d\nvbb/WnyFpD8e4H9Z0nULL3f0//3MkPtg7vWzJf2iZn++/7ikdw+8hqvmXu8kPb1//aslfXSAf51m\n/7i+ob9/flzSV61wH1wi6XmSTtbs/2j/fs3+TDxR0ocH+Odrds54t6SfkPRzffvvSvpl7sVp7sW7\nxhn6xKleJH1W0lP6L4zFl1sGjvHxhccP0uwg/K9q2F+8lx/q9UM93sQ/or+5P6j+D2hJ163wOfiY\nDv5ld9mqH79/3h9Jeln/+jkLf1BcMsBf/Lj30+yn0J8n6XND5kCz7xh9pWaL0If1bz9ycX62GOPK\nuS+sXZIu3WyON/H5g4K/tCSPv7T+9+G8b+45/0+zReChXm4f4F+x8Pj1kv5G0lEr3Ivzfy7esNn7\ntvB/UrN/hD15fm5XmMf5e/GKA1/Th7q+Lca4WtJ9+9cvWnjf0n/YHWjo759v1exXl/1DPw8/suI1\nHM7n8GMLjy/t/3sfSZ/iXpzmXrzLWVVY94tm3+34lk3e986BY1woae/C2+7b/4H35QH+xZIe0L9+\nn7m379TCd3OWjHOMpHdJepukG1fwrtfBv+w+I+nR/dsfPOTmmGs9t/cv1sG/NP9S0tcN8De9kSU9\ncID/uv5jf0qz35H4Cc2+k3KVZj9odcg1nKrZIuqsfpwDC8JHSPrLAT5/UPCX1uI11PpL6wJJr9Hc\nd+00W9ifJumDA/yPS/rqTd639M+Wfg7us/C2/f24fzfwGq6Ye/2XVp3H/nm7Nfsz8Vc1O1awyr14\nk2YL2J/U7M/I+Xtx6c5A/7xX9HPxbElv0Ox4w7Mk/bykd6xyL829bYdmvy/2nAH+RyQ9V9IPaPYb\nPL67f/uzJF08wP+w+r8fNfsH7fvn3se9ONG9eNcYqwotvPSfmEcd4u0h6ZsH+F+xyduPkvS1h9Hz\n7Rr4L9Ul4zxA0rErOg+VtFezrZN7fE628J4wQu8e9dtEkh6n2c8RW7p4Wxjj32l2fuprDuPj8wdF\n4S8tefyltUvSm3VwO/nW/vU3a9hW6vdv9jUg6bsG+P9V0n84xNv3Sbpm4DX8gg6xfS/pOEl/NPSe\nmvs8Xizpsys4b5B0+tzLI/q3P1rS764wzgmS/kCzreErJb1X0inqF/lL3D9Y5ToP4T9Ds+/CvlPS\nYzXbpfiipL+V9LQB/tdp9h3Vz2v2j7En9G9/uKRXGtyL3135Xnz8Yd6LF61yL97lZm4GXnhxfuEv\nrXs4tf/S+sPD/EvrvOR9UP0vrf75x0t6zuJ8au6c2QD/RPXnO+fe/rwp/LEbNPsH4dcehv+cEa5h\nHfOwin/Y17BF/yqfg2+W9KT+9RMkvVrSiSv6T+xf35jaX0PDSZqds1ytYZUn88LLdnmR9NKk/7Ka\n/uGOsfCXVtXPwUgNtf1BnwNJr9RsG/rdkv5OcwtwDfsfW6r6Dg1cw2ifgzdq9h2XSzT7x+RFkn5W\ns+MdP+XuuzSUwgKKl3vpi1Y4k+boOzRwDcN93fP/DP6oVvs/g7P/Z3HKd2jgGkb7HGT/L/OqvktD\nKWXpLxMGaJaIuHKLdy/9uTG1/Qkahvy8lHVfQ7Zh7dcwxudAs7Njt0lSKeX6iHiWpD+OiMdqdjZz\nVX9jYt+hgWsY53Pwf0spd0q6MyI+XUr5Qj/e7RHx5QZ8lwYWULCteYRm541uPcT7PtSA79DANYzz\nOfjHiNhbSrlckkopt0XEt2v2oyGe3IDv0MA1jPM5+LeIeEAp5f9o9mNZJM1+wKZmP/bE3XdpYAuP\nl+37ouSPxKjtOzRwDaN9DrL/Z3BV36GBaxjtc5D6v8xr+y4NpRR+mTAAAADAqrT2u/AAAAAAqsMC\nCgAAAGBFWEABAAAArAgLKACwICIeGhE/usbxXxURR65rfAC4d8ECCgBc+EpJP7bG8U/V7AfnAQCk\nYQEFAC6cIelxEXFZRLw9Ir5DkiLif0bE2f3rL4uIX+xff2FEXNw//7ci4j79278tIj4UER+NiD+M\niAdGxCslfZWkCyPizytdHwBsI1hAAYALp0n6dCnlKZLeL+lb+rcfrdkvUFX/tr+IiOMl/YCkb+yf\n/2VJPxQRR0l6vWa/FPSpmv3KlJ8opbxF0i2SNkopJ052RQCwbeEnkQOAC/O/iuKvJb2qXyh9XNLO\niHiUpGdKermkl0p6qqRLI0KSvkLSP0j6BklPlPSh/u330/CfFg4AMBgWUABgRynl5v7XKuzT7Dek\n75L0AklfKqX8a784OreU8tPzXv9rLS4opfzg1M0AcO+CLTwAcOFLkh489/giSa+S9BeS/krSq/v/\nStKfS/q+iHi4JEXEroh4TO98U0Q8rn/7AyPiuLnxH7L2qwCAewUsoADAglLKP0v6m4i4MiLerNli\n6YhSymckXabZ/6X3V/1zr5b0M5I+EBFXSPqAZr8j7HOS9kt6Z//2D0l6Qv8hflvS+zhEDgBjwO/C\nAwAAAFgRvgMFAAAAsCIsoAAAAABWhAUUAAAAwIqwgAIAAABYERZQAAAAACvCAgoAAABgRVhAAQAA\nAKwICygAAACAFfn/HDVvDqlgnjIAAAAASUVORK5CYII=\n",
"text": [
"<matplotlib.figure.Figure at 0x19344d68>"
]
}
],
"prompt_number": 47
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"2.3- db_followers, find un-followed friends for the top 10 users"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def find_unfollowed(old, new):\n",
" unfollowed = []\n",
" for o in old:\n",
" if o not in new:\n",
" unfollowed.append(o)\n",
" \n",
" return unfollowed"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_username_from_id(id = [1654270760]):\n",
" usernames = []\n",
" try:\n",
" users = api.lookup_users(user_ids=id)\n",
" for u in users:\n",
" usernames.append(u.screen_name)\n",
" except:\n",
" usernames.append('ERROR')\n",
" \n",
" return usernames"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 31
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_followers(user_screen_name):\n",
" followers = []\n",
" \n",
" limit = api.rate_limit_status()['resources']['followers']['/followers/ids']\n",
" if limit['remaining'] == 0:\n",
" now = datetime.datetime.now()\n",
" delta = datetime.datetime.fromtimestamp(limit['reset']) - now\n",
" if delta.total_seconds()>0:\n",
" print 'sleeping for', delta.total_seconds(),'seconds'\n",
" time.sleep(delta.total_seconds())\n",
" elif limit['remaining']<5:\n",
" print 'remaining <5, sleeping for 20s...'\n",
" time.sleep(20)\n",
" \n",
" c = tweepy.Cursor(api.followers_ids, screen_name = user_screen_name).items()\n",
" \n",
" while True:\n",
" try:\n",
" f = c.next()\n",
" followers.append(f)\n",
" except tweepy.TweepError:\n",
" print 'sleeping for 10s...'\n",
" time.sleep(10)\n",
" continue\n",
" except StopIteration:\n",
" break\n",
" \n",
" return followers"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Find and store follwers in db_followers"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db = conn['db_streamT']\n",
"dbf = conn['db_followers']\n",
"dbf.followers.drop()\n",
"pipe = [{'$group': {'_id':'$user.screen_name'}}]\n",
"for user in db.tweets.aggregate(pipe)['result']:\n",
" user_screen_name = user['_id']\n",
" print user_screen_name\n",
" followers = get_followers(user_screen_name)\n",
" print len(followers),'followers'\n",
" dbf.followers.insert({'user':user_screen_name,'followers':followers})"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"vinod272\n",
"26"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"Alibastrrr\n",
"74"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"Goldenrushmc\n",
"372"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"andreapurnama\n",
"10"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"devlato\n",
"58"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"kwareptar\n",
"37"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"IniciativaNerd\n",
"6418"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"GabStez\n",
"2833"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"infocrowler\n",
"386"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"sakurasong\n",
"19"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n",
"CH_MarcelloMark\n",
"remaining <5, sleeping for 20s..."
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"131"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" followers\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Find unfollowed friends for top 10 user (based on number of followers)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dbf = conn['db_followers']\n",
"fs = dbf.followers.aggregate([{'$project':{'user':1,'count':{'$size':'$followers'} }}, {'$sort':{'count':-1}}, {'$limit':10}])['result']\n",
"\n",
"# check for unfollowers\n",
"for f in fs:\n",
" print f['user'],f['count'],'followers'\n",
" followers_old = dbf.followers.find_one({'user':f['user']})['followers']\n",
" followers_new = get_followers(f['user'])\n",
" if len(followers_old) != len(followers_new):\n",
" diff = find_unfollowed(followers_old, followers_new)\n",
" if len(diff)>0:\n",
" unfollowed = get_username_from_id(diff)\n",
" if len(unfollowed)>0:\n",
" print 'Unfollowed friends: '\n",
" for u in unfollowed:\n",
" print u\n",
" \n",
" print '\\n'\n",
" else:\n",
" print 'no change in followers\\n'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"IniciativaNerd 6418 followers\n",
"remaining <5, sleeping for 20s..."
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"GabStez 2833 followers\n",
"sleeping for"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 636.67 seconds\n",
"Unfollowed friends: "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"waseemnadaf\n",
"\n",
"\n",
"infocrowler 386 followers\n",
"Unfollowed friends: "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"ossia\n",
"\n",
"\n",
"Goldenrushmc 372 followers\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"CH_MarcelloMark 131 followers\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Alibastrrr 74 followers\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"devlato 58 followers\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"kwareptar 37 followers\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"vinod272 26 followers\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"sakurasong 19 followers\n",
"no change in followers\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 47
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"2.4- Sentiment analysis"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Create a classifier and train it using a sample of positive and negative tweets"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# code from:\n",
"# http://www.laurentluce.com/posts/twitter-sentiment-analysis-using-python-and-nltk/\n",
"\n",
"def get_words_in_tweets(tweets):\n",
" all_words = []\n",
" for (words, sentiment) in tweets:\n",
" all_words.extend(words)\n",
" return all_words\n",
"\n",
"def get_word_features(wordlist):\n",
" wordlist = nltk.FreqDist(wordlist)\n",
" word_features = wordlist.keys()\n",
" return word_features\n",
"\n",
"pos_tweets = [('Minecraft new launcher remove the need for standalone Java to play', 'positive'),\n",
" ('Minecraft PS4 Survival Lets Play playlist check it out #minecraft #minecrfatps4 #minecon #mojang #minecraft','positive'),\n",
" ('Standing room only for #Mojang #Minecraft session #SXSWGaming','positive'),\n",
" ('for sale apple startup vega bigdata microsoft', 'positive')]\n",
"neg_tweets = [('WTF Turkey?', 'negative'),\n",
" ('lawsuit against #Mojang in Turkey because #Minecraft is too violent!? WTF ! #gamedev #indiegamedev #indiegamelover', 'negative')]\n",
"tweets = []\n",
"\n",
"for (words, sentiment) in pos_tweets + neg_tweets:\n",
" words_filtered = remove_stopwords(remove_punctuation(remove_urls_usernames(words)))\n",
" # words_filtered = [e.lower() for e in words.split() if len(e) >= 3]\n",
" tweets.append((words_filtered, sentiment))\n",
" \n",
"word_features = get_word_features(get_words_in_tweets(tweets))\n",
"\n",
"def extract_features(document):\n",
" document_words = set(document)\n",
" features = {}\n",
" for word in word_features:\n",
" features['contains(%s)' % word] = (word in document_words)\n",
" return features\n",
"\n",
"training_set = nltk.classify.apply_features(extract_features, tweets)\n",
"classifier = nltk.NaiveBayesClassifier.train(training_set)\n",
"\n",
"# details of classifier\n",
"print classifier.show_most_informative_features(32)\n",
"\n",
"def classify_tweet(tweet):\n",
" tweet = remove_stopwords(remove_punctuation(remove_urls_usernames(tweet)))\n",
" print classifier.classify(extract_features(tweet))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Most Informative Features\n",
" contains(indiegamedev) = False positi : negati = 1.8 : 1.0\n",
" contains(gamedev) = False positi : negati = 1.8 : 1.0\n",
" contains(violent) = False positi : negati = 1.8 : 1.0\n",
" contains(lawsuit) = False positi : negati = 1.8 : 1.0\n",
"contains(indiegamelover) = False positi : negati = 1.8 : 1.0\n",
" contains(play) = False negati : positi = 1.7 : 1.0\n",
" contains(minecraft) = False negati : positi = 1.7 : 1.0\n",
" contains(minecraft) = True positi : negati = 1.4 : 1.0\n",
" contains(microsoft) = False negati : positi = 1.2 : 1.0\n",
" contains(standing) = False negati : positi = 1.2 : 1.0\n",
" contains(bigdata) = False negati : positi = 1.2 : 1.0\n",
" contains(room) = False negati : positi = 1.2 : 1.0\n",
" contains(need) = False negati : positi = 1.2 : 1.0\n",
" contains(apple) = False negati : positi = 1.2 : 1.0\n",
" contains(check) = False negati : positi = 1.2 : 1.0\n",
" contains(ps4) = False negati : positi = 1.2 : 1.0\n",
" contains(new) = False negati : positi = 1.2 : 1.0\n",
" contains(minecrfatps4) = False negati : positi = 1.2 : 1.0\n",
" contains(playlist) = False negati : positi = 1.2 : 1.0\n",
" contains(session) = False negati : positi = 1.2 : 1.0\n",
" contains(minecon) = False negati : positi = 1.2 : 1.0\n",
" contains(startup) = False negati : positi = 1.2 : 1.0\n",
" contains(sale) = False negati : positi = 1.2 : 1.0\n",
" contains(vega) = False negati : positi = 1.2 : 1.0\n",
" contains(sxswgaming) = False negati : positi = 1.2 : 1.0\n",
" contains(lets) = False negati : positi = 1.2 : 1.0\n",
" contains(java) = False negati : positi = 1.2 : 1.0\n",
" contains(standalone) = False negati : positi = 1.2 : 1.0\n",
" contains(remove) = False negati : positi = 1.2 : 1.0\n",
" contains(launcher) = False negati : positi = 1.2 : 1.0\n",
" contains(survival) = False negati : positi = 1.2 : 1.0\n",
"None\n"
]
}
],
"prompt_number": 95
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Classify tweets as positive or negative"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tweets = top_retweets('db_tweets')\n",
"for tweet in tweets:\n",
" print tweet['text']\n",
" print classify_tweet(tweet['text'])\n",
" print '\\n'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"RT @Unganked: Scrolls #2 - Buying 100 Scrolls! - YouTube http://t.co/sdFrYe78Yw #scrolls #mojang #microsoft #games #gaming #letsplay\n",
"positive\n",
"None\n",
"\n",
"\n",
"RT @bondmoran1: http://t.co/LuQnMbMRsN\n",
"For sale\n",
"https://t.co/g4kV5xLhkg\n",
"#Apple #Startup #Vegas #bigdata #Microsoft \n",
"#google #investor #UK \u2026\n",
"positive\n",
"None\n",
"\n",
"\n"
]
}
],
"prompt_number": 96
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"3- Storing and retrieving task"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db1 = conn['db_tweets']\n",
"db2 = conn['db_streamT']\n",
"db1_key = 'db_tweets.json'\n",
"db2_key = 'db_streamT.json'\n",
"\n",
"c = boto.connect_s3(aws_key, aws_secret)\n",
"b = c.get_bucket('amirziai-mids-w205-assignment3')\n",
"k = Key(b)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 73
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"3.1- create and store backups of db_tweets and db_stremT to S3 + restore backups"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I'm going to store the collection inside a JSON file"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"k.key = db1_key\n",
"k.set_contents_from_string(dumps(db1.tweets.find()))\n",
"k.key = db2_key\n",
"k.set_contents_from_string(dumps(db2.tweets.find()))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 74,
"text": [
"15214"
]
}
],
"prompt_number": 74
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now I'll read both collections from S3 and restore them into their corresponding MongoDB db"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"k.key = db1_key\n",
"j = json.loads(k.get_contents_as_string(), object_hook=json_util.object_hook)\n",
"db1.tweets.drop()\n",
"for tweet in j:\n",
" db1.tweets.insert(tweet)\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 95
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"k.key = db2_key\n",
"j = json.loads(k.get_contents_as_string(), object_hook=json_util.object_hook)\n",
"db2.tweets.drop()\n",
"for tweet in j:\n",
" db2.tweets.insert(tweet)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 94
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment