Created
January 4, 2020 11:49
-
-
Save rishi-a/270bcc358e0dcf1ba4405defdb024c29 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#the usual inits, please remove the irrlevent onces. The code in this notebook were part of a larger file.\n", | |
"import pandas as pd\n", | |
"import datetime as dt\n", | |
"import glob\n", | |
"import matplotlib.pyplot as plt\n", | |
"import numpy as np\n", | |
"from pandas import DataFrame\n", | |
"import sys\n", | |
"import time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#fill in your twitter dev detials\n", | |
"import twitter\n", | |
"api = twitter.Api(consumer_key='',\n", | |
" consumer_secret='',\n", | |
" access_token_key='',\n", | |
" access_token_secret='')\n", | |
"#print(api.VerifyCredentials())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n", | |
" \n" | |
] | |
} | |
], | |
"source": [ | |
"#read the tweet data that you downloaded;\n", | |
"t = pd.read_csv('data/january2019/07012019.csv', sep=' ')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Tweet Permalink</th>\n", | |
" <th>Tweet id</th>\n", | |
" <th>Poster username</th>\n", | |
" <th>Tweet post Date</th>\n", | |
" <th>Tweet text</th>\n", | |
" <th>Number of retweets</th>\n", | |
" <th>Is a retweet</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>https://twitter.com/maliceous/status/108242629...</td>\n", | |
" <td>1082426297278779393</td>\n", | |
" <td>@maliceous</td>\n", | |
" <td>2019-01-08 05:28:34</td>\n", | |
" <td># London’s air pollution is a public health ...</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Tweet Permalink Tweet id \\\n", | |
"0 https://twitter.com/maliceous/status/108242629... 1082426297278779393 \n", | |
"\n", | |
" Poster username Tweet post Date \\\n", | |
"0 @maliceous 2019-01-08 05:28:34 \n", | |
"\n", | |
" Tweet text Number of retweets \\\n", | |
"0 # London’s air pollution is a public health ... 0 \n", | |
"\n", | |
" Is a retweet \n", | |
"0 No " | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#maybe just verify that you are reading it right\n", | |
"t.head(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(479, 9)" | |
] | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t.shape" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Code To Insert Location Information For Each Tweet if it Does Not Exists" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(658, 9)" | |
] | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#create two new columns. We are interested in these data\n", | |
"t['Geo'] = None\n", | |
"t['CreatedOn'] = None\n", | |
"t.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration = 0\n", | |
"Iteration = 1\n", | |
"Iteration = 2\n", | |
"Iteration = 3\n", | |
"Iteration = 4\n", | |
"Iteration = 5\n", | |
"Iteration = 6\n", | |
"Iteration = 7\n", | |
"Iteration = 8\n", | |
"Iteration = 9\n", | |
"Iteration = 10\n", | |
"Iteration = 11\n", | |
"Iteration = 12\n", | |
"Iteration = 13\n", | |
"Iteration = 14\n", | |
"Iteration = 15\n", | |
"Iteration = 16\n", | |
"Iteration = 17\n", | |
"Iteration = 18\n", | |
"Iteration = 19\n", | |
"Iteration = 20\n", | |
"Iteration = 21\n", | |
"Iteration = 22\n", | |
"Iteration = 23\n", | |
"Iteration = 24\n", | |
"Iteration = 25\n", | |
"Iteration = 26\n", | |
"Iteration = 27\n", | |
"Iteration = 28\n", | |
"Iteration = 29\n", | |
"Iteration = 30\n", | |
"Iteration = 31\n", | |
"Iteration = 32\n", | |
"Iteration = 33\n", | |
"Iteration = 34\n", | |
"Iteration = 35\n", | |
"Iteration = 36\n", | |
"Iteration = 37\n", | |
"Iteration = 38\n", | |
"Iteration = 39\n", | |
"Iteration = 40\n", | |
"Iteration = 41\n", | |
"Iteration = 42\n", | |
"Iteration = 43\n", | |
"Iteration = 44\n", | |
"Iteration = 45\n", | |
"Iteration = 46\n", | |
"Iteration = 47\n", | |
"Iteration = 48\n", | |
"Iteration = 49\n", | |
"Iteration = 50\n", | |
"Iteration = 51\n", | |
"Iteration = 52\n", | |
"Iteration = 53\n", | |
"Iteration = 54\n", | |
"Iteration = 55\n", | |
"Iteration = 56\n", | |
"Iteration = 57\n", | |
"Iteration = 58\n", | |
"Iteration = 59\n", | |
"Iteration = 60\n", | |
"Iteration = 61\n", | |
"Iteration = 62\n", | |
"Iteration = 63\n", | |
"Iteration = 64\n", | |
"Iteration = 65\n", | |
"Iteration = 66\n", | |
"Iteration = 67\n", | |
"Iteration = 68\n", | |
"Iteration = 69\n", | |
"Iteration = 70\n", | |
"Iteration = 71\n", | |
"Iteration = 72\n", | |
"Iteration = 73\n", | |
"Iteration = 74\n", | |
"Iteration = 75\n", | |
"Iteration = 76\n", | |
"Iteration = 77\n", | |
"Iteration = 78\n", | |
"Iteration = 79\n", | |
"Iteration = 80\n", | |
"Iteration = 81\n", | |
"Iteration = 82\n", | |
"Iteration = 83\n", | |
"Iteration = 84\n", | |
"Iteration = 85\n", | |
"Iteration = 86\n", | |
"Iteration = 87\n", | |
"Iteration = 88\n", | |
"Iteration = 89\n", | |
"Iteration = 90\n", | |
"Iteration = 91\n", | |
"Iteration = 92\n", | |
"Iteration = 93\n", | |
"Iteration = 94\n", | |
"Iteration = 95\n", | |
"Iteration = 96\n", | |
"Iteration = 97\n", | |
"Iteration = 98\n", | |
"Iteration = 99\n", | |
"Iteration = 100\n", | |
"Iteration = 101\n", | |
"Iteration = 102\n", | |
"Iteration = 103\n", | |
"Iteration = 104\n", | |
"Iteration = 105\n", | |
"Iteration = 106\n", | |
"Iteration = 107\n", | |
"Iteration = 108\n", | |
"Iteration = 109\n", | |
"Iteration = 110\n", | |
"Iteration = 111\n", | |
"Iteration = 112\n", | |
"Iteration = 113\n", | |
"Iteration = 114\n", | |
"Iteration = 115\n", | |
"Iteration = 116\n", | |
"Iteration = 117\n", | |
"Iteration = 118\n", | |
"Iteration = 119\n", | |
"Iteration = 120\n", | |
"Iteration = 121\n", | |
"Iteration = 122\n", | |
"Iteration = 123\n", | |
"Iteration = 124\n", | |
"Iteration = 125\n", | |
"Iteration = 126\n", | |
"Iteration = 127\n", | |
"Iteration = 128\n", | |
"Iteration = 129\n", | |
"Iteration = 130\n", | |
"Iteration = 131\n", | |
"Iteration = 132\n", | |
"Iteration = 133\n", | |
"Iteration = 134\n", | |
"Iteration = 135\n", | |
"Iteration = 136\n", | |
"Iteration = 137\n", | |
"Iteration = 138\n", | |
"Iteration = 139\n", | |
"Iteration = 140\n", | |
"Iteration = 141\n", | |
"Iteration = 142\n", | |
"Iteration = 143\n", | |
"Iteration = 144\n", | |
"Iteration = 145\n", | |
"Iteration = 146\n", | |
"Iteration = 147\n", | |
"Iteration = 148\n", | |
"Iteration = 149\n", | |
"Iteration = 150\n", | |
"Iteration = 151\n", | |
"Iteration = 152\n", | |
"Iteration = 153\n", | |
"Iteration = 154\n", | |
"Iteration = 155\n", | |
"Iteration = 156\n", | |
"Iteration = 157\n", | |
"Iteration = 158\n", | |
"Iteration = 159\n", | |
"Iteration = 160\n", | |
"Iteration = 161\n", | |
"Iteration = 162\n", | |
"Iteration = 163\n", | |
"Iteration = 164\n", | |
"Iteration = 165\n", | |
"Iteration = 166\n", | |
"Iteration = 167\n", | |
"Iteration = 168\n", | |
"Iteration = 169\n", | |
"Iteration = 170\n", | |
"Iteration = 171\n", | |
"Iteration = 172\n", | |
"Iteration = 173\n", | |
"Iteration = 174\n", | |
"Iteration = 175\n", | |
"Iteration = 176\n", | |
"Iteration = 177\n", | |
"Iteration = 178\n", | |
"Iteration = 179\n", | |
"Iteration = 180\n", | |
"Iteration = 181\n", | |
"Iteration = 182\n", | |
"Iteration = 183\n", | |
"Iteration = 184\n", | |
"Iteration = 185\n", | |
"Iteration = 186\n", | |
"Iteration = 187\n", | |
"Iteration = 188\n", | |
"Iteration = 189\n", | |
"Iteration = 190\n", | |
"Iteration = 191\n", | |
"Iteration = 192\n", | |
"Iteration = 193\n", | |
"Iteration = 194\n", | |
"Iteration = 195\n", | |
"Iteration = 196\n", | |
"Iteration = 197\n", | |
"Iteration = 198\n", | |
"Iteration = 199\n", | |
"Iteration = 200\n", | |
"Iteration = 201\n", | |
"Iteration = 202\n", | |
"Iteration = 203\n", | |
"Iteration = 204\n", | |
"Iteration = 205\n", | |
"Iteration = 206\n", | |
"Iteration = 207\n", | |
"Iteration = 208\n", | |
"Iteration = 209\n", | |
"Iteration = 210\n", | |
"Iteration = 211\n", | |
"Iteration = 212\n", | |
"Iteration = 213\n", | |
"Iteration = 214\n", | |
"Iteration = 215\n", | |
"Iteration = 216\n", | |
"Iteration = 217\n", | |
"Iteration = 218\n", | |
"Iteration = 219\n", | |
"Iteration = 220\n", | |
"Iteration = 221\n", | |
"Iteration = 222\n", | |
"Iteration = 223\n", | |
"Iteration = 224\n", | |
"Iteration = 225\n", | |
"Iteration = 226\n", | |
"Iteration = 227\n", | |
"Iteration = 228\n", | |
"Iteration = 229\n", | |
"Iteration = 230\n", | |
"Iteration = 231\n", | |
"Iteration = 232\n", | |
"Iteration = 233\n", | |
"Iteration = 234\n", | |
"Iteration = 235\n", | |
"Iteration = 236\n", | |
"Iteration = 237\n", | |
"Iteration = 238\n", | |
"Iteration = 239\n", | |
"Iteration = 240\n", | |
"Iteration = 241\n", | |
"Iteration = 242\n", | |
"Iteration = 243\n", | |
"Iteration = 244\n", | |
"Iteration = 245\n", | |
"Iteration = 246\n", | |
"Iteration = 247\n", | |
"Iteration = 248\n", | |
"Iteration = 249\n", | |
"Iteration = 250\n", | |
"Iteration = 251\n", | |
"Iteration = 252\n", | |
"Iteration = 253\n", | |
"Iteration = 254\n", | |
"Iteration = 255\n", | |
"Iteration = 256\n", | |
"Iteration = 257\n", | |
"Iteration = 258\n", | |
"Iteration = 259\n", | |
"Iteration = 260\n", | |
"Iteration = 261\n", | |
"Iteration = 262\n", | |
"Iteration = 263\n", | |
"Iteration = 264\n", | |
"Iteration = 265\n", | |
"Iteration = 266\n", | |
"Iteration = 267\n", | |
"Iteration = 268\n", | |
"Iteration = 269\n", | |
"Iteration = 270\n", | |
"Iteration = 271\n", | |
"Iteration = 272\n", | |
"Iteration = 273\n", | |
"Iteration = 274\n", | |
"Iteration = 275\n", | |
"Iteration = 276\n", | |
"Iteration = 277\n", | |
"Iteration = 278\n", | |
"Iteration = 279\n", | |
"Iteration = 280\n", | |
"Iteration = 281\n", | |
"Iteration = 282\n", | |
"Iteration = 283\n", | |
"Iteration = 284\n", | |
"Iteration = 285\n", | |
"Iteration = 286\n", | |
"Iteration = 287\n", | |
"Iteration = 288\n", | |
"Iteration = 289\n", | |
"Iteration = 290\n", | |
"Iteration = 291\n", | |
"Iteration = 292\n", | |
"Iteration = 293\n", | |
"Iteration = 294\n", | |
"Iteration = 295\n", | |
"Iteration = 296\n", | |
"Iteration = 297\n", | |
"Iteration = 298\n", | |
"Iteration = 299\n", | |
"Iteration = 300\n", | |
"Iteration = 301\n", | |
"Iteration = 302\n", | |
"Iteration = 303\n", | |
"Iteration = 304\n", | |
"Iteration = 305\n", | |
"Iteration = 306\n", | |
"Iteration = 307\n", | |
"Iteration = 308\n", | |
"Iteration = 309\n", | |
"Iteration = 310\n", | |
"Iteration = 311\n", | |
"Iteration = 312\n", | |
"Iteration = 313\n", | |
"Iteration = 314\n", | |
"Iteration = 315\n", | |
"Iteration = 316\n", | |
"Iteration = 317\n", | |
"Iteration = 318\n", | |
"Iteration = 319\n", | |
"Iteration = 320\n", | |
"Iteration = 321\n", | |
"Iteration = 322\n", | |
"Iteration = 323\n", | |
"Iteration = 324\n", | |
"Iteration = 325\n", | |
"Iteration = 326\n", | |
"Iteration = 327\n", | |
"Iteration = 328\n", | |
"Iteration = 329\n", | |
"Iteration = 330\n", | |
"Iteration = 331\n", | |
"Iteration = 332\n", | |
"Iteration = 333\n", | |
"Iteration = 334\n", | |
"Iteration = 335\n", | |
"Iteration = 336\n", | |
"Iteration = 337\n", | |
"Iteration = 338\n", | |
"Iteration = 339\n", | |
"Iteration = 340\n", | |
"Iteration = 341\n", | |
"Iteration = 342\n", | |
"Iteration = 343\n", | |
"Iteration = 344\n", | |
"Iteration = 345\n", | |
"Iteration = 346\n", | |
"Iteration = 347\n", | |
"Iteration = 348\n", | |
"Iteration = 349\n", | |
"Iteration = 350\n", | |
"Iteration = 351\n", | |
"Iteration = 352\n", | |
"Iteration = 353\n", | |
"Iteration = 354\n", | |
"Iteration = 355\n", | |
"Iteration = 356\n", | |
"Iteration = 357\n", | |
"Iteration = 358\n", | |
"Iteration = 359\n", | |
"Iteration = 360\n", | |
"Iteration = 361\n", | |
"Iteration = 362\n", | |
"Iteration = 363\n", | |
"Iteration = 364\n", | |
"Iteration = 365\n", | |
"Iteration = 366\n", | |
"Iteration = 367\n", | |
"Iteration = 368\n", | |
"Iteration = 369\n", | |
"Iteration = 370\n", | |
"Iteration = 371\n", | |
"Iteration = 372\n", | |
"Iteration = 373\n", | |
"Iteration = 374\n", | |
"Iteration = 375\n", | |
"Iteration = 376\n", | |
"Iteration = 377\n", | |
"Iteration = 378\n", | |
"Iteration = 379\n", | |
"Iteration = 380\n", | |
"Iteration = 381\n", | |
"Iteration = 382\n", | |
"Iteration = 383\n", | |
"Iteration = 384\n", | |
"Iteration = 385\n", | |
"Iteration = 386\n", | |
"Iteration = 387\n", | |
"Iteration = 388\n", | |
"Iteration = 389\n", | |
"Iteration = 390\n", | |
"Iteration = 391\n", | |
"Iteration = 392\n", | |
"Iteration = 393\n", | |
"Iteration = 394\n", | |
"Iteration = 395\n", | |
"Iteration = 396\n", | |
"Iteration = 397\n", | |
"Iteration = 398\n", | |
"Iteration = 399\n", | |
"Iteration = 400\n", | |
"Iteration = 401\n", | |
"Iteration = 402\n", | |
"Iteration = 403\n", | |
"Iteration = 404\n", | |
"Iteration = 405\n", | |
"Iteration = 406\n", | |
"Iteration = 407\n", | |
"Iteration = 408\n", | |
"Iteration = 409\n", | |
"Iteration = 410\n", | |
"Iteration = 411\n", | |
"Iteration = 412\n", | |
"Iteration = 413\n", | |
"Iteration = 414\n", | |
"Iteration = 415\n", | |
"Iteration = 416\n", | |
"Iteration = 417\n", | |
"Iteration = 418\n", | |
"Iteration = 419\n", | |
"Iteration = 420\n", | |
"Iteration = 421\n", | |
"Iteration = 422\n", | |
"Iteration = 423\n", | |
"Iteration = 424\n", | |
"Iteration = 425\n", | |
"Iteration = 426\n", | |
"Iteration = 427\n", | |
"Iteration = 428\n", | |
"Iteration = 429\n", | |
"Iteration = 430\n", | |
"Iteration = 431\n", | |
"Iteration = 432\n", | |
"Iteration = 433\n", | |
"Iteration = 434\n", | |
"Iteration = 435\n", | |
"Iteration = 436\n", | |
"Iteration = 437\n", | |
"Iteration = 438\n", | |
"Iteration = 439\n", | |
"Iteration = 440\n", | |
"Iteration = 441\n", | |
"Iteration = 442\n", | |
"Iteration = 443\n", | |
"Iteration = 444\n", | |
"Iteration = 445\n", | |
"Iteration = 446\n", | |
"Iteration = 447\n", | |
"Iteration = 448\n", | |
"Iteration = 449\n", | |
"Iteration = 450\n", | |
"Iteration = 451\n", | |
"Iteration = 452\n", | |
"Iteration = 453\n", | |
"Iteration = 454\n", | |
"Iteration = 455\n", | |
"Iteration = 456\n", | |
"Iteration = 457\n", | |
"Iteration = 458\n", | |
"Iteration = 459\n", | |
"Iteration = 460\n", | |
"Iteration = 461\n", | |
"{'message': 'Rate limit exceeded', 'code': 88}\n", | |
"Sleeping . .\n", | |
"Iteration = 462\n", | |
"Iteration = 463\n", | |
"{'message': 'Rate limit exceeded', 'code': 88}\n", | |
"Sleeping . .\n", | |
"Iteration = 464\n", | |
"{'message': 'Rate limit exceeded', 'code': 88}\n", | |
"Sleeping . .\n", | |
"Iteration = 465\n", | |
"Iteration = 466\n", | |
"{'message': 'Rate limit exceeded', 'code': 88}\n", | |
"Sleeping . .\n", | |
"Iteration = 467\n", | |
"{'message': 'Rate limit exceeded', 'code': 88}\n", | |
"Sleeping . .\n", | |
"Iteration = 468\n", | |
"Iteration = 469\n", | |
"{'message': 'Rate limit exceeded', 'code': 88}\n", | |
"Sleeping . .\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration = 470\n", | |
"Iteration = 471\n", | |
"{'message': 'Rate limit exceeded', 'code': 88}\n", | |
"Sleeping . .\n", | |
"Iteration = 472\n", | |
"Iteration = 473\n", | |
"Iteration = 474\n", | |
"Iteration = 475\n", | |
"Iteration = 476\n", | |
"Iteration = 477\n", | |
"Iteration = 478\n", | |
"Iteration = 479\n", | |
"Iteration = 480\n", | |
"Iteration = 481\n", | |
"Iteration = 482\n", | |
"Iteration = 483\n", | |
"Iteration = 484\n", | |
"Iteration = 485\n", | |
"Iteration = 486\n", | |
"Iteration = 487\n", | |
"Iteration = 488\n", | |
"Iteration = 489\n", | |
"Iteration = 490\n", | |
"Iteration = 491\n", | |
"Iteration = 492\n", | |
"Iteration = 493\n", | |
"Iteration = 494\n", | |
"Iteration = 495\n", | |
"Iteration = 496\n", | |
"Iteration = 497\n", | |
"Iteration = 498\n", | |
"Iteration = 499\n", | |
"Iteration = 500\n", | |
"Iteration = 501\n", | |
"Iteration = 502\n", | |
"Iteration = 503\n", | |
"Iteration = 504\n", | |
"Iteration = 505\n", | |
"Iteration = 506\n", | |
"Iteration = 507\n", | |
"Iteration = 508\n", | |
"Iteration = 509\n", | |
"Iteration = 510\n", | |
"Iteration = 511\n", | |
"Iteration = 512\n", | |
"Iteration = 513\n", | |
"Iteration = 514\n", | |
"Iteration = 515\n", | |
"Iteration = 516\n", | |
"Iteration = 517\n", | |
"Iteration = 518\n", | |
"Iteration = 519\n", | |
"Iteration = 520\n", | |
"Iteration = 521\n", | |
"Iteration = 522\n", | |
"Iteration = 523\n", | |
"Iteration = 524\n", | |
"Iteration = 525\n", | |
"Iteration = 526\n", | |
"Iteration = 527\n", | |
"Iteration = 528\n", | |
"Iteration = 529\n", | |
"Iteration = 530\n", | |
"Iteration = 531\n", | |
"Iteration = 532\n", | |
"Iteration = 533\n", | |
"Iteration = 534\n", | |
"Iteration = 535\n", | |
"Iteration = 536\n", | |
"Iteration = 537\n", | |
"Iteration = 538\n", | |
"Iteration = 539\n", | |
"Iteration = 540\n", | |
"Iteration = 541\n", | |
"Iteration = 542\n", | |
"Iteration = 543\n", | |
"Iteration = 544\n", | |
"Iteration = 545\n", | |
"Iteration = 546\n", | |
"Iteration = 547\n", | |
"Iteration = 548\n", | |
"Iteration = 549\n", | |
"Iteration = 550\n", | |
"Iteration = 551\n", | |
"Iteration = 552\n", | |
"Iteration = 553\n", | |
"Iteration = 554\n", | |
"Iteration = 555\n", | |
"Iteration = 556\n", | |
"Iteration = 557\n", | |
"Iteration = 558\n", | |
"Iteration = 559\n", | |
"Iteration = 560\n", | |
"Iteration = 561\n", | |
"Iteration = 562\n", | |
"Iteration = 563\n", | |
"Iteration = 564\n", | |
"Iteration = 565\n", | |
"Iteration = 566\n", | |
"Iteration = 567\n", | |
"Iteration = 568\n", | |
"Iteration = 569\n", | |
"Iteration = 570\n", | |
"Iteration = 571\n", | |
"Iteration = 572\n", | |
"Iteration = 573\n", | |
"Iteration = 574\n", | |
"Iteration = 575\n", | |
"Iteration = 576\n", | |
"Iteration = 577\n", | |
"Iteration = 578\n", | |
"Iteration = 579\n", | |
"Iteration = 580\n", | |
"Iteration = 581\n", | |
"Iteration = 582\n", | |
"Iteration = 583\n", | |
"Iteration = 584\n", | |
"Iteration = 585\n", | |
"Iteration = 586\n", | |
"Iteration = 587\n", | |
"Iteration = 588\n", | |
"Iteration = 589\n", | |
"Iteration = 590\n", | |
"Iteration = 591\n", | |
"Iteration = 592\n", | |
"Iteration = 593\n", | |
"Iteration = 594\n", | |
"Iteration = 595\n", | |
"Iteration = 596\n", | |
"Iteration = 597\n", | |
"Iteration = 598\n", | |
"Iteration = 599\n", | |
"Iteration = 600\n", | |
"Iteration = 601\n", | |
"Iteration = 602\n", | |
"Iteration = 603\n", | |
"Iteration = 604\n", | |
"Iteration = 605\n", | |
"Iteration = 606\n", | |
"Iteration = 607\n", | |
"Iteration = 608\n", | |
"Iteration = 609\n", | |
"Iteration = 610\n", | |
"Iteration = 611\n", | |
"Iteration = 612\n", | |
"Iteration = 613\n", | |
"Iteration = 614\n", | |
"Iteration = 615\n", | |
"Iteration = 616\n", | |
"Iteration = 617\n", | |
"Iteration = 618\n", | |
"Iteration = 619\n", | |
"Iteration = 620\n", | |
"Iteration = 621\n", | |
"Iteration = 622\n", | |
"Iteration = 623\n", | |
"Iteration = 624\n", | |
"Iteration = 625\n", | |
"Iteration = 626\n", | |
"Iteration = 627\n", | |
"Iteration = 628\n", | |
"Iteration = 629\n", | |
"Iteration = 630\n", | |
"Iteration = 631\n", | |
"Iteration = 632\n", | |
"Iteration = 633\n", | |
"Iteration = 634\n", | |
"Iteration = 635\n", | |
"Iteration = 636\n", | |
"Iteration = 637\n", | |
"Iteration = 638\n", | |
"Iteration = 639\n", | |
"Iteration = 640\n", | |
"Iteration = 641\n", | |
"Iteration = 642\n", | |
"Iteration = 643\n", | |
"Iteration = 644\n", | |
"Iteration = 645\n", | |
"Iteration = 646\n", | |
"Iteration = 647\n", | |
"Iteration = 648\n", | |
"Iteration = 649\n", | |
"Iteration = 650\n", | |
"Iteration = 651\n", | |
"Iteration = 652\n", | |
"Iteration = 653\n", | |
"Iteration = 654\n", | |
"Iteration = 655\n", | |
"Iteration = 656\n", | |
"Iteration = 657\n" | |
] | |
} | |
], | |
"source": [ | |
"#CAUTION: Use this code wisely. We do not want to waste request limit set by Twitter\n", | |
"\n", | |
"#keep track of users so that we do not end up calling multiple request for same user\n", | |
"requestNo = 0\n", | |
"users = []\n", | |
"\n", | |
"for i in range(len(t)):\n", | |
" print(\"Iteration = \", i)\n", | |
" if t.loc[i, \"Poster username\"][1:] not in users: \n", | |
" if t.loc[i, \"Geo\"] == None:\n", | |
" try:\n", | |
" u = api.GetUser(screen_name=t.loc[i, \"Poster username\"][1:])\n", | |
" t.loc[i, \"Geo\"] = u.location\n", | |
" t.loc[i,\"CreatedOn\"] = u.created_at\n", | |
" requestNo = requestNo + 1\n", | |
" users.append(t.loc[i, \"Poster username\"][1:])\n", | |
"\n", | |
" except Exception as e:\n", | |
" if e.message[0]['code'] != 50:\n", | |
" print(e.message[0])\n", | |
" print(\"Sleeping . .\")\n", | |
" time.sleep(30)\n", | |
" continue\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Tweet Permalink</th>\n", | |
" <th>Tweet id</th>\n", | |
" <th>Poster username</th>\n", | |
" <th>Tweet post Date</th>\n", | |
" <th>Tweet text</th>\n", | |
" <th>Number of retweets</th>\n", | |
" <th>Is a retweet</th>\n", | |
" <th>Geo</th>\n", | |
" <th>CreatedOn</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>653</td>\n", | |
" <td>https://twitter.com/DOUBLE00JK/status/10820690...</td>\n", | |
" <td>1082069077739405312</td>\n", | |
" <td>@DOUBLE00JK</td>\n", | |
" <td>2019-01-07 05:49:06</td>\n", | |
" <td>I liked a @ YouTube video http://youtu.be/tVKm...</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>654</td>\n", | |
" <td>https://twitter.com/ClassicloveRei/status/1082...</td>\n", | |
" <td>1082068823149137920</td>\n", | |
" <td>@ClassicloveRei</td>\n", | |
" <td>2019-01-07 05:48:05</td>\n", | |
" <td>Xi’an, the core city of China, is now suffer...</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td></td>\n", | |
" <td>Sat Jan 23 16:58:53 +0000 2016</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>655</td>\n", | |
" <td>https://twitter.com/nick_cayman/status/1082068...</td>\n", | |
" <td>1082068214727802880</td>\n", | |
" <td>@nick_cayman</td>\n", | |
" <td>2019-01-07 05:45:40</td>\n", | |
" <td>RT COP23: Air pollution can make climate chang...</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td>Cayman Islands</td>\n", | |
" <td>Sun Jul 06 14:27:58 +0000 2008</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>656</td>\n", | |
" <td>https://twitter.com/misskatsuragi/status/10820...</td>\n", | |
" <td>1082067786124460033</td>\n", | |
" <td>@misskatsuragi</td>\n", | |
" <td>2019-01-07 05:43:58</td>\n", | |
" <td>Didn’t miss the air pollution in the my home...</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td>Edmonton, Alberta</td>\n", | |
" <td>Mon Feb 22 18:26:24 +0000 2010</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>657</td>\n", | |
" <td>https://twitter.com/The_PlugSeeker/status/1082...</td>\n", | |
" <td>1082067130768650241</td>\n", | |
" <td>@The_PlugSeeker</td>\n", | |
" <td>2019-01-07 05:41:22</td>\n", | |
" <td>Do you Really think it’s going to be decades...</td>\n", | |
" <td>1</td>\n", | |
" <td>No</td>\n", | |
" <td>Worldwide</td>\n", | |
" <td>Tue Aug 25 13:00:39 +0000 2015</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Tweet Permalink Tweet id \\\n", | |
"653 https://twitter.com/DOUBLE00JK/status/10820690... 1082069077739405312 \n", | |
"654 https://twitter.com/ClassicloveRei/status/1082... 1082068823149137920 \n", | |
"655 https://twitter.com/nick_cayman/status/1082068... 1082068214727802880 \n", | |
"656 https://twitter.com/misskatsuragi/status/10820... 1082067786124460033 \n", | |
"657 https://twitter.com/The_PlugSeeker/status/1082... 1082067130768650241 \n", | |
"\n", | |
" Poster username Tweet post Date \\\n", | |
"653 @DOUBLE00JK 2019-01-07 05:49:06 \n", | |
"654 @ClassicloveRei 2019-01-07 05:48:05 \n", | |
"655 @nick_cayman 2019-01-07 05:45:40 \n", | |
"656 @misskatsuragi 2019-01-07 05:43:58 \n", | |
"657 @The_PlugSeeker 2019-01-07 05:41:22 \n", | |
"\n", | |
" Tweet text Number of retweets \\\n", | |
"653 I liked a @ YouTube video http://youtu.be/tVKm... 0 \n", | |
"654 Xi’an, the core city of China, is now suffer... 0 \n", | |
"655 RT COP23: Air pollution can make climate chang... 0 \n", | |
"656 Didn’t miss the air pollution in the my home... 0 \n", | |
"657 Do you Really think it’s going to be decades... 1 \n", | |
"\n", | |
" Is a retweet Geo CreatedOn \n", | |
"653 No None None \n", | |
"654 No Sat Jan 23 16:58:53 +0000 2016 \n", | |
"655 No Cayman Islands Sun Jul 06 14:27:58 +0000 2008 \n", | |
"656 No Edmonton, Alberta Mon Feb 22 18:26:24 +0000 2010 \n", | |
"657 No Worldwide Tue Aug 25 13:00:39 +0000 2015 " | |
] | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#check if the 'Geo' column and 'CreatedOn' columns are filled\n", | |
"t.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# we are done. save the new df\n", | |
"t.to_csv('data/january2019/07012019-L.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"397" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#check how many request you sent and compare it with number of columns in the df. Ideally both should match.\n", | |
"#they wont match in case you ran out of request limit.\n", | |
"requestNo" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment