Skip to content

Instantly share code, notes, and snippets.

@rishi-a
Created January 4, 2020 11:49
Show Gist options
  • Save rishi-a/270bcc358e0dcf1ba4405defdb024c29 to your computer and use it in GitHub Desktop.
Save rishi-a/270bcc358e0dcf1ba4405defdb024c29 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#the usual inits, please remove the irrlevent onces. The code in this notebook were part of a larger file.\n",
"import pandas as pd\n",
"import datetime as dt\n",
"import glob\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from pandas import DataFrame\n",
"import sys\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#fill in your twitter dev detials\n",
"import twitter\n",
"api = twitter.Api(consumer_key='',\n",
" consumer_secret='',\n",
" access_token_key='',\n",
" access_token_secret='')\n",
"#print(api.VerifyCredentials())"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
" \n"
]
}
],
"source": [
"#read the tweet data that you downloaded;\n",
"t = pd.read_csv('data/january2019/07012019.csv', sep=' ')"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Tweet Permalink</th>\n",
" <th>Tweet id</th>\n",
" <th>Poster username</th>\n",
" <th>Tweet post Date</th>\n",
" <th>Tweet text</th>\n",
" <th>Number of retweets</th>\n",
" <th>Is a retweet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>https://twitter.com/maliceous/status/108242629...</td>\n",
" <td>1082426297278779393</td>\n",
" <td>@maliceous</td>\n",
" <td>2019-01-08 05:28:34</td>\n",
" <td># London’s air pollution is a public health ...</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Tweet Permalink Tweet id \\\n",
"0 https://twitter.com/maliceous/status/108242629... 1082426297278779393 \n",
"\n",
" Poster username Tweet post Date \\\n",
"0 @maliceous 2019-01-08 05:28:34 \n",
"\n",
" Tweet text Number of retweets \\\n",
"0 # London’s air pollution is a public health ... 0 \n",
"\n",
" Is a retweet \n",
"0 No "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#maybe just verify that you are reading it right\n",
"t.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(479, 9)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Code To Insert Location Information For Each Tweet if it Does Not Exists"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(658, 9)"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#create two new columns. We are interested in these data\n",
"t['Geo'] = None\n",
"t['CreatedOn'] = None\n",
"t.shape"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration = 0\n",
"Iteration = 1\n",
"Iteration = 2\n",
"Iteration = 3\n",
"Iteration = 4\n",
"Iteration = 5\n",
"Iteration = 6\n",
"Iteration = 7\n",
"Iteration = 8\n",
"Iteration = 9\n",
"Iteration = 10\n",
"Iteration = 11\n",
"Iteration = 12\n",
"Iteration = 13\n",
"Iteration = 14\n",
"Iteration = 15\n",
"Iteration = 16\n",
"Iteration = 17\n",
"Iteration = 18\n",
"Iteration = 19\n",
"Iteration = 20\n",
"Iteration = 21\n",
"Iteration = 22\n",
"Iteration = 23\n",
"Iteration = 24\n",
"Iteration = 25\n",
"Iteration = 26\n",
"Iteration = 27\n",
"Iteration = 28\n",
"Iteration = 29\n",
"Iteration = 30\n",
"Iteration = 31\n",
"Iteration = 32\n",
"Iteration = 33\n",
"Iteration = 34\n",
"Iteration = 35\n",
"Iteration = 36\n",
"Iteration = 37\n",
"Iteration = 38\n",
"Iteration = 39\n",
"Iteration = 40\n",
"Iteration = 41\n",
"Iteration = 42\n",
"Iteration = 43\n",
"Iteration = 44\n",
"Iteration = 45\n",
"Iteration = 46\n",
"Iteration = 47\n",
"Iteration = 48\n",
"Iteration = 49\n",
"Iteration = 50\n",
"Iteration = 51\n",
"Iteration = 52\n",
"Iteration = 53\n",
"Iteration = 54\n",
"Iteration = 55\n",
"Iteration = 56\n",
"Iteration = 57\n",
"Iteration = 58\n",
"Iteration = 59\n",
"Iteration = 60\n",
"Iteration = 61\n",
"Iteration = 62\n",
"Iteration = 63\n",
"Iteration = 64\n",
"Iteration = 65\n",
"Iteration = 66\n",
"Iteration = 67\n",
"Iteration = 68\n",
"Iteration = 69\n",
"Iteration = 70\n",
"Iteration = 71\n",
"Iteration = 72\n",
"Iteration = 73\n",
"Iteration = 74\n",
"Iteration = 75\n",
"Iteration = 76\n",
"Iteration = 77\n",
"Iteration = 78\n",
"Iteration = 79\n",
"Iteration = 80\n",
"Iteration = 81\n",
"Iteration = 82\n",
"Iteration = 83\n",
"Iteration = 84\n",
"Iteration = 85\n",
"Iteration = 86\n",
"Iteration = 87\n",
"Iteration = 88\n",
"Iteration = 89\n",
"Iteration = 90\n",
"Iteration = 91\n",
"Iteration = 92\n",
"Iteration = 93\n",
"Iteration = 94\n",
"Iteration = 95\n",
"Iteration = 96\n",
"Iteration = 97\n",
"Iteration = 98\n",
"Iteration = 99\n",
"Iteration = 100\n",
"Iteration = 101\n",
"Iteration = 102\n",
"Iteration = 103\n",
"Iteration = 104\n",
"Iteration = 105\n",
"Iteration = 106\n",
"Iteration = 107\n",
"Iteration = 108\n",
"Iteration = 109\n",
"Iteration = 110\n",
"Iteration = 111\n",
"Iteration = 112\n",
"Iteration = 113\n",
"Iteration = 114\n",
"Iteration = 115\n",
"Iteration = 116\n",
"Iteration = 117\n",
"Iteration = 118\n",
"Iteration = 119\n",
"Iteration = 120\n",
"Iteration = 121\n",
"Iteration = 122\n",
"Iteration = 123\n",
"Iteration = 124\n",
"Iteration = 125\n",
"Iteration = 126\n",
"Iteration = 127\n",
"Iteration = 128\n",
"Iteration = 129\n",
"Iteration = 130\n",
"Iteration = 131\n",
"Iteration = 132\n",
"Iteration = 133\n",
"Iteration = 134\n",
"Iteration = 135\n",
"Iteration = 136\n",
"Iteration = 137\n",
"Iteration = 138\n",
"Iteration = 139\n",
"Iteration = 140\n",
"Iteration = 141\n",
"Iteration = 142\n",
"Iteration = 143\n",
"Iteration = 144\n",
"Iteration = 145\n",
"Iteration = 146\n",
"Iteration = 147\n",
"Iteration = 148\n",
"Iteration = 149\n",
"Iteration = 150\n",
"Iteration = 151\n",
"Iteration = 152\n",
"Iteration = 153\n",
"Iteration = 154\n",
"Iteration = 155\n",
"Iteration = 156\n",
"Iteration = 157\n",
"Iteration = 158\n",
"Iteration = 159\n",
"Iteration = 160\n",
"Iteration = 161\n",
"Iteration = 162\n",
"Iteration = 163\n",
"Iteration = 164\n",
"Iteration = 165\n",
"Iteration = 166\n",
"Iteration = 167\n",
"Iteration = 168\n",
"Iteration = 169\n",
"Iteration = 170\n",
"Iteration = 171\n",
"Iteration = 172\n",
"Iteration = 173\n",
"Iteration = 174\n",
"Iteration = 175\n",
"Iteration = 176\n",
"Iteration = 177\n",
"Iteration = 178\n",
"Iteration = 179\n",
"Iteration = 180\n",
"Iteration = 181\n",
"Iteration = 182\n",
"Iteration = 183\n",
"Iteration = 184\n",
"Iteration = 185\n",
"Iteration = 186\n",
"Iteration = 187\n",
"Iteration = 188\n",
"Iteration = 189\n",
"Iteration = 190\n",
"Iteration = 191\n",
"Iteration = 192\n",
"Iteration = 193\n",
"Iteration = 194\n",
"Iteration = 195\n",
"Iteration = 196\n",
"Iteration = 197\n",
"Iteration = 198\n",
"Iteration = 199\n",
"Iteration = 200\n",
"Iteration = 201\n",
"Iteration = 202\n",
"Iteration = 203\n",
"Iteration = 204\n",
"Iteration = 205\n",
"Iteration = 206\n",
"Iteration = 207\n",
"Iteration = 208\n",
"Iteration = 209\n",
"Iteration = 210\n",
"Iteration = 211\n",
"Iteration = 212\n",
"Iteration = 213\n",
"Iteration = 214\n",
"Iteration = 215\n",
"Iteration = 216\n",
"Iteration = 217\n",
"Iteration = 218\n",
"Iteration = 219\n",
"Iteration = 220\n",
"Iteration = 221\n",
"Iteration = 222\n",
"Iteration = 223\n",
"Iteration = 224\n",
"Iteration = 225\n",
"Iteration = 226\n",
"Iteration = 227\n",
"Iteration = 228\n",
"Iteration = 229\n",
"Iteration = 230\n",
"Iteration = 231\n",
"Iteration = 232\n",
"Iteration = 233\n",
"Iteration = 234\n",
"Iteration = 235\n",
"Iteration = 236\n",
"Iteration = 237\n",
"Iteration = 238\n",
"Iteration = 239\n",
"Iteration = 240\n",
"Iteration = 241\n",
"Iteration = 242\n",
"Iteration = 243\n",
"Iteration = 244\n",
"Iteration = 245\n",
"Iteration = 246\n",
"Iteration = 247\n",
"Iteration = 248\n",
"Iteration = 249\n",
"Iteration = 250\n",
"Iteration = 251\n",
"Iteration = 252\n",
"Iteration = 253\n",
"Iteration = 254\n",
"Iteration = 255\n",
"Iteration = 256\n",
"Iteration = 257\n",
"Iteration = 258\n",
"Iteration = 259\n",
"Iteration = 260\n",
"Iteration = 261\n",
"Iteration = 262\n",
"Iteration = 263\n",
"Iteration = 264\n",
"Iteration = 265\n",
"Iteration = 266\n",
"Iteration = 267\n",
"Iteration = 268\n",
"Iteration = 269\n",
"Iteration = 270\n",
"Iteration = 271\n",
"Iteration = 272\n",
"Iteration = 273\n",
"Iteration = 274\n",
"Iteration = 275\n",
"Iteration = 276\n",
"Iteration = 277\n",
"Iteration = 278\n",
"Iteration = 279\n",
"Iteration = 280\n",
"Iteration = 281\n",
"Iteration = 282\n",
"Iteration = 283\n",
"Iteration = 284\n",
"Iteration = 285\n",
"Iteration = 286\n",
"Iteration = 287\n",
"Iteration = 288\n",
"Iteration = 289\n",
"Iteration = 290\n",
"Iteration = 291\n",
"Iteration = 292\n",
"Iteration = 293\n",
"Iteration = 294\n",
"Iteration = 295\n",
"Iteration = 296\n",
"Iteration = 297\n",
"Iteration = 298\n",
"Iteration = 299\n",
"Iteration = 300\n",
"Iteration = 301\n",
"Iteration = 302\n",
"Iteration = 303\n",
"Iteration = 304\n",
"Iteration = 305\n",
"Iteration = 306\n",
"Iteration = 307\n",
"Iteration = 308\n",
"Iteration = 309\n",
"Iteration = 310\n",
"Iteration = 311\n",
"Iteration = 312\n",
"Iteration = 313\n",
"Iteration = 314\n",
"Iteration = 315\n",
"Iteration = 316\n",
"Iteration = 317\n",
"Iteration = 318\n",
"Iteration = 319\n",
"Iteration = 320\n",
"Iteration = 321\n",
"Iteration = 322\n",
"Iteration = 323\n",
"Iteration = 324\n",
"Iteration = 325\n",
"Iteration = 326\n",
"Iteration = 327\n",
"Iteration = 328\n",
"Iteration = 329\n",
"Iteration = 330\n",
"Iteration = 331\n",
"Iteration = 332\n",
"Iteration = 333\n",
"Iteration = 334\n",
"Iteration = 335\n",
"Iteration = 336\n",
"Iteration = 337\n",
"Iteration = 338\n",
"Iteration = 339\n",
"Iteration = 340\n",
"Iteration = 341\n",
"Iteration = 342\n",
"Iteration = 343\n",
"Iteration = 344\n",
"Iteration = 345\n",
"Iteration = 346\n",
"Iteration = 347\n",
"Iteration = 348\n",
"Iteration = 349\n",
"Iteration = 350\n",
"Iteration = 351\n",
"Iteration = 352\n",
"Iteration = 353\n",
"Iteration = 354\n",
"Iteration = 355\n",
"Iteration = 356\n",
"Iteration = 357\n",
"Iteration = 358\n",
"Iteration = 359\n",
"Iteration = 360\n",
"Iteration = 361\n",
"Iteration = 362\n",
"Iteration = 363\n",
"Iteration = 364\n",
"Iteration = 365\n",
"Iteration = 366\n",
"Iteration = 367\n",
"Iteration = 368\n",
"Iteration = 369\n",
"Iteration = 370\n",
"Iteration = 371\n",
"Iteration = 372\n",
"Iteration = 373\n",
"Iteration = 374\n",
"Iteration = 375\n",
"Iteration = 376\n",
"Iteration = 377\n",
"Iteration = 378\n",
"Iteration = 379\n",
"Iteration = 380\n",
"Iteration = 381\n",
"Iteration = 382\n",
"Iteration = 383\n",
"Iteration = 384\n",
"Iteration = 385\n",
"Iteration = 386\n",
"Iteration = 387\n",
"Iteration = 388\n",
"Iteration = 389\n",
"Iteration = 390\n",
"Iteration = 391\n",
"Iteration = 392\n",
"Iteration = 393\n",
"Iteration = 394\n",
"Iteration = 395\n",
"Iteration = 396\n",
"Iteration = 397\n",
"Iteration = 398\n",
"Iteration = 399\n",
"Iteration = 400\n",
"Iteration = 401\n",
"Iteration = 402\n",
"Iteration = 403\n",
"Iteration = 404\n",
"Iteration = 405\n",
"Iteration = 406\n",
"Iteration = 407\n",
"Iteration = 408\n",
"Iteration = 409\n",
"Iteration = 410\n",
"Iteration = 411\n",
"Iteration = 412\n",
"Iteration = 413\n",
"Iteration = 414\n",
"Iteration = 415\n",
"Iteration = 416\n",
"Iteration = 417\n",
"Iteration = 418\n",
"Iteration = 419\n",
"Iteration = 420\n",
"Iteration = 421\n",
"Iteration = 422\n",
"Iteration = 423\n",
"Iteration = 424\n",
"Iteration = 425\n",
"Iteration = 426\n",
"Iteration = 427\n",
"Iteration = 428\n",
"Iteration = 429\n",
"Iteration = 430\n",
"Iteration = 431\n",
"Iteration = 432\n",
"Iteration = 433\n",
"Iteration = 434\n",
"Iteration = 435\n",
"Iteration = 436\n",
"Iteration = 437\n",
"Iteration = 438\n",
"Iteration = 439\n",
"Iteration = 440\n",
"Iteration = 441\n",
"Iteration = 442\n",
"Iteration = 443\n",
"Iteration = 444\n",
"Iteration = 445\n",
"Iteration = 446\n",
"Iteration = 447\n",
"Iteration = 448\n",
"Iteration = 449\n",
"Iteration = 450\n",
"Iteration = 451\n",
"Iteration = 452\n",
"Iteration = 453\n",
"Iteration = 454\n",
"Iteration = 455\n",
"Iteration = 456\n",
"Iteration = 457\n",
"Iteration = 458\n",
"Iteration = 459\n",
"Iteration = 460\n",
"Iteration = 461\n",
"{'message': 'Rate limit exceeded', 'code': 88}\n",
"Sleeping . .\n",
"Iteration = 462\n",
"Iteration = 463\n",
"{'message': 'Rate limit exceeded', 'code': 88}\n",
"Sleeping . .\n",
"Iteration = 464\n",
"{'message': 'Rate limit exceeded', 'code': 88}\n",
"Sleeping . .\n",
"Iteration = 465\n",
"Iteration = 466\n",
"{'message': 'Rate limit exceeded', 'code': 88}\n",
"Sleeping . .\n",
"Iteration = 467\n",
"{'message': 'Rate limit exceeded', 'code': 88}\n",
"Sleeping . .\n",
"Iteration = 468\n",
"Iteration = 469\n",
"{'message': 'Rate limit exceeded', 'code': 88}\n",
"Sleeping . .\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration = 470\n",
"Iteration = 471\n",
"{'message': 'Rate limit exceeded', 'code': 88}\n",
"Sleeping . .\n",
"Iteration = 472\n",
"Iteration = 473\n",
"Iteration = 474\n",
"Iteration = 475\n",
"Iteration = 476\n",
"Iteration = 477\n",
"Iteration = 478\n",
"Iteration = 479\n",
"Iteration = 480\n",
"Iteration = 481\n",
"Iteration = 482\n",
"Iteration = 483\n",
"Iteration = 484\n",
"Iteration = 485\n",
"Iteration = 486\n",
"Iteration = 487\n",
"Iteration = 488\n",
"Iteration = 489\n",
"Iteration = 490\n",
"Iteration = 491\n",
"Iteration = 492\n",
"Iteration = 493\n",
"Iteration = 494\n",
"Iteration = 495\n",
"Iteration = 496\n",
"Iteration = 497\n",
"Iteration = 498\n",
"Iteration = 499\n",
"Iteration = 500\n",
"Iteration = 501\n",
"Iteration = 502\n",
"Iteration = 503\n",
"Iteration = 504\n",
"Iteration = 505\n",
"Iteration = 506\n",
"Iteration = 507\n",
"Iteration = 508\n",
"Iteration = 509\n",
"Iteration = 510\n",
"Iteration = 511\n",
"Iteration = 512\n",
"Iteration = 513\n",
"Iteration = 514\n",
"Iteration = 515\n",
"Iteration = 516\n",
"Iteration = 517\n",
"Iteration = 518\n",
"Iteration = 519\n",
"Iteration = 520\n",
"Iteration = 521\n",
"Iteration = 522\n",
"Iteration = 523\n",
"Iteration = 524\n",
"Iteration = 525\n",
"Iteration = 526\n",
"Iteration = 527\n",
"Iteration = 528\n",
"Iteration = 529\n",
"Iteration = 530\n",
"Iteration = 531\n",
"Iteration = 532\n",
"Iteration = 533\n",
"Iteration = 534\n",
"Iteration = 535\n",
"Iteration = 536\n",
"Iteration = 537\n",
"Iteration = 538\n",
"Iteration = 539\n",
"Iteration = 540\n",
"Iteration = 541\n",
"Iteration = 542\n",
"Iteration = 543\n",
"Iteration = 544\n",
"Iteration = 545\n",
"Iteration = 546\n",
"Iteration = 547\n",
"Iteration = 548\n",
"Iteration = 549\n",
"Iteration = 550\n",
"Iteration = 551\n",
"Iteration = 552\n",
"Iteration = 553\n",
"Iteration = 554\n",
"Iteration = 555\n",
"Iteration = 556\n",
"Iteration = 557\n",
"Iteration = 558\n",
"Iteration = 559\n",
"Iteration = 560\n",
"Iteration = 561\n",
"Iteration = 562\n",
"Iteration = 563\n",
"Iteration = 564\n",
"Iteration = 565\n",
"Iteration = 566\n",
"Iteration = 567\n",
"Iteration = 568\n",
"Iteration = 569\n",
"Iteration = 570\n",
"Iteration = 571\n",
"Iteration = 572\n",
"Iteration = 573\n",
"Iteration = 574\n",
"Iteration = 575\n",
"Iteration = 576\n",
"Iteration = 577\n",
"Iteration = 578\n",
"Iteration = 579\n",
"Iteration = 580\n",
"Iteration = 581\n",
"Iteration = 582\n",
"Iteration = 583\n",
"Iteration = 584\n",
"Iteration = 585\n",
"Iteration = 586\n",
"Iteration = 587\n",
"Iteration = 588\n",
"Iteration = 589\n",
"Iteration = 590\n",
"Iteration = 591\n",
"Iteration = 592\n",
"Iteration = 593\n",
"Iteration = 594\n",
"Iteration = 595\n",
"Iteration = 596\n",
"Iteration = 597\n",
"Iteration = 598\n",
"Iteration = 599\n",
"Iteration = 600\n",
"Iteration = 601\n",
"Iteration = 602\n",
"Iteration = 603\n",
"Iteration = 604\n",
"Iteration = 605\n",
"Iteration = 606\n",
"Iteration = 607\n",
"Iteration = 608\n",
"Iteration = 609\n",
"Iteration = 610\n",
"Iteration = 611\n",
"Iteration = 612\n",
"Iteration = 613\n",
"Iteration = 614\n",
"Iteration = 615\n",
"Iteration = 616\n",
"Iteration = 617\n",
"Iteration = 618\n",
"Iteration = 619\n",
"Iteration = 620\n",
"Iteration = 621\n",
"Iteration = 622\n",
"Iteration = 623\n",
"Iteration = 624\n",
"Iteration = 625\n",
"Iteration = 626\n",
"Iteration = 627\n",
"Iteration = 628\n",
"Iteration = 629\n",
"Iteration = 630\n",
"Iteration = 631\n",
"Iteration = 632\n",
"Iteration = 633\n",
"Iteration = 634\n",
"Iteration = 635\n",
"Iteration = 636\n",
"Iteration = 637\n",
"Iteration = 638\n",
"Iteration = 639\n",
"Iteration = 640\n",
"Iteration = 641\n",
"Iteration = 642\n",
"Iteration = 643\n",
"Iteration = 644\n",
"Iteration = 645\n",
"Iteration = 646\n",
"Iteration = 647\n",
"Iteration = 648\n",
"Iteration = 649\n",
"Iteration = 650\n",
"Iteration = 651\n",
"Iteration = 652\n",
"Iteration = 653\n",
"Iteration = 654\n",
"Iteration = 655\n",
"Iteration = 656\n",
"Iteration = 657\n"
]
}
],
"source": [
"#CAUTION: Use this code wisely. We do not want to waste request limit set by Twitter\n",
"\n",
"#keep track of users so that we do not end up calling multiple request for same user\n",
"requestNo = 0\n",
"users = []\n",
"\n",
"for i in range(len(t)):\n",
" print(\"Iteration = \", i)\n",
" if t.loc[i, \"Poster username\"][1:] not in users: \n",
" if t.loc[i, \"Geo\"] == None:\n",
" try:\n",
" u = api.GetUser(screen_name=t.loc[i, \"Poster username\"][1:])\n",
" t.loc[i, \"Geo\"] = u.location\n",
" t.loc[i,\"CreatedOn\"] = u.created_at\n",
" requestNo = requestNo + 1\n",
" users.append(t.loc[i, \"Poster username\"][1:])\n",
"\n",
" except Exception as e:\n",
" if e.message[0]['code'] != 50:\n",
" print(e.message[0])\n",
" print(\"Sleeping . .\")\n",
" time.sleep(30)\n",
" continue\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Tweet Permalink</th>\n",
" <th>Tweet id</th>\n",
" <th>Poster username</th>\n",
" <th>Tweet post Date</th>\n",
" <th>Tweet text</th>\n",
" <th>Number of retweets</th>\n",
" <th>Is a retweet</th>\n",
" <th>Geo</th>\n",
" <th>CreatedOn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>653</td>\n",
" <td>https://twitter.com/DOUBLE00JK/status/10820690...</td>\n",
" <td>1082069077739405312</td>\n",
" <td>@DOUBLE00JK</td>\n",
" <td>2019-01-07 05:49:06</td>\n",
" <td>I liked a @ YouTube video http://youtu.be/tVKm...</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <td>654</td>\n",
" <td>https://twitter.com/ClassicloveRei/status/1082...</td>\n",
" <td>1082068823149137920</td>\n",
" <td>@ClassicloveRei</td>\n",
" <td>2019-01-07 05:48:05</td>\n",
" <td>Xi’an, the core city of China, is now suffer...</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td></td>\n",
" <td>Sat Jan 23 16:58:53 +0000 2016</td>\n",
" </tr>\n",
" <tr>\n",
" <td>655</td>\n",
" <td>https://twitter.com/nick_cayman/status/1082068...</td>\n",
" <td>1082068214727802880</td>\n",
" <td>@nick_cayman</td>\n",
" <td>2019-01-07 05:45:40</td>\n",
" <td>RT COP23: Air pollution can make climate chang...</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>Cayman Islands</td>\n",
" <td>Sun Jul 06 14:27:58 +0000 2008</td>\n",
" </tr>\n",
" <tr>\n",
" <td>656</td>\n",
" <td>https://twitter.com/misskatsuragi/status/10820...</td>\n",
" <td>1082067786124460033</td>\n",
" <td>@misskatsuragi</td>\n",
" <td>2019-01-07 05:43:58</td>\n",
" <td>Didn’t miss the air pollution in the my home...</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>Edmonton, Alberta</td>\n",
" <td>Mon Feb 22 18:26:24 +0000 2010</td>\n",
" </tr>\n",
" <tr>\n",
" <td>657</td>\n",
" <td>https://twitter.com/The_PlugSeeker/status/1082...</td>\n",
" <td>1082067130768650241</td>\n",
" <td>@The_PlugSeeker</td>\n",
" <td>2019-01-07 05:41:22</td>\n",
" <td>Do you Really think it’s going to be decades...</td>\n",
" <td>1</td>\n",
" <td>No</td>\n",
" <td>Worldwide</td>\n",
" <td>Tue Aug 25 13:00:39 +0000 2015</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Tweet Permalink Tweet id \\\n",
"653 https://twitter.com/DOUBLE00JK/status/10820690... 1082069077739405312 \n",
"654 https://twitter.com/ClassicloveRei/status/1082... 1082068823149137920 \n",
"655 https://twitter.com/nick_cayman/status/1082068... 1082068214727802880 \n",
"656 https://twitter.com/misskatsuragi/status/10820... 1082067786124460033 \n",
"657 https://twitter.com/The_PlugSeeker/status/1082... 1082067130768650241 \n",
"\n",
" Poster username Tweet post Date \\\n",
"653 @DOUBLE00JK 2019-01-07 05:49:06 \n",
"654 @ClassicloveRei 2019-01-07 05:48:05 \n",
"655 @nick_cayman 2019-01-07 05:45:40 \n",
"656 @misskatsuragi 2019-01-07 05:43:58 \n",
"657 @The_PlugSeeker 2019-01-07 05:41:22 \n",
"\n",
" Tweet text Number of retweets \\\n",
"653 I liked a @ YouTube video http://youtu.be/tVKm... 0 \n",
"654 Xi’an, the core city of China, is now suffer... 0 \n",
"655 RT COP23: Air pollution can make climate chang... 0 \n",
"656 Didn’t miss the air pollution in the my home... 0 \n",
"657 Do you Really think it’s going to be decades... 1 \n",
"\n",
" Is a retweet Geo CreatedOn \n",
"653 No None None \n",
"654 No Sat Jan 23 16:58:53 +0000 2016 \n",
"655 No Cayman Islands Sun Jul 06 14:27:58 +0000 2008 \n",
"656 No Edmonton, Alberta Mon Feb 22 18:26:24 +0000 2010 \n",
"657 No Worldwide Tue Aug 25 13:00:39 +0000 2015 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check if the 'Geo' column and 'CreatedOn' columns are filled\n",
"t.tail()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"# we are done. save the new df\n",
"t.to_csv('data/january2019/07012019-L.csv')"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"397"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check how many request you sent and compare it with number of columns in the df. Ideally both should match.\n",
"#they wont match in case you ran out of request limit.\n",
"requestNo"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment