Created
October 5, 2019 12:52
-
-
Save NikosKoufos/664e6d5b467f6c602f87678062b78520 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import time\n", | |
"from selenium import webdriver\n", | |
"from selenium.webdriver.common.keys import Keys" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_tweets(browser):\n", | |
" body = browser.find_element_by_tag_name('body')\n", | |
" tweets = []\n", | |
" while len(tweets) < num_of_tweets:\n", | |
" for _ in range(5):\n", | |
" body.send_keys(Keys.PAGE_DOWN)\n", | |
" time.sleep(1)\n", | |
" tweets = browser.find_elements_by_class_name('content')\n", | |
"\n", | |
" return tweets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def parse_and_save(tweets, num_of_tweets, query):\n", | |
" counter = 0\n", | |
" column_separator = \",\"\n", | |
" tags_separator = '|'\n", | |
" \n", | |
" with open(query+\"_twitter_data.csv\", 'w', errors='ignore') as f:\n", | |
" f.write(\"Username\" + column_separator + \"Text\" + column_separator + \"Reply\"\\\n", | |
" + column_separator + \"Retweet\" + column_separator + \"Favourite\"\\\n", | |
" + column_separator + \"RefUsers\" + column_separator + \"Hashtags\\n\")\n", | |
"\n", | |
" for tweet in tweets:\n", | |
" if counter == num_of_tweets:\n", | |
" break\n", | |
" user = tweet.find_element_by_class_name('username').text\n", | |
" # remove , and new line\n", | |
" text = tweet.find_element_by_class_name('tweet-text').text.replace('\\n', ' ').replace(',', ' ')\n", | |
" stats = tweet.find_elements_by_class_name('ProfileTweet-actionCountForPresentation')\n", | |
"\n", | |
" reply = stats[0].text\n", | |
" if not len(reply):\n", | |
" reply = '0'\n", | |
"\n", | |
" retweet = stats[1].text\n", | |
" if not len(retweet):\n", | |
" retweet = '0'\n", | |
"\n", | |
" favourite = stats[3].text\n", | |
" if not len(favourite):\n", | |
" favourite = '0'\n", | |
"\n", | |
" ref_users = ''\n", | |
" for user_ref in tweet.find_elements_by_class_name('twitter-atreply'):\n", | |
" ref_users += user_ref.text + tags_separator\n", | |
" ref_users = ref_users[:-len(tags_separator)]\n", | |
" if not len(ref_users):\n", | |
" ref_users = '-'\n", | |
"\n", | |
" hashtags = ''\n", | |
" for hashtag in tweet.find_elements_by_class_name('twitter-hashtag'):\n", | |
" hashtags += hashtag.text + tags_separator\n", | |
" hashtags = hashtags[:-len(tags_separator)]\n", | |
" if not len(hashtags):\n", | |
" hashtags = '-'\n", | |
"\n", | |
" line = user + column_separator + text + column_separator + reply \\\n", | |
" + column_separator + retweet + column_separator + favourite \\\n", | |
" + column_separator + ref_users + column_separator + hashtags + '\\n'\n", | |
" f.write(line)\n", | |
" counter += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"browser = webdriver.Chrome()\n", | |
"base_url = \"https://twitter.com/search?q=\"\n", | |
"query = \"Trump\"\n", | |
"url = base_url + query\n", | |
"num_of_tweets = 100\n", | |
"\n", | |
"browser.get(url)\n", | |
"time.sleep(1)\n", | |
"tweets = get_tweets(browser)\n", | |
"parse_and_save(tweets, num_of_tweets, query)\n", | |
"browser.close()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment