Last active
October 26, 2020 10:55
-
-
Save ikagios/81577d0be47033659e5a5cf0197a305a to your computer and use it in GitHub Desktop.
Application for detecting fake news in Twitter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary libraries | |
from __future__ import print_function | |
import numpy as np | |
import pandas as pd | |
import tweepy | |
import json | |
import time | |
import datetime | |
from tweepy import Stream, StreamListener, OAuthHandler | |
# pip install mysql-connector-python | |
import mysql.connector | |
# pip install --only-binary :all: mysqlclient | |
# for Python Shell instead run in cmd: pip install mysqlclient-1.4.4-cp37-cp37m-win32.whl | |
import MySQLdb | |
from dateutil import parser | |
from http.client import IncompleteRead as http_incompleteRead | |
from urllib3.exceptions import IncompleteRead as urllib3_incompleteRead | |
from urllib3.exceptions import ProtocolError | |
from ssl import SSLError | |
from requests.exceptions import Timeout, ConnectionError | |
from urllib3.exceptions import ReadTimeoutError | |
# Enter hashtags or keywords you wish to find tweets | |
input_key_hash = input("Enter hashtags or keywords separated by commas, e.g. #bigdata, #AI, #datascience: ") | |
WORDS = input_key_hash.split(",") | |
# Enter time to stop collecting tweets | |
date_entry = input('Enter date in YYYY-MM-DD format, to stop collecting tweets: ') | |
time_entry = input('Enter time of day in HH:MM:SS format, to stop collecting tweets: ') | |
year, month, day = map(int, date_entry.split('-')) | |
hour, minute, second = map(int, time_entry.split(':')) | |
STOPDATE = datetime.datetime(year, month, day, hour, minute, second) | |
# Enter Twitter Developer Credetials | |
CONSUMER_KEY = 'P6n9SmQL9xTdjVA6pSlz7SAIj' | |
CONSUMER_SECRET = 'iXKdbG4BthZQNi8spy2LJx2Wz2RdO1KL5byKwg4n2UCJeJrXkm' | |
ACCESS_TOKEN = '997022971620347904-Dm5CDg8e7wPxbob1ssikc6Z9CSVM168' | |
ACCESS_TOKEN_SECRET = 'UikHbT02qzhJJEpukKB5eUNhrpeF3tE9GZrb1MO6JQq0X' | |
# Enter MySQL Database Name and Credentials | |
HOST = 'localhost' | |
USER = 'FakeNews' | |
PASSWD = 'dynatothtes23ZX' | |
DATABASE = 'fake' | |
# This function takes the 'WORDS', 'tweet_id', 'screen_name', 'user_id_str', 'created_at', 'text', 'reaction_user', | |
# react_screen_name, retweet_OR_reply and stores it # into MySQL database "fake", table "greek_test2" | |
def store_data1(WORDS, tweet_id, screen_name, user_id_str, created_at, text, reaction_user, react_screen_name, retweet_OR_reply): | |
db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8mb4") | |
cursor = db.cursor() | |
insert_query = "INSERT INTO greek_test2 (WORDS, tweet_id, screen_name, user_id_str, created_at, text, reaction_user, react_screen_name, retweet_OR_reply) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" | |
cursor.execute(insert_query, (WORDS, tweet_id, screen_name, user_id_str, created_at, text, reaction_user, react_screen_name, retweet_OR_reply)) | |
db.commit() | |
cursor.close() | |
db.close() | |
return | |
# This function selects columns from table "greek_test2" and inserts them into table "dataforgraph_greek3". These are the | |
# columns we are going to use later on to plot our network graph | |
def store_data2(WORDS, tweet_id, created_at, user_id_str, screen_name, reaction_user, react_screen_name, retweet_OR_reply): | |
db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8mb4") | |
cursor = db.cursor() | |
cursor.execute('SELECT WORDS, tweet_id, created_at, user_id_str, screen_name, reaction_user, react_screen_name, retweet_OR_reply from greek_test2') | |
for row in cursor.fetchall(): | |
WORDS, tweet_id, created_at, user_id_str, screen_name, reaction_user, react_screen_name, retweet_OR_reply = row | |
insert_query = "INSERT INTO dataforgraph_greek3 (WORDS, tweet_id, created_at, user_id_str, screen_name, reaction_user, react_screen_name, retweet_OR_reply) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" | |
cursor.execute(insert_query, row) | |
db.commit() | |
cursor.close() | |
db.close() | |
return | |
# Create a class inheriting from StreamListener | |
class StreamListener(tweepy.StreamListener): | |
#This is a class provided by tweepy to access the Twitter Streaming API. | |
def on_connect(self): | |
# Called initially to connect to the Streaming API | |
print("You are now connected to the streaming API.") | |
def on_error(self, status_code): | |
# On error - if an error occurs, display the error / status code | |
print('An Error has occured: ' + repr(status_code)) | |
return True | |
def on_disconnect(self, notice): | |
#Called when twitter sends a disconnect notice | |
return True | |
def on_timeout(self): | |
print ('Timeout...') | |
return True | |
def on_exception(self, exception): | |
print('Exception...') | |
return True | |
def on_data(self, data): | |
# Connects to the MySQL database and stores the tweet | |
if datetime.datetime.now() < STOPDATE: # Tweets are collected until given time is reached | |
try: | |
# Decode the JSON from Twitter | |
datajson = json.loads(data) | |
# grab the wanted data from the Tweet | |
text = datajson['text'] | |
screen_name = datajson['user']['screen_name'] | |
tweet_id = datajson['id'] | |
created_at = parser.parse(datajson['created_at']) | |
retweet_OR_reply = "" | |
reaction_user = "" | |
react_screen_name = "" | |
if datajson['in_reply_to_status_id']: | |
reaction_user = datajson['in_reply_to_user_id_str'] | |
react_screen_name = datajson['in_reply_to_screen_name'] | |
retweet_OR_reply = "reply" | |
else: | |
reaction_user = "" | |
react_screen_name = "" | |
user_id_str = "" | |
try: | |
user_id_str = datajson['user']['id_str'] | |
reaction_user += datajson['retweeted_status']['user']['id_str'] | |
react_screen_name += datajson['retweeted_status']['user']['screen_name'] | |
retweet_OR_reply = "retweet" | |
except: | |
try: | |
user_id_str = datajson['user']['id_str'] | |
except: | |
contiune | |
# print out a message that we have collected a tweet at a specific time and Authors' user_id | |
print("Tweet collected at " + str(created_at)) | |
print("authored by user with user_id: " + str(user_id_str)) | |
# insert the data into MySQL database "fake", table "greek_test2" | |
store_data1(WORDS, tweet_id, screen_name, user_id_str, created_at, text, reaction_user, react_screen_name, retweet_OR_reply) | |
# grab the data from table "greek_test2" and insert them into table "datafrograph_greek3" | |
store_data2(WORDS, tweet_id, created_at, user_id_str, screen_name, reaction_user, react_screen_name, retweet_OR_reply) | |
except BaseException as e: | |
print("Error on_data: %s, Pausing..." % str(e)) | |
time.sleep(5) | |
return True | |
except http_incompleteRead as e: | |
print("http.client Incomplete Read error: %s" % str(e)) | |
print("~~~ Restarting stream search in 5 seconds... ~~~") | |
time.sleep(5) | |
#restart stream - simple as return true just like previous exception? | |
return True | |
except urllib3_incompleteRead as e: | |
print("urllib3 Incomplete Read error: %s" % str(e)) | |
print("~~~ Restarting stream search in 5 seconds... ~~~") | |
time.sleep(5) | |
return True | |
except (ProtocolError, AttributeError) as e: | |
print("Incomplete Read error: %s" % str(e)) | |
print("~~~ Restarting stream search in 5 seconds... ~~~") | |
time.sleep(5) | |
return True | |
except IncompleteRead as e: | |
print("Incomplete Read error: %s" % str(e)) | |
print("~~~ Restarting stream search in 5 seconds... ~~~") | |
time.sleep(5) | |
return True | |
except (Timeout, SSLError, ReadTimeoutError, ConnectionError) as e: | |
logging.warning("Network error occurred...", str(e)) | |
time.sleep(5) | |
return True | |
# if datetime.datetime.now() <= STOPDATE the collection of tweets stops and the stream disconnects | |
else: | |
print('Timed out!') | |
return False | |
# Authentication Procedure | |
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) | |
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) | |
# Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting. | |
listener = StreamListener(api = tweepy.API(wait_on_rate_limit=True) ) | |
#Establish a streaming session and route messages to StreamListener | |
streamer = tweepy.Stream(auth=auth, listener=listener) | |
print("Tracking: " + str(WORDS)) | |
# We use filter to stream all tweets containing the given word. The track parameter is an array of search terms to stream. | |
streamer.filter(track=["IwannhsXwston","HMetanastria","GeorghyZhukov","AlexandraGaale1","hatz_patty","oldfileles","LavrentisBeria4"]) | |
print("\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment