Skip to content

Instantly share code, notes, and snippets.

@DFoly
DFoly / part2_Twitter.py
Created September 25, 2018 16:05
connect method
def connect(username, created_at, tweet, retweet_count, place , location):
"""
connect to MySQL database and insert twitter data
"""
try:
con = mysql.connector.connect(host = 'localhost',
database='twitterdb', user='root', password = password, charset = 'utf8')
if con.is_connected():
@DFoly
DFoly / part3_Twitter.py
Created September 25, 2018 16:06
stream listener class
class Streamlistener(tweepy.StreamListener):
def on_connect(self):
print("You are connected to the Twitter API")
def on_error(self):
if status_code != 200:
print("error found")
@DFoly
DFoly / part4_Twitter.py
Created September 25, 2018 16:07
main method
if __name__== '__main__':
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api =tweepy.API(auth, wait_on_rate_limit=True)
# create instance of Streamlistener
listener = Streamlistener(api = api)
stream = tweepy.Stream(auth, listener = listener)
if __name__ == '__main__':
t = TweetObject( host = 'localhost', database = 'twitterdb', user = 'root')
data = t.MySQLConnect("SELECT created_at, tweet FROM `TwitterDB`.`Golf`;")
data = t.clean_tweets(data)
data['Sentiment'] = np.array([t.sentiment(x) for x in data['clean_tweets']])
t.word_cloud(data)
t.save_to_csv(data)
def sentiment(self, tweet):
"""
This function calculates sentiment
from our base on our cleaned tweets.
Uses textblob to calculate polarity.
Parameters:
----------------
arg1: takes in a tweet (row of dataframe)
----------------
Returns:
@DFoly
DFoly / gist:0e8630af4c443422b845110bf24a6790
Last active October 25, 2018 01:29
Twitter Streaming file
#!usr/bin/python
import mysql.connector
from mysql.connector import Error
import tweepy
import json
from dateutil import parser
import time
import os
import subprocess
def clean_tweets(self, df):
"""
Takes raw tweets and cleans them
so we can carry out analysis
remove stopwords, punctuation,
lower case, html, emoticons.
This will be done using Regex
? means option so colou?r matches
both color and colour.
@DFoly
DFoly / part1_Twitter.py
Last active October 31, 2018 10:00
Part 1 Twitter Blog Post
#!usr/bin/python
import mysql.connector
from mysql.connector import Error
import tweepy
import json
from dateutil import parser
import time
import os
import subprocess
from sklearn.feature_extraction.text import TfidfVectorizer
data = df['body_new']
tf_idf_vectorizor = TfidfVectorizer(stop_words = 'english',#tokenizer = tokenize_and_stem,
max_features = 20000)
tf_idf = tf_idf_vectorizor.fit_transform(data)
tf_idf_norm = normalize(tf_idf)
tf_idf_array = tf_idf_norm.toarray()
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.metrics import pairwise_distances
import nltk
import string