Skip to content

Instantly share code, notes, and snippets.

View kiddojazz's full-sized avatar
🎯
Focusing

Temidayo Omoniyi kiddojazz

🎯
Focusing
View GitHub Profile
@kiddojazz
kiddojazz / config.py
Created July 18, 2022 09:18
Configperser
import tweepy # Python Library for scrapping Twitter Data.
import configparser #ConfigParser is a Python class which implements a basic configuration language for Python programs.
import pandas as pd #Data Manipulation & Transformation
from datetime import date
import re #RegEx : Regular expression
#read configs
config = configparser.ConfigParser()
config.read("config.ini") #The Config.ini file we just created
api_key = config["twitter"]["api_key"]
api_key_secret = config["twitter"]["api_key_secret"]
access_token = config["twitter"]["access_token"]
access_token_secret = config["twitter"]["access_token_secret"]
#Authentificate account to twitter App
#API Instance
auth = tweepy.OAuth1UserHandler(
api_key, api_key_secret, access_token, access_token_secret
)
api = tweepy.API(auth)
# get tweets from the API
tweets = tweepy.Cursor(api.search_tweets, q=search_query, lang="en").items(1000) #Set limit to 1000 tweet.
#Column header
columns = ["User", "Time", "Tweet", "Location", "Verified", "Tweet_Source", "Followers", "Retweet_Count", "Tweet ID"]
data = []
for tweet in tweets:
data.append([tweet.user.screen_name, tweet.created_at, tweet.text, tweet.user.location, tweet.user.verified,
tweet.source,tweet.user.followers_count,tweet.retweet_count,tweet.id])
import os
import glob
import pandas as pd
os.chdir("/mydir") # Change "/mydir" to your desired working directory.
extension = 'csv' #This will help select only the csv file.
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "Append.csv", index=False, encoding='utf-8-sig') #encoding = 'utf-8-sig' is added to overcome the issue
Burna_Data_Damini.drop(['Unnamed: 0'], 1, inplace=True) #This is used to remove unwated columns "Unnamed"
Burna_Data_Damini.head()
Burna_Data_Damini.shape #Get the number of Rows & Columns
Burna_Data_Damini.info() # Get information about the data
Burna_Data_Damini.isnull().sum() # Get the empty cells
Burna_Data_Damini.drop_duplicates(inplace=True) # We can set the argumentinplace=True to remove duplicates
#from the original DataFrame
#Next we have to remove emoji & Unicode from the Tweet data.
def remove_emoji(string):
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002500-\U00002BEF" # chinese char
u"\U00002702-\U000027B0"
u"\U00002702-\U000027B0"