Temidayo Omoniyi kiddojazz

## config.py
import tweepy # Python Library for scrapping Twitter Data.
import configparser #ConfigParser is a Python class which implements a basic configuration language for Python programs.
import pandas as pd #Data Manipulation & Transformation
from datetime import date
import re #RegEx : Regular expression

## read.py
#read configs
config = configparser.ConfigParser()
config.read("config.ini") #The Config.ini file we just created

api_key = config["twitter"]["api_key"]
api_key_secret = config["twitter"]["api_key_secret"]

access_token = config["twitter"]["access_token"]
access_token_secret = config["twitter"]["access_token_secret"]

## Auth.py
#Authentificate account to twitter App
#API Instance
auth = tweepy.OAuth1UserHandler(
   api_key, api_key_secret, access_token, access_token_secret
)

api = tweepy.API(auth)

## search.py
# get tweets from the API
tweets = tweepy.Cursor(api.search_tweets, q=search_query, lang="en").items(1000) #Set limit to 1000 tweet.

#Column header
columns = ["User", "Time", "Tweet", "Location", "Verified", "Tweet_Source", "Followers", "Retweet_Count", "Tweet ID"]
data = []
for tweet in tweets:
    data.append([tweet.user.screen_name, tweet.created_at, tweet.text, tweet.user.location, tweet.user.verified,
                tweet.source,tweet.user.followers_count,tweet.retweet_count,tweet.id])


## Append.py
import os
import glob
import pandas as pd
os.chdir("/mydir") # Change "/mydir" to your desired working directory.
extension = 'csv' #This will help select only the csv file.
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "Append.csv", index=False, encoding='utf-8-sig') #encoding = 'utf-8-sig' is added to overcome the issue

## dropcol.py
Burna_Data_Damini.drop(['Unnamed: 0'], 1, inplace=True) #This is used to remove unwated columns "Unnamed"
Burna_Data_Damini.head()

## EDA.py
Burna_Data_Damini.shape #Get the number of Rows & Columns
Burna_Data_Damini.info() # Get information about the data
Burna_Data_Damini.isnull().sum() # Get the empty cells

## removeduplicate.py
Burna_Data_Damini.drop_duplicates(inplace=True) # We can set the argumentinplace=True to remove duplicates
#from the original DataFrame

## hyperlink.py
#Clean the text

#Create a function to clean the tweets
def cleanTxt(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text) #Remove @mentions replace with blank
    text = re.sub(r'#', '', text) #Remove the '#' symbol, replace with blank
    text = re.sub(r'RT[\s]+', '', text) #Removing RT, replace with blank
    text = re.sub(r'https?:\/\/\S+', '', text) #Remove the hyperlinks
    text = re.sub(r':', '', text) # Remove :


## unicode.py
#Next we have to remove emoji & Unicode from the Tweet data.
def remove_emoji(string):
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002500-\U00002BEF"  # chinese char
                               u"\U00002702-\U000027B0"
                               u"\U00002702-\U000027B0"
	import tweepy # Python Library for scrapping Twitter Data.
	import configparser #ConfigParser is a Python class which implements a basic configuration language for Python programs.
	import pandas as pd #Data Manipulation & Transformation
	from datetime import date
	import re #RegEx : Regular expression
	#read configs
	config = configparser.ConfigParser()
	config.read("config.ini") #The Config.ini file we just created

	api_key = config["twitter"]["api_key"]
	api_key_secret = config["twitter"]["api_key_secret"]

	access_token = config["twitter"]["access_token"]
	access_token_secret = config["twitter"]["access_token_secret"]
	#Authentificate account to twitter App
	#API Instance
	auth = tweepy.OAuth1UserHandler(
	api_key, api_key_secret, access_token, access_token_secret
	)

	api = tweepy.API(auth)
	# get tweets from the API
	tweets = tweepy.Cursor(api.search_tweets, q=search_query, lang="en").items(1000) #Set limit to 1000 tweet.

	#Column header
	columns = ["User", "Time", "Tweet", "Location", "Verified", "Tweet_Source", "Followers", "Retweet_Count", "Tweet ID"]
	data = []
	for tweet in tweets:
	data.append([tweet.user.screen_name, tweet.created_at, tweet.text, tweet.user.location, tweet.user.verified,
	tweet.source,tweet.user.followers_count,tweet.retweet_count,tweet.id])
	import os
	import glob
	import pandas as pd
	os.chdir("/mydir") # Change "/mydir" to your desired working directory.
	extension = 'csv' #This will help select only the csv file.
	all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
	#combine all files in the list
	combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
	#export to csv
	combined_csv.to_csv( "Append.csv", index=False, encoding='utf-8-sig') #encoding = 'utf-8-sig' is added to overcome the issue
	Burna_Data_Damini.drop(['Unnamed: 0'], 1, inplace=True) #This is used to remove unwated columns "Unnamed"
	Burna_Data_Damini.head()
	Burna_Data_Damini.shape #Get the number of Rows & Columns
	Burna_Data_Damini.info() # Get information about the data
	Burna_Data_Damini.isnull().sum() # Get the empty cells
	Burna_Data_Damini.drop_duplicates(inplace=True) # We can set the argumentinplace=True to remove duplicates
	#from the original DataFrame
	#Clean the text

	#Create a function to clean the tweets
	def cleanTxt(text):
	text = re.sub(r'@[A-Za-z0-9]+', '', text) #Remove @mentions replace with blank
	text = re.sub(r'#', '', text) #Remove the '#' symbol, replace with blank
	text = re.sub(r'RT[\s]+', '', text) #Removing RT, replace with blank
	text = re.sub(r'https?:\/\/\S+', '', text) #Remove the hyperlinks
	text = re.sub(r':', '', text) # Remove :
	#Next we have to remove emoji & Unicode from the Tweet data.
	def remove_emoji(string):
	emoji_pattern = re.compile("["
	u"\U0001F600-\U0001F64F" # emoticons
	u"\U0001F300-\U0001F5FF" # symbols & pictographs
	u"\U0001F680-\U0001F6FF" # transport & map symbols
	u"\U0001F1E0-\U0001F1FF" # flags (iOS)
	u"\U00002500-\U00002BEF" # chinese char
	u"\U00002702-\U000027B0"
	u"\U00002702-\U000027B0"