Skip to content

Instantly share code, notes, and snippets.

View leks39's full-sized avatar

Olalekan Fagbuyi leks39

View GitHub Profile
#importing word cloud image from directory
mask = np.array(Image.open("C:\\Users\\ofagb\\Downloads\\GOT.jpg"))
plt.figure(figsize = (18,12))
plt.imshow(mask, interpolation='bilinear')
plt.axis('off')
plt.show()
#create word cloud
masked_wordcloud = WordCloud(background_color = 'white', contour_color = 'black', contour_width = 3,
min_font_size = 8, max_words = 300, mask = mask)
@leks39
leks39 / HOTD 6
Last active November 5, 2022 06:58
#Calculating text polarity using TextBlob
def polarity(Processed_Text):
return TextBlob(Processed_Text).sentiment.polarity
#Add polarity score to df
hotd_df2['Polarity'] = hotd_df2['Processed_Text'].apply(polarity)
hotd_df2.head()
#Creating polarity sentiment column
def p_sentiment(label):
#first step in text processing is to taking out stopwords from the text using NLTK stop_words method
#stop words are common words in english that add little or no value in NLP
stop_words = list(stopwords.words('english'))
#user defined common words that do not add value in the context of the show will also be removed
hotd_stop_words = ['house','GOT','thrones','lady','lord','ser','gameofthrones', 'hotd',
'premiere','watching','got','episode','serie','houseofthedragon',
'fan','hbo','hbomax','finale', 'end', 'getting', 'today', 'week','watch',
'stream', 'houseofdragon', 'houseofthedragonhbo','houseofthedragonep']
#selecting main characters from HOTD show
hotd_chars = ['rhaenyra', 'daemon','alicent', 'otto', 'viserys', 'rhaenys','vaemond', 'lucerys', 'jacaerys', 'lyonel',
'corlys','aegon', 'aemond', 'larys','harwin','laenor', 'laena', 'criston', 'mysaria', 'helaena']
#replacing mispellings and nicknames of character names with actual names
hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyra','rhaenyra')
hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyras','rhaenyra')
hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyratargaryen','rhaenyra')
hotd_df['Text'] = hotd_df['Text'].str.replace('visery','viserys')
hotd_df['Text'] = hotd_df['Text'].str.replace('viseryss','viserys')
##GETTING TOP TWEET LOCATIONS
#replace empty values with NaN - *this affects the location column
hotd = hotd.replace(r'^\s*$', np.nan, regex=True)
hotd.head()
#filling null values with next value using bfill method - replaces Nan with next value
hotd_df = hotd.fillna(method = 'bfill')
hotd_df.head()
#visualization for top 10 tweet locations
#hotd1_df - October 2022 tweets - Episode 7 to 10
maxTweets = 200000
# Creating list to append tweet data to
tweets_list = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper
('#HouseoftheDragon since:2022-08-21 until:2022-10-24 lang:"en"').get_items()):
if i>maxTweets:
break
#import libraries
#core python packages for data analysis and arithmetics
import pandas as pd
import numpy as np
pd.set_option('max_colwidth', None)
#vizualization packages
import seaborn as sns
import matplotlib.pyplot as plt
@leks39
leks39 / Naija_Genre5.py
Created October 19, 2022 07:08
Naija_Genre5
#Genre Distribution
Genre_Distr = result.Genre_Name.value_counts()
Genre_Distr
#Visualisating Genre distribution
sns.countplot(y = 'Genre_Name', data = result, palette='viridis',
order = result['Genre_Name'].value_counts().index).set(title='Genre Distribution')
@leks39
leks39 / Naija_Genre4.py
Created October 19, 2022 06:37
Naija_Genre4
#Splitting songs into 5 genres
kmeans = KMeans(n_clusters = 5)
label = kmeans.fit_predict(pca_scores)
unique_labels = np.unique(label)
#Plotting clusters
for i in unique_labels:
plt.scatter(pca_scores[label==i,0], pca_scores[label==i,1], label=i, s=80)
plt.legend()
@leks39
leks39 / Naija_Genre3.py
Created October 19, 2022 06:12
Naija_Genre3
#standardization
scaler = StandardScaler()
audio_features2_std = scaler.fit_transform(audio_features2)
#fitting standardised data using PCA
pca = PCA()
pca.fit(audio_features2_std)
#variance generated by each feature
pca.explained_variance_ratio_