Skip to content

Instantly share code, notes, and snippets.

View leks39's full-sized avatar

Olalekan Fagbuyi leks39

View GitHub Profile
https://github.com/leks39/Naija-Songs-into-Genres/blob/main/Naija%20Song%20Genres.ipynb
@leks39
leks39 / Naija_Genre.py
Last active October 18, 2022 17:25
Naija Genre gist
#import relevant libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
#import dataset
@leks39
leks39 / Naija_Genre2.py
Created October 18, 2022 17:35
Naija Genre 2
#feature selection - checking for correlation among numerical features
audio_features = naija_songs2.drop(columns = ['name', 'artist'], axis = 1)
audio_features.corr()
#visualizing correlations
plt.figure(figsize=(10,6))
sns.heatmap(audio_features.corr(), annot = True, cmap=plt.cm.CMRmap_r)
@leks39
leks39 / Naija_Genre3.py
Created October 19, 2022 06:12
Naija_Genre3
#standardization
scaler = StandardScaler()
audio_features2_std = scaler.fit_transform(audio_features2)
#fitting standardised data using PCA
pca = PCA()
pca.fit(audio_features2_std)
#variance generated by each feature
pca.explained_variance_ratio_
@leks39
leks39 / Naija_Genre4.py
Created October 19, 2022 06:37
Naija_Genre4
#Splitting songs into 5 genres
kmeans = KMeans(n_clusters = 5)
label = kmeans.fit_predict(pca_scores)
unique_labels = np.unique(label)
#Plotting clusters
for i in unique_labels:
plt.scatter(pca_scores[label==i,0], pca_scores[label==i,1], label=i, s=80)
plt.legend()
@leks39
leks39 / Naija_Genre5.py
Created October 19, 2022 07:08
Naija_Genre5
#Genre Distribution
Genre_Distr = result.Genre_Name.value_counts()
Genre_Distr
#Visualisating Genre distribution
sns.countplot(y = 'Genre_Name', data = result, palette='viridis',
order = result['Genre_Name'].value_counts().index).set(title='Genre Distribution')
#import libraries
#core python packages for data analysis and arithmetics
import pandas as pd
import numpy as np
pd.set_option('max_colwidth', None)
#vizualization packages
import seaborn as sns
import matplotlib.pyplot as plt
#hotd1_df - October 2022 tweets - Episode 7 to 10
maxTweets = 200000
# Creating list to append tweet data to
tweets_list = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper
('#HouseoftheDragon since:2022-08-21 until:2022-10-24 lang:"en"').get_items()):
if i>maxTweets:
break
##GETTING TOP TWEET LOCATIONS
#replace empty values with NaN - *this affects the location column
hotd = hotd.replace(r'^\s*$', np.nan, regex=True)
hotd.head()
#filling null values with next value using bfill method - replaces Nan with next value
hotd_df = hotd.fillna(method = 'bfill')
hotd_df.head()
#visualization for top 10 tweet locations
#selecting main characters from HOTD show
hotd_chars = ['rhaenyra', 'daemon','alicent', 'otto', 'viserys', 'rhaenys','vaemond', 'lucerys', 'jacaerys', 'lyonel',
'corlys','aegon', 'aemond', 'larys','harwin','laenor', 'laena', 'criston', 'mysaria', 'helaena']
#replacing mispellings and nicknames of character names with actual names
hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyra','rhaenyra')
hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyras','rhaenyra')
hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyratargaryen','rhaenyra')
hotd_df['Text'] = hotd_df['Text'].str.replace('visery','viserys')
hotd_df['Text'] = hotd_df['Text'].str.replace('viseryss','viserys')