This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| https://github.com/leks39/Naija-Songs-into-Genres/blob/main/Naija%20Song%20Genres.ipynb |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #import relevant libraries | |
| import pandas as pd | |
| import numpy as np | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.cluster import KMeans | |
| from sklearn.decomposition import PCA | |
| #import dataset |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #feature selection - checking for correlation among numerical features | |
| audio_features = naija_songs2.drop(columns = ['name', 'artist'], axis = 1) | |
| audio_features.corr() | |
| #visualizing correlations | |
| plt.figure(figsize=(10,6)) | |
| sns.heatmap(audio_features.corr(), annot = True, cmap=plt.cm.CMRmap_r) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #standardization | |
| scaler = StandardScaler() | |
| audio_features2_std = scaler.fit_transform(audio_features2) | |
| #fitting standardised data using PCA | |
| pca = PCA() | |
| pca.fit(audio_features2_std) | |
| #variance generated by each feature | |
| pca.explained_variance_ratio_ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Splitting songs into 5 genres | |
| kmeans = KMeans(n_clusters = 5) | |
| label = kmeans.fit_predict(pca_scores) | |
| unique_labels = np.unique(label) | |
| #Plotting clusters | |
| for i in unique_labels: | |
| plt.scatter(pca_scores[label==i,0], pca_scores[label==i,1], label=i, s=80) | |
| plt.legend() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Genre Distribution | |
| Genre_Distr = result.Genre_Name.value_counts() | |
| Genre_Distr | |
| #Visualisating Genre distribution | |
| sns.countplot(y = 'Genre_Name', data = result, palette='viridis', | |
| order = result['Genre_Name'].value_counts().index).set(title='Genre Distribution') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #import libraries | |
| #core python packages for data analysis and arithmetics | |
| import pandas as pd | |
| import numpy as np | |
| pd.set_option('max_colwidth', None) | |
| #vizualization packages | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #hotd1_df - October 2022 tweets - Episode 7 to 10 | |
| maxTweets = 200000 | |
| # Creating list to append tweet data to | |
| tweets_list = [] | |
| # Using TwitterSearchScraper to scrape data and append tweets to list | |
| for i,tweet in enumerate(sntwitter.TwitterSearchScraper | |
| ('#HouseoftheDragon since:2022-08-21 until:2022-10-24 lang:"en"').get_items()): | |
| if i>maxTweets: | |
| break |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ##GETTING TOP TWEET LOCATIONS | |
| #replace empty values with NaN - *this affects the location column | |
| hotd = hotd.replace(r'^\s*$', np.nan, regex=True) | |
| hotd.head() | |
| #filling null values with next value using bfill method - replaces Nan with next value | |
| hotd_df = hotd.fillna(method = 'bfill') | |
| hotd_df.head() | |
| #visualization for top 10 tweet locations |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #selecting main characters from HOTD show | |
| hotd_chars = ['rhaenyra', 'daemon','alicent', 'otto', 'viserys', 'rhaenys','vaemond', 'lucerys', 'jacaerys', 'lyonel', | |
| 'corlys','aegon', 'aemond', 'larys','harwin','laenor', 'laena', 'criston', 'mysaria', 'helaena'] | |
| #replacing mispellings and nicknames of character names with actual names | |
| hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyra','rhaenyra') | |
| hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyras','rhaenyra') | |
| hotd_df['Text'] = hotd_df['Text'].str.replace('rhaneyratargaryen','rhaenyra') | |
| hotd_df['Text'] = hotd_df['Text'].str.replace('visery','viserys') | |
| hotd_df['Text'] = hotd_df['Text'].str.replace('viseryss','viserys') |
OlderNewer