This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_dinner = fetch_audio_features(sp, username, '37SqXO5bm81JmGCiuhin0L') | |
df_party = fetch_audio_features(sp, username, '2m75Xwwn4YqhwsxHH7Qc9W') | |
df_lounge = fetch_audio_features(sp, username, '6Jbi3Y7ZNNgSrPaZF4DpUp') | |
df_pop = fetch_audio_features(sp, username, '3u2nUYNuI08yUg877JE5FI') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fetch_audio_features(sp, username, playlist_id): | |
""" | |
Returns the selected audio features of every track, | |
for the given playlist. | |
""" | |
# Use the fetch_playlist_tracks function to fetch all of the tracks | |
playlist = fetch_playlist_tracks(sp, username, playlist_id) | |
index = 0 | |
audio_features = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Take a sample from the Pop playlist | |
df_pop_sample_III = df_pop[df_pop['score'] > df_party['score'].mean()].copy() | |
# Concatenate the original playlist with the sample | |
df_party_exp_III = pd.concat([df_party, df_pop_sample_III]) | |
df_party_exp_III.describe() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Take a sample from the Pop playlist | |
df_pop_sample_II = df_pop[(df_pop['danceability'] > 69.55) & (df_pop['valence'] > 51.89)].copy() | |
# Concatenate the original playlist with the sample | |
df_party_exp_II = pd.concat([df_party, df_pop_sample_II]) | |
df_party_exp_II.describe() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Take a sample from the Pop playlist | |
df_pop_sample_I = df_pop.sample(n=40, weights='danceability', random_state=1) | |
df_pop_sample_I.describe() | |
# Concatenate the original playlist with the sample | |
df_party_exp_I = pd.concat([df_party, df_pop_sample_I]) | |
df_party_exp_I.describe() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make a temporary list of tracks | |
list_track = df_party_exp_III.index | |
# Create the playlist | |
enrich_playlist(sp, username, '779Uv1K6LcYiiWxblSDjx7', list_track) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Read in data | |
spam_collection = pd.read_csv('SMSSpamCollection', sep='\t', header=None, names=['Label', 'SMS']) | |
# Randomize the data set | |
randomized_collection = spam_collection.sample(frac=1, random_state=3) | |
# Calculate index for the split-up | |
training_test_index = round(len(randomized_collection) * 0.8) | |
# Training/Test split-up |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### While in Python environment ### | |
import nltk | |
# Download stopwords | |
nltk.download(‘stopwords’) | |
# Download punkt sentence tokenizer | |
nltk.download(‘punkt’) | |
# Download wordnet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Filter the spam and ham dataframes | |
spam_df = training_set_final[training_set_final['Label'] == 'spam'].copy() | |
ham_df = training_set_final[training_set_final['Label'] == 'ham'].copy() | |
# Calculate P(Spam) and P(Ham) | |
p_spam = spam_df.shape[0] / training_set_final.shape[0] | |
p_ham = ham_df.shape[0] / training_set_final.shape[0] | |
# Calculate Nspam, Nham and Nvocabulary | |
spam_words_per_message = spam_df['SMS'].apply(len) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create two dictionaries that match each unique word with the respective probability value. | |
parameters_spam = {unique_word: 0 for unique_word in vocabulary} | |
parameters_ham = {unique_word: 0 for unique_word in vocabulary} | |
# Iterate over the vocabulary and for each word, calculate P(wi|Spam) and P(wi|Ham) | |
for unique_word in vocabulary: | |
p_unique_word_spam = (spam_df[unique_word].sum() + alpha) / (n_spam + alpha * n_vocabulary) | |
p_unique_word_ham = (ham_df[unique_word].sum() + alpha) / (n_ham + alpha * n_vocabulary) | |
# Update the calculated propabilities to the dictionaries |