Skip to content

Instantly share code, notes, and snippets.

View makispl's full-sized avatar

Plegas Gerasimos makispl

View GitHub Profile
df_dinner = fetch_audio_features(sp, username, '37SqXO5bm81JmGCiuhin0L')
df_party = fetch_audio_features(sp, username, '2m75Xwwn4YqhwsxHH7Qc9W')
df_lounge = fetch_audio_features(sp, username, '6Jbi3Y7ZNNgSrPaZF4DpUp')
df_pop = fetch_audio_features(sp, username, '3u2nUYNuI08yUg877JE5FI')
def fetch_audio_features(sp, username, playlist_id):
"""
Returns the selected audio features of every track,
for the given playlist.
"""
# Use the fetch_playlist_tracks function to fetch all of the tracks
playlist = fetch_playlist_tracks(sp, username, playlist_id)
index = 0
audio_features = []
# Take a sample from the Pop playlist
df_pop_sample_III = df_pop[df_pop['score'] > df_party['score'].mean()].copy()
# Concatenate the original playlist with the sample
df_party_exp_III = pd.concat([df_party, df_pop_sample_III])
df_party_exp_III.describe()
# Take a sample from the Pop playlist
df_pop_sample_II = df_pop[(df_pop['danceability'] > 69.55) & (df_pop['valence'] > 51.89)].copy()
# Concatenate the original playlist with the sample
df_party_exp_II = pd.concat([df_party, df_pop_sample_II])
df_party_exp_II.describe()
# Take a sample from the Pop playlist
df_pop_sample_I = df_pop.sample(n=40, weights='danceability', random_state=1)
df_pop_sample_I.describe()
# Concatenate the original playlist with the sample
df_party_exp_I = pd.concat([df_party, df_pop_sample_I])
df_party_exp_I.describe()
# Make a temporary list of tracks
list_track = df_party_exp_III.index
# Create the playlist
enrich_playlist(sp, username, '779Uv1K6LcYiiWxblSDjx7', list_track)
# Read in data
spam_collection = pd.read_csv('SMSSpamCollection', sep='\t', header=None, names=['Label', 'SMS'])
# Randomize the data set
randomized_collection = spam_collection.sample(frac=1, random_state=3)
# Calculate index for the split-up
training_test_index = round(len(randomized_collection) * 0.8)
# Training/Test split-up
### While in Python environment ###
import nltk
# Download stopwords
nltk.download(‘stopwords’)
# Download punkt sentence tokenizer
nltk.download(‘punkt’)
# Download wordnet
# Filter the spam and ham dataframes
spam_df = training_set_final[training_set_final['Label'] == 'spam'].copy()
ham_df = training_set_final[training_set_final['Label'] == 'ham'].copy()
# Calculate P(Spam) and P(Ham)
p_spam = spam_df.shape[0] / training_set_final.shape[0]
p_ham = ham_df.shape[0] / training_set_final.shape[0]
# Calculate Nspam, Nham and Nvocabulary
spam_words_per_message = spam_df['SMS'].apply(len)
# Create two dictionaries that match each unique word with the respective probability value.
parameters_spam = {unique_word: 0 for unique_word in vocabulary}
parameters_ham = {unique_word: 0 for unique_word in vocabulary}
# Iterate over the vocabulary and for each word, calculate P(wi|Spam) and P(wi|Ham)
for unique_word in vocabulary:
p_unique_word_spam = (spam_df[unique_word].sum() + alpha) / (n_spam + alpha * n_vocabulary)
p_unique_word_ham = (ham_df[unique_word].sum() + alpha) / (n_ham + alpha * n_vocabulary)
# Update the calculated propabilities to the dictionaries