Plegas Gerasimos makispl

## aud_ftrs_df.py
df_dinner = fetch_audio_features(sp, username, '37SqXO5bm81JmGCiuhin0L')
df_party = fetch_audio_features(sp, username, '2m75Xwwn4YqhwsxHH7Qc9W')
df_lounge = fetch_audio_features(sp, username, '6Jbi3Y7ZNNgSrPaZF4DpUp')
df_pop = fetch_audio_features(sp, username, '3u2nUYNuI08yUg877JE5FI')

## fetch_aud_ftrs.py
def fetch_audio_features(sp, username, playlist_id):
    """
    Returns the selected audio features of every track,
    for the given playlist.
    """

    # Use the fetch_playlist_tracks function to fetch all of the tracks
    playlist = fetch_playlist_tracks(sp, username, playlist_id)
    index = 0
    audio_features = []

## sample_III.py
# Take a sample from the Pop playlist
df_pop_sample_III = df_pop[df_pop['score'] > df_party['score'].mean()].copy()

# Concatenate the original playlist with the sample
df_party_exp_III = pd.concat([df_party, df_pop_sample_III])
df_party_exp_III.describe()

## sample_II.py
# Take a sample from the Pop playlist
df_pop_sample_II = df_pop[(df_pop['danceability'] > 69.55) & (df_pop['valence'] > 51.89)].copy()

# Concatenate the original playlist with the sample
df_party_exp_II = pd.concat([df_party, df_pop_sample_II])
df_party_exp_II.describe()

## sample_I.py
# Take a sample from the Pop playlist
df_pop_sample_I = df_pop.sample(n=40, weights='danceability', random_state=1)
df_pop_sample_I.describe()

# Concatenate the original playlist with the sample
df_party_exp_I = pd.concat([df_party, df_pop_sample_I])
df_party_exp_I.describe()

## create_plst.py
# Make a temporary list of tracks
list_track = df_party_exp_III.index

# Create the playlist
enrich_playlist(sp, username, '779Uv1K6LcYiiWxblSDjx7', list_track)

## dataset_division.py
# Read in data
spam_collection = pd.read_csv('SMSSpamCollection', sep='\t', header=None, names=['Label', 'SMS'])

# Randomize the data set
randomized_collection = spam_collection.sample(frac=1, random_state=3)

# Calculate index for the split-up
training_test_index = round(len(randomized_collection) * 0.8)

# Training/Test split-up

## download.py
### While in Python environment ###
import nltk

# Download stopwords
nltk.download(‘stopwords’)

# Download punkt sentence tokenizer
nltk.download(‘punkt’)

# Download wordnet

## calc_constant.py
# Filter the spam and ham dataframes
spam_df = training_set_final[training_set_final['Label'] == 'spam'].copy()
ham_df = training_set_final[training_set_final['Label'] == 'ham'].copy()

# Calculate P(Spam) and P(Ham)
p_spam = spam_df.shape[0] / training_set_final.shape[0]
p_ham = ham_df.shape[0] / training_set_final.shape[0]

# Calculate Nspam, Nham and Nvocabulary
spam_words_per_message = spam_df['SMS'].apply(len)

## calc_params.py
# Create two dictionaries that match each unique word with the respective probability value.
parameters_spam = {unique_word: 0 for unique_word in vocabulary}
parameters_ham = {unique_word: 0 for unique_word in vocabulary}

# Iterate over the vocabulary and for each word, calculate P(wi|Spam) and P(wi|Ham)
for unique_word in vocabulary:
    p_unique_word_spam = (spam_df[unique_word].sum() + alpha) / (n_spam + alpha * n_vocabulary)
    p_unique_word_ham = (ham_df[unique_word].sum() + alpha) / (n_ham + alpha * n_vocabulary)

    # Update the calculated propabilities to the dictionaries
	df_dinner = fetch_audio_features(sp, username, '37SqXO5bm81JmGCiuhin0L')
	df_party = fetch_audio_features(sp, username, '2m75Xwwn4YqhwsxHH7Qc9W')
	df_lounge = fetch_audio_features(sp, username, '6Jbi3Y7ZNNgSrPaZF4DpUp')
	df_pop = fetch_audio_features(sp, username, '3u2nUYNuI08yUg877JE5FI')
	def fetch_audio_features(sp, username, playlist_id):
	"""
	Returns the selected audio features of every track,
	for the given playlist.
	"""

	# Use the fetch_playlist_tracks function to fetch all of the tracks
	playlist = fetch_playlist_tracks(sp, username, playlist_id)
	index = 0
	audio_features = []
	# Take a sample from the Pop playlist
	df_pop_sample_III = df_pop[df_pop['score'] > df_party['score'].mean()].copy()

	# Concatenate the original playlist with the sample
	df_party_exp_III = pd.concat([df_party, df_pop_sample_III])
	df_party_exp_III.describe()
	# Take a sample from the Pop playlist
	df_pop_sample_II = df_pop[(df_pop['danceability'] > 69.55) & (df_pop['valence'] > 51.89)].copy()

	# Concatenate the original playlist with the sample
	df_party_exp_II = pd.concat([df_party, df_pop_sample_II])
	df_party_exp_II.describe()
	# Take a sample from the Pop playlist
	df_pop_sample_I = df_pop.sample(n=40, weights='danceability', random_state=1)
	df_pop_sample_I.describe()

	# Concatenate the original playlist with the sample
	df_party_exp_I = pd.concat([df_party, df_pop_sample_I])
	df_party_exp_I.describe()
	# Make a temporary list of tracks
	list_track = df_party_exp_III.index

	# Create the playlist
	enrich_playlist(sp, username, '779Uv1K6LcYiiWxblSDjx7', list_track)
	# Read in data
	spam_collection = pd.read_csv('SMSSpamCollection', sep='\t', header=None, names=['Label', 'SMS'])

	# Randomize the data set
	randomized_collection = spam_collection.sample(frac=1, random_state=3)

	# Calculate index for the split-up
	training_test_index = round(len(randomized_collection) * 0.8)

	# Training/Test split-up
	### While in Python environment ###
	import nltk

	# Download stopwords
	nltk.download(‘stopwords’)

	# Download punkt sentence tokenizer
	nltk.download(‘punkt’)

	# Download wordnet
	# Filter the spam and ham dataframes
	spam_df = training_set_final[training_set_final['Label'] == 'spam'].copy()
	ham_df = training_set_final[training_set_final['Label'] == 'ham'].copy()

	# Calculate P(Spam) and P(Ham)
	p_spam = spam_df.shape[0] / training_set_final.shape[0]
	p_ham = ham_df.shape[0] / training_set_final.shape[0]

	# Calculate Nspam, Nham and Nvocabulary
	spam_words_per_message = spam_df['SMS'].apply(len)
	# Create two dictionaries that match each unique word with the respective probability value.
	parameters_spam = {unique_word: 0 for unique_word in vocabulary}
	parameters_ham = {unique_word: 0 for unique_word in vocabulary}

	# Iterate over the vocabulary and for each word, calculate P(wi\|Spam) and P(wi\|Ham)
	for unique_word in vocabulary:
	p_unique_word_spam = (spam_df[unique_word].sum() + alpha) / (n_spam + alpha * n_vocabulary)
	p_unique_word_ham = (ham_df[unique_word].sum() + alpha) / (n_ham + alpha * n_vocabulary)

	# Update the calculated propabilities to the dictionaries