makispl/calc_constant.py

## calc_constant.py
# Filter the spam and ham dataframes
spam_df = training_set_final[training_set_final['Label'] == 'spam'].copy()
ham_df = training_set_final[training_set_final['Label'] == 'ham'].copy()

# Calculate P(Spam) and P(Ham)
p_spam = spam_df.shape[0] / training_set_final.shape[0]
p_ham = ham_df.shape[0] / training_set_final.shape[0]

# Calculate Nspam, Nham and Nvocabulary
spam_words_per_message = spam_df['SMS'].apply(len)
n_spam = spam_words_per_message.sum()

ham_words_per_message = ham_df['SMS'].apply(len)
n_ham = ham_words_per_message.sum()

n_vocabulary = len(vocabulary)

# Opt for the Laplace smoothing
alpha = 1
	# Filter the spam and ham dataframes
	spam_df = training_set_final[training_set_final['Label'] == 'spam'].copy()
	ham_df = training_set_final[training_set_final['Label'] == 'ham'].copy()

	# Calculate P(Spam) and P(Ham)
	p_spam = spam_df.shape[0] / training_set_final.shape[0]
	p_ham = ham_df.shape[0] / training_set_final.shape[0]

	# Calculate Nspam, Nham and Nvocabulary
	spam_words_per_message = spam_df['SMS'].apply(len)
	n_spam = spam_words_per_message.sum()

	ham_words_per_message = ham_df['SMS'].apply(len)
	n_ham = ham_words_per_message.sum()

	n_vocabulary = len(vocabulary)

	# Opt for the Laplace smoothing
	alpha = 1