Skip to content

Instantly share code, notes, and snippets.

View Midvel's full-sized avatar

Pavlo Horbonos Midvel

View GitHub Profile
@Midvel
Midvel / AccumulatorMapInit.cpp
Last active January 29, 2020 12:21
AccumulatorMapInit
AccumulatorMap new_Map(initialValue)
("firstKey", firstValue)
("secondKey", secondValue)
("thirdKey", thirdValue);
@Midvel
Midvel / AccumulatorMapGenerator.cpp
Created January 29, 2020 12:24
AccumulatorMapGenerator
ConstructedObject GenerateObject(const Data& data)
{
return AccumulatorMap(data)
("firstProperty", value1)
("secondProperty", value2);
}
@Midvel
Midvel / AccumulatorMap.cpp
Created January 29, 2020 13:15
AccumulatorMap
class AccumulatorMap
{
public:
AccumulatorMap() = delete;
explicit AccumulatorMap(double initialValue) : m_value(initialValue)
{}
AccumulatorMap& operator()(Operation op, double value)
{
m_value = m_calc[op].calculate(m_value, value);
@Midvel
Midvel / AccumulatorMapUsage.cpp
Created January 29, 2020 13:16
AccumulatorMapUsage
double CircleArea(double radius)
{
return AccumulatorMap(radius)
(EXP, 2)
(MULTI, 3.14);
}
double SphereVolume(double radius)
{
return AccumulatorMap(radius)
(EXP, 3)
@Midvel
Midvel / sms_data_cleaning.py
Created January 30, 2020 18:34
sms_data_cleaning
sms_data_clean['SMS'] = sms_data_clean['SMS'].str.replace('\W+', ' ').str.replace('\s+', ' ').str.strip()
sms_data_clean['SMS'] = sms_data_clean['SMS'].str.lower()
sms_data_clean['SMS'] = sms_data_clean['SMS'].str.split()
@Midvel
Midvel / sms_data_split.py
Last active January 30, 2020 21:40
sms_data_split
train_data = sms_data_clean.sample(frac=0.8,random_state=1).reset_index(drop=True)
test_data = sms_data_clean.drop(train_data.index).reset_index(drop=True)
train_data = train_data.reset_index(drop=True)
sms_data_clean['Label'].value_counts() / sms_data.shape[0] * 100
'''
ham 86.593683
spam 13.406317
Name: Label, dtype: float64
'''
@Midvel
Midvel / sms_data_vocabulary.py
Last active January 30, 2020 21:42
sms_data_vocabulary
vocabulary = list(set(train_data['SMS'].sum()))
word_counts_per_sms = pd.DataFrame([
[row[1].count(word) for word in vocabulary]
for _, row in train_data.iterrows()], columns=vocabulary)
train_data = pd.concat([train_data.reset_index(), word_counts_per_sms], axis=1).iloc[:,1:]
@Midvel
Midvel / sms_data_functions.py
Created January 30, 2020 21:44
sms_data_functions
def p_w_spam(word):
if word in train_data.columns:
return (train_data.loc[train_data['Label'] == 'spam', word].sum() + alpha) / (Nspam + alpha*Nvoc)
else:
return 1
def p_w_ham(word):
if word in train_data.columns:
return (train_data.loc[train_data['Label'] == 'ham', word].sum() + alpha) / (Nham + alpha*Nvoc)
else:
@Midvel
Midvel / sms_data_classifier.py
Created January 30, 2020 21:45
sms_data_classifier
def classify(message):
p_spam_given_message = Pspam
p_ham_given_message = Pham
for word in message:
p_spam_given_message *= p_w_spam(word)
p_ham_given_message *= p_w_ham(word)
if p_ham_given_message > p_spam_given_message:
return 'ham'
elif p_ham_given_message < p_spam_given_message:
return 'spam'
@Midvel
Midvel / foursquare_categories.py
Created February 2, 2020 15:30
foursquare_categories
import requests
import pandas as pd
def extract_categories(cat_list, cat):
for element in cat:
if 'categories' in element and len(element['categories']) > 0:
extract_categories(cat_list, element['categories'])
if('id' in element and 'name' in element):
cat_list.append([element['name'], element['id']])