This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# what columns you want to group by | |
cols = ['Day', 'Ocean'] | |
# picking the aggregation type ex: 'mean' | |
group = df_example.groupby(cols).agg({ | |
'1': ['mean'], | |
'2: ['mean']}) | |
# rename the column names with their respective aggregation type | |
group.columns = ["_".join(x) for x in group.columns.ravel()] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
import pandas as pd | |
import numpy as np | |
# creating numeric data for columns "1" and "2" | |
df_example = pd.DataFrame(np.random.randint(0,100,size=(100, 2)), columns=list('12')) | |
# creating your categorical columns that will be grouping by | |
oceans = ['Pacific','Atlantic'] | |
days = ['Monday', 'Tuesday', 'Wednesday'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = {'text_field': ['words word word12341****341234', '2132word word$$%3412', 'word 24234']} | |
df = pd.DataFrame (data, columns = ['text_field']) | |
import re | |
def cleaning_funciton(x): | |
x = x.lower() | |
x = re.sub('[^a-zA-Z\s]', '', x) | |
return "".join(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# text and numeric classes that use sklearn base libaries | |
class TextTransformer(BaseEstimator, TransformerMixin): | |
""" | |
Transform text features | |
""" | |
def __init__(self, key): | |
self.key = key | |
def fit(self, X, y=None, *parg, **kwarg): | |
return self | |
def transform(self, X): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# text and numeric classes that use sklearn base libaries | |
class TextTransformer(BaseEstimator, TransformerMixin): | |
""" | |
Transform text features | |
""" | |
def __init__(self, key): | |
self.key = key | |
def fit(self, X, y=None, *parg, **kwarg): | |
return self | |
def transform(self, X): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# unite the features and classifier together | |
pipe = Pipeline([('features', features), | |
('clf',clf) | |
]) | |
# create grid | |
param_grid = { | |
'n_estimators': [200, 300, 400], | |
'max_features': ['auto', 'sqrt', 'log2'], | |
'max_depth' : [4,6,8,10,20], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# common classifiers that can be optimized with parameter tuning | |
clf = RandomForestClassifier() | |
clf = MultinomialNB() | |
clf = LogisticRegression() | |
clf = svm.SVC() | |
clf = KNeighborsClassifier() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | |
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.pipeline import Pipeline, FeatureUnion | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn import svm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void setup(){ | |
size(500, 700); | |
windmill1 = new Windmill(#E3D7CE, 100,265,1); | |
windmill2 = new Windmill(#E3D7CE, 250,265,.5); | |
windmill3 = new Windmill(#E3D7CE, 400, 265, 1); | |
boat = new Boat (255, #5D4108, #483206, 0, 375, 1, 1); | |
background = new Background(255, 500, 1); | |
water1 = new Water(#354164, 0, 5, 75); | |
water2 = new Water(#394F90, 0, 1, 75); | |
water3 = new Water(#445FAD, -25, 1, 75); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
# sklearn reference: https://scikit-learn.org/0.19/about.html#citing-scikit-learn | |
# pandas reference: https://pandas.pydata.org/ | |
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold | |
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | |
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.pipeline import Pipeline, FeatureUnion | |
import matplotlib.pyplot as plt |
NewerOlder