Skip to content

Instantly share code, notes, and snippets.

View mprzybyla123's full-sized avatar

Matt Przybyla mprzybyla123

View GitHub Profile
@mprzybyla123
mprzybyla123 / .py
Created June 7, 2021 16:55
aggregation
# what columns you want to group by
cols = ['Day', 'Ocean']
# picking the aggregation type ex: 'mean'
group = df_example.groupby(cols).agg({
'1': ['mean'],
'2: ['mean']})
# rename the column names with their respective aggregation type
group.columns = ["_".join(x) for x in group.columns.ravel()]
@mprzybyla123
mprzybyla123 / .py
Last active June 7, 2021 16:54
dataset creation
# import libraries
import pandas as pd
import numpy as np
# creating numeric data for columns "1" and "2"
df_example = pd.DataFrame(np.random.randint(0,100,size=(100, 2)), columns=list('12'))
# creating your categorical columns that will be grouping by
oceans = ['Pacific','Atlantic']
days = ['Monday', 'Tuesday', 'Wednesday']
@mprzybyla123
mprzybyla123 / .py
Created July 21, 2020 21:23
cleaning
data = {'text_field': ['words word word12341****341234', '2132word word$$%3412', 'word 24234']}
df = pd.DataFrame (data, columns = ['text_field'])
import re
def cleaning_funciton(x):
x = x.lower()
x = re.sub('[^a-zA-Z\s]', '', x)
return "".join(x)
@mprzybyla123
mprzybyla123 / .py
Created July 21, 2020 21:02
classes
# text and numeric classes that use sklearn base libaries
class TextTransformer(BaseEstimator, TransformerMixin):
"""
Transform text features
"""
def __init__(self, key):
self.key = key
def fit(self, X, y=None, *parg, **kwarg):
return self
def transform(self, X):
@mprzybyla123
mprzybyla123 / .py
Created July 21, 2020 21:01
classes
# text and numeric classes that use sklearn base libaries
class TextTransformer(BaseEstimator, TransformerMixin):
"""
Transform text features
"""
def __init__(self, key):
self.key = key
def fit(self, X, y=None, *parg, **kwarg):
return self
def transform(self, X):
@mprzybyla123
mprzybyla123 / .py
Created June 23, 2020 00:58
grid-search
# unite the features and classifier together
pipe = Pipeline([('features', features),
('clf',clf)
])
# create grid
param_grid = {
'n_estimators': [200, 300, 400],
'max_features': ['auto', 'sqrt', 'log2'],
'max_depth' : [4,6,8,10,20],
@mprzybyla123
mprzybyla123 / .py
Created June 23, 2020 00:54
models
# common classifiers that can be optimized with parameter tuning
clf = RandomForestClassifier()
clf = MultinomialNB()
clf = LogisticRegression()
clf = svm.SVC()
clf = KNeighborsClassifier()
@mprzybyla123
mprzybyla123 / .py
Last active May 29, 2020 21:38
classification-models
# import libraries
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
@mprzybyla123
mprzybyla123 / .java
Last active May 27, 2020 23:52
Processing
void setup(){
size(500, 700);
windmill1 = new Windmill(#E3D7CE, 100,265,1);
windmill2 = new Windmill(#E3D7CE, 250,265,.5);
windmill3 = new Windmill(#E3D7CE, 400, 265, 1);
boat = new Boat (255, #5D4108, #483206, 0, 375, 1, 1);
background = new Background(255, 500, 1);
water1 = new Water(#354164, 0, 5, 75);
water2 = new Water(#394F90, 0, 1, 75);
water3 = new Water(#445FAD, -25, 1, 75);
@mprzybyla123
mprzybyla123 / .py
Last active May 16, 2020 21:35
fake-news-predictor
# import libraries
# sklearn reference: https://scikit-learn.org/0.19/about.html#citing-scikit-learn
# pandas reference: https://pandas.pydata.org/
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline, FeatureUnion
import matplotlib.pyplot as plt