Skip to content

Instantly share code, notes, and snippets.

View JarrydWannenburg's full-sized avatar
🤔
Learning

Jarryd Wannenburg JarrydWannenburg

🤔
Learning
View GitHub Profile
@JarrydWannenburg
JarrydWannenburg / altair_EDA.py
Last active August 18, 2022 16:10
House_Prices_Adv_Reg_EDA
import altair as alt
import pandas as pd
# Ignore size limits
alt.data_transformers.enable('default', max_rows=None)
# Create a function that takes the dataset and column name and returns an interactive histogram
def chart(dataset, column_name, target_var):
w = 500
single = alt.selection_single()
@JarrydWannenburg
JarrydWannenburg / altair_EDA_save.py
Last active August 17, 2022 19:31
House_Prices_Adv_Reg
# Try/Except to append multiple altair charts to a list
myl = []
for col in dataset.columns:
try:
myl.append(chart(dataset, col, target_var))
except:
pass
else:
pass
@JarrydWannenburg
JarrydWannenburg / Simple_bar_chart.py
Last active August 17, 2022 20:12
Easiest_python_visualization
import altair as alt
import pandas as pd
source = pd.DataFrame({
'Language': ['Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp'],
'Usage': [10,8,6,4,2,1]
})
alt.Chart(source, title = "Programming language usage").mark_bar().encode(
x=alt.X('Language', sort='-y'),
@JarrydWannenburg
JarrydWannenburg / plt_bar_chart.py
Last active August 17, 2022 20:09
Easiest_python_visualization
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp')
y_pos = np.arange(len(objects))
performance = [10,8,6,4,2,1]
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
@JarrydWannenburg
JarrydWannenburg / libraries.py
Last active August 26, 2022 19:07
Scalable_Pipeline_Article
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer, make_column_selector as Selector
from sklearn.decomposition import PCA
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split, GridSearchCV
@JarrydWannenburg
JarrydWannenburg / CategoricalTransformer.py
Last active August 26, 2022 18:44
Scalable_Pipeline_Article
class CategoricalTransformer(BaseEstimator, TransformerMixin):
def __init__(self, sep_passengerId = True, sep_cabin = True, convert_bools = True, excluded_features = ['Name']):
self.sep_passengerId = sep_passengerId
self.sep_cabin = sep_cabin
self.convert_bools = convert_bools
self.excluded_features = excluded_features
def fit(self, X, y=None):
return self
@JarrydWannenburg
JarrydWannenburg / CatFeatureEngineerTransformer.py
Last active August 26, 2022 18:44
Scalable_Pipeline_Article
class CatFeatureEngineerTransformer(BaseEstimator, TransformerMixin):
def __init__(self, home_dest = True):
self.home_dest = home_dest
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
if self.home_dest:
X['HomeDest'] = X.HomePlanet + ':' + X.Destination
@JarrydWannenburg
JarrydWannenburg / Nominal_Columns.py
Last active August 26, 2022 18:52
Scalable_Pipeline_Article
class Nominal_Columns(BaseEstimator, TransformerMixin):
def __init__(self, exclude=['Deck','Deck2']): # These would be your ordinal columns. Be sure to understand the order too.
self.exclude = exclude
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
_X = X.copy()
_X.drop(columns=self.exclude, inplace=True)
@JarrydWannenburg
JarrydWannenburg / Ordinal_Columns.py
Created August 26, 2022 18:55
Scalable_Pipeline_Article
class Ordinal_Columns(BaseEstimator, TransformerMixin):
def __init__(self, names=['Deck','Deck2']): #,'Deck2'
self.names = names
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
if len(self.names) > 0:
if len(self.names) == 1:
@JarrydWannenburg
JarrydWannenburg / cat_pipe.py
Last active August 26, 2022 20:10
Scalable_Pipeline_Article
cat_pipe = Pipeline([
('CategoricalTransformer', CategoricalTransformer()), # Manually included fillna(mode) similar to simple_imputer
('CatFeatureEngineer', CatFeatureEngineerTransformer()),
('Subtypes', FeatureUnion([
('ordinal', Pipeline([
('cols', Ordinal_Columns()),
('encoder', OrdinalEncoder(categories=[['T','G','F', 'E', 'D', 'C', 'B', 'A'],# Need row of values (low to high)
['G','F', 'E', 'D', 'C', 'B', 'A']], # can use auto instead of categories
# We see 11 total records with 'T' but external research doesn't support that value.
handle_unknown='use_encoded_value',