This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import altair as alt | |
import pandas as pd | |
# Ignore size limits | |
alt.data_transformers.enable('default', max_rows=None) | |
# Create a function that takes the dataset and column name and returns an interactive histogram | |
def chart(dataset, column_name, target_var): | |
w = 500 | |
single = alt.selection_single() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Try/Except to append multiple altair charts to a list | |
myl = [] | |
for col in dataset.columns: | |
try: | |
myl.append(chart(dataset, col, target_var)) | |
except: | |
pass | |
else: | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import altair as alt | |
import pandas as pd | |
source = pd.DataFrame({ | |
'Language': ['Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp'], | |
'Usage': [10,8,6,4,2,1] | |
}) | |
alt.Chart(source, title = "Programming language usage").mark_bar().encode( | |
x=alt.X('Language', sort='-y'), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt; plt.rcdefaults() | |
import numpy as np | |
import matplotlib.pyplot as plt | |
objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp') | |
y_pos = np.arange(len(objects)) | |
performance = [10,8,6,4,2,1] | |
plt.bar(y_pos, performance, align='center', alpha=0.5) | |
plt.xticks(y_pos, objects) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.compose import ColumnTransformer, make_column_selector as Selector | |
from sklearn.decomposition import PCA | |
from sklearn.experimental import enable_iterative_imputer | |
from sklearn.impute import IterativeImputer, SimpleImputer | |
from sklearn.svm import SVC | |
from sklearn.feature_selection import SelectKBest, chi2 | |
from sklearn.model_selection import train_test_split, GridSearchCV |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CategoricalTransformer(BaseEstimator, TransformerMixin): | |
def __init__(self, sep_passengerId = True, sep_cabin = True, convert_bools = True, excluded_features = ['Name']): | |
self.sep_passengerId = sep_passengerId | |
self.sep_cabin = sep_cabin | |
self.convert_bools = convert_bools | |
self.excluded_features = excluded_features | |
def fit(self, X, y=None): | |
return self | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CatFeatureEngineerTransformer(BaseEstimator, TransformerMixin): | |
def __init__(self, home_dest = True): | |
self.home_dest = home_dest | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X, y=None): | |
if self.home_dest: | |
X['HomeDest'] = X.HomePlanet + ':' + X.Destination |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Nominal_Columns(BaseEstimator, TransformerMixin): | |
def __init__(self, exclude=['Deck','Deck2']): # These would be your ordinal columns. Be sure to understand the order too. | |
self.exclude = exclude | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X, y=None): | |
_X = X.copy() | |
_X.drop(columns=self.exclude, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Ordinal_Columns(BaseEstimator, TransformerMixin): | |
def __init__(self, names=['Deck','Deck2']): #,'Deck2' | |
self.names = names | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X, y=None): | |
if len(self.names) > 0: | |
if len(self.names) == 1: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat_pipe = Pipeline([ | |
('CategoricalTransformer', CategoricalTransformer()), # Manually included fillna(mode) similar to simple_imputer | |
('CatFeatureEngineer', CatFeatureEngineerTransformer()), | |
('Subtypes', FeatureUnion([ | |
('ordinal', Pipeline([ | |
('cols', Ordinal_Columns()), | |
('encoder', OrdinalEncoder(categories=[['T','G','F', 'E', 'D', 'C', 'B', 'A'],# Need row of values (low to high) | |
['G','F', 'E', 'D', 'C', 'B', 'A']], # can use auto instead of categories | |
# We see 11 total records with 'T' but external research doesn't support that value. | |
handle_unknown='use_encoded_value', |
OlderNewer