Skip to content

Instantly share code, notes, and snippets.

View xiaowei1234's full-sized avatar

Xiao Wei xiaowei1234

View GitHub Profile
import numpy as np
from sklearn.linear_model import PoissonRegressor, Lasso, Ridge
import statsmodels.api as sm
X_array = np.asarray([[1, 2], [1, 3], [1, 4], [1, 3]])
y = np.asarray([2, 2, 3, 2])
Preg_alpha_1 = PoissonRegressor(alpha=1., fit_intercept=False).fit(X_array, y)
print('alpha 1 Poisson Reg', Preg_alpha_1.coef_)
Preg_alpha_2 = PoissonRegressor(alpha=2., fit_intercept=False).fit(X_array*4., y)
print('alpha 2 Poisson Reg', Preg_alpha_2.coef_)
@xiaowei1234
xiaowei1234 / GLM vs OLS Lasso penalty.py
Created March 4, 2022 17:02
A comparison of GLM vs OLS lasso penalty regression
import numpy as np
from sklearn.linear_model import PoissonRegressor, Lasso
X_array = np.asarray([[1, 2], [1, 3], [1, 4], [1, 3]])
y = np.asarray([2, 2, 3, 2])
Preg_alpha_1 = PoissonRegressor(alpha=1., fit_intercept=False).fit(X_array, y)
print('alpha 1', Preg_alpha_1.coef_)
Preg_alpha_2 = PoissonRegressor(alpha=2., fit_intercept=False).fit(X_array/2., y)
print('alpha 2', Preg_alpha_2.coef_)
Lreg_alpha_1 = Lasso(alpha=1., fit_intercept=False).fit(X_array, y)
@xiaowei1234
xiaowei1234 / Expression_isnull.py
Created July 27, 2019 00:57
create binary column on null values
wrap1 = DataFrameMapper([
('col_with_nulls', ExpressionTransformer("0 if pandas.isnull(X[0]) else 1")
])
@xiaowei1234
xiaowei1234 / Alias.py
Last active June 1, 2019 01:37
Status Code aliases
from sklearn.pipeline import make_pipeline
from sklearn2pmml.decoration import Alias
from sklearn.pipeline import FeatureUnion
wrap1 = DataFrameMapper([
('Status 1', Alias(LookupTransformer({202:1}, 0), 'status_1_202', prefit=True))
, ('Status 2', Alias(LookupTransformer({202:1}, 0), 'status_2_202', prefit=True))
, ('Status 3', Alias(LookupTransformer({202:1}, 0), 'status_3_202', prefit=True))
])
@xiaowei1234
xiaowei1234 / status_codes.py
Last active April 24, 2019 15:53
Status Code Transformation
from sklearn.pipeline import make_pipeline
wrap2 = DataFrameMapper([
('Status 1', LookupTransformer({203:1}, 0))
, ('Status 2', LookupTransformer({203:1}, 0))
, ('Status 3', LookupTransformer({203:1}, 0))
])
union = ExpressionTransformer("X[0]+X[1]+X[2]")
@xiaowei1234
xiaowei1234 / LookupTransformer.py
Last active July 16, 2019 00:53
LookupTransformer example
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelBinarizer
from sklearn_pandas import DataFrameMapper
col_trans2 = ColumnTransformer([('first', 'drop', [0])]
, remainder='passthrough', sparse_threshold=0.0)
mapper = DataFrameMapper([
('code1', [CategoricalDomain(missing_value_treatment = "as_value", missing_value_replacement = '!')
,LookupTransformer({'a': 'b', 'b': 'd', 'c': 'd'}, 'a')
@xiaowei1234
xiaowei1234 / CutTransformer.py
Last active April 18, 2019 21:04
Binning using CutTransformer
from sklearn2pmml.preprocessing import CutTransformer
from sklearn.impute import SimpleImputer
bins = CutTransformer(bins=[0, 250, 2200], labels=[0.3, 0.4])
wrap = DataFrameMapper([
('amount', [SimpleImputer(), bins])
])
@xiaowei1234
xiaowei1234 / ContinuousDomain.py
Last active April 18, 2019 20:58
ContinuousDomain example
from sklearn2pmml.decoration import ContinuousDomain
from sklearn.impute import SimpleImputer
cont_d = ContinuousDomain(missing_value_replacement=350, missing_value_treatment='as_value'
, missing_values=[float("NaN"), -1], outlier_treatment='as_extreme_values'
, low_value=300, high_value=1500)
wrap = DataFrameMapper([
('amount', [cont_d, SimpleImputer(), FunctionTransformer(np.log1p, validate=False)])
@xiaowei1234
xiaowei1234 / scorers.py
Last active October 1, 2020 12:58
binary classification scorers
import numpy as np
import pandas as pd
from scipy.stats import ks_2samp
from sklearn.metrics import make_scorer, roc_auc_score, log_loss
from sklearn.model_selection import GridSearchCV
def ks_stat(y, yhat):
return ks_2samp(yhat[y==1], yhat[y!=1]).statistic
@xiaowei1234
xiaowei1234 / pdf_pipe_decorator.py
Last active November 9, 2018 00:37
pipe decorator example
def cell_wrapper(df, func, field, drop=True, new_name=None):
"""
decorator function for pandas pipe api
takes func which applies function to one value in field
returns modified dataframe
df (pandas dataframe): the dataframe to apply transformation on
func (function): function to apply to each value of field
field (str): name of column in df
drop (boolean): whether to drop 'field' after transformation
new_name (str): whether to rename transformed 'field' column to new_name