Skip to content

Instantly share code, notes, and snippets.

@baatout
baatout / train_test_split.py
Last active September 8, 2018 11:06
Train/test split
from pandas import read_csv
from sklearn.model_selection import train_test_split
url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
@baatout
baatout / save_model_coefficients.py
Last active September 8, 2018 11:16
Save model coefficients
# with X_train, X_test, Y_train, Y_test
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train, Y_train)
print(clf.score(X_test, Y_test))
import json
with open('logreg_coefs', 'w') as f:
json.dump(clf.coef_.tolist(), f)
@baatout
baatout / pmml_export.py
Last active September 8, 2018 11:15
PMML export
# with X_train, X_test, Y_train, Y_test
import numpy as np
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
clf = PMMLPipeline([
("mapper", DataFrameMapper([
(['mass'], FunctionTransformer(np.log1p)),
@baatout
baatout / pmml_output.xml
Last active September 8, 2018 11:13
PMML output
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_3" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.3">
<Header>
<Application name="JPMML-SkLearn" version="1.5.6"/>
<Timestamp>2018-09-08T11:13:03Z</Timestamp>
</Header>
<MiningBuildTask>
<Extension>PMMLPipeline(steps=[('mapper', DataFrameMapper(default=False, df_out=False,
features=[(['mass'], FunctionTransformer(accept_sparse=False, func=&lt;ufunc 'log1p'&gt;,
inv_kw_args=None, inverse_func=None, kw_args=None,
@baatout
baatout / pmml_failure.py
Last active September 8, 2018 11:18
PMML failure
# with X_train, X_test, Y_train, Y_test
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
def is_adult(x): return x > 18
clf = PMMLPipeline([
("mapper", DataFrameMapper([
@baatout
baatout / pickle_export.py
Last active September 8, 2018 11:22
Pickle export
# with X_train, X_test, Y_train, Y_test
import dill
from sklearn_pandas import DataFrameMapper
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
def is_adult(x): return x > 18
clf = Pipeline([
@baatout
baatout / run_pickle.py
Last active September 8, 2018 11:24
Run pickle
# run this anywhere and change the pipeline.pk path
import dill
from pandas import read_csv
url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]
@baatout
baatout / adder.py
Last active July 1, 2018 12:05
A function with no types
def add(a, b):
return a + b
from adder import add
x = add(4, 6)
@baatout
baatout / adder.py
Created July 1, 2018 12:11
Typed add function
def add(a: int, b: int) -> int:
return a + b