Skip to content

Instantly share code, notes, and snippets.

@baatout
baatout / pmml_export.py
Last active September 8, 2018 11:15
PMML export
# with X_train, X_test, Y_train, Y_test
import numpy as np
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
clf = PMMLPipeline([
("mapper", DataFrameMapper([
(['mass'], FunctionTransformer(np.log1p)),
@baatout
baatout / save_model_coefficients.py
Last active September 8, 2018 11:16
Save model coefficients
# with X_train, X_test, Y_train, Y_test
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train, Y_train)
print(clf.score(X_test, Y_test))
import json
with open('logreg_coefs', 'w') as f:
json.dump(clf.coef_.tolist(), f)
@baatout
baatout / train_test_split.py
Last active September 8, 2018 11:06
Train/test split
from pandas import read_csv
from sklearn.model_selection import train_test_split
url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)