Amine Baatout baatout

## pmml_export.py
# with X_train, X_test, Y_train, Y_test
import numpy as np
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer

clf = PMMLPipeline([
   ("mapper", DataFrameMapper([
       (['mass'], FunctionTransformer(np.log1p)),

## save_model_coefficients.py
# with X_train, X_test, Y_train, Y_test
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()
clf.fit(X_train, Y_train)
print(clf.score(X_test, Y_test))

import json
with open('logreg_coefs', 'w') as f:
    json.dump(clf.coef_.tolist(), f)

## train_test_split.py
from pandas import read_csv
from sklearn.model_selection import train_test_split

url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
	# with X_train, X_test, Y_train, Y_test
	import numpy as np
	from sklearn_pandas import DataFrameMapper
	from sklearn2pmml import PMMLPipeline, sklearn2pmml
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import FunctionTransformer

	clf = PMMLPipeline([
	("mapper", DataFrameMapper([
	(['mass'], FunctionTransformer(np.log1p)),
	# with X_train, X_test, Y_train, Y_test
	from sklearn.linear_model import LogisticRegression

	clf = LogisticRegression()
	clf.fit(X_train, Y_train)
	print(clf.score(X_test, Y_test))

	import json
	with open('logreg_coefs', 'w') as f:
	json.dump(clf.coef_.tolist(), f)
	from pandas import read_csv
	from sklearn.model_selection import train_test_split

	url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
	features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
	label = 'label'
	dataframe = read_csv(url, names=features + [label])
	X = dataframe[features]
	Y = dataframe[label]
	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)