Amine Baatout baatout

## train_test_split.py
from pandas import read_csv
from sklearn.model_selection import train_test_split

url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

## save_model_coefficients.py
# with X_train, X_test, Y_train, Y_test
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()
clf.fit(X_train, Y_train)
print(clf.score(X_test, Y_test))

import json
with open('logreg_coefs', 'w') as f:
    json.dump(clf.coef_.tolist(), f)

## pmml_export.py
# with X_train, X_test, Y_train, Y_test
import numpy as np
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer

clf = PMMLPipeline([
   ("mapper", DataFrameMapper([
       (['mass'], FunctionTransformer(np.log1p)),

## pmml_output.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_3" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.3">
	<Header>
		<Application name="JPMML-SkLearn" version="1.5.6"/>
		<Timestamp>2018-09-08T11:13:03Z</Timestamp>
	</Header>
	<MiningBuildTask>
		<Extension>PMMLPipeline(steps=[('mapper', DataFrameMapper(default=False, df_out=False,
        features=[(['mass'], FunctionTransformer(accept_sparse=False, func=&lt;ufunc 'log1p'&gt;,
          inv_kw_args=None, inverse_func=None, kw_args=None,

## pmml_failure.py
# with X_train, X_test, Y_train, Y_test
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer

def is_adult(x): return x > 18

clf = PMMLPipeline([
    ("mapper", DataFrameMapper([

## pickle_export.py
# with X_train, X_test, Y_train, Y_test
import dill
from sklearn_pandas import DataFrameMapper
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer

def is_adult(x): return x > 18

clf = Pipeline([

## run_pickle.py
# run this anywhere and change the pipeline.pk path
import dill
from pandas import read_csv

url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]

## adder.py
def add(a, b):
	return a + b

## run_add_int.py
from adder import add

x = add(4, 6)

## adder.py
def add(a: int, b: int) -> int:
	return a + b
	from pandas import read_csv
	from sklearn.model_selection import train_test_split

	url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
	features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
	label = 'label'
	dataframe = read_csv(url, names=features + [label])
	X = dataframe[features]
	Y = dataframe[label]
	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
	# with X_train, X_test, Y_train, Y_test
	from sklearn.linear_model import LogisticRegression

	clf = LogisticRegression()
	clf.fit(X_train, Y_train)
	print(clf.score(X_test, Y_test))

	import json
	with open('logreg_coefs', 'w') as f:
	json.dump(clf.coef_.tolist(), f)
	# with X_train, X_test, Y_train, Y_test
	import numpy as np
	from sklearn_pandas import DataFrameMapper
	from sklearn2pmml import PMMLPipeline, sklearn2pmml
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import FunctionTransformer

	clf = PMMLPipeline([
	("mapper", DataFrameMapper([
	(['mass'], FunctionTransformer(np.log1p)),
	<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
	<PMML xmlns="http://www.dmg.org/PMML-4_3" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.3">
	<Header>
	<Application name="JPMML-SkLearn" version="1.5.6"/>
	<Timestamp>2018-09-08T11:13:03Z</Timestamp>
	</Header>
	<MiningBuildTask>
	<Extension>PMMLPipeline(steps=[('mapper', DataFrameMapper(default=False, df_out=False,
	features=[(['mass'], FunctionTransformer(accept_sparse=False, func=<ufunc 'log1p'>,
	inv_kw_args=None, inverse_func=None, kw_args=None,
	# with X_train, X_test, Y_train, Y_test
	import dill
	from sklearn_pandas import DataFrameMapper
	from sklearn.pipeline import Pipeline
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import FunctionTransformer

	def is_adult(x): return x > 18

	clf = Pipeline([
	# run this anywhere and change the pipeline.pk path
	import dill
	from pandas import read_csv

	url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
	features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
	label = 'label'
	dataframe = read_csv(url, names=features + [label])
	X = dataframe[features]
	Y = dataframe[label]