Created
August 14, 2019 12:12
-
-
Save adrinjalali/de9ac56c61f3931b38b24e577f54d083 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#%% | |
# coding: utf-8 | |
""" | |
@file | |
@brief Wraps runtime into a :epkg:`scikit-learn` transformer. | |
""" | |
import numpy as np | |
from sklearn.base import BaseEstimator, TransformerMixin | |
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin | |
from skl2onnx.common.data_types import FloatTensorType | |
from mlprodict.onnxrt import OnnxInference | |
from sklearn.utils.validation import check_array, check_is_fitted | |
class OnnxModel(BaseEstimator, OnnxOperatorMixin): | |
""" | |
Calls :epkg:`onnxruntime` inference following :epkg:`scikit-learn` API | |
so that it can be included in a :epkg:`scikit-learn` pipeline. | |
Parameters | |
---------- | |
onnx_bytes : bytes | |
output_name: string | |
requested output name or None to request all and | |
have method *transform* to store all of them in a dataframe | |
enforce_float32 : boolean | |
:epkg:`onnxruntime` only supports *float32*, | |
:epkg:`scikit-learn` usually uses double floats, this parameter | |
ensures that every array of double floats is converted into | |
single floats | |
""" | |
def __init__(self, obj, output_name=None, enforce_float32=True, | |
runtime='onnxruntime1'): | |
BaseEstimator.__init__(self) | |
TransformerMixin.__init__(self) | |
self.obj = obj | |
self.output_name = output_name | |
self.enforce_float32 = enforce_float32 | |
self.runtime = runtime | |
def __repr__(self): # pylint: disable=W0222 | |
""" | |
usual | |
""" | |
if hasattr(self, "onnx_bytes_"): | |
ob = self.onnx_bytes_ | |
else: | |
ob = repr(self.obj) | |
if len(ob) > 20: | |
ob = ob[:10] + b"..." + ob[-10:] | |
return "{0}(onnx_bytes={1}, output_name={2}, enforce_float32={3}, runtime='{4}')".format( | |
self.__class__.__name__, ob, self.output_name, | |
self.enforce_float32, self.runtime) | |
def fit(self, X, y=None, **fit_params): | |
""" | |
Loads the :epkg:`ONNX` model. | |
Parameters | |
---------- | |
X : unused | |
y : unused | |
Returns | |
------- | |
self | |
""" | |
if isinstance(self.obj, BaseEstimator): | |
self.onnx_bytes_ = convert_sklearn( | |
self.obj, | |
initial_types=[('X', FloatTensorType([None, X.shape[1]]))] | |
).SerializeToString() | |
else: | |
self.onnx_bytes_ = (self.obj | |
if not hasattr(self.obj, 'SerializeToString') | |
else self.obj.SerializeToString()) | |
self.onnxrt_ = OnnxInference(self.onnx_bytes_, runtime=self.runtime) | |
self.inputs_ = self.onnxrt_.input_names | |
return self | |
def _onnx_run(self, X): | |
check_is_fitted(self, 'onnxrt_') | |
dtype = 'float32' if self.enforce_float32 else 'numeric' | |
X = check_array(X, dtype=dtype) | |
rt_inputs = {self.inputs_[0]: X} | |
doutputs = self.onnxrt_.run(rt_inputs) | |
return doutputs | |
def _dict_to_array(self, data): | |
keys = sorted(list(data[0].keys())) | |
return np.asarray([[item[k] for k in keys] for item in data]) | |
def transform(self, X, y=None, **inputs): | |
""" | |
Runs the predictions. If *X* is a dataframe, | |
the function assumes every columns is a separate input, | |
otherwise, *X* is considered as a first input and *inputs* | |
can be used to specify extra inputs. | |
Parameters | |
---------- | |
X : iterable, data to process (or first input if several expected) | |
y : unused | |
inputs: :epkg:`ONNX` graph support multiple inputs, | |
each column of a dataframe is converted into as many inputs if | |
*X* is a dataframe, otherwise, *X* is considered as the first input | |
and *inputs* can be used to specify the other ones | |
Returns | |
------- | |
:epkg:`DataFrame` | |
""" | |
return self._onnx_run(X) | |
def predict(self, X): | |
return self._onnx_run(X)['output_label'] | |
def predict_proba(self, X): | |
res = self._onnx_run(X)['output_probability'] | |
return self._dict_to_array(res) | |
def fit_transform(self, X, y=None, **inputs): | |
""" | |
Loads the *ONNX* model and runs the predictions. | |
Parameters | |
---------- | |
X : iterable, data to process (or first input if several expected) | |
y : unused | |
inputs: :epkg:`ONNX` graph support multiple inputs, | |
each column of a dataframe is converted into as many inputs if | |
*X* is a dataframe, otherwise, *X* is considered as the first input | |
and *inputs* can be used to specify the other ones | |
Returns | |
------- | |
:epkg:`DataFrame` | |
""" | |
return self.fit(X, y=y, **inputs).transform(X, y) | |
#%% | |
from sklearn.pipeline import make_pipeline | |
from sklearn.decomposition import PCA | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.datasets import load_iris | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report | |
iris = load_iris() | |
X, y = iris.data, iris.target | |
X_train, X_test, y_train, y_test = train_test_split(X, y) | |
dec_models = [ | |
PCA(n_components=1), | |
PCA(n_components=2), | |
StandardScaler(), | |
] | |
pipeline = make_pipeline(PCA(n_components=2), | |
LogisticRegression( | |
solver='lbfgs', | |
multi_class='auto')).fit(X_train, y_train) | |
model = OnnxModel(pipeline).fit(X_train) | |
model.predict(X_test) | |
model.predict_proba(X_test) | |
cl = classification_report(y_test, model.predict(X_test)) | |
print(cl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment