Skip to content

Instantly share code, notes, and snippets.

@adrinjalali
Created August 14, 2019 12:12
Show Gist options
  • Save adrinjalali/de9ac56c61f3931b38b24e577f54d083 to your computer and use it in GitHub Desktop.
Save adrinjalali/de9ac56c61f3931b38b24e577f54d083 to your computer and use it in GitHub Desktop.
#%%
# coding: utf-8
"""
@file
@brief Wraps runtime into a :epkg:`scikit-learn` transformer.
"""
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
from skl2onnx.common.data_types import FloatTensorType
from mlprodict.onnxrt import OnnxInference
from sklearn.utils.validation import check_array, check_is_fitted
class OnnxModel(BaseEstimator, OnnxOperatorMixin):
"""
Calls :epkg:`onnxruntime` inference following :epkg:`scikit-learn` API
so that it can be included in a :epkg:`scikit-learn` pipeline.
Parameters
----------
onnx_bytes : bytes
output_name: string
requested output name or None to request all and
have method *transform* to store all of them in a dataframe
enforce_float32 : boolean
:epkg:`onnxruntime` only supports *float32*,
:epkg:`scikit-learn` usually uses double floats, this parameter
ensures that every array of double floats is converted into
single floats
"""
def __init__(self, obj, output_name=None, enforce_float32=True,
runtime='onnxruntime1'):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.obj = obj
self.output_name = output_name
self.enforce_float32 = enforce_float32
self.runtime = runtime
def __repr__(self): # pylint: disable=W0222
"""
usual
"""
if hasattr(self, "onnx_bytes_"):
ob = self.onnx_bytes_
else:
ob = repr(self.obj)
if len(ob) > 20:
ob = ob[:10] + b"..." + ob[-10:]
return "{0}(onnx_bytes={1}, output_name={2}, enforce_float32={3}, runtime='{4}')".format(
self.__class__.__name__, ob, self.output_name,
self.enforce_float32, self.runtime)
def fit(self, X, y=None, **fit_params):
"""
Loads the :epkg:`ONNX` model.
Parameters
----------
X : unused
y : unused
Returns
-------
self
"""
if isinstance(self.obj, BaseEstimator):
self.onnx_bytes_ = convert_sklearn(
self.obj,
initial_types=[('X', FloatTensorType([None, X.shape[1]]))]
).SerializeToString()
else:
self.onnx_bytes_ = (self.obj
if not hasattr(self.obj, 'SerializeToString')
else self.obj.SerializeToString())
self.onnxrt_ = OnnxInference(self.onnx_bytes_, runtime=self.runtime)
self.inputs_ = self.onnxrt_.input_names
return self
def _onnx_run(self, X):
check_is_fitted(self, 'onnxrt_')
dtype = 'float32' if self.enforce_float32 else 'numeric'
X = check_array(X, dtype=dtype)
rt_inputs = {self.inputs_[0]: X}
doutputs = self.onnxrt_.run(rt_inputs)
return doutputs
def _dict_to_array(self, data):
keys = sorted(list(data[0].keys()))
return np.asarray([[item[k] for k in keys] for item in data])
def transform(self, X, y=None, **inputs):
"""
Runs the predictions. If *X* is a dataframe,
the function assumes every columns is a separate input,
otherwise, *X* is considered as a first input and *inputs*
can be used to specify extra inputs.
Parameters
----------
X : iterable, data to process (or first input if several expected)
y : unused
inputs: :epkg:`ONNX` graph support multiple inputs,
each column of a dataframe is converted into as many inputs if
*X* is a dataframe, otherwise, *X* is considered as the first input
and *inputs* can be used to specify the other ones
Returns
-------
:epkg:`DataFrame`
"""
return self._onnx_run(X)
def predict(self, X):
return self._onnx_run(X)['output_label']
def predict_proba(self, X):
res = self._onnx_run(X)['output_probability']
return self._dict_to_array(res)
def fit_transform(self, X, y=None, **inputs):
"""
Loads the *ONNX* model and runs the predictions.
Parameters
----------
X : iterable, data to process (or first input if several expected)
y : unused
inputs: :epkg:`ONNX` graph support multiple inputs,
each column of a dataframe is converted into as many inputs if
*X* is a dataframe, otherwise, *X* is considered as the first input
and *inputs* can be used to specify the other ones
Returns
-------
:epkg:`DataFrame`
"""
return self.fit(X, y=y, **inputs).transform(X, y)
#%%
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
dec_models = [
PCA(n_components=1),
PCA(n_components=2),
StandardScaler(),
]
pipeline = make_pipeline(PCA(n_components=2),
LogisticRegression(
solver='lbfgs',
multi_class='auto')).fit(X_train, y_train)
model = OnnxModel(pipeline).fit(X_train)
model.predict(X_test)
model.predict_proba(X_test)
cl = classification_report(y_test, model.predict(X_test))
print(cl)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment