adrinjalali/OnnxModel.py

## OnnxModel.py
#%%
# coding: utf-8
"""
@file
@brief Wraps runtime into a :epkg:`scikit-learn` transformer.
"""

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
from skl2onnx.common.data_types import FloatTensorType
from mlprodict.onnxrt import OnnxInference
from sklearn.utils.validation import check_array, check_is_fitted


class OnnxModel(BaseEstimator, OnnxOperatorMixin):
    """
    Calls :epkg:`onnxruntime` inference following :epkg:`scikit-learn` API
    so that it can be included in a :epkg:`scikit-learn` pipeline.

    Parameters
    ----------

    onnx_bytes : bytes
    output_name: string
        requested output name or None to request all and
        have method *transform* to store all of them in a dataframe
    enforce_float32 : boolean
        :epkg:`onnxruntime` only supports *float32*,
        :epkg:`scikit-learn` usually uses double floats, this parameter
        ensures that every array of double floats is converted into
        single floats
    """

    def __init__(self, obj, output_name=None, enforce_float32=True,
                 runtime='onnxruntime1'):
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self.obj = obj
        self.output_name = output_name
        self.enforce_float32 = enforce_float32
        self.runtime = runtime

    def __repr__(self):  # pylint: disable=W0222
        """
        usual
        """
        if hasattr(self, "onnx_bytes_"):
            ob = self.onnx_bytes_
        else:
            ob = repr(self.obj)
        if len(ob) > 20:
            ob = ob[:10] + b"..." + ob[-10:]
        return "{0}(onnx_bytes={1}, output_name={2}, enforce_float32={3}, runtime='{4}')".format(
            self.__class__.__name__, ob, self.output_name,
            self.enforce_float32, self.runtime)

    def fit(self, X, y=None, **fit_params):
        """
        Loads the :epkg:`ONNX` model.

        Parameters
        ----------
        X : unused
        y : unused

        Returns
        -------
        self
        """
        if isinstance(self.obj, BaseEstimator):
            self.onnx_bytes_ = convert_sklearn(
                self.obj,
                initial_types=[('X', FloatTensorType([None, X.shape[1]]))]
                ).SerializeToString()
        else:
            self.onnx_bytes_ = (self.obj
                                if not hasattr(self.obj, 'SerializeToString')
                                else self.obj.SerializeToString())
        self.onnxrt_ = OnnxInference(self.onnx_bytes_, runtime=self.runtime)
        self.inputs_ = self.onnxrt_.input_names
        return self

    def _onnx_run(self, X):
        check_is_fitted(self, 'onnxrt_')
        dtype = 'float32' if self.enforce_float32 else 'numeric'
        X = check_array(X, dtype=dtype)
        rt_inputs = {self.inputs_[0]: X}

        doutputs = self.onnxrt_.run(rt_inputs)
        return doutputs

    def _dict_to_array(self, data):
        keys = sorted(list(data[0].keys()))
        return np.asarray([[item[k] for k in keys] for item in data])

    def transform(self, X, y=None, **inputs):
        """
        Runs the predictions. If *X* is a dataframe,
        the function assumes every columns is a separate input,
        otherwise, *X* is considered as a first input and *inputs*
        can be used to specify extra inputs.

        Parameters
        ----------
        X : iterable, data to process (or first input if several expected)
        y : unused
        inputs: :epkg:`ONNX` graph support multiple inputs,
            each column of a dataframe is converted into as many inputs if
            *X* is a dataframe, otherwise, *X* is considered as the first input
            and *inputs* can be used to specify the other ones

        Returns
        -------
        :epkg:`DataFrame`
        """
        return self._onnx_run(X)

    def predict(self, X):
        return self._onnx_run(X)['output_label']

    def predict_proba(self, X):
        res = self._onnx_run(X)['output_probability']
        return self._dict_to_array(res)

    def fit_transform(self, X, y=None, **inputs):
        """
        Loads the *ONNX* model and runs the predictions.

        Parameters
        ----------
        X : iterable, data to process (or first input if several expected)
        y : unused
        inputs: :epkg:`ONNX` graph support multiple inputs,
            each column of a dataframe is converted into as many inputs if
            *X* is a dataframe, otherwise, *X* is considered as the first input
            and *inputs* can be used to specify the other ones

        Returns
        -------
        :epkg:`DataFrame`
        """
        return self.fit(X, y=y, **inputs).transform(X, y)


#%%

from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

dec_models = [
    PCA(n_components=1),
    PCA(n_components=2),
    StandardScaler(),
]

pipeline = make_pipeline(PCA(n_components=2),
                         LogisticRegression(
                             solver='lbfgs',
                             multi_class='auto')).fit(X_train, y_train)

model = OnnxModel(pipeline).fit(X_train)
model.predict(X_test)
model.predict_proba(X_test)

cl = classification_report(y_test, model.predict(X_test))
print(cl)
	#%%
	# coding: utf-8
	"""
	@file
	@brief Wraps runtime into a :epkg:`scikit-learn` transformer.
	"""

	import numpy as np
	from sklearn.base import BaseEstimator, TransformerMixin
	from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
	from skl2onnx.common.data_types import FloatTensorType
	from mlprodict.onnxrt import OnnxInference
	from sklearn.utils.validation import check_array, check_is_fitted


	class OnnxModel(BaseEstimator, OnnxOperatorMixin):
	"""
	Calls :epkg:`onnxruntime` inference following :epkg:`scikit-learn` API
	so that it can be included in a :epkg:`scikit-learn` pipeline.

	Parameters
	----------

	onnx_bytes : bytes
	output_name: string
	requested output name or None to request all and
	have method transform to store all of them in a dataframe
	enforce_float32 : boolean
	:epkg:`onnxruntime` only supports float32,
	:epkg:`scikit-learn` usually uses double floats, this parameter
	ensures that every array of double floats is converted into
	single floats
	"""

	def __init__(self, obj, output_name=None, enforce_float32=True,
	runtime='onnxruntime1'):
	BaseEstimator.__init__(self)
	TransformerMixin.__init__(self)
	self.obj = obj
	self.output_name = output_name
	self.enforce_float32 = enforce_float32
	self.runtime = runtime

	def __repr__(self): # pylint: disable=W0222
	"""
	usual
	"""
	if hasattr(self, "onnx_bytes_"):
	ob = self.onnx_bytes_
	else:
	ob = repr(self.obj)
	if len(ob) > 20:
	ob = ob[:10] + b"..." + ob[-10:]
	return "{0}(onnx_bytes={1}, output_name={2}, enforce_float32={3}, runtime='{4}')".format(
	self.__class__.__name__, ob, self.output_name,
	self.enforce_float32, self.runtime)

	def fit(self, X, y=None, **fit_params):
	"""
	Loads the :epkg:`ONNX` model.

	Parameters
	----------
	X : unused
	y : unused

	Returns
	-------
	self
	"""
	if isinstance(self.obj, BaseEstimator):
	self.onnx_bytes_ = convert_sklearn(
	self.obj,
	initial_types=[('X', FloatTensorType([None, X.shape[1]]))]
	).SerializeToString()
	else:
	self.onnx_bytes_ = (self.obj
	if not hasattr(self.obj, 'SerializeToString')
	else self.obj.SerializeToString())
	self.onnxrt_ = OnnxInference(self.onnx_bytes_, runtime=self.runtime)
	self.inputs_ = self.onnxrt_.input_names
	return self

	def _onnx_run(self, X):
	check_is_fitted(self, 'onnxrt_')
	dtype = 'float32' if self.enforce_float32 else 'numeric'
	X = check_array(X, dtype=dtype)
	rt_inputs = {self.inputs_[0]: X}

	doutputs = self.onnxrt_.run(rt_inputs)
	return doutputs

	def _dict_to_array(self, data):
	keys = sorted(list(data[0].keys()))
	return np.asarray([[item[k] for k in keys] for item in data])

	def transform(self, X, y=None, **inputs):
	"""
	Runs the predictions. If X is a dataframe,
	the function assumes every columns is a separate input,
	otherwise, X is considered as a first input and inputs
	can be used to specify extra inputs.

	Parameters
	----------
	X : iterable, data to process (or first input if several expected)
	y : unused
	inputs: :epkg:`ONNX` graph support multiple inputs,
	each column of a dataframe is converted into as many inputs if
	X is a dataframe, otherwise, X is considered as the first input
	and inputs can be used to specify the other ones

	Returns
	-------
	:epkg:`DataFrame`
	"""
	return self._onnx_run(X)

	def predict(self, X):
	return self._onnx_run(X)['output_label']

	def predict_proba(self, X):
	res = self._onnx_run(X)['output_probability']
	return self._dict_to_array(res)

	def fit_transform(self, X, y=None, **inputs):
	"""
	Loads the ONNX model and runs the predictions.

	Parameters
	----------
	X : iterable, data to process (or first input if several expected)
	y : unused
	inputs: :epkg:`ONNX` graph support multiple inputs,
	each column of a dataframe is converted into as many inputs if
	X is a dataframe, otherwise, X is considered as the first input
	and inputs can be used to specify the other ones

	Returns
	-------
	:epkg:`DataFrame`
	"""
	return self.fit(X, y=y, **inputs).transform(X, y)


	#%%

	from sklearn.pipeline import make_pipeline
	from sklearn.decomposition import PCA
	from sklearn.preprocessing import StandardScaler
	from sklearn.datasets import load_iris
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report

	iris = load_iris()
	X, y = iris.data, iris.target
	X_train, X_test, y_train, y_test = train_test_split(X, y)

	dec_models = [
	PCA(n_components=1),
	PCA(n_components=2),
	StandardScaler(),
	]

	pipeline = make_pipeline(PCA(n_components=2),
	LogisticRegression(
	solver='lbfgs',
	multi_class='auto')).fit(X_train, y_train)

	model = OnnxModel(pipeline).fit(X_train)
	model.predict(X_test)
	model.predict_proba(X_test)

	cl = classification_report(y_test, model.predict(X_test))
	print(cl)