jnothman/transform_feature_names.py

## transform_feature_names.py
from singledispatch import singledispatch

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.feature_selection.base import SelectorMixin
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import FunctionTransformer

#from eli5 import explain_weights
#@explain_weights.register(Pipeline)
#def explain_weights_pipeline(estimator, feature_names=None, **kwargs):
#    last_estimator = estimator.steps[-1][1]
#    transform_pipeline = Pipeline(estimator.steps[:-1])
#    feature_names = transform_feature_names(transform_pipeline, feature_names)
#    out = explain_weights(last_estimator, feature_names=feature_names)
#    out.estimator = repr(estimator)
#    return out


@singledispatch
def transform_feature_names(transformer, in_names=None):
    if hasattr(transformer, 'get_feature_names'):
        return transformer.get_feature_names()
    raise NotImplementedError('transform_feature_names not available for '
                              '{}'.format(transformer))


@transform_feature_names.register(Pipeline)
def _pipeline_names(est, in_names=None):
    names = in_names
    for name, trans in est.steps:
        if trans is not None:
            names = transform_feature_names(trans, names)
    return names


@transform_feature_names.register(FeatureUnion)
def _union_names(est, in_names=None):
    return ['{}:{}'.format(trans_name, feat_name)
            for trans_name, trans, _ in est._iter()
            for feat_name in transform_feature_names(trans, in_names)]


@transform_feature_names.register(SelectorMixin)
def _select_names(est, in_names=None):
    return [in_names[i] for i in est.get_support(indices=True)]


def _formatted_names(fmt):
    def transform_names(self, in_names=None):
        return [fmt.format(name) for name in in_names]
    return transform_names


def _component_names(fmt, attr):
    def transform_names(self, in_names=None):
        return [fmt.format(i) for i in range(getattr(self, attr))]
    return transform_names


transform_feature_names.register(TfidfTransformer)(_formatted_names('tfidf({})'))
transform_feature_names.register(Imputer)(_formatted_names('impute({})'))
transform_feature_names.register(LatentDirichletAllocation)(_component_names('topic({})', 'n_topics'))


class FunctionTransformer(FunctionTransformer):
    def __init__(self, func=None, inverse_func=None,
                 feature_name_func=None,
                 validate=True,
                 accept_sparse=False, pass_y=False,
                 kw_args=None, inv_kw_args=None):

        super(type(self), self).__init__(
            func=func, inverse_func=inverse_func, validate=validate,
            accept_sparse=accept_sparse, pass_y=pass_y,
            kw_args=kw_args, inv_kw_args=inv_kw_args)
        self.feature_name_func = feature_name_func


@transform_feature_names.register(FunctionTransformer)
def _function_transformer_names(est, in_names=None):
    return est.feature_name_func(in_names)
	from singledispatch import singledispatch

	from sklearn.pipeline import Pipeline, FeatureUnion
	from sklearn.feature_selection.base import SelectorMixin
	from sklearn.feature_extraction.text import TfidfTransformer
	from sklearn.decomposition import LatentDirichletAllocation
	from sklearn.preprocessing import Imputer
	from sklearn.preprocessing import FunctionTransformer

	#from eli5 import explain_weights
	#@explain_weights.register(Pipeline)
	#def explain_weights_pipeline(estimator, feature_names=None, **kwargs):
	# last_estimator = estimator.steps[-1][1]
	# transform_pipeline = Pipeline(estimator.steps[:-1])
	# feature_names = transform_feature_names(transform_pipeline, feature_names)
	# out = explain_weights(last_estimator, feature_names=feature_names)
	# out.estimator = repr(estimator)
	# return out


	@singledispatch
	def transform_feature_names(transformer, in_names=None):
	if hasattr(transformer, 'get_feature_names'):
	return transformer.get_feature_names()
	raise NotImplementedError('transform_feature_names not available for '
	'{}'.format(transformer))


	@transform_feature_names.register(Pipeline)
	def _pipeline_names(est, in_names=None):
	names = in_names
	for name, trans in est.steps:
	if trans is not None:
	names = transform_feature_names(trans, names)
	return names


	@transform_feature_names.register(FeatureUnion)
	def _union_names(est, in_names=None):
	return ['{}:{}'.format(trans_name, feat_name)
	for trans_name, trans, _ in est._iter()
	for feat_name in transform_feature_names(trans, in_names)]


	@transform_feature_names.register(SelectorMixin)
	def _select_names(est, in_names=None):
	return [in_names[i] for i in est.get_support(indices=True)]


	def _formatted_names(fmt):
	def transform_names(self, in_names=None):
	return [fmt.format(name) for name in in_names]
	return transform_names


	def _component_names(fmt, attr):
	def transform_names(self, in_names=None):
	return [fmt.format(i) for i in range(getattr(self, attr))]
	return transform_names


	transform_feature_names.register(TfidfTransformer)(_formatted_names('tfidf({})'))
	transform_feature_names.register(Imputer)(_formatted_names('impute({})'))
	transform_feature_names.register(LatentDirichletAllocation)(_component_names('topic({})', 'n_topics'))


	class FunctionTransformer(FunctionTransformer):
	def __init__(self, func=None, inverse_func=None,
	feature_name_func=None,
	validate=True,
	accept_sparse=False, pass_y=False,
	kw_args=None, inv_kw_args=None):

	super(type(self), self).__init__(
	func=func, inverse_func=inverse_func, validate=validate,
	accept_sparse=accept_sparse, pass_y=pass_y,
	kw_args=kw_args, inv_kw_args=inv_kw_args)
	self.feature_name_func = feature_name_func


	@transform_feature_names.register(FunctionTransformer)
	def _function_transformer_names(est, in_names=None):
	return est.feature_name_func(in_names)