Skip to content

Instantly share code, notes, and snippets.

@dashesy
Last active December 19, 2015 05:19
Show Gist options
  • Save dashesy/5902939 to your computer and use it in GitHub Desktop.
Save dashesy/5902939 to your computer and use it in GitHub Desktop.
Joint classifier inspired by FeatureUnion
'''
Created on June 26, 2013
@author: dashesy
Purpose: Like FeatureUnion but can apply different X,y to different transformers
Each transformer could accept different number os samples or features
but the result should be concatenatable
'''
import numpy as np
import warnings
from scipy import sparse
from scipy import linalg
from sklearn.pipeline import FeatureUnion, _fit_one_transformer, _transform_one, _fit_transform_one
from sklearn.utils import array2d
from sklearn.externals.joblib import Parallel, delayed
class JointClassifier(FeatureUnion):
''' Concatenates results of multiple transformer objects.
Unlike FeatureUnion can accept different X,y for different transformers
Parameters
----------
transformers: list of (name, transformer)
List of transformer objects to be applied to the data.
n_jobs: int, optional
Number of jobs to run in parallel (default 1).
transformer_weights: dict, optional
Multiplicative weights for features per transformer.
Keys are transformer names, values the weights.
'''
def __init__(self, transformer_list, n_jobs=1, transformer_weights=None):
super(JointClassifier, self).__init__(transformer_list, n_jobs=n_jobs, transformer_weights=transformer_weights)
def fit(self, *Xy):
"""Fit n'th transformer using n'th X and y
Parameters
----------
*Xy : positional arguments with series of X followed by y (X1,y1,X2,y2,...)
X : array-like or sparse matrix, shape (n_samples, n_features)
Input data, used to fit transformers.
y : rray-like, shape = [n_samples]
label for data (pass None if not needed)
"""
Parallel(n_jobs=self.n_jobs)(
delayed(_fit_one_transformer)(trans, Xy[idx * 2], Xy[idx * 2 + 1])
for idx, (name, trans) in enumerate(self.transformer_list))
return self
def fit_transform(self, *Xy, **fit_params):
"""Fit n'th transformer using n'th X and y, transform that data and concatenate
results.
Parameters
----------
*Xy : positional arguments with series of X followed by y (X1,y1,X2,y2,...)
X : array-like or sparse matrix, shape (n_samples, n_features)
Input data, used to fit transformers.
y : rray-like, shape = [n_samples]
label for data (pass None if not needed)
Returns
-------
X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
hstack of results of transformers. sum_n_components is the
sum of n_components (output dimension) over transformers.
"""
Xs = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_transform_one)(trans, name, Xy[idx * 2], Xy[idx * 2 + 1],
self.transformer_weights, **fit_params)
for idx, (name, trans) in enumerate(self.transformer_list))
if any(sparse.issparse(f) for f in Xs):
Xs = sparse.hstack(Xs).tocsr()
else:
Xs = np.hstack(Xs)
return Xs
def transform(self, *X):
"""Transform n'th X separately by n'th transformer, concatenate results.
Parameters
----------
*X : positional arguments with series of X (X1,X2,...)
X : array-like or sparse matrix, shape (n_samples, n_features)
Input data, used to fit transformers.
Returns
-------
X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
hstack of results of transformers. sum_n_components is the
sum of n_components (output dimension) over transformers.
"""
Xs = Parallel(n_jobs=self.n_jobs)(
delayed(_transform_one)(trans, name, X[idx], self.transformer_weights)
for idx, (name, trans) in enumerate(self.transformer_list))
if any(sparse.issparse(f) for f in Xs):
Xs = sparse.hstack(Xs).tocsr()
else:
Xs = np.hstack(Xs)
return Xs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment