Last active
December 19, 2015 05:19
-
-
Save dashesy/5902939 to your computer and use it in GitHub Desktop.
Joint classifier inspired by FeatureUnion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on June 26, 2013 | |
@author: dashesy | |
Purpose: Like FeatureUnion but can apply different X,y to different transformers | |
Each transformer could accept different number os samples or features | |
but the result should be concatenatable | |
''' | |
import numpy as np | |
import warnings | |
from scipy import sparse | |
from scipy import linalg | |
from sklearn.pipeline import FeatureUnion, _fit_one_transformer, _transform_one, _fit_transform_one | |
from sklearn.utils import array2d | |
from sklearn.externals.joblib import Parallel, delayed | |
class JointClassifier(FeatureUnion): | |
''' Concatenates results of multiple transformer objects. | |
Unlike FeatureUnion can accept different X,y for different transformers | |
Parameters | |
---------- | |
transformers: list of (name, transformer) | |
List of transformer objects to be applied to the data. | |
n_jobs: int, optional | |
Number of jobs to run in parallel (default 1). | |
transformer_weights: dict, optional | |
Multiplicative weights for features per transformer. | |
Keys are transformer names, values the weights. | |
''' | |
def __init__(self, transformer_list, n_jobs=1, transformer_weights=None): | |
super(JointClassifier, self).__init__(transformer_list, n_jobs=n_jobs, transformer_weights=transformer_weights) | |
def fit(self, *Xy): | |
"""Fit n'th transformer using n'th X and y | |
Parameters | |
---------- | |
*Xy : positional arguments with series of X followed by y (X1,y1,X2,y2,...) | |
X : array-like or sparse matrix, shape (n_samples, n_features) | |
Input data, used to fit transformers. | |
y : rray-like, shape = [n_samples] | |
label for data (pass None if not needed) | |
""" | |
Parallel(n_jobs=self.n_jobs)( | |
delayed(_fit_one_transformer)(trans, Xy[idx * 2], Xy[idx * 2 + 1]) | |
for idx, (name, trans) in enumerate(self.transformer_list)) | |
return self | |
def fit_transform(self, *Xy, **fit_params): | |
"""Fit n'th transformer using n'th X and y, transform that data and concatenate | |
results. | |
Parameters | |
---------- | |
*Xy : positional arguments with series of X followed by y (X1,y1,X2,y2,...) | |
X : array-like or sparse matrix, shape (n_samples, n_features) | |
Input data, used to fit transformers. | |
y : rray-like, shape = [n_samples] | |
label for data (pass None if not needed) | |
Returns | |
------- | |
X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) | |
hstack of results of transformers. sum_n_components is the | |
sum of n_components (output dimension) over transformers. | |
""" | |
Xs = Parallel(n_jobs=self.n_jobs)( | |
delayed(_fit_transform_one)(trans, name, Xy[idx * 2], Xy[idx * 2 + 1], | |
self.transformer_weights, **fit_params) | |
for idx, (name, trans) in enumerate(self.transformer_list)) | |
if any(sparse.issparse(f) for f in Xs): | |
Xs = sparse.hstack(Xs).tocsr() | |
else: | |
Xs = np.hstack(Xs) | |
return Xs | |
def transform(self, *X): | |
"""Transform n'th X separately by n'th transformer, concatenate results. | |
Parameters | |
---------- | |
*X : positional arguments with series of X (X1,X2,...) | |
X : array-like or sparse matrix, shape (n_samples, n_features) | |
Input data, used to fit transformers. | |
Returns | |
------- | |
X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) | |
hstack of results of transformers. sum_n_components is the | |
sum of n_components (output dimension) over transformers. | |
""" | |
Xs = Parallel(n_jobs=self.n_jobs)( | |
delayed(_transform_one)(trans, name, X[idx], self.transformer_weights) | |
for idx, (name, trans) in enumerate(self.transformer_list)) | |
if any(sparse.issparse(f) for f in Xs): | |
Xs = sparse.hstack(Xs).tocsr() | |
else: | |
Xs = np.hstack(Xs) | |
return Xs | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment