pjbull/SparseInteractions.py

## SparseInteractions.py
from sklearn.base import BaseEstimator, TransformerMixin
from scipy import sparse
from itertools import combinations


class SparseInteractions(BaseEstimator, TransformerMixin):
    def __init__(self, degree=2, feature_name_separator="_"):
        self.degree = degree
        self.feature_name_separator = feature_name_separator

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        if not sparse.isspmatrix_csc(X):
            X = sparse.csc_matrix(X)

        if hasattr(X, "columns"):
            self.orig_col_names = X.columns
        else:
            self.orig_col_names = np.array([str(i) for i in range(X.shape[1])])

        spi = self._create_sparse_interactions(X)
        return spi


    def get_feature_names(self):
        return self.feature_names

    def _create_sparse_interactions(self, X):
        out_mat = []
        self.feature_names = self.orig_col_names.tolist()

        for sub_degree in range(2, self.degree + 1):
            for col_ixs in combinations(range(X.shape[1]), sub_degree):
                # add name for new column
                name = self.feature_name_separator.join(self.orig_col_names[list(col_ixs)])
                self.feature_names.append(name)

                # get column multiplications value
                out = X[:, col_ixs[0]]
                for j in col_ixs[1:]:
                    out = out.multiply(X[:, j])

                out_mat.append(out)

        return sparse.hstack([X] + out_mat)
	from sklearn.base import BaseEstimator, TransformerMixin
	from scipy import sparse
	from itertools import combinations


	class SparseInteractions(BaseEstimator, TransformerMixin):
	def __init__(self, degree=2, feature_name_separator="_"):
	self.degree = degree
	self.feature_name_separator = feature_name_separator

	def fit(self, X, y=None):
	return self

	def transform(self, X):
	if not sparse.isspmatrix_csc(X):
	X = sparse.csc_matrix(X)

	if hasattr(X, "columns"):
	self.orig_col_names = X.columns
	else:
	self.orig_col_names = np.array([str(i) for i in range(X.shape[1])])

	spi = self._create_sparse_interactions(X)
	return spi


	def get_feature_names(self):
	return self.feature_names

	def _create_sparse_interactions(self, X):
	out_mat = []
	self.feature_names = self.orig_col_names.tolist()

	for sub_degree in range(2, self.degree + 1):
	for col_ixs in combinations(range(X.shape[1]), sub_degree):
	# add name for new column
	name = self.feature_name_separator.join(self.orig_col_names[list(col_ixs)])
	self.feature_names.append(name)

	# get column multiplications value
	out = X[:, col_ixs[0]]
	for j in col_ixs[1:]:
	out = out.multiply(X[:, j])

	out_mat.append(out)

	return sparse.hstack([X] + out_mat)