Skip to content

Instantly share code, notes, and snippets.

@walterreade
Last active January 16, 2016 22:56
Show Gist options
  • Save walterreade/1e7dec8682e044ae9ed9 to your computer and use it in GitHub Desktop.
Save walterreade/1e7dec8682e044ae9ed9 to your computer and use it in GitHub Desktop.
Create Sparse Dummies
# http://www.dataiku.com/blog/2015/08/24/xgboost_and_dss.html
from pandas.core.categorical import Categorical
from scipy.sparse import csr_matrix
import numpy as np
def sparse_dummies(categorical_values):
categories = Categorical.from_array(categorical_values)
N = len(categorical_values)
row_numbers = np.arange(N, dtype=np.int)
ones = np.ones((N,))
return csr_matrix( (ones, (row_numbers, categories.codes)) )
sparse_dummies(df.VAR_0001)
# Concatenate with other dummies
from scipy.sparse import hstack
cat1 = sparse_dummies(df.VAR_0001)
cat2 = sparse_dummies(df.VAR_0002)
hstack((cat1,cat2), format="csr")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment