Skip to content

Instantly share code, notes, and snippets.

@saihttam
Created October 24, 2015 16:05
Show Gist options
  • Save saihttam/cad6d3d223fc8d769227 to your computer and use it in GitHub Desktop.
Save saihttam/cad6d3d223fc8d769227 to your computer and use it in GitHub Desktop.
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
def one_hot_dataframe(data, cols, replace=False):
""" Takes a dataframe and a list of columns that need to be encoded.
Returns a 3-tuple comprising the data, the vectorized data,
and the fitted vectorizor."""
vec = DictVectorizer(sparse=False)
vecData = pd.DataFrame(vec.fit_transform(data[cols].T.to_dict().values()))
vecData.columns = vec.get_feature_names()
vecData.index = data.index
if replace is True:
data = data.drop(cols, axis=1)
data = data.join(vecData)
return (data, vecData, vec)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment