Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
from sklearn.feature_extraction import DictVectorizer
def one_hot_dataframe(data, cols, replace=False):
""" Takes a dataframe and a list of columns that need to be encoded.
Returns a 3-tuple comprising the data, the vectorized data,
and the fitted vectorizor."""
vec = DictVectorizer(sparse=False)
vecData = pd.DataFrame(vec.fit_transform(data[cols].T.to_dict().values()))
vecData.columns = vec.get_feature_names()