Skip to content

Instantly share code, notes, and snippets.

@ramhiser
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ramhiser/9126072 to your computer and use it in GitHub Desktop.
Save ramhiser/9126072 to your computer and use it in GitHub Desktop.
Create a Pandas DataFrame with columns named using a MultiIndex
import numpy as np
import pandas as pd
from itertools import chain, izip, repeat
np.random.seed(42)
num_rows = 10
num_features = 5
num_feature_values = 3
# Builds tuples of features with many values per feature
features = ['Feature' + str(i) for i in range(num_features)]
feature_values = ['Value' + str(i) for i in range(num_feature_values)]
features_list = list(chain(*izip(*repeat(features, num_feature_values))))
feature_values_list = feature_values * num_features
feature_tuples = zip(features_list, feature_values_list)
index = pd.MultiIndex.from_tuples(feature_tuples)
df = 0.1 * np.random.randn(num_rows, num_features * num_feature_values) + 0.5
df = pd.DataFrame(df, columns=index)
df.head()
# The following is a much simpler approach.
# Uses MultiIndex.from_product.
import numpy as np
import pandas as pd
np.random.seed(42)
num_rows = 10
num_features = 5
num_feature_values = 3
features = ['Feature' + str(i) for i in range(num_features)]
feature_values = ['Value' + str(i) for i in range(num_feature_values)]
index = pd.MultiIndex.from_product([features, feature_values])
df = 0.1 * np.random.randn(num_rows, num_features * num_feature_values) + 0.5
df = pd.DataFrame(df, columns=index)
df.head()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment