Skip to content

Instantly share code, notes, and snippets.

@jseabold
Created September 8, 2011 21:21
Show Gist options
  • Save jseabold/1204771 to your computer and use it in GitHub Desktop.
Save jseabold/1204771 to your computer and use it in GitHub Desktop.
pandas PanelIndex from MultiIndex
from pandas import MultiIndex, Factor
import numpy as np
import pandas
def _ensure_like_indices(time, panels):
n_time = len(time)
n_panel = len(panels)
u_panels = np.unique(panels) # this sorts!
u_time = np.unique(time)
if len(u_time) == n_time:
time = np.tile(u_time, len(u_panels))
if len(u_panels) == n_panel:
panels = np.repeat(u_panels, len(u_time))
return time, panels
class PanelIndex(MultiIndex):
def __new__(cls, time, panels):
# ensure we're given full indices, else make them
time, panels = _ensure_like_indices(time, panels)
time_factor = Factor(time)
panel_factor = Factor(panels)
labels = [time_factor.labels, panel_factor.labels]
cls.labels = labels
levels = [time_factor.levels, panel_factor.levels]
cls.levels = levels
names = ['time','panel']
cls.names = names
return MultiIndex.__new__(cls, levels, labels, sortorder=None,
names=names)
def __init__(self, time, panels):
levels, labels, names = self.levels, self.labels, self.names
super(PanelIndex, self).__init__(levels=levels, labels=labels,
sortorder=None, names=names)
def panel_index(time, panels):
time, panels = _ensure_like_indices(time, panels)
time_factor = Factor(time)
panel_factor = Factor(panels)
labels = [time_factor.labels, panel_factor.labels]
levels = [time_factor.levels, panel_factor.levels]
names = ['time','panel']
return MultiIndex(levels, labels, sortorder=None, names=names)
if __name__ == "__main__":
import string
n_years = 9
n_panel = 5
n_items = 3
panels = list(string.uppercase[:5])
years = np.arange(1960,2001,5)
panel_idx = PanelIndex(years, panels)
panel_idx2 = panel_index(years, panels)
raw_panel = np.arange(n_panel*n_years*n_items).reshape(n_panel*n_years, n_items)
panel_df = pandas.DataFrame(raw_panel, columns=['col1','col2', 'col3'], index=panel_idx).T
panel_df[1960, 'A']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment