Skip to content

Instantly share code, notes, and snippets.

@vitiral
Last active August 29, 2015 14:15
Show Gist options
  • Save vitiral/13278001b1a0b0cde647 to your computer and use it in GitHub Desktop.
Save vitiral/13278001b1a0b0cde647 to your computer and use it in GitHub Desktop.
Functions that should be standard to pandas dataframe objects
import pandas as pd
from . import tools as _t
def resolve_header(header):
if header is None:
return None
if isinstance(header, dict):
return _t.get_header(header)
else:
return header
def dataframe_dict(data, index=None, filler='', header=None):
'''General loader of dataframes from python objects. Can either be a
dict of lists or a list of dicts.
Header is detected automatically and will be multiindex if the dict
is nested'''
if isinstance(data, dict):
header = resolve_header(header)
if header is None:
header = _t.get_header(data)
else:
header = resolve_header(header)
if header is None:
header = _t.get_header(data[0])
data = _t.unpack_dicts(data, header)
data = _t.fill_dict(data, filler)
data = pd.DataFrame.from_dict(data)
if index is not None:
data.set_index(index, inplace=True)
data.sort_index(inplace=True)
return data
from unittest import TestCase
from pandas.util.testing import assert_frame_equal
from . import dataframe
from . import tools
strings = 'abcdefg'
testdata = {key: value for (key, value) in zip(strings, range(len(strings)))}
testdata['many'] = dict(testdata)
testdata = [testdata for n in range(10)]
class TestLoad(TestCase):
def test_list(self):
print()
print(dataframe.dataframe_dict(testdata))
def test_dict(self):
print()
header = tools.get_header(testdata[0])
testdata_dict = tools.unpack_dicts(testdata, header)
print(testdata_dict)
df = dataframe.dataframe_dict(testdata_dict)
print(df)
df2 = dataframe.dataframe_dict(testdata)
assert_frame_equal(df, df2, check_names=True)
def dict_depth(d, depth=0):
if not isinstance(d, dict) or not d:
return depth
return max(dict_depth(v, depth + 1) for k, v in d.items())
def get_header(item, extra_levels=None, filler=''):
'''Returns the header of a nested dictionary
The header is a list of tuples detailing the structure of the dictionary'''
levels = extra_levels
if levels is None:
levels = dict_depth(item)
keys = []
for key, value in item.items():
if isinstance(value, dict):
keys.extend((key,) + v for v in get_header(value, levels - 1, filler))
else:
keys.append((key,))
return keys
def get_item(dic, item):
'''item access with columns'''
for i in item:
dic = dic[i]
return dic
def fill_dict(data, filler):
'''Makes all keys tuples of the same length'''
keys, values = zip(*data.items())
# convert all keys to tuples
keys = tuple(key if isinstance(key, tuple) else (key,) for key in keys)
maxlen = max(map(len, keys))
return {key + ((filler,) * (maxlen - len(key))): value for (key, value)
in zip(keys, values)}
def unpack_dicts(data, header):
'''Unpacks a list of dictionaries into a dictionary of lists
according to the header'''
out = {key: [] for key in header}
for d in data:
for h in header:
out[h].append(get_item(d, h))
return out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment