Skip to content

Instantly share code, notes, and snippets.

@wilsonfreitas
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wilsonfreitas/597532ea0915d224b354 to your computer and use it in GitHub Desktop.
Save wilsonfreitas/597532ea0915d224b354 to your computer and use it in GitHub Desktop.
Quick and dirty implementation of a data frame for getting data properly structured.

tinydf

tinydf is a Python module which implements a very simple implementation of DataFrame, with some key points that I think are quite relevant.

  • export DataFrame as JSON, CSV or Python dict
  • create attributes
  • ...
import json
import csv
import StringIO
class DataFrame(object):
keys = ('data', 'attributes', 'headers')
def __init__(self, **kwargs):
self.headers = kwargs.keys()
self.data = kwargs
self.attributes = {}
def __setattr__(self, key, value):
if key in DataFrame.keys:
self.__dict__[key] = value
else:
ref = self.attributes
ref[key] = value
def __getattr__(self, key):
if key in DataFrame.keys:
return self.__dict__[key]
else:
ref = self.attributes
return ref[key]
def add(self, **row):
for h in self.headers:
col = self.data.get(h, [])
col.append(row[h])
self.data[h] = col
@property
def csv(self):
buf = StringIO.StringIO()
writer = csv.writer(buf)
headers = self.headers + self.attributes.keys()
writer.writerow(headers)
if self.data:
n = len(self.data[self.headers[0]])
for i in range(n):
row = []
for h in self.headers:
row.append(self.data[h][i])
for h in self.attributes.keys():
row.append(self.attributes[h])
writer.writerow(row)
content = buf.getvalue()
buf.close()
return content
@property
def dict(self):
df_dict = self.attributes.copy()
if self.data:
df_dict['data'] = self.data
return df_dict
@property
def json(self):
df_dict = self.attributes.copy()
if self.data:
n = len(self.data[self.headers[0]])
tab = []
for i in range(n):
row = {}
for h in self.headers:
row[h] = self.data[h][i]
tab.append(row)
if df_dict:
df_dict['data'] = tab
else:
df_dict = tab
return json.dumps(df_dict)
if __name__ == '__main__':
df = DataFrame()
assert df.dict == {}
#
df = DataFrame()
df.attr1 = 1
assert df.dict == {'attr1': 1}
#
data = {
'key1': range(5),
'key2': list('a' * 5)
}
df = DataFrame(**data)
df.attr1 = 'attr1'
assert df.dict == {
'attr1': 'attr1',
'data': {
'key1': range(5),
'key2': list('a' * 5)
}
}
#
data = {
'1 key': range(5),
'2 key': list('a' * 5)
}
df = DataFrame(**data)
df.attr1 = 'attr1'
df.attr2 = 1
assert df.dict == {
'attr1': 'attr1',
'attr2': 1,
'data': {
'1 key': range(5),
'2 key': list('a' * 5)
}
}
# print(df.json)
# print(df.csv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment