Skip to content

Instantly share code, notes, and snippets.

@ZwodahS
Created September 21, 2015 11:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZwodahS/d627dd3210fcf4d3a613 to your computer and use it in GitHub Desktop.
Save ZwodahS/d627dd3210fcf4d3a613 to your computer and use it in GitHub Desktop.
"""
data_matrix.py
Author: Eric
github: ZwodahS
data_matrix is a simple modules that helps you count on a N-dimension matrix.
terminology:
data: a single entry in the matrix
tag: a tag is a single label given to a data
tags is like the row/column in a 2dimension matrix
"""
INT="INT"
STRING="STR"
BOOL="BOOL"
RANGE="RANGE" # only for int
DISTINCT="DISTINCT"
class DataMatrix(object):
def __init__(self, tags):
"""
tags : defines what each data point are and their valid values
{
"<name>" : { "type": (INT | STR), "range": (RANGE | DISTINCT) }
}
"""
self._init_tags(tags)
def _init_tags(self, tags):
for name, tag in tags.items():
self._assert_type(tag.get("type"))
self._assert_range(tag.get("range"), tag.get("type"))
self.tags = tags
self._datas = []
self._matrix = {}
self._tag_order = [ tag_name for tag_name in tags ] # just fix on an order, doesn't really matter for now
def _assert_type(self, data_type):
if data_type not in (INT, STRING, BOOL):
raise Exception("Invalid data type {0}".format(data_type))
def _assert_range(self, data_range, data_type):
valid = tuple()
if data_type == INT:
valid = (RANGE, DISTINCT)
elif data_type == STRING:
valid = (DISTINCT, )
elif data_type == BOOL:
valid = (DISTINCT, )
if data_range not in valid:
raise Exception("Invalid tag range '{0}' for type '{1}'".format(data_range, data_type))
def _clean_value_for_tag(self, tag_name, value):
tag = self.tags.get(tag_name)
if tag is None:
raise Exception("Invalid name for tag {0}".format(tag_name))
if tag["type"] == INT:
try:
value = int(value)
except Exception as e:
raise Exception("Invalid value for tag {0} : {1}".format(tag_name, value))
elif tag["type"] == STRING:
if not isinstance(value, str):
raise Exception("Invalid value for tag {0} : {1}".format(tag_name, value))
elif tag["type"] == BOOL:
try:
value = bool(value)
except Exception as e:
raise Exception("Invalid value for tag {0} : {1}".format(tag_name, value))
return value
def set_data(self, data, **tags):
"""If data is none, only increase count
"""
if data is not None:
self._datas.append((data, tags))
for tag_name in tags:
if tag_name not in self.tags:
raise Exception("tag {0} is not in defined tag".format(tag_name))
if len(tags) != len(self.tags):
raise Exception("All data must be tag to all defined tags")
ordered = [ tags.get(tag_name) for tag_name in self._tag_order ]
current = self._matrix
for ordered_value in ordered:
if ordered_value not in current:
current[ordered_value] = {}
current = current[ordered_value]
if "count" not in current:
current["count"] = 0
current["count"] += 1
def get_count(self, **tags):
tag_query = []
for tag_name in self._tag_order:
if tag_name in tags:
tag_query.append(tags.get(tag_name))
else:
tag_query.append(None)
return self._get_count(self._matrix, tag_query)
def _get_count(self, current_data, tags_list):
if current_data is None:
return 0
if len(tags_list) == 0:
return current_data.get("count")
current_tag = tags_list[0]
if current_tag is None:
counts = [ self._get_count(d, tags_list[1:]) for _, d in current_data.items() ]
return sum(counts)
else:
d = current_data.get(current_tag)
return self._get_count(d, tags_list[1:])
if __name__ == "__main__":
matrix = DataMatrix(tags={
"school": { "type": STRING, "range": DISTINCT},
"age": {"type": INT, "range": RANGE},
"gender": {"type": STRING, "range": DISTINCT},
"town": {"type": STRING, "range": DISTINCT},
"vegetarian": {"type": BOOL, "range": DISTINCT},
})
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_B", vegetarian=True)
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_B", vegetarian=True)
assert matrix.get_count(school="School_A") == 51
assert matrix.get_count(school="School_B") == 48
assert matrix.get_count(gender="M") == 45
assert matrix.get_count(gender="F") == 54
assert matrix.get_count(age=7) == 48
assert matrix.get_count(age=8) == 51
assert matrix.get_count(vegetarian=True) == 51
assert matrix.get_count(vegetarian=False) == 48
assert matrix.get_count(town="Town_A") == 42
assert matrix.get_count(town="Town_B") == 57
assert matrix.get_count(school="School_A", town="Town_A") == 18
assert matrix.get_count(school="School_A", town="Town_B") == 33
assert matrix.get_count(school="School_B", town="Town_A") == 24
assert matrix.get_count(school="School_B", town="Town_B") == 24
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment