Last active
April 8, 2022 07:35
-
-
Save timgianitsos/d7eb3b8055cde6961d70a8a5a2d3115f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import chain, combinations | |
import json | |
import warnings | |
__author__ = 'Tim Gianitsos' | |
class PowerSetCounter: | |
''' | |
Author: Tim Gianitsos | |
Count all combinations of occurences of the given iterable and their attributes | |
Example: | |
>>> p = PowerSetCounter(['cat', 'dog', 'fish'], initialize_with_all=True) | |
>>> p | |
{ | |
"()": {}, | |
"('cat',)": {}, | |
"('dog',)": {}, | |
"('cat', 'dog')": {}, | |
"('fish',)": {}, | |
"('cat', 'fish')": {}, | |
"('dog', 'fish')": {}, | |
"('cat', 'dog', 'fish')": {} | |
} | |
>>> p.update(['cat', 'dog']) # +1 for ('cat', 'dog') | |
>>> p.update(['dog', 'cat']) # retains ordering, another +1 for ('cat', 'dog') | |
>>> p.update(['rat', 'dog', 'horse', 'bat']) # ignores irrelevant items, +1 ('dog') | |
>>> p | |
{ | |
"()": {}, | |
"('cat',)": {}, | |
"('dog',)": { | |
"count": 1 | |
}, | |
"('cat', 'dog')": { | |
"count": 2 | |
}, | |
"('fish',)": {}, | |
"('cat', 'fish')": {}, | |
"('dog', 'fish')": {}, | |
"('cat', 'dog', 'fish')": {} | |
} | |
''' | |
def __init__(self, iterable, initialize_with_all=False): | |
self.val_to_index = {elem: i for i, elem in enumerate(iterable)} # keep ordering | |
if initialize_with_all and len(self.val_to_index) > 10: | |
warnings.warn(f'The set of all subsets of a collection of size' | |
f' {len(self.val_to_index)} is very large at: {2 ** len(self.val_to_index)}.' | |
f' Consider using `initialize_with_all=False`') | |
self.data = { | |
elem: {} | |
for elem in chain.from_iterable(combinations(self.val_to_index, r) | |
for r in range(len(self.val_to_index) + 1)) | |
} if initialize_with_all else {} | |
def __repr__(self): | |
''' | |
String representation converts tuples into strings | |
This necessary for the json module to json-ify this object. | |
''' | |
# View keys in sorted order | |
return json.dumps({ | |
repr(k1): { | |
k2: { | |
k3: { | |
repr(k4): v4 | |
for k4, v4 in v3.items() | |
} | |
for k3, v3 in v2.items() | |
} if isinstance(v2, dict) else v2 # v2 is not always a dict | |
for k2, v2 in v1.items() | |
} | |
# Ensure items are sorted based on natural ordering (not insertion order) | |
for k1, v1 in sorted( | |
self.data.items(), | |
# Every item v in tuple has an "index" of `self.val_to_index[v]` which came | |
# from original ordering when the PowerSetCounter was initialized | |
# e.g. PSC('abc') -> a:0, b:1, c;2 | |
# Utilize this "index" to create a natural ordering so that all subsets | |
# of {'a', 'b', 'c'} can be compared e.g., {'c','b'} > {'b','a'} | |
# Each element of {'a','b','c'} can be concieved of as bitstring digit | |
# e.g. PSC('abc') -> a:1, b:2, c;4. The subset {'c','b'} corresponds | |
# to bits 110. The subset {'b','a'} corresponds to 011 | |
# Therefore, {'c','b'} > {'b','a'} because 110 > 011 | |
key=lambda elems: sum(2**self.val_to_index[v] for v in elems[0]) | |
) | |
}, indent=4) | |
def update(self, elements, attr_to_func=None): | |
# remove irrelevant elements and sort | |
elements = tuple(sorted( | |
self.val_to_index.keys() & set(elements), | |
key=lambda e: self.val_to_index[e] | |
)) | |
# increment counter | |
ckey = 'count' | |
d = self.data.setdefault(elements, {}) | |
d[ckey] = d.get(ckey, 0) + 1 | |
# set attributes | |
if attr_to_func: | |
for attr, func in attr_to_func.items(): | |
attr_dict = self.data[elements].setdefault('attrs', {}).setdefault(attr, {}) | |
key = tuple(func(element) for element in elements) | |
attr_dict[key] = attr_dict.get(key, 0) + 1 | |
def filter(self): | |
'''Remove keys with empty values''' | |
self.data = {k: v for k, v in self.data.items() if v} | |
if __name__ == '__main__': | |
p = PowerSetCounter(['cat', 'dog', 'fish'], initialize_with_all=False) | |
print(f'Counter before updates: {p}') | |
p.update(['cat', 'dog']) # +1 for ('cat', 'dog') | |
p.update(['dog', 'cat']) # retains ordering, another +1 for ('cat', 'dog') | |
p.update(['rat', 'dog', 'horse', 'bat']) # ignores irrelevant items, +1 ('dog') | |
print(f'Counter after updates: {p}') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment