Skip to content

Instantly share code, notes, and snippets.

@timgianitsos
Last active April 8, 2022 07:35
Show Gist options
  • Save timgianitsos/d7eb3b8055cde6961d70a8a5a2d3115f to your computer and use it in GitHub Desktop.
Save timgianitsos/d7eb3b8055cde6961d70a8a5a2d3115f to your computer and use it in GitHub Desktop.
from itertools import chain, combinations
import json
import warnings
__author__ = 'Tim Gianitsos'
class PowerSetCounter:
'''
Author: Tim Gianitsos
Count all combinations of occurences of the given iterable and their attributes
Example:
>>> p = PowerSetCounter(['cat', 'dog', 'fish'], initialize_with_all=True)
>>> p
{
"()": {},
"('cat',)": {},
"('dog',)": {},
"('cat', 'dog')": {},
"('fish',)": {},
"('cat', 'fish')": {},
"('dog', 'fish')": {},
"('cat', 'dog', 'fish')": {}
}
>>> p.update(['cat', 'dog']) # +1 for ('cat', 'dog')
>>> p.update(['dog', 'cat']) # retains ordering, another +1 for ('cat', 'dog')
>>> p.update(['rat', 'dog', 'horse', 'bat']) # ignores irrelevant items, +1 ('dog')
>>> p
{
"()": {},
"('cat',)": {},
"('dog',)": {
"count": 1
},
"('cat', 'dog')": {
"count": 2
},
"('fish',)": {},
"('cat', 'fish')": {},
"('dog', 'fish')": {},
"('cat', 'dog', 'fish')": {}
}
'''
def __init__(self, iterable, initialize_with_all=False):
self.val_to_index = {elem: i for i, elem in enumerate(iterable)} # keep ordering
if initialize_with_all and len(self.val_to_index) > 10:
warnings.warn(f'The set of all subsets of a collection of size'
f' {len(self.val_to_index)} is very large at: {2 ** len(self.val_to_index)}.'
f' Consider using `initialize_with_all=False`')
self.data = {
elem: {}
for elem in chain.from_iterable(combinations(self.val_to_index, r)
for r in range(len(self.val_to_index) + 1))
} if initialize_with_all else {}
def __repr__(self):
'''
String representation converts tuples into strings
This necessary for the json module to json-ify this object.
'''
# View keys in sorted order
return json.dumps({
repr(k1): {
k2: {
k3: {
repr(k4): v4
for k4, v4 in v3.items()
}
for k3, v3 in v2.items()
} if isinstance(v2, dict) else v2 # v2 is not always a dict
for k2, v2 in v1.items()
}
# Ensure items are sorted based on natural ordering (not insertion order)
for k1, v1 in sorted(
self.data.items(),
# Every item v in tuple has an "index" of `self.val_to_index[v]` which came
# from original ordering when the PowerSetCounter was initialized
# e.g. PSC('abc') -> a:0, b:1, c;2
# Utilize this "index" to create a natural ordering so that all subsets
# of {'a', 'b', 'c'} can be compared e.g., {'c','b'} > {'b','a'}
# Each element of {'a','b','c'} can be concieved of as bitstring digit
# e.g. PSC('abc') -> a:1, b:2, c;4. The subset {'c','b'} corresponds
# to bits 110. The subset {'b','a'} corresponds to 011
# Therefore, {'c','b'} > {'b','a'} because 110 > 011
key=lambda elems: sum(2**self.val_to_index[v] for v in elems[0])
)
}, indent=4)
def update(self, elements, attr_to_func=None):
# remove irrelevant elements and sort
elements = tuple(sorted(
self.val_to_index.keys() & set(elements),
key=lambda e: self.val_to_index[e]
))
# increment counter
ckey = 'count'
d = self.data.setdefault(elements, {})
d[ckey] = d.get(ckey, 0) + 1
# set attributes
if attr_to_func:
for attr, func in attr_to_func.items():
attr_dict = self.data[elements].setdefault('attrs', {}).setdefault(attr, {})
key = tuple(func(element) for element in elements)
attr_dict[key] = attr_dict.get(key, 0) + 1
def filter(self):
'''Remove keys with empty values'''
self.data = {k: v for k, v in self.data.items() if v}
if __name__ == '__main__':
p = PowerSetCounter(['cat', 'dog', 'fish'], initialize_with_all=False)
print(f'Counter before updates: {p}')
p.update(['cat', 'dog']) # +1 for ('cat', 'dog')
p.update(['dog', 'cat']) # retains ordering, another +1 for ('cat', 'dog')
p.update(['rat', 'dog', 'horse', 'bat']) # ignores irrelevant items, +1 ('dog')
print(f'Counter after updates: {p}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment