Skip to content

Instantly share code, notes, and snippets.

@mesejo
Created October 9, 2021 15:56
Show Gist options
  • Save mesejo/e9e709a1085266219abdf0e1d8013537 to your computer and use it in GitHub Desktop.
Save mesejo/e9e709a1085266219abdf0e1d8013537 to your computer and use it in GitHub Desktop.
import random
import perfplot
from collections import defaultdict, Counter
from itertools import chain
from pyroaring import BitMap
def setup(k):
lss = [f"list{i}" for i in range(1, k + 1)]
labels = [f"label{i}" for i in range(1, 101)]
data = {ls: random.sample(labels, k=10) for ls in lss}
return data
def baseline(data):
data = {key: set(value) for key, value in data.items()}
output = {key1: {key2: 0 for key2 in data.keys()} for key1 in data.keys()}
for key1, value1 in data.items():
for key2, value2 in data.items():
for element in value1:
if element in value2:
count = output[key1][key2]
output[key1][key2] = count + 1
return output
def inverted_index(data):
index = defaultdict(list)
for key, values in data.items():
for value in values:
index[value].append(key)
result = {key: Counter(chain.from_iterable(index[label] for label in labels)) for key, labels in data.items()}
return {key: {k: value.get(k, 0) for k in data} for key, value in result.items()}
def roaring_bitmap(data):
# all labels
labels = set().union(*data.values())
# lookup mapping to an integer
lookup = {key: value for value, key in enumerate(labels)}
roaring_data = {key: BitMap(lookup[v] for v in value) for key, value in data.items()}
result = defaultdict(dict)
for k_out, outer in roaring_data.items():
for k_in, inner in roaring_data.items():
result[k_out][k_in] = len(outer & inner)
return result
def equality_check(a, b):
for k_out, d in a.items():
for k_in, value in d.items():
try:
if b[k_out][k_in] != value:
return False
except KeyError:
return False
return True
if __name__ == "__main__":
perfplot.show(
setup=setup,
n_range=[length for length in range(100, 5000, 100)],
kernels=[baseline, inverted_index, roaring_bitmap],
labels=["baseline", "inverted_index", "roaring_bitmap"],
xlabel="len(data)",
equality_check=equality_check,
relative_to=0,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment