Skip to content

Instantly share code, notes, and snippets.

@MatteoLacki
Last active June 10, 2022 06:18
Show Gist options
  • Save MatteoLacki/682a0cb7665d94d52e71022239aa7481 to your computer and use it in GitHub Desktop.
Save MatteoLacki/682a0cb7665d94d52e71022239aa7481 to your computer and use it in GitHub Desktop.
import numpy as np
# holy settings
K = 100
# static sequences
sequences = [
list(
zip(
np.sort(np.random.randint(low=0, high=100_000, size=1_000)),
np.random.randint(low=0, high=100_000, size=1_000),
)
)
for _ in range(K)
]
# infinite sequences
def random_growing_sequence(key_start=None, step_high=10, value_low=0, value_high=100_000):
random_step = lambda: int(np.random.randint(low=0, high=step_high))
random_value = lambda: int(np.random.randint(low=value_low, high=value_high))
key = key_start
if key is None:
key = random_step()
while True:
value = random_value()
yield (key, value)
key += random_step()
sequences2 = [random_growing_sequence() for _ in range(K)]
# Tomek Wilczyński
from sortedcontainers import SortedDict
def matteo2(input):
p = SortedDict()
for sublist in input:
for element in sublist:
if(p.get(element[0]) is None):
p[element[0]] = element[1]
else:
p.update({element[0]: p.get(element[0]) + element[1]})
return [x for x in p.items()]
%%timeit
matteo2(sequences)
# Mateusz Łącki
import heapq
def deduplicate(*sequences):
merged_key_values = heapq.merge(*sequences, key=lambda x: x[0])
prev_key, prev_value = next(merged_key_values)
for key, value in merged_key_values:
if key == prev_key:
prev_value += value
else:
yield (prev_key, prev_value)
prev_key = key
prev_value = value
it = deduplicate(*sequences2)
next(it)
# Racing
%%timeit
aa = list(deduplicate(*sequences))
%%timeit
bb = matteo2(sequences)
# QC
err = []
for i, (a, b) in enumerate(zip(aa, bb)):
if a != b:
err.append((i,a,b))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment