Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Add IDs and hashes to a KMinHash key backed by Redis sorted set, by aggressively truncating the set to k elements after every addition
def update_min_hash(self, element_id):
min_hash = self.__element_hash(element_id)
if self.elements_added == self.k:
# Is new element going to change k min hashes?
if min_hash >= self.max_min_hash:
return
else:
# Remove the element with max score & update max value of min_hash
self.elements_added -= self.redis_client.zremrangebyrank(self.key, -1, -1)
self.max_min_hash = self.redis_client.zrange(self.key, -1, -1, withscores=True)[0][1]
self.elements_added += self.redis_client.zadd(self.key, min_hash, element_id)
self.max_min_hash = max(self.max_min_hash, min_hash)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment