Skip to content

Instantly share code, notes, and snippets.

@alexforster
Created September 25, 2023 20:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexforster/3cd1c51f3d1988814e365dab9d92cb57 to your computer and use it in GitHub Desktop.
Save alexforster/3cd1c51f3d1988814e365dab9d92cb57 to your computer and use it in GitHub Desktop.
Streaming Heavy Hitters
#!/usr/bin/env python3
import sys
import random
from typing import *
from dataclasses import dataclass
from ipaddress import IPv4Address
@dataclass
class HeavyHittersTableEntry:
item: Any
count: int
class HeavyHittersTable:
def __init__(self, capacity: int):
self.capacity = capacity # type: int
self.entries = list() # type: List[HeavyHittersTableEntry]
def sample(self, item: Any):
# try to find the item in an existing table entry
for entry in self.entries:
if entry.item == item:
# we found the item, so increment its count and stop searching
entry.count += 1
break
else:
# we didn't find the item
if len(self.entries) < self.capacity:
# we still have unused table capacity, so append a new table entry to the end of the table
self.entries.append(HeavyHittersTableEntry(item, 1))
else:
# the table is full, so replace the last entry in the table with this entry
self.entries.pop()
self.entries.append(HeavyHittersTableEntry(item, 1))
# re-sort the table
self.entries.sort(key=lambda entry: entry.count, reverse=True)
def __iter__(self):
return iter(self.entries)
def main() -> int:
# create a heavy hitters table and simulate three top IPs
hh = HeavyHittersTable(capacity=25)
top_ips = [IPv4Address(random.randbytes(4)), IPv4Address(random.randbytes(4)), IPv4Address(random.randbytes(4))]
for i in range(10000):
if random.random() < 0.2:
# 20% chance this is one of the three "top IPs"
random_ip = random.choice(top_ips)
else:
# 80% chance this is a random IP
random_ip = IPv4Address(random.randbytes(4))
# sample the IP in the heavy hitters table
hh.sample(random_ip)
# print the heavy hitters table
for entry in hh:
print(entry.item, entry.count)
return 0
if __name__ == '__main__':
sys.exit(main())
# Example output:
#
# ~# ./hh.py
# 103.124.248.231 679
# 7.206.8.42 678
# 122.92.108.109 657
# 134.197.126.208 1
# 26.90.72.67 1
# 221.143.101.0 1
# 190.234.230.152 1
# 69.62.101.189 1
# 230.112.137.45 1
# 73.175.51.45 1
# 189.13.248.134 1
# 87.94.143.18 1
# 145.13.55.254 1
# 116.87.65.104 1
# 202.22.116.138 1
# 48.252.17.176 1
# 167.92.52.213 1
# 203.67.81.165 1
# 65.121.29.182 1
# 241.143.141.107 1
# 2.12.49.64 1
# 195.198.98.127 1
# 182.239.36.119 1
# 248.226.103.26 1
# 136.9.121.18 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment