Skip to content

Instantly share code, notes, and snippets.

@Riebart
Last active September 20, 2017 19:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Riebart/5c63ea5d006572e99578564eb07dcdcf to your computer and use it in GitHub Desktop.
Save Riebart/5c63ea5d006572e99578564eb07dcdcf to your computer and use it in GitHub Desktop.
from random import random
def accumulate(iterator):
total = 0
for item in iterator:
total += item
yield total
# Generate a list of a bunch of values with random offsets so we can bucket them.
# Only step at most 10ms per 'packet', because we want a representative sample size.
#
# Accumulate will take the progressive partial sums of the list. For example:
# - list(accumulate([1,2,3,4,5])) = [1, 3, 6, 10, 15]
input_packets = [partial_sum for partial_sum in accumulate([random()/100.0 for _ in xrange(1000000)])]
print input_packets[1:10]
buckets = dict()
for packet in input_packets:
# Truncate the timestamps to an integer
# round(_, 0) rounds to 0 significant digits after the decimal point
canonical_timestamp = round(packet, 0)
if canonical_timestamp not in buckets:
buckets[canonical_timestamp] = list()
buckets[canonical_timestamp].append(packet)
for bucket_timestamp, packets in buckets.iteritems():
for packet in packets:
timestamp, ip_address, isp = packet.split(",")
# Take only the first three octets of an IP address
short_ip_address = ".".join(ip_address.split(".")[:3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment