Last active
September 20, 2017 19:18
-
-
Save Riebart/5c63ea5d006572e99578564eb07dcdcf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from random import random | |
def accumulate(iterator): | |
total = 0 | |
for item in iterator: | |
total += item | |
yield total | |
# Generate a list of a bunch of values with random offsets so we can bucket them. | |
# Only step at most 10ms per 'packet', because we want a representative sample size. | |
# | |
# Accumulate will take the progressive partial sums of the list. For example: | |
# - list(accumulate([1,2,3,4,5])) = [1, 3, 6, 10, 15] | |
input_packets = [partial_sum for partial_sum in accumulate([random()/100.0 for _ in xrange(1000000)])] | |
print input_packets[1:10] | |
buckets = dict() | |
for packet in input_packets: | |
# Truncate the timestamps to an integer | |
# round(_, 0) rounds to 0 significant digits after the decimal point | |
canonical_timestamp = round(packet, 0) | |
if canonical_timestamp not in buckets: | |
buckets[canonical_timestamp] = list() | |
buckets[canonical_timestamp].append(packet) | |
for bucket_timestamp, packets in buckets.iteritems(): | |
for packet in packets: | |
timestamp, ip_address, isp = packet.split(",") | |
# Take only the first three octets of an IP address | |
short_ip_address = ".".join(ip_address.split(".")[:3]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment