Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@Milek7

Milek7/plot.py Secret

Created November 24, 2020 12:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Milek7/0b8a91443c3d2462a1327265aa79614f to your computer and use it in GitHub Desktop.
Save Milek7/0b8a91443c3d2462a1327265aa79614f to your computer and use it in GitHub Desktop.
"""
zcat -f access* | grep userfiles | grep GET | awk -F' ' '{if ($6 == "\"GET" && $9 == "200") print $10;}' | sort | uniq -c | sort -rn > histo_dump
"""
import matplotlib.pyplot as pl
import numpy as np
import csv
data = []
with open('histo_dump', newline='') as csvfile:
reader = csv.reader(csvfile, delimiter=' ', skipinitialspace=True)
for row in reader:
mb = float(row[1]) / 1048576.0 / 1024.0
total = float(row[0]) * mb
data.append((total, mb))
data.sort(key=lambda tup: tup[0], reverse=True)
totalstor = sum(b for a, b in data)
step = 0.01
acc = 0.0
next = step
cbin = 0.0
bins = []
labels = []
for entry in data:
acc += entry[1]
cbin += entry[0]
while acc / totalstor > next:
bins.append(cbin)
labels.append(next * totalstor)
print("after accounting for " + str(round(next * totalstor, 1)) + " GiB of stored data, " + str(round(sum(bins))) + " GiB of bandwidth was used")
next += step
cbin = 0.0
if cbin != 0:
bins.append(cbin)
labels.append(next * totalstor)
acc = np.add.accumulate(bins)
pl.bar(labels, acc, totalstor * step * 0.8)
pl.xlabel('Stored files sorted by bandwidth usage [GiB]')
pl.ylabel('Cumulative yearly bandwidth used [GiB]')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment