Skip to content

Instantly share code, notes, and snippets.

@edsu
Last active March 1, 2022 17:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edsu/d0051fd6c89099af34621986f90b90ce to your computer and use it in GitHub Desktop.
Save edsu/d0051fd6c89099af34621986f90b90ce to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
from datetime import datetime
total_objects = 4300000
elapsed = 0
last = None
count = 0
# figure out the total elapsed time between requests and the total number
# of requests in order to calculate the average requests / second
for line in open('archive-fedora.log'):
t = datetime.strptime(line.split(' - ')[0], "%Y-%m-%d %H:%M:%S")
if last:
elapsed += (t - last).total_seconds()
count += 1
last = t
# count the number of files and their total size to calculate average file size
file_count = 0
file_size_bytes = 0.0
for root, dirs, files in os.walk('archive'):
for f in files:
file_count += 1
file_size_bytes += os.path.getsize(os.path.join(root, f))
reqs_per_sec = count / float(elapsed)
estimated_time_days = (total_objects / reqs_per_sec) / 60 / 60 / 24
avg_file_size_bytes = file_size_bytes / float(file_count)
estimated_size_gb = (total_objects * avg_file_size_bytes) / 1024 / 1024 / 1024.0
print ""
print "avg req/sec: {}".format(reqs_per_sec)
print "esimated total time: {} days".format(estimated_time_days)
print ""
print "avg bytes/file: {}".format(avg_file_size_bytes)
print "estimated total size: {} GB".format(estimated_size_gb)
print ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment