Skip to content

Instantly share code, notes, and snippets.

@ricardocchaves
Created June 2, 2021 11:37
Show Gist options
  • Save ricardocchaves/8a11e08f0f91e2ddc8ec5f52a5db1a81 to your computer and use it in GitHub Desktop.
Save ricardocchaves/8a11e08f0f91e2ddc8ec5f52a5db1a81 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# Calculates stats for the entire test scene (all OBUs)
# WARNING: Must be adapted to your use case.
# Usage example:
# import ipfs.parseStats as ipfs
# downloads = ipfs.parse_data(fname)
# s,r,dup,dow,a,p = ipfs.detail_results(downloads)
import time
import datetime
import sys
import numpy as np
# IPFS hashes
h = {'bafykbzaceclef4u2n4ojgik2z7nkefuyw7wkeidv2z43fe3byirydy7dhsshy':\
54277586, 'bafykbzaceb6mibp5ebrdegcftiqjjzm3zjhknxsut5gtcyjlkbbqbrnofsyqq':\
4766573}
def main():
if len(sys.argv) > 1:
fname = sys.argv[1]
downloads = parse_data(fname)
detail_results(downloads,True)
else:
files = ["obu_3164/insight_file_transfer_filtered.log",\
"obu_3166/insight_file_transfer_filtered.log"]
sizes = []
recv_ratios = []
dups_ratios = []
download_times = []
avg_throughputs = []
peers = []
for fname in files:
downloads = parse_data(fname)
s,r,dup,dow,a,p = detail_results(downloads,True)
sizes += s
recv_ratios += r
dups_ratios += dup
download_times += dow
avg_throughputs += a
peers += p
print("## Transfers (downloads): {}".format(len(sizes)))
print("## Bytes received")
print_stats(recv_ratios)
total = round(sum(sizes)/1024/1024,2)
print("Total: {} MB".format(total))
print("## Duplicates (if there are 100 unique chunks, and one of them is \
received extra twice, duplicates=2%)")
print_stats(dups_ratios)
print("## Peers")
print_stats(peers,False)
print("## Download times (s)")
print_stats(download_times,False)
fastest_idx = get_fastest_download(recv_ratios,download_times)
fastest_time = download_times[fastest_idx]
print("Fastest: {} s".format(fastest_time))
print("## Throughput (MB/s)")
print_stats(avg_throughputs,False)
def get_fastest_download(recv_ratios,download_times):
i = 0
possible = []
while i < len(recv_ratios):
if recv_ratios[i]>=1:
possible.append(i)
i += 1
fastest_time = -1
fastest_idx = -1
for i in possible:
if fastest_time == -1 or download_times[i] < fastest_time:
fastest_time = download_times[i]
fastest_idx = i
return fastest_idx
# Prints a line with the mean(average), median and standard deviation
def print_stats(data,percentage=True):
if percentage:
mult = 100
string = "Avg: {}%\tMedian: {}%\tStd: {}%\tMin: {}%\tMax: {}%"
else:
mult = 1
string = "Avg: {}\tMedian: {}\tStd: {}\tMin: {}\tMax: {}"
print(string.format(\
round(np.mean(data)*mult,2),\
round(np.median(data)*mult,2),\
round(np.std(data)*mult,2),\
np.min(data)*mult,\
np.max(data)*mult))
# Returns (sizes[],recv_ratios[],dups_ratios[],download_times[],avg_throughputs[],peers[])
def detail_results(downloads, verbose=False):
sizes = []
recv_ratios = []
dups_ratios = []
download_times = []
avg_throughputs = []
peers = []
for d in downloads:
if d['size_bytes'] == 117:
continue
dups_ratio = get_dups(d['content'])
expected_size = h[d['content'][0]['cid']]
recv_ratio = round(d['size_bytes']/expected_size,3)
first_recv = d['content'][0]['ts']
last_recv = d['content'][-1]['ts']
d_time = abs(last_recv-first_recv)
if d_time > 159519200:
continue
if d_time == 0:
avg = 0
else:
avg = round(d['size_bytes']/d_time/1024/1024,2)
"""
if d_time < 50 or recv_ratio < 1 or first_recv < 0:
continue
"""
if verbose:
print("-------")
print("obu: {}".format(d['obu']))
print("size: {} bytes / {} bytes, ratio {}".format(d['size_bytes'],\
expected_size,recv_ratio))
print("blocks received: {}".format(len(d['content'])))
print("peers: {}".format(d['peer']))
print("duplicates/seen: {}".format(dups_ratio))
print("download time: [{},{}], {} secs ## {} MB/s avg".format(\
datetime.datetime.fromtimestamp(first_recv),\
datetime.datetime.fromtimestamp(last_recv),\
d_time,\
avg))
print("first_recv: {} ### last_recv: {}".format(first_recv,last_recv))
# NOTE important
#if recv_ratio >= 1:
if True:
sizes.append(d['size_bytes'])
recv_ratios.append(recv_ratio)
dups_ratios.append(dups_ratio)
download_times.append(d_time)
peers.append(len(d['peer']))
avg_throughputs.append(avg)
return sizes,recv_ratios,dups_ratios,download_times,avg_throughputs,peers
def parse_data(fname):
f = open(fname)
transfers = []
tmp_transfer = {}
ts = -1
o_idx = fname.find('obu')
obu = fname[o_idx+4:o_idx+8]
for line in f:
line = line.strip()
if 'file_transfer' in line:
#transfers.append({})
tmp_transfer = {'action': '','size_bytes':0,'content':[],'peer':{},'obu':obu}
# Timestamp raw format: '2020-07-21 05:20:05.922'
ts = line.split('+')[0][1:-1] # Remove first and last char
ts = time.mktime(datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S.%f").timetuple())
ts = int(ts)
elif 'action.name' in line:
action = line.split("=")[1].replace(' ','')
if action != 'recv':
print("WARNING! Not recv")
return
tmp_transfer['action'] = action
elif 'cid' in line:
cid = line.split("=")[1].replace(' ','')
tmp_transfer['content'] = [{'cid':cid,'ts':ts}]
elif 'size_bytes' in line:
size_bytes = int(line.split("=")[1].replace(' ',''))
if size_bytes == 117:
# New transfer
transfers.append(tmp_transfer)
else:
# Ongoing transfer
transfers[-1]['size_bytes'] += size_bytes
transfers[-1]['content'].append(tmp_transfer['content'][0])
tmp_transfer['size_bytes'] = size_bytes
elif 'peer' in line:
peer = line.split("=")[1].replace(' ','')
if peer in transfers[-1]["peer"]:
transfers[-1]["peer"][peer] += 1
else:
transfers[-1]["peer"][peer] = 1
f.close()
return transfers
# Returns duplicate ratio in `data`
def get_dups(data):
# Get duplicates
dups = []
seen = {}
for c in [x['cid'] for x in data]:
if c not in seen:
seen[c] = 1
else:
if seen[c] == 1:
dups.append(c)
seen[c] += 1
#print([(x,seen[x]) for x in seen if seen[x]>1])
return round(len(dups)/len(seen),3)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment