Created
June 2, 2021 11:37
-
-
Save ricardocchaves/8a11e08f0f91e2ddc8ec5f52a5db1a81 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Calculates stats for the entire test scene (all OBUs) | |
# WARNING: Must be adapted to your use case. | |
# Usage example: | |
# import ipfs.parseStats as ipfs | |
# downloads = ipfs.parse_data(fname) | |
# s,r,dup,dow,a,p = ipfs.detail_results(downloads) | |
import time | |
import datetime | |
import sys | |
import numpy as np | |
# IPFS hashes | |
h = {'bafykbzaceclef4u2n4ojgik2z7nkefuyw7wkeidv2z43fe3byirydy7dhsshy':\ | |
54277586, 'bafykbzaceb6mibp5ebrdegcftiqjjzm3zjhknxsut5gtcyjlkbbqbrnofsyqq':\ | |
4766573} | |
def main(): | |
if len(sys.argv) > 1: | |
fname = sys.argv[1] | |
downloads = parse_data(fname) | |
detail_results(downloads,True) | |
else: | |
files = ["obu_3164/insight_file_transfer_filtered.log",\ | |
"obu_3166/insight_file_transfer_filtered.log"] | |
sizes = [] | |
recv_ratios = [] | |
dups_ratios = [] | |
download_times = [] | |
avg_throughputs = [] | |
peers = [] | |
for fname in files: | |
downloads = parse_data(fname) | |
s,r,dup,dow,a,p = detail_results(downloads,True) | |
sizes += s | |
recv_ratios += r | |
dups_ratios += dup | |
download_times += dow | |
avg_throughputs += a | |
peers += p | |
print("## Transfers (downloads): {}".format(len(sizes))) | |
print("## Bytes received") | |
print_stats(recv_ratios) | |
total = round(sum(sizes)/1024/1024,2) | |
print("Total: {} MB".format(total)) | |
print("## Duplicates (if there are 100 unique chunks, and one of them is \ | |
received extra twice, duplicates=2%)") | |
print_stats(dups_ratios) | |
print("## Peers") | |
print_stats(peers,False) | |
print("## Download times (s)") | |
print_stats(download_times,False) | |
fastest_idx = get_fastest_download(recv_ratios,download_times) | |
fastest_time = download_times[fastest_idx] | |
print("Fastest: {} s".format(fastest_time)) | |
print("## Throughput (MB/s)") | |
print_stats(avg_throughputs,False) | |
def get_fastest_download(recv_ratios,download_times): | |
i = 0 | |
possible = [] | |
while i < len(recv_ratios): | |
if recv_ratios[i]>=1: | |
possible.append(i) | |
i += 1 | |
fastest_time = -1 | |
fastest_idx = -1 | |
for i in possible: | |
if fastest_time == -1 or download_times[i] < fastest_time: | |
fastest_time = download_times[i] | |
fastest_idx = i | |
return fastest_idx | |
# Prints a line with the mean(average), median and standard deviation | |
def print_stats(data,percentage=True): | |
if percentage: | |
mult = 100 | |
string = "Avg: {}%\tMedian: {}%\tStd: {}%\tMin: {}%\tMax: {}%" | |
else: | |
mult = 1 | |
string = "Avg: {}\tMedian: {}\tStd: {}\tMin: {}\tMax: {}" | |
print(string.format(\ | |
round(np.mean(data)*mult,2),\ | |
round(np.median(data)*mult,2),\ | |
round(np.std(data)*mult,2),\ | |
np.min(data)*mult,\ | |
np.max(data)*mult)) | |
# Returns (sizes[],recv_ratios[],dups_ratios[],download_times[],avg_throughputs[],peers[]) | |
def detail_results(downloads, verbose=False): | |
sizes = [] | |
recv_ratios = [] | |
dups_ratios = [] | |
download_times = [] | |
avg_throughputs = [] | |
peers = [] | |
for d in downloads: | |
if d['size_bytes'] == 117: | |
continue | |
dups_ratio = get_dups(d['content']) | |
expected_size = h[d['content'][0]['cid']] | |
recv_ratio = round(d['size_bytes']/expected_size,3) | |
first_recv = d['content'][0]['ts'] | |
last_recv = d['content'][-1]['ts'] | |
d_time = abs(last_recv-first_recv) | |
if d_time > 159519200: | |
continue | |
if d_time == 0: | |
avg = 0 | |
else: | |
avg = round(d['size_bytes']/d_time/1024/1024,2) | |
""" | |
if d_time < 50 or recv_ratio < 1 or first_recv < 0: | |
continue | |
""" | |
if verbose: | |
print("-------") | |
print("obu: {}".format(d['obu'])) | |
print("size: {} bytes / {} bytes, ratio {}".format(d['size_bytes'],\ | |
expected_size,recv_ratio)) | |
print("blocks received: {}".format(len(d['content']))) | |
print("peers: {}".format(d['peer'])) | |
print("duplicates/seen: {}".format(dups_ratio)) | |
print("download time: [{},{}], {} secs ## {} MB/s avg".format(\ | |
datetime.datetime.fromtimestamp(first_recv),\ | |
datetime.datetime.fromtimestamp(last_recv),\ | |
d_time,\ | |
avg)) | |
print("first_recv: {} ### last_recv: {}".format(first_recv,last_recv)) | |
# NOTE important | |
#if recv_ratio >= 1: | |
if True: | |
sizes.append(d['size_bytes']) | |
recv_ratios.append(recv_ratio) | |
dups_ratios.append(dups_ratio) | |
download_times.append(d_time) | |
peers.append(len(d['peer'])) | |
avg_throughputs.append(avg) | |
return sizes,recv_ratios,dups_ratios,download_times,avg_throughputs,peers | |
def parse_data(fname): | |
f = open(fname) | |
transfers = [] | |
tmp_transfer = {} | |
ts = -1 | |
o_idx = fname.find('obu') | |
obu = fname[o_idx+4:o_idx+8] | |
for line in f: | |
line = line.strip() | |
if 'file_transfer' in line: | |
#transfers.append({}) | |
tmp_transfer = {'action': '','size_bytes':0,'content':[],'peer':{},'obu':obu} | |
# Timestamp raw format: '2020-07-21 05:20:05.922' | |
ts = line.split('+')[0][1:-1] # Remove first and last char | |
ts = time.mktime(datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S.%f").timetuple()) | |
ts = int(ts) | |
elif 'action.name' in line: | |
action = line.split("=")[1].replace(' ','') | |
if action != 'recv': | |
print("WARNING! Not recv") | |
return | |
tmp_transfer['action'] = action | |
elif 'cid' in line: | |
cid = line.split("=")[1].replace(' ','') | |
tmp_transfer['content'] = [{'cid':cid,'ts':ts}] | |
elif 'size_bytes' in line: | |
size_bytes = int(line.split("=")[1].replace(' ','')) | |
if size_bytes == 117: | |
# New transfer | |
transfers.append(tmp_transfer) | |
else: | |
# Ongoing transfer | |
transfers[-1]['size_bytes'] += size_bytes | |
transfers[-1]['content'].append(tmp_transfer['content'][0]) | |
tmp_transfer['size_bytes'] = size_bytes | |
elif 'peer' in line: | |
peer = line.split("=")[1].replace(' ','') | |
if peer in transfers[-1]["peer"]: | |
transfers[-1]["peer"][peer] += 1 | |
else: | |
transfers[-1]["peer"][peer] = 1 | |
f.close() | |
return transfers | |
# Returns duplicate ratio in `data` | |
def get_dups(data): | |
# Get duplicates | |
dups = [] | |
seen = {} | |
for c in [x['cid'] for x in data]: | |
if c not in seen: | |
seen[c] = 1 | |
else: | |
if seen[c] == 1: | |
dups.append(c) | |
seen[c] += 1 | |
#print([(x,seen[x]) for x in seen if seen[x]>1]) | |
return round(len(dups)/len(seen),3) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment