Skip to content

Instantly share code, notes, and snippets.

@peterbe
Created March 29, 2017 19:56
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterbe/a9f28f0a9df19285f94a5057d2ae3939 to your computer and use it in GitHub Desktop.
Save peterbe/a9f28f0a9df19285f94a5057d2ae3939 to your computer and use it in GitHub Desktop.
import os
import io
from gzip import GzipFile
from urllib.parse import urlparse
import requests
import boto3
# ~18.4MB compressed
URL_BIG = (
"https://s3-us-west-2.amazonaws.com/"
"org.mozilla.crash-stats.symbols-public"
"/v1/xul.pdb/C017F3ED83534FCE9CAA2057D8BCEE322/xul.sym"
)
# ~1.2MB compressed
URL_SMALL = (
"https://s3-us-west-2.amazonaws.com/"
"org.mozilla.crash-stats.symbols-public"
"/v1/wntdll.pdb/D74F79EB1F8D4A45ABCD2F476CCABACC2/wntdll.sym"
)
s3 = boto3.resource('s3', 'us-west-2')
def f1(url):
r = requests.get(url)
return len(r.content)
def f2(url):
r = requests.get(url, stream=True)
buffer = io.BytesIO()
for chunk in r.iter_content(chunk_size=512):
if chunk:
buffer.write(chunk)
return len(buffer.getvalue())
def f3(url): # same as f2 but bigger chunk size
r = requests.get(url, stream=True)
buffer = io.BytesIO()
for chunk in r.iter_content(chunk_size=1024):
if chunk:
buffer.write(chunk)
return len(buffer.getvalue())
def f4(url):
_, bucket_name, key = urlparse(url).path.split('/', 2)
obj = s3.Object(
bucket_name=bucket_name,
key=key
)
buffer = io.BytesIO(obj.get()["Body"].read())
try:
got_text = GzipFile(None, 'rb', fileobj=buffer).read()
except OSError:
buffer.seek(0)
got_text = buffer.read()
return len(got_text)
def _stats(r):
# returns the median, average and standard deviation of a sequence
tot = sum(r)
avg = tot/len(r)
sdsq = sum([(i-avg)**2 for i in r])
s = list(r)
s.sort()
return s[len(s)//2], avg, (sdsq/(len(r)-1 or 1))**.5
if __name__ == '__main__':
# assert f1(URL_BIG) == 87794590
# assert f1(URL_SMALL) == 1244266
# assert f2(URL_BIG) == 87794590
# assert f2(URL_SMALL) == 1244266
# assert f3(URL_BIG) == 87794590
# assert f3(URL_SMALL) == 1244266
# assert f4(URL_BIG) == 87794590
# assert f4(URL_SMALL) == 1244266
import random
import time
functions = [f1, f2, f3, f4]
results = {}
for i in range(3):
random.shuffle(functions)
for url in URL_BIG, URL_SMALL:
for f in functions:
key = (f.__name__, os.path.basename(url))
if key not in results:
results[key] = []
t0 = time.time()
res = f(url)
t1 = time.time()
# print(key, res, t1 - t0)
results[key].append((t1 - t0, res))
for f in sorted(results):
times = [x[0] for x in results[f]]
med, avg, std = _stats(times)
print(f[0], '\t', f[1], '\t', round(med, 3), '\t', round(std, 3))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment