Created
October 9, 2020 06:20
-
-
Save dreness/b64c09158f3d1c045e1fec3d0642cf3e to your computer and use it in GitHub Desktop.
Sum Content-Lengths of a bunch of HTTP resources on servers that might not return Content-Length in HEAD responses.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env/ python | |
import requests | |
import fileinput | |
import sys | |
from humanfriendly import format_size | |
f = open("/Users/andre/Desktop/urls.txt", "r") | |
s = requests.session() | |
i = 0 | |
cl = 0 | |
got = 0 | |
def is_integer(n): | |
try: | |
float(n) | |
except ValueError: | |
return False | |
else: | |
return float(n).is_integer() | |
for url in f.readlines(): | |
url = url.strip() | |
# Content-Length isn't required in HEAD respons, so do a | |
# GET but use stream=True to abort immediately after getting headers. | |
r = s.get(url, stream=True, allow_redirects=True) | |
thisCL = r.headers.get("content-length", -1) | |
if is_integer(thisCL): | |
cl = int(thisCL) + cl | |
got += 1 | |
i += 1 | |
fs = format_size(cl) | |
print(f"After {i} requests, found {got} resources whose cummulative size is {fs}") |
Author
dreness
commented
Oct 9, 2020
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment