Skip to content

Instantly share code, notes, and snippets.

@bactisme
Created April 26, 2016 13:59
Show Gist options
  • Save bactisme/8c56aa6f04f3df1fce7b4346a2522049 to your computer and use it in GitHub Desktop.
Save bactisme/8c56aa6f04f3df1fce7b4346a2522049 to your computer and use it in GitHub Desktop.
Take a file containing a liste of URL, print average, sum and max file size
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from lxml import html
import sys
def sum_avg_max(images):
total = sum(s for url, s in images)
average = total / len(images)
max_s = max(s for url, s in images)
return (total, average, max_s)
def get_page_content(url):
#r = get_page_content("http://www.frandroid.com/produits-android/smartphone/355381_xiaomi-mi-max-apercu-snapdragon-652")
r = requests.get(url, timeout=15)
text = r.text
tree = html.fromstring(r.content)
srcs = tree.xpath('//div[@class="post-content"]//img/@src')
results = []
for image in srcs:
if image[0:2] == "//":
image = "http:"+image
try:
r2 = requests.head(image, timeout=5)
#print(r2.headers)
results.append((image, int(r2.headers["content-length"])) )
except:
pass
return results
def checkimagesize(file):
fp = open(file, "r")
for line in fp:
line = line.strip(' \t\n\r')
if line[0:3] == "www":
line = "http://"+line
images = get_page_content(line)
r = sum_avg_max(images)
print line + (",%d" % len(images)) + (",%d,%d,%d" % r)
checkimagesize( sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment