Skip to content

Instantly share code, notes, and snippets.

@wasi0013
Created March 23, 2019 10:51
Show Gist options
  • Save wasi0013/e6a51de8f9d0cdb2a657969f44967fa3 to your computer and use it in GitHub Desktop.
Save wasi0013/e6a51de8f9d0cdb2a657969f44967fa3 to your computer and use it in GitHub Desktop.
Find un-optimized images of a webpage using requests_html python
import requests_html
def unoptimized_images(url):
"""
find unoptimized images in a webpage
:param url: webpage_url
:return : tuple of image_count in int, images list of dict
"""
session = requests_html.HTMLSession()
response = session.get(url)
images = []
image_count = 0
for element in response.html.find("img"):
image_url = element.attrs.get("src")
try:
if image_url:
i = session.get(image_url)
else:
continue
except:
if image_url[0] == "/" and element.base_url[-1] == "/": image_url = image_url[1::]
image_url = element.base_url + image_url
i = session.get(image_url)
if i.status_code != 200:
continue
image_size = None
try:
image_size = int(i.raw.info().get("Content-Length"))/1000
except:
print("Error fetching image size for:", image_url)
# check if image size is greater than 1MB
if image_size is not None and image_size >=1024:
images.append({
'url': image_url,
'size(KB)': image_size,
})
image_count += 1
return image_count, images
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment