Find un-optimized images of a webpage using requests_html python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests_html | |
def unoptimized_images(url): | |
""" | |
find unoptimized images in a webpage | |
:param url: webpage_url | |
:return : tuple of image_count in int, images list of dict | |
""" | |
session = requests_html.HTMLSession() | |
response = session.get(url) | |
images = [] | |
image_count = 0 | |
for element in response.html.find("img"): | |
image_url = element.attrs.get("src") | |
try: | |
if image_url: | |
i = session.get(image_url) | |
else: | |
continue | |
except: | |
if image_url[0] == "/" and element.base_url[-1] == "/": image_url = image_url[1::] | |
image_url = element.base_url + image_url | |
i = session.get(image_url) | |
if i.status_code != 200: | |
continue | |
image_size = None | |
try: | |
image_size = int(i.raw.info().get("Content-Length"))/1000 | |
except: | |
print("Error fetching image size for:", image_url) | |
# check if image size is greater than 1MB | |
if image_size is not None and image_size >=1024: | |
images.append({ | |
'url': image_url, | |
'size(KB)': image_size, | |
}) | |
image_count += 1 | |
return image_count, images |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment