Created
February 28, 2019 16:55
-
-
Save nottrobin/a50772d46f7afc104272e6d3c326be8b to your computer and use it in GitHub Desktop.
To replace images in www.ubuntu.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Standard library | |
import re | |
from io import BytesIO | |
from glob import glob | |
from PIL import Image | |
from urllib.parse import urlparse, parse_qs | |
from urllib.request import urlopen | |
from xml.etree import ElementTree | |
# Packages | |
from bs4 import BeautifulSoup | |
from canonicalwebteam import image_template | |
def get_properties(img_tag): | |
img = BeautifulSoup(img_tag, "html.parser").find("img") | |
width = None | |
height = None | |
if "width" in img.attrs: | |
width = round(float(img.attrs["width"].rstrip("px"))) | |
if "height" in img.attrs: | |
height = round(float(img.attrs["height"].rstrip("px"))) | |
url = img.attrs["src"] | |
alt = img.attrs["alt"] | |
url_parts = urlparse(url) | |
if url_parts.netloc == "assets.ubuntu.com": | |
# Use the assets server to resize the image | |
# so we aren't caching more than we need in cloudinary | |
params = parse_qs(url_parts.query) | |
if "w" in params: | |
width = int(params["w"][0]) | |
if "h" in params: | |
width = int(params["h"][0]) | |
if not (width and height): | |
# Download image | |
image_file = BytesIO(urlopen(url).read()) | |
if url_parts.path[-4:] == ".svg": | |
tree = ElementTree.fromstring(image_file.read().decode("utf-8")) | |
if "width" in tree.attrib: | |
real_width = round(float(tree.attrib["width"].rstrip("px"))) | |
else: | |
if "viewBox" in tree.attrib: | |
matches = re.match( | |
"0 0 ([\d.]+) ([\d.]+)", tree.attrib["viewBox"] | |
) | |
real_width = round(float(matches.groups()[0])) | |
if "height" in tree.attrib: | |
real_height = round(float(tree.attrib["height"].rstrip("px"))) | |
else: | |
if "viewBox" in tree.attrib: | |
matches = re.match( | |
"0 0 ([\d.]+) ([\d.]+)", tree.attrib["viewBox"] | |
) | |
real_height = round(float(matches.groups()[1])) | |
else: | |
image = Image.open(image_file) | |
real_width, real_height = image.size | |
if width and real_width > width and real_width <= 1040: | |
# If we have width, calculate the relative height | |
ratio = width / real_width | |
height = round(real_height * ratio) | |
elif height and real_height > height: | |
# If we have height, calculate the relative width | |
ratio = height / real_height | |
width = round(real_width * ratio) | |
else: | |
width = real_width | |
height = real_height | |
if width > 1040: | |
# Images never wider than 1040px | |
width = 1040 | |
ratio = width / real_width | |
height = round(real_height * ratio) | |
del img.attrs["src"] | |
del img.attrs["alt"] | |
if "width" in img.attrs: | |
del img.attrs["width"] | |
if "height" in img.attrs: | |
del img.attrs["height"] | |
if "class" in img.attrs: | |
img.attrs["class"] = " ".join(img.attrs["class"]) | |
return { | |
"url": url, | |
"alt": alt, | |
"width": width, | |
"height": height, | |
"attributes": img.attrs, | |
} | |
for template_path in glob("**/*.html", recursive=True): | |
with open(template_path) as template_file: | |
template_content = template_file.read() | |
img_tags = re.findall("<img[^>]+>", template_content) | |
for img_tag in img_tags: | |
try: | |
img_properties = get_properties(img_tag) | |
except Exception as error: | |
print(f"File: {template_path}") | |
print(f"Image tag: {img_tag}") | |
import ipdb | |
ipdb.set_trace() | |
raise error | |
print(image_template(**img_properties)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment