Skip to content

Instantly share code, notes, and snippets.

@pigeonflight
Last active April 22, 2021 05:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pigeonflight/2eb2ccfb7d140c11f4b9110f45f93dba to your computer and use it in GitHub Desktop.
Save pigeonflight/2eb2ccfb7d140c11f4b9110f45f93dba to your computer and use it in GitHub Desktop.
Python script to remove unused images from a web project folder, works for webflow projects but should work in other circumstances as well

Background

This is a horribly ugly (and lazy) implementation. The goal is to remove images that are not referenced by either the css or html files of a web project.

Additionally it converts large png files (Large png is bigger than 250000 bytes) into jpg files since these are most often photographs. It then goes into each file and replaces the reference to the png with the reference to the jpg file. Finally, it removes the old png files to reduce the overall site folder size. It also skips externally stored images.

The script includes a 'sed-like' function based on something found at stackoverflow (https://stackoverflow.com/questions/12714415/python-equivalent-to-sed/40843600#40843600)

Assumption

All images are located in images/ and all css is located in css/. It ignores files that begin with '_'.

Usage

To use this script edit the css pattern at the beginning of the file. By default, the file looks for css files that end with ".webflow.css"

python cleanimages.py site_folder
import re
import os
import sys
import shutil
from tempfile import mkstemp
from PIL import Image
large_png = 250000
css_file_pattern = '.webflow.css'
#---- no need to change anything below here ----#
directory = sys.argv[1]
css_directory = '{}/css'.format(directory)
image_directory = '{}/images'.format(directory)
output = []
css_output = []
# this function is "lifted" from stackoverflow
# see https://stackoverflow.com/questions/12714415/python-equivalent-to-sed/40843600#40843600
def sed(pattern, replace, source, dest=None, count=0):
"""Reads a source file and writes the destination file.
In each line, replaces pattern with replace.
Args:
pattern (str): pattern to match (can be re.pattern)
replace (str): replacement str
source (str): input filename
count (int): number of occurrences to replace
dest (str): destination filename, if not given, source will be over written.
"""
fin = open(source, 'r')
num_replaced = count
if dest:
fout = open(dest, 'w')
else:
fd, name = mkstemp()
fout = open(name, 'w')
for line in fin:
out = re.sub(pattern, replace, line)
fout.write(out)
if out != line:
num_replaced += 1
if count and num_replaced > count:
break
try:
fout.writelines(fin.readlines())
except Exception as E:
raise E
fin.close()
fout.close()
if not dest:
shutil.move(name, source)
def replace_in_files(old,new):
for filename in os.listdir(directory):
if filename.endswith(".html"):
file_path = '{}/{}'.format(directory,filename)
sed(old,new, file_path)
for filename in os.listdir(css_directory):
if filename.endswith(css_file_pattern):
file_path = '{}/{}'.format(css_directory,filename)
sed(old,new, file_path)
# get html matches
for filename in os.listdir(directory):
if filename.endswith(".html"):
with open('{}/{}'.format(directory,filename)) as html:
content = html.read()
pattern = r'[^\"\'=\s]+\.(?:jpe?g|png|PNG|gif|ico)'
matches = re.findall(pattern, content)
output.extend(matches)
# get css matches
for filename in os.listdir(css_directory):
if filename.endswith(css_file_pattern):
with open('{}/{}'.format(css_directory,filename)) as html:
content = html.read()
matches_ = re.findall(r'url\(\'\.\.\/([^)]+)\'\)', content)
matches = [match for match in matches_ if match.endswith('.jpg') or match.endswith('.png')]
css_output.extend(matches)
# get images
html = set(output)
css = set(css_output)
file_images = html.union(css)
directory_images = set(["images/{}".format(item) for item in os.listdir(image_directory)])
unused_images = directory_images.difference(file_images)
for filename in unused_images:
print("removing ",filename)
os.remove('{}/{}'.format(directory,filename))
for filename in file_images:
if filename.endswith('png'):
if filename.startswith('http') or os.path.basename(filename).startswith('_'):
print('skipping {}'.format(filename))
continue
print('processing {}'.format(filename))
filename_new = '{}.jpg'.format(os.path.splitext(filename)[0])
file_path = "{}/{}".format(directory,filename)
file_path_new = "{}/{}".format(directory,filename_new)
if os.stat(file_path).st_size > large_png:
print('converting {} to jpg'.format(file_path))
im = Image.open(file_path)
rgb_im = im.convert('RGB')
rgb_im.save(file_path_new)
os.remove(file_path)
replace_in_files(filename, filename_new)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment