|
import re |
|
import os |
|
import sys |
|
import shutil |
|
from tempfile import mkstemp |
|
from PIL import Image |
|
|
|
large_png = 250000 |
|
css_file_pattern = '.webflow.css' |
|
|
|
#---- no need to change anything below here ----# |
|
directory = sys.argv[1] |
|
css_directory = '{}/css'.format(directory) |
|
image_directory = '{}/images'.format(directory) |
|
output = [] |
|
css_output = [] |
|
|
|
# this function is "lifted" from stackoverflow |
|
# see https://stackoverflow.com/questions/12714415/python-equivalent-to-sed/40843600#40843600 |
|
def sed(pattern, replace, source, dest=None, count=0): |
|
"""Reads a source file and writes the destination file. |
|
|
|
In each line, replaces pattern with replace. |
|
|
|
Args: |
|
pattern (str): pattern to match (can be re.pattern) |
|
replace (str): replacement str |
|
source (str): input filename |
|
count (int): number of occurrences to replace |
|
dest (str): destination filename, if not given, source will be over written. |
|
""" |
|
|
|
fin = open(source, 'r') |
|
num_replaced = count |
|
|
|
if dest: |
|
fout = open(dest, 'w') |
|
else: |
|
fd, name = mkstemp() |
|
fout = open(name, 'w') |
|
|
|
for line in fin: |
|
out = re.sub(pattern, replace, line) |
|
fout.write(out) |
|
|
|
if out != line: |
|
num_replaced += 1 |
|
if count and num_replaced > count: |
|
break |
|
try: |
|
fout.writelines(fin.readlines()) |
|
except Exception as E: |
|
raise E |
|
|
|
fin.close() |
|
fout.close() |
|
|
|
if not dest: |
|
shutil.move(name, source) |
|
|
|
def replace_in_files(old,new): |
|
for filename in os.listdir(directory): |
|
if filename.endswith(".html"): |
|
file_path = '{}/{}'.format(directory,filename) |
|
sed(old,new, file_path) |
|
|
|
for filename in os.listdir(css_directory): |
|
if filename.endswith(css_file_pattern): |
|
file_path = '{}/{}'.format(css_directory,filename) |
|
sed(old,new, file_path) |
|
|
|
|
|
# get html matches |
|
for filename in os.listdir(directory): |
|
if filename.endswith(".html"): |
|
with open('{}/{}'.format(directory,filename)) as html: |
|
content = html.read() |
|
pattern = r'[^\"\'=\s]+\.(?:jpe?g|png|PNG|gif|ico)' |
|
matches = re.findall(pattern, content) |
|
output.extend(matches) |
|
|
|
|
|
# get css matches |
|
for filename in os.listdir(css_directory): |
|
if filename.endswith(css_file_pattern): |
|
with open('{}/{}'.format(css_directory,filename)) as html: |
|
content = html.read() |
|
matches_ = re.findall(r'url\(\'\.\.\/([^)]+)\'\)', content) |
|
matches = [match for match in matches_ if match.endswith('.jpg') or match.endswith('.png')] |
|
css_output.extend(matches) |
|
|
|
# get images |
|
html = set(output) |
|
css = set(css_output) |
|
file_images = html.union(css) |
|
directory_images = set(["images/{}".format(item) for item in os.listdir(image_directory)]) |
|
|
|
unused_images = directory_images.difference(file_images) |
|
for filename in unused_images: |
|
print("removing ",filename) |
|
os.remove('{}/{}'.format(directory,filename)) |
|
|
|
for filename in file_images: |
|
if filename.endswith('png'): |
|
if filename.startswith('http') or os.path.basename(filename).startswith('_'): |
|
print('skipping {}'.format(filename)) |
|
continue |
|
print('processing {}'.format(filename)) |
|
filename_new = '{}.jpg'.format(os.path.splitext(filename)[0]) |
|
file_path = "{}/{}".format(directory,filename) |
|
file_path_new = "{}/{}".format(directory,filename_new) |
|
if os.stat(file_path).st_size > large_png: |
|
print('converting {} to jpg'.format(file_path)) |
|
im = Image.open(file_path) |
|
rgb_im = im.convert('RGB') |
|
rgb_im.save(file_path_new) |
|
os.remove(file_path) |
|
|
|
replace_in_files(filename, filename_new) |