Skip to content

Instantly share code, notes, and snippets.

@nico202
Last active November 13, 2019 19:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nico202/80d24b82d1ff1bde2dd40234bccd8125 to your computer and use it in GitHub Desktop.
Save nico202/80d24b82d1ff1bde2dd40234bccd8125 to your computer and use it in GitHub Desktop.
Includes external resources in html files. Use with: python build.py input.html output.html (adapted from https://stackoverflow.com/questions/28258579/modify-html-file-to-embed-all-external-scripts-and-css-into-script-and-style)
# This takes an html, looks for script and stylesheet tags,
# download them (when necessary) and then
import sys, re, os
from collections import deque
from bs4 import BeautifulSoup, Tag
from jsmin import jsmin
from cssmin import cssmin
from urllib.parse import urlparse
import requests
# html param
html = sys.argv[1]
# target param
target = sys.argv[2]
# path from html param
path = re.sub(r"[^\/]*$", "", html)
# open html file
soup = BeautifulSoup(open(html), features="html.parser")
# find last script as anchorpoint
# FAILS IF NO SCRIPT TAG PRESENT!
lastScript = soup.findAll("script", attrs = {"src" : True})[-1]
# get all scripts containing src attribute (= external scripts)
scripts = soup.findAll("script", attrs = {"src" : True})
# find last style link as anchorpoint
lastStylesheet = soup.findAll("link", attrs = {"rel" : "stylesheet"})[-1]
# get all links to css stylesheets
stylesheets = soup.findAll("link", attrs = {"rel" : "stylesheet"})
# create list of script srcs
# TODO: download (local_or_url_read) should happen here
print("\nRead Scripts:")
scriptsSrc = deque()
for script in scripts:
scriptsSrc.append(path + script.attrs["src"])
print("\t" + path + script.attrs["src"])
# create list of stylesheets srcs
print("\nRead Stylesheets:")
stylesheetsSrc = deque()
for stylesheet in stylesheets:
stylesheetsSrc.append(path + stylesheet.attrs["href"])
print("\t" + path + stylesheet.attrs["href"])
def local_or_url_read(fname, output):
if not os.path.isfile(fname):
if fname.startswith("http"):
urlpath = fname
fname = os.path.basename(urlparse(fname).path)
if os.path.isfile(fname):
print("File %s has already been downloaded, skipping" %
fname, end="")
else:
with open(fname, 'w') as ofile:
print("downloading %s" % urlpath, end="")
c = requests.get(urlpath)
ofile.write(str(c.text))
else:
print("The file %s is missing and I don't know how to download it!"
% fname, end="")
with open(fname) as infile:
for line in infile:
output.write(line)
# merge scripts to temp.js
print("\nMerge Scripts:")
print("\t", end="")
with open("temp.js", "w") as outfileScript:
for fname in scriptsSrc:
# add space every script
outfileScript.write("\n")
print("~", end="")
local_or_url_read(fname, outfileScript)
print("\n");
# merge stylsheets to temp.css
print("Merge Stylesheets:")
print("\t", end="")
with open("temp.css", "w") as outfileCSS:
for fname in stylesheetsSrc:
# add space every script
outfileCSS.write("\n")
print("~", end="")
local_or_url_read(fname, outfileCSS)
print("\n");
# minify javascript
print("Minify temp.js\n\t~")
with open("temp.js") as js:
minified_js = jsmin(js.read())
# minify css
print("\nMinify temp.css\n\t~")
with open("temp.css") as css:
minified_css = cssmin(css.read())
# replace scripts with merged and min embed script / css
print("\nReplacing and deleting\n\t~")
tag = soup.new_tag("script")
tag["type"] = "text/javascript"
tag.append(minified_js)
lastScript.replace_with(tag)
tag = soup.new_tag("style")
tag["type"] = "text/css"
tag.append(minified_css)
lastStylesheet.replace_with(tag)
#remove script and style tags
for script in scripts:
script.decompose()
for stylesheet in stylesheets:
stylesheet.decompose()
#remove temp
os.remove("temp.js")
os.remove("temp.css")
#save html as target
file = open(target,"w")
file.write(soup.prettify())
file.close()
print("\nFIN\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment