Created
October 31, 2016 19:04
-
-
Save Diviei/b20ed9dcf103ee451199e52a81f6117b to your computer and use it in GitHub Desktop.
A script intended to create a non dependencies html (inline .js and .css and base64 images)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
from bs4 import BeautifulSoup | |
import base64 | |
base_url = "http://getbootstrap.com/" | |
r = requests.get(base_url) | |
html = r.text #htmlstring | |
f = open('result.html', 'w+') | |
soup = BeautifulSoup(html, 'html.parser') | |
#JS converted inline | |
print "Converting and merging js scripts" | |
js_inline = "" | |
for js in soup.find_all("script"): | |
if js.has_attr('src') and js.get("src").endswith(".js"): | |
if js['src'].startswith("/") or js['src'].startswith("../") and js['src'].endswith(".js"): | |
js_url = base_url + js['src'].replace("../","/") | |
else: | |
js_url = js['src'] | |
try: | |
r = requests.get(js_url) | |
js_inline += r.text | |
js_inline += "\n" | |
js.extract() | |
except: | |
print "Controlled exception" | |
new_script = soup.new_tag("script", type="text/javascript") | |
new_script.string = js_inline | |
soup.head.append(new_script) | |
#CSS converted inline | |
print "Converting and merging css styles" | |
css_inline = "" | |
for css in soup.find_all("link"): | |
if css.has_attr('href') and ".css" in css.get("href"): | |
if css['href'].startswith("/") or css['href'].startswith("../") and css['href'].endswith(".css"): | |
css_url = base_url + css['href'].replace("../","/") | |
else: | |
css_url = css['href'] | |
r = requests.get(css_url) | |
css_inline += r.text | |
css.extract() | |
soup.head.append(soup.new_tag("style", type="text/css")) | |
soup.head.style.append(css_inline) | |
#Images converted to base64 | |
print "Converting all images to base64" | |
for image in soup.find_all("img"): | |
if image.has_attr('src'): | |
if not image['src'].startswith("http"): | |
image_url = base_url + image['src'].replace("../","/") | |
else: | |
image_url = image['src'] | |
r = requests.get(image_url) | |
new_src = "data:" + r.headers.get("content-type") + ";base64, " + base64.b64encode(r.content) | |
image["src"] = new_src | |
#Save it to a file | |
f.write(str(soup)) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment