Skip to content

Instantly share code, notes, and snippets.

@Diviei
Created October 31, 2016 19:04
Show Gist options
  • Save Diviei/b20ed9dcf103ee451199e52a81f6117b to your computer and use it in GitHub Desktop.
Save Diviei/b20ed9dcf103ee451199e52a81f6117b to your computer and use it in GitHub Desktop.
A script intended to create a non dependencies html (inline .js and .css and base64 images)
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import base64
base_url = "http://getbootstrap.com/"
r = requests.get(base_url)
html = r.text #htmlstring
f = open('result.html', 'w+')
soup = BeautifulSoup(html, 'html.parser')
#JS converted inline
print "Converting and merging js scripts"
js_inline = ""
for js in soup.find_all("script"):
if js.has_attr('src') and js.get("src").endswith(".js"):
if js['src'].startswith("/") or js['src'].startswith("../") and js['src'].endswith(".js"):
js_url = base_url + js['src'].replace("../","/")
else:
js_url = js['src']
try:
r = requests.get(js_url)
js_inline += r.text
js_inline += "\n"
js.extract()
except:
print "Controlled exception"
new_script = soup.new_tag("script", type="text/javascript")
new_script.string = js_inline
soup.head.append(new_script)
#CSS converted inline
print "Converting and merging css styles"
css_inline = ""
for css in soup.find_all("link"):
if css.has_attr('href') and ".css" in css.get("href"):
if css['href'].startswith("/") or css['href'].startswith("../") and css['href'].endswith(".css"):
css_url = base_url + css['href'].replace("../","/")
else:
css_url = css['href']
r = requests.get(css_url)
css_inline += r.text
css.extract()
soup.head.append(soup.new_tag("style", type="text/css"))
soup.head.style.append(css_inline)
#Images converted to base64
print "Converting all images to base64"
for image in soup.find_all("img"):
if image.has_attr('src'):
if not image['src'].startswith("http"):
image_url = base_url + image['src'].replace("../","/")
else:
image_url = image['src']
r = requests.get(image_url)
new_src = "data:" + r.headers.get("content-type") + ";base64, " + base64.b64encode(r.content)
image["src"] = new_src
#Save it to a file
f.write(str(soup))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment