Skip to content

Instantly share code, notes, and snippets.

@fherbine
Last active March 21, 2020 20:10
Show Gist options
  • Save fherbine/59fa5bed4397fc2cbbfe796d49e273c4 to your computer and use it in GitHub Desktop.
Save fherbine/59fa5bed4397fc2cbbfe796d49e273c4 to your computer and use it in GitHub Desktop.
Useful to remove all JS scripts from a web page
#!/usr/bin/python3
import datetime
import sys
from hashlib import md5
import requests
import sh
from bs4 import BeautifulSoup
DEFAULT_WEBBROWSER = 'firefox'
DEFAULT_DST = '/tmp/{}.html'.format(
md5(str(datetime.datetime.now()).encode()).hexdigest()
)
def dispatch_help():
print(
"""
======= clean-js =======
remove-js and generate .html
usage:
`clean-js <source-url> [options]`
options:
-h or --help: dispatch usage
-o [web-browser] open in a web-browser (default is firefox)
"""
)
def remove_scripts(string):
soup = BeautifulSoup(string.lower(), 'html.parser')
to_extract = soup.findAll('script')
for item in to_extract:
item.extract()
return soup.prettify()
if __name__ == '__main__':
if (len(sys.argv) < 2 or '-h' in sys.argv or '--help' in sys.argv):
dispatch_help()
sys.exit(0)
sys.argv.pop(0) # pop the program name
url = sys.argv.pop(0)
r = requests.get(url)
if r.status_code not in (200, 201, 202, 203):
dispatch_help()
sys.exit(-1)
content = ''.join(r.text)
content = remove_scripts(content)
with open(DEFAULT_DST, 'w+') as dst:
dst.write(content)
if '-o' in sys.argv:
idx = sys.argv.index('-o')
sys.argv.remove('-o')
try:
getattr(sh, sys.argv[idx])(DEFAULT_DST)
except:
getattr(sh, DEFAULT_WEBBROWSER)(DEFAULT_DST)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment