Skip to content

Instantly share code, notes, and snippets.

@harryposner
Created June 24, 2020 17:34
Show Gist options
  • Save harryposner/9312a16bdaf77cb6122707092b2b018f to your computer and use it in GitHub Desktop.
Save harryposner/9312a16bdaf77cb6122707092b2b018f to your computer and use it in GitHub Desktop.
Remove <script> tags from HTML files
#!/usr/bin/env python3
"""Remove <script> tags from HTML files
Usage: `remove_script_tags [path]`
If `path` is a directory, will process all `.html` files in the
directory recursively.
"""
import html.parser
import os
import os.path
import sys
SINGLE_QUOTE = "'"
DOUBLE_QUOTE = '"'
class RemoveScriptTags(html.parser.HTMLParser):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.output = []
def clean_file(self, fname):
with open(fname, "r") as infile:
self.feed(infile.read())
with open(fname, "w") as outfile:
outfile.write("".join(self.output))
def handle_starttag(self, tag, attrs):
if tag == "script":
return
full_tag = [f"<{tag}"]
for attr, val in attrs:
if val is None:
full_tag.append(f" {attr}")
else:
quote = DOUBLE_QUOTE if DOUBLE_QUOTE not in val else SINGLE_QUOTE
full_tag.append(f" {attr}={quote}{val}{quote}")
full_tag.append(">")
self.output.append("".join(full_tag))
def handle_endtag(self, tag):
if tag == "script":
return
self.output.append(f"</{tag}>")
def handle_data(self, data):
in_tag = self.get_starttag_text()
if in_tag is None or not in_tag.startswith("<script"):
self.output.append(data)
def handle_comment(self, comment):
self.output.append(f"<!--{comment}-->")
def main():
path = sys.argv[1]
if os.path.isfile(path):
RemoveScriptTags().clean_file(path)
sys.exit(0)
for dirpath, __, filenames in os.walk(path):
for fname in filenames:
if fname.endswith(".html"):
path_to_file = os.path.join(dirpath, fname)
print(path_to_file)
RemoveScriptTags().clean_file(path_to_file)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment