Skip to content

Instantly share code, notes, and snippets.

@lightstrike
Last active August 29, 2015 14:04
Show Gist options
  • Save lightstrike/08322a945b7d8966a903 to your computer and use it in GitHub Desktop.
Save lightstrike/08322a945b7d8966a903 to your computer and use it in GitHub Desktop.
Simple CSS class extraction script from HTML file
"""
Usage: get_classes_from_html.py path/to/file.html
"""
def convert_html_to_string(html_file_path):
html_file = open(html_file_path, 'r')
return html_file.read()
def get_css_classes(html_string, attribute='class'):
# TODO: Add dynamic attribute finding, agnostic quote types
# TODO: split up multiple classes
class_list = html_string.split('class="')
class_list.pop(0) # first item will not be in list due to split
for index, class_block in enumerate(class_list):
trim_index = class_block.find('"')
class_list[index] = ''.join(['.', class_block[0:trim_index], ' {\n}\n'])
return class_list
import sys
if __name__ == "__main__":
html_file_path = sys.argv[1]
html_string = convert_html_to_string(html_file_path)
css_classes = get_css_classes(html_string)
for css_class in css_classes:
print css_class
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment