Skip to content

Instantly share code, notes, and snippets.

@davidnuon
Created June 10, 2012 18:52
Show Gist options
  • Save davidnuon/2906907 to your computer and use it in GitHub Desktop.
Save davidnuon/2906907 to your computer and use it in GitHub Desktop.
Python script that goes through an html file and prints a css file with selectors seen.
#!/usr/bin/env python
"""
css_gen.py < stdin
Reads through the input (should be an html file), looking for classes and ids, and
and then grouping them by similar name. It then prints out the text for a CSS file.
The CSS file has all selectors, for each selector it has no properties.
Ex:
<div class="wrapper" id="foo"></div>
->
.wrapper {}
#foo {}
"""
try:
from pyquery import PyQuery as pq
except:
print "You need pyquery installed"
import sys
# Writes the line of css, if type is true, it writes an i
def css(selector, id= False):
if id:
return '#'+ str(selector) + ' {\n\n} \n'
else:
return '.'+ str(selector) + ' {\n\n} \n'
# Adds an item to a list iff it is unique
def add_if_new(list, item):
if not item in list:
list.append(item)
# String to be printed
main_string = """
body, html { width:100%; height:!00%; margin:0; padding:0; }
img, a { outline:0; border:0; }
body {}
"""
# Gather all of the nodes into a list to be parsed later
file = sys.argv[1]
html = ''
css_classes = []
css_ids = []
html_nodes = None
html = sys.stdin.read()
html_nodes = pq(html)
html_nodes = html_nodes.find('*')
for node in html_nodes:
attrib = node.attrib
keys = attrib.keys()
# Check for classes or ids, and then split them into separte
# lists
if 'class' in keys:
_class = attrib['class']
_class = _class.split(' ')
for _c in _class:
add_if_new(css_classes, _c)
if 'id' in keys:
_id = attrib['id']
add_if_new(css_ids, _id)
css_classes = sorted(css_classes)
css_ids = sorted(css_ids)
css_classes_org = []
css_current = ''
# Group classes with similar names
# Not needed to be done for IDs becuase IDs are supposed to be unique
for co in css_classes:
split_up = co.replace('.','').split('-')[0]
if split_up != css_current:
css_current = split_up + ''
css_classes_org.append([])
css_classes_org[len(css_classes_org)-1].append(co)
# Iterate through both classes and ids, and appaned it to the main_string
for _cat in css_classes_org:
main_string = main_string + '/* %s */\n' % _cat[0].replace('.', '').split('-')[0]
for _c in _cat:
main_string = main_string + css(_c)
main_string += '\n'
for _id in css_ids:
main_string = main_string + css(_id, True)
print main_string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment