Created
July 6, 2011 01:41
-
-
Save nfeldman/1066357 to your computer and use it in GitHub Desktop.
markup checkup script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @copyright 2011 all rights reserved | |
* @author Noah Feldman | |
* This was originally written as a greasemonkey script and used to demonstrate to a vendor | |
* that the markup they'd delivered was worse than inadequate. In addition to being | |
* so inconsistent that their efforts to style it consistently resulted in bloated, | |
* unwieldy CSS, they'd also made it impossible to have any confidence in programatic | |
* manipulation of the documents returned. This file, coupled with a custom stylesheet | |
* <https://gist.github.com/gists/1066284> made the majority of issues painfully obvious. | |
* This is far from perfect, but it served its purpose and is useful enough that I | |
* want to keep it somewhere semi-permanent for the next time I need something similar. | |
*/ | |
(function (global, doc) { | |
if (typeof [].forEach === 'function') { | |
var panela = doc.createElement('div'), | |
body = doc.body, | |
style = document.createElement('link'), | |
head = doc.getElementsByTagName('head'), | |
byId = function (id, context) { | |
var r = context || doc; | |
return r.getElementById(id); | |
}, | |
byCN = function (CN, context) { | |
var r = context || doc; | |
return r.getElementsByClassName(CN); | |
}, | |
byQ = function (q, context) { | |
var r = context || doc; | |
return r.querySelector(q); | |
}, | |
byQA = function (qa, context) { | |
var r = context || doc; | |
return r.querySelectorAll(qa); | |
}, | |
toArray = function (arg) { | |
return [].slice.call(arg); | |
}, | |
GenXpath, | |
// unsafeWindow is the greasemonkey reference to the global object | |
NSF = unsafeWindow.NSF || {}; | |
unsafeWindow.NSF = NSF; | |
style.type = 'text/css'; | |
style.rel = 'stylesheet'; | |
style.href = 'http://example.com/markup_checkup.css'; | |
head[0].appendChild(style); | |
panela.id = "panel1"; | |
panela.innerHTML = "<h1 class=\nobg\" title=\"HELP\">?</h1><h2 class=\"nobg\">COLOR KEY</h2><h3 class=\"nobg\">Headings:</h3><ul><li><span class=\"h1\"> H1 </span></li><li><span class=\"h2\"> H2 </span></li><li><span class=\"h3\"> H3 </span></li><li><span class=\"h4\"> H4 </span></li><li><span class=\"h5\"> H5 </span></li></ul><h3 class=\"nobg\">Others:</h3><ul><li><span style=\"background-color:#FFF!important\"><span style=\"foo:bar\">light green overlay</span></span> = the presence of an inline style <strong>if this is a p > a > strong or similar, there's a very good chance this is also a fake heading</strong></li><li class=\"Div\">red-dotted outline = presence of a div from the old site</li><li class=\"pasbr\">grey, an element containing 1 whitespace character and nothing else, hover for more information</li></ul><h2 class=\"nobg\">NOTES</h2><p>For the most part, I have attempted to call out problematic elements with some descriptive copy and/or color. However, it is more than likely that you will run into cases where there\'s more wrong than the script and CSS are able to call out. When you edit the content in question, pay attention, and work on the raw HTML. Remember that H2 - H6 function like headings in an outline, and apply them appropriately. Remove all style attributes (style=\"anything\"). Remove needless nesting, things that look like: \"<foo><bar>;<foo>\" are pointless.</p>"; | |
// silly at the moment, but we can flesh it out later if we need it | |
NSF.tooltips = function (tip) { | |
var div = doc.createElement('div'), p = document.createElement('p'); | |
div.className="ttipText"; | |
p.innerHTML = tip; | |
div.appendChild(p); | |
return div; | |
} | |
// DOM Manipulation Utilities | |
GenXpath = function (exp, clss, t) { | |
var t = t || '', clss = clss || ''; | |
this.exp = exp; | |
this.init(exp, clss, t); | |
return this; | |
} | |
GenXpath.prototype = { | |
init: function (exp, clss, t, callback) { | |
this.results = []; | |
this.result = doc.evaluate(exp, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); | |
var i = this.result.snapshotLength, item; | |
while (i--) { | |
item = this.result.snapshotItem(i); | |
item.className = item.className ? item.className + ' ' + clss : clss; | |
item.title = item.title ? ('[' + t + '] ' + item.title) : t ? ('[' + t + ']') : ''; | |
if (callback) callback.call(item); | |
this.results.push(item); | |
}; | |
} | |
}; | |
NSF.wrap = function (node, el, clss) { | |
var p = node.parentNode, | |
np = doc.createElement(el); | |
p.insertBefore(np, node); | |
np.appendChild(node); | |
np.className = clss || ''; | |
return np; | |
} | |
// callout bad classnames (is more general, actually) | |
// TODO, generalize | |
NSF.classMill = function () { | |
var args = [].slice.call(arguments); | |
for (var i = 0, len = args.length; i < len; i++ ) { | |
args[i].forEach( function(el) { | |
if (!el.className.match(/asbr/)) { | |
return NSF.wrap.call(this, el, 'div', 'ttip').appendChild(NSF.tooltips('<code>' + el.nodeName + '</code> with class <code>' + el.className + '</code> found here')); | |
} | |
}, this); | |
}; | |
}; | |
/* **************************** | |
FILTERS, ADD WHATEVER IS USEFUL | |
***************************** */ | |
NSF.filters = function (callback) { | |
// find paragraphs that should be headings | |
NSF.pheading = new GenXpath('//p[count(strong)=1]/strong[position()=1 and last()]', '', ''); | |
NSF.trap = {}; // storage | |
// get nodes with 1 text character and no children | |
NSF.wspcXpath = new GenXpath('//*[string-length()=1 and count(*)=0]', '', ''); | |
// add class name and title attr to results containing only whitespace | |
NSF.wspcXpath.results.forEach(function(el) { | |
var n; | |
if (el.textContent.match(/^\s\s+$/)) { | |
n = el.nodeName.toLowerCase(); | |
el.className = n + 'asbr ' + el.className; | |
el.title += ' WARNING: empty ' + n + (n.match(/p|div|td/m) ? ' used as spacer': ' detected'); | |
el.className = el.className.trim(); | |
el.title = el.title.trim(); | |
// NSF.wrap.call(this, el, 'div', 'ttip').appendChild(NSF.tooltips(el.title)); | |
NSF.trap[n] = NSF.trap.n ? NSF.trap[n].push(el) : [el]; | |
} | |
}, this); | |
NSF.plist = new GenXpath('//div[@id="inner-content"]/*/p[string-length() < 80 and preceding-sibling::p[string-length() < 80]]', 'plist', 'WARNING: this is a vary short paragraph with at least one equally short sibling paragraph. Maybe this should be a list?'); | |
NSF.pheading.results.forEach(function(el, i) { | |
if (el.parentNode.firstChild.nodeType === 1) { | |
el.parentNode.className += 'fakeheading'; | |
el.parentNode.title += 'WARNING: this is a ' + el.parentNode.nodeName + ' element masquarading as a heading'; | |
return el.parentNode.appendChild(NSF.tooltips('WARNING: this is a <code>' + el.parentNode.nodeName + ' element</code> masquarading as a heading')); | |
} | |
}, this); | |
NSF.classMill(toArray(byCN('leftmenu')), toArray(byCN('title')), toArray(byCN('link')), toArray(byQA('[class$="Div"]'))); | |
NSF.brtags = new GenXpath('//br'); | |
(function () { | |
var b = NSF.brtags.results, | |
i = b.length; | |
while (i--) { | |
NSF.wrap(b[i], 'span', 'brtag'); | |
} | |
}()); | |
callback(); | |
}; | |
if (body.className.match(/\slogged-in\b/) && body.className.match(/\bfront\b/)) { | |
NSF.filters(function() { | |
body.appendChild(panela); | |
}); | |
} | |
} | |
}(window, window.document)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment