Skip to content

Instantly share code, notes, and snippets.

@darobin
Created November 20, 2012 15:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darobin/4118747 to your computer and use it in GitHub Desktop.
Save darobin/4118747 to your computer and use it in GitHub Desktop.
Extracting alt text in context
var phrasing = {};
"a abbr area audio b bdi bdo br button canvas cite code command datalist del dfn em embed i iframe img input " +
"ins kbd keygen label map mark math meter noscript object output progress q ruby s samp script select small " +
"span strong sub sup svg textarea time u var video wbr"
.split(" ")
.forEach(function (el) { phrasing[el] = true; })
;
function isNotPhrasing (el) {
if (!el) return true;
if (el.nodeType !== 1) return true;
return !phrasing[el.tagName.toLowerCase()];
}
// ... later ...
doc.normalize();
$("img[alt]").each(function () {
var $img = $(this);
if (!$img.attr("alt").length) return;
inc++;
$img.attr("data-id", inc);
var $parent;
$img.parents().each(function () {
if (isNotPhrasing(this)) {
$parent = $(this).clone();
return false;
}
});
var $imgClone = $parent.find("img[data-id=" + inc + "]");
$imgClone.replaceWith(doc.createTextNode("\u3037" + $imgClone.attr("alt") + "\u3037"));
var texts = $parent
.text()
.replace(/\s+/g, " ")
.replace(/^\s/, "")
.replace(/\s$/, "")
.split("\u3037");
results.push({
preceding: texts[0]
, alt: texts[1]
, following: texts[2]
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment