Created
November 6, 2015 19:36
-
-
Save JordanDelcros/30b4421e47b1eb18ad13 to your computer and use it in GitHub Desktop.
Parse HTML file and highlight it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var parser = function( html ){ | |
html = html | |
.replace(/&(lt|nbsp|amp)/gim, "&$1") | |
.replace(/•/gi, "·") | |
.replace(/#/gi, "#") | |
.replace(/</gim, "<") | |
.replace(/>/gim, ">") | |
.replace(/(<(?!\!)\/?(?:\w+)((?:\s*?[\w-]+(?:\s*=\s*)?(?:\"[^\"]*(?:\")?|\'[^\']*(?:\')?)?)*)?(?:\s*\/?\s*>|[\s\S]*$)?)/gi, function( matchTag ){ | |
var tag = matchTag.replace(/(?:<\/?\w+)?(\s*[\w-]+(?:=?\"[^\"]*(?:\")?|=\'[^\']*(?:\')?)?)?(?:\/?>|<)?/gi, function( global, matchAttribute ){ | |
var returned = global; | |
if( matchAttribute ){ | |
if( /^\s*[\w-]+\s*$/.test(matchAttribute) ){ | |
returned = global.replace(matchAttribute, "##/attribute/####/name/##" + matchAttribute + "##\\name\\####\\attribute\\##"); | |
} | |
else if( /^\s*[\w-]+=/.test(matchAttribute) ){ | |
var attribute = matchAttribute.split(/(=)([\s\S]*)/); | |
attribute[0] = "##/name/##" + attribute[0] + "##\\name\\##"; | |
attribute[2] = "##/value/##" + attribute[2] + "##\\value\\##"; | |
returned = global.replace(matchAttribute, "##/attribute/##" + attribute.join("") + "##\\attribute\\##"); | |
} | |
else if( /^\s*[\w-]+[\"\']/.test(matchAttribute) ){ | |
var attribute = matchAttribute.split(/([\"\'][\s\S]+)/); | |
attribute[0] = "##/name/##" + attribute[0] + "##\\name\\##"; | |
attribute[1] = "##/value/##" + (attribute[1] || "") + "##\\value\\##"; | |
returned = global.replace(matchAttribute, "##/attribute/##" + attribute.join("") + "##\\attribute\\##"); | |
}; | |
}; | |
return returned; | |
}); | |
return '##/tag/##' + tag.replace(/(^<\/?|\/?>$)/gi, '##/bracket/##$1##\\bracket\\##') + '##\\tag\\##'; | |
}) | |
.replace(/(<\s|<$)/g, '##/error/##$1##\\error\\##') | |
.replace(/(<)(?=[\s\n]+)/g, '##/error/##$1##\\error\\##') | |
.replace(/(<\!--[\s\S]*?(?:-->|$))/gi, function( global ){ | |
return global.replace(/(<\!--)/g, "##/comment/##$1"); | |
}) | |
.replace(/(<\!\[CDATA\[[\s\S]*?(?:\]\]>|$))/gi, function( global ){ | |
return "##/cdata/##" + global.replace(/##(\/|\\)\w+\1##/g, "") + "##\\cdata\\##"; | |
}) | |
.replace(/(##\/comment\/##[\s\S]*?(?:-->|$))/g, function( global ){ | |
return "##/comment/##" + global.replace(/##(\/|\\)\w+\1##/g, "") + "##\\comment\\##"; | |
}) | |
.replace(/(##\/value\/##[\s\S]*?(?:##\\value\\##|$))/gi, function( global ){ | |
return "##/value/##" + global.replace(/##(\/|\\)\w+\1##/g, "") + "##\\value\\##"; | |
}) | |
.replace(/(\t)/gim, '##/tabulation/##$1##\\tabulation\\##') | |
.split(/\n/g); | |
var unopened; | |
var unclosed; | |
for( var line = 0; line < html.length; line++ ){ | |
var opened = (html[line].match(/##\/\w+\/##/g) || []); | |
var closed = (html[line].match(/##\\\w+\\##/g) || []); | |
if( unclosed && unclosed.length > 0 ){ | |
var tempUnclosed = unclosed.slice(0); | |
for( var toOpened = 0; toOpened < tempUnclosed.length; toOpened++ ){ | |
var inverted = tempUnclosed[toOpened].replace(/\\/g, "/"); | |
html[line] = inverted + html[line]; | |
opened.unshift(inverted); | |
if( new RegExp(tempUnclosed[toOpened].replace(/\\/g, "\\\\")).test(html[line]) /*&& open.length === close.length*/ ){ | |
unclosed.shift(); | |
} | |
else { | |
html[line] += tempUnclosed[toOpened]; | |
closed.push(tempUnclosed[toOpened]); | |
}; | |
}; | |
var openedBuffer = opened.slice(0); | |
var closedBuffer = closed.slice(0); | |
for( var type = 0; type < opened.length; type++ ){ | |
var inverted = opened[type].replace(/\//g, "\\"); | |
var index = closedBuffer.indexOf(inverted); | |
if( index === -1 ){ | |
unclosed.push(inverted); | |
} | |
else { | |
closedBuffer.splice(index, 1); | |
}; | |
}; | |
} | |
else { | |
unopened = new Array(); | |
unclosed = new Array(); | |
var openedBuffer = opened.slice(0); | |
var closedBuffer = closed.slice(0); | |
if( opened.length > closed.length ){ | |
for( var type = 0; type < opened.length; type++ ){ | |
var inverted = opened[type].replace(/\//g, "\\"); | |
var index = closedBuffer.indexOf(inverted); | |
if( index === -1 ){ | |
unclosed.push(inverted); | |
} | |
else { | |
closedBuffer.splice(index, 1); | |
}; | |
}; | |
} | |
else if( closed.length > opened.length ){ | |
for( var type = 0; type < closed.length; type++ ){ | |
var inverted = closed[type].replace(/\\/g, "/"); | |
var index = openedBuffer.indexOf(inverted); | |
if( index === -1 ){ | |
unopened.push(inverted); | |
} | |
else { | |
openedBuffer.splice(index, 1); | |
}; | |
}; | |
}; | |
html[line] = unopened.join("") + html[line] + unclosed.reverse().join(""); | |
}; | |
html[line] = "<pre>" + html[line] + "</pre>"; | |
}; | |
return html | |
.join("") | |
.replace(/##\/(\w+)\/##/g, '<span class="$1">') | |
.replace(/##\\\w+\\##/g, '</span>') | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment