Skip to content

Instantly share code, notes, and snippets.

@balupton
Created October 1, 2012 10:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save balupton/3810925 to your computer and use it in GitHub Desktop.
Save balupton/3810925 to your computer and use it in GitHub Desktop.
highlightjs-escape
/*
Syntax highlighting with language autodetection.
http://softwaremaniacs.org/soft/highlight/
*/
var highlightAfter = function() {
/* Utility functions */
function escape(value) {
return value.replace(/&/gm, '&amp;').replace(/</gm, '&lt;').replace(/>/gm, '&gt;');
}
function findCode(pre) {
for (var node = pre.firstChild; node; node = node.nextSibling) {
if (node.nodeName == 'CODE')
return node;
if (!(node.nodeType == 3 && node.nodeValue.match(/\s+/)))
break;
}
}
function blockText(block, ignoreNewLines) {
return Array.prototype.map.call(block.childNodes, function(node) {
if (node.nodeType == 3) {
return ignoreNewLines ? node.nodeValue.replace(/\n/g, '') : node.nodeValue;
}
if (node.nodeName == 'BR') {
return '\n';
}
return blockText(node, ignoreNewLines);
}).join('');
}
function blockLanguage(block) {
var classes = (block.className + ' ' + block.parentNode.className).split(/\s+/);
classes = classes.map(function(c) {return c.replace(/^language-/, '')});
for (var i = 0; i < classes.length; i++) {
if (languages[classes[i]] || classes[i] == 'no-highlight') {
return classes[i];
}
}
}
/* Stream merging */
function nodeStream(node) {
var result = [];
(function _nodeStream(node, offset) {
for (var child = node.firstChild; child; child = child.nextSibling) {
if (child.nodeType == 3)
offset += child.nodeValue.length;
else if (child.nodeName == 'BR')
offset += 1;
else if (child.nodeType == 1) {
result.push({
event: 'start',
offset: offset,
node: child
});
offset = _nodeStream(child, offset);
result.push({
event: 'stop',
offset: offset,
node: child
});
}
}
return offset;
})(node, 0);
return result;
}
function mergeStreams(stream1, stream2, value) {
var processed = 0;
var result = '';
var nodeStack = [];
function selectStream() {
if (stream1.length && stream2.length) {
if (stream1[0].offset != stream2[0].offset)
return (stream1[0].offset < stream2[0].offset) ? stream1 : stream2;
else {
/*
To avoid starting the stream just before it should stop the order is
ensured that stream1 always starts first and closes last:
if (event1 == 'start' && event2 == 'start')
return stream1;
if (event1 == 'start' && event2 == 'stop')
return stream2;
if (event1 == 'stop' && event2 == 'start')
return stream1;
if (event1 == 'stop' && event2 == 'stop')
return stream2;
... which is collapsed to:
*/
return stream2[0].event == 'start' ? stream1 : stream2;
}
} else {
return stream1.length ? stream1 : stream2;
}
}
function open(node) {
function attr_str(a) {return ' ' + a.nodeName + '="' + escape(a.value) + '"'};
return '<' + node.nodeName + Array.prototype.map.call(node.attributes, attr_str).join('') + '>';
}
while (stream1.length || stream2.length) {
var current = selectStream().splice(0, 1)[0];
result += escape(value.substr(processed, current.offset - processed));
processed = current.offset;
if ( current.event == 'start') {
result += open(current.node);
nodeStack.push(current.node);
} else if (current.event == 'stop') {
var node, i = nodeStack.length;
do {
i--;
node = nodeStack[i];
result += ('</' + node.nodeName.toLowerCase() + '>');
} while (node != current.node);
nodeStack.splice(i, 1);
while (i < nodeStack.length) {
result += open(nodeStack[i]);
i++;
}
}
}
return result + escape(value.substr(processed));
}
/* Initialization */
function compileLanguage(language) {
function langRe(value, global) {
return RegExp(
value,
'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '')
);
}
function compileMode(mode, parent) {
if (mode.compiled)
return;
mode.compiled = true;
var keywords = []; // used later with beginWithKeyword but filled as a side-effect of keywords compilation
if (mode.keywords) {
var compiled_keywords = {};
function flatten(className, str) {
str.split(' ').forEach(function(kw) {
var pair = kw.split('|');
compiled_keywords[pair[0]] = [className, pair[1] ? Number(pair[1]) : 1];
keywords.push(pair[0]);
});
}
mode.lexemsRe = langRe(mode.lexems || hljs.IDENT_RE, true);
if (typeof mode.keywords == 'string') { // string
flatten('keyword', mode.keywords)
} else {
for (var className in mode.keywords) {
if (!mode.keywords.hasOwnProperty(className))
continue;
flatten(className, mode.keywords[className]);
}
}
mode.keywords = compiled_keywords;
}
if (parent) {
if (mode.beginWithKeyword) {
mode.begin = '\\b(' + keywords.join('|') + ')\\s';
}
mode.beginRe = langRe(mode.begin ? mode.begin : '\\B|\\b');
if (!mode.end && !mode.endsWithParent)
mode.end = '\\B|\\b';
if (mode.end)
mode.endRe = langRe(mode.end);
mode.terminator_end = mode.end || '';
if (mode.endsWithParent && parent.terminator_end)
mode.terminator_end += (mode.end ? '|' : '') + parent.terminator_end;
}
if (mode.illegal)
mode.illegalRe = langRe(mode.illegal);
if (mode.relevance === undefined)
mode.relevance = 1;
if (!mode.contains) {
mode.contains = [];
}
for (var i = 0; i < mode.contains.length; i++) {
if (mode.contains[i] == 'self') {
mode.contains[i] = mode;
}
compileMode(mode.contains[i], mode);
}
if (mode.starts) {
compileMode(mode.starts, parent);
}
var terminators = [];
for (var i = 0; i < mode.contains.length; i++) {
terminators.push(mode.contains[i].begin);
}
if (mode.terminator_end) {
terminators.push(mode.terminator_end);
}
if (mode.illegal) {
terminators.push(mode.illegal);
}
mode.terminators = terminators.length ? langRe(terminators.join('|'), true) : {exec: function(s) {return null;}};
}
compileMode(language);
}
/*
Core highlighting function. Accepts a language name and a string with the
code to highlight. Returns an object with the following properties:
- relevance (int)
- keyword_count (int)
- value (an HTML string with highlighting markup)
*/
function highlight(language_name, value) {
function subMode(lexem, mode) {
for (var i = 0; i < mode.contains.length; i++) {
var match = mode.contains[i].beginRe.exec(lexem);
if (match && match.index == 0) {
return mode.contains[i];
}
}
}
function endOfMode(mode, lexem) {
if (mode.end && mode.endRe.test(lexem)) {
return mode;
}
if (mode.endsWithParent) {
return endOfMode(mode.parent, lexem);
}
}
function isIllegal(lexem, mode) {
return mode.illegal && mode.illegalRe.test(lexem);
}
function keywordMatch(mode, match) {
var match_str = language.case_insensitive ? match[0].toLowerCase() : match[0];
return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str];
}
function processKeywords() {
var buffer = escape(mode_buffer);
if (!top.keywords)
return buffer;
var result = '';
var last_index = 0;
top.lexemsRe.lastIndex = 0;
var match = top.lexemsRe.exec(buffer);
while (match) {
result += buffer.substr(last_index, match.index - last_index);
var keyword_match = keywordMatch(top, match);
if (keyword_match) {
keyword_count += keyword_match[1];
result += '<span class="'+ keyword_match[0] +'">' + match[0] + '</span>';
} else {
result += match[0];
}
last_index = top.lexemsRe.lastIndex;
match = top.lexemsRe.exec(buffer);
}
return result + buffer.substr(last_index);
}
function processSubLanguage() {
if (top.subLanguage && !languages[top.subLanguage]) {
return escape(mode_buffer);
}
var result = top.subLanguage ? highlight(top.subLanguage, mode_buffer) : highlightAuto(mode_buffer);
// Counting embedded language score towards the host language may be disabled
// with zeroing the containing mode relevance. Usecase in point is Markdown that
// allows XML everywhere and makes every XML snippet to have a much larger Markdown
// score.
if (top.relevance > 0) {
keyword_count += result.keyword_count;
relevance += result.relevance;
}
return '<span class="' + result.language + '">' + result.value + '</span>';
}
function processBuffer() {
return top.subLanguage !== undefined ? processSubLanguage() : processKeywords();
}
function startNewMode(mode, lexem) {
var markup = mode.className? '<span class="' + mode.className + '">': '';
if (mode.returnBegin) {
result += markup;
mode_buffer = '';
} else if (mode.excludeBegin) {
result += escape(lexem) + markup;
mode_buffer = '';
} else {
result += markup;
mode_buffer = lexem;
}
top = Object.create(mode, {parent: {value: top}});
relevance += mode.relevance;
}
function processModeInfo(buffer, lexem) {
mode_buffer += buffer;
if (lexem === undefined) {
result += processBuffer();
return;
}
var new_mode = subMode(lexem, top);
if (new_mode) {
result += processBuffer();
startNewMode(new_mode, lexem);
return new_mode.returnBegin;
}
var end_mode = endOfMode(top, lexem);
if (end_mode) {
if (!(end_mode.returnEnd || end_mode.excludeEnd)) {
mode_buffer += lexem;
}
result += processBuffer();
do {
if (top.className) {
result += '</span>';
}
top = top.parent;
} while (top != end_mode.parent);
if (end_mode.excludeEnd) {
result += escape(lexem);
}
mode_buffer = '';
if (end_mode.starts) {
startNewMode(end_mode.starts, '');
}
return end_mode.returnEnd;
}
if (isIllegal(lexem, top))
throw 'Illegal';
}
var language = languages[language_name];
compileLanguage(language);
var top = language;
var mode_buffer = '';
var relevance = 0;
var keyword_count = 0;
var result = '';
try {
var match, index = 0;
while (true) {
top.terminators.lastIndex = index;
match = top.terminators.exec(value);
if (!match)
break;
var return_lexem = processModeInfo(value.substr(index, match.index - index), match[0]);
index = match.index + (return_lexem ? 0 : match[0].length);
}
processModeInfo(value.substr(index), undefined);
return {
relevance: relevance,
keyword_count: keyword_count,
value: result,
language: language_name
};
} catch (e) {
if (e == 'Illegal') {
return {
relevance: 0,
keyword_count: 0,
value: escape(value)
};
} else {
throw e;
}
}
}
/*
Highlighting with language detection. Accepts a string with the code to
highlight. Returns an object with the following properties:
- language (detected language)
- relevance (int)
- keyword_count (int)
- value (an HTML string with highlighting markup)
- second_best (object with the same structure for second-best heuristically
detected language, may be absent)
*/
function highlightAuto(text) {
var result = {
keyword_count: 0,
relevance: 0,
value: escape(text)
};
var second_best = result;
for (var key in languages) {
if (!languages.hasOwnProperty(key))
continue;
var current = highlight(key, text);
current.language = key;
if (current.keyword_count + current.relevance > second_best.keyword_count + second_best.relevance) {
second_best = current;
}
if (current.keyword_count + current.relevance > result.keyword_count + result.relevance) {
second_best = result;
result = current;
}
}
if (second_best.language) {
result.second_best = second_best;
}
return result;
}
/*
Post-processing of the highlighted markup:
- replace TABs with something more useful
- replace real line-breaks with '<br>' for non-pre containers
*/
function fixMarkup(value, tabReplace, useBR) {
if (tabReplace) {
value = value.replace(/^((<[^>]+>|\t)+)/gm, function(match, p1, offset, s) {
return p1.replace(/\t/g, tabReplace);
});
}
if (useBR) {
value = value.replace(/\n/g, '<br>');
}
return value;
}
/*
Applies highlighting to a DOM node containing code. Accepts a DOM node and
two optional parameters for fixMarkup.
*/
function highlightBlock(block, tabReplace, useBR) {
var text = blockText(block, useBR);
var language = blockLanguage(block);
if (language == 'no-highlight')
return;
var result = language ? highlight(language, text) : highlightAuto(text);
language = result.language;
var original = nodeStream(block);
if (original.length) {
var pre = document.createElement('pre');
pre.innerHTML = result.value;
result.value = mergeStreams(original, nodeStream(pre), text);
}
result.value = fixMarkup(result.value, tabReplace, useBR);
var class_name = block.className;
if (!class_name.match('(\\s|^)(language-)?' + language + '(\\s|$)')) {
class_name = class_name ? (class_name + ' ' + language) : language;
}
block.innerHTML = result.value;
block.className = class_name;
block.result = {
language: language,
kw: result.keyword_count,
re: result.relevance
};
if (result.second_best) {
block.second_best = {
language: result.second_best.language,
kw: result.second_best.keyword_count,
re: result.second_best.relevance
};
}
}
/*
Applies highlighting to all <pre><code>..</code></pre> blocks on a page.
*/
function initHighlighting() {
if (initHighlighting.called)
return;
initHighlighting.called = true;
Array.prototype.map.call(document.getElementsByTagName('pre'), findCode).
filter(Boolean).
forEach(function(code){highlightBlock(code, hljs.tabReplace)});
}
/*
Attaches highlighting to the page load event.
*/
function initHighlightingOnLoad() {
window.addEventListener('DOMContentLoaded', initHighlighting, false);
window.addEventListener('load', initHighlighting, false);
}
var languages = {}; // a shortcut to avoid writing "this." everywhere
/* Interface definition */
this.LANGUAGES = languages;
this.highlight = highlight;
this.highlightAuto = highlightAuto;
this.fixMarkup = fixMarkup;
this.highlightBlock = highlightBlock;
this.initHighlighting = initHighlighting;
this.initHighlightingOnLoad = initHighlightingOnLoad;
// Common regexps
this.IDENT_RE = '[a-zA-Z][a-zA-Z0-9_]*';
this.UNDERSCORE_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_]*';
this.NUMBER_RE = '\\b\\d+(\\.\\d+)?';
this.C_NUMBER_RE = '(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float
this.BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b...
this.RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';
// Common modes
this.BACKSLASH_ESCAPE = {
begin: '\\\\[\\s\\S]', relevance: 0
};
this.APOS_STRING_MODE = {
className: 'string',
begin: '\'', end: '\'',
illegal: '\\n',
contains: [this.BACKSLASH_ESCAPE],
relevance: 0
};
this.QUOTE_STRING_MODE = {
className: 'string',
begin: '"', end: '"',
illegal: '\\n',
contains: [this.BACKSLASH_ESCAPE],
relevance: 0
};
this.C_LINE_COMMENT_MODE = {
className: 'comment',
begin: '//', end: '$'
};
this.C_BLOCK_COMMENT_MODE = {
className: 'comment',
begin: '/\\*', end: '\\*/'
};
this.HASH_COMMENT_MODE = {
className: 'comment',
begin: '#', end: '$'
};
this.NUMBER_MODE = {
className: 'number',
begin: this.NUMBER_RE,
relevance: 0
};
this.C_NUMBER_MODE = {
className: 'number',
begin: this.C_NUMBER_RE,
relevance: 0
};
this.BINARY_NUMBER_MODE = {
className: 'number',
begin: this.BINARY_NUMBER_RE,
relevance: 0
};
// Utility functions
this.inherit = function(parent, obj) {
var result = {}
for (var key in parent)
result[key] = parent[key];
if (obj)
for (var key in obj)
result[key] = obj[key];
return result;
}
}
/*
Syntax highlighting with language autodetection.
http://softwaremaniacs.org/soft/highlight/
*/
var highlightAfter2 = function() {
/* Utility functions */
function escape(value) {
var map = {'&':'&amp;', '<':'&lt;', '>':'&gt;'};
return value.replace(/[&<>]/gm, function(char){return map[char];});
}
function findCode(pre) {
for (var node = pre.firstChild; node; node = node.nextSibling) {
if (node.nodeName == 'CODE')
return node;
if (!(node.nodeType == 3 && node.nodeValue.match(/\s+/)))
break;
}
}
function blockText(block, ignoreNewLines) {
return Array.prototype.map.call(block.childNodes, function(node) {
if (node.nodeType == 3) {
return ignoreNewLines ? node.nodeValue.replace(/\n/g, '') : node.nodeValue;
}
if (node.nodeName == 'BR') {
return '\n';
}
return blockText(node, ignoreNewLines);
}).join('');
}
function blockLanguage(block) {
var classes = (block.className + ' ' + block.parentNode.className).split(/\s+/);
classes = classes.map(function(c) {return c.replace(/^language-/, '')});
for (var i = 0; i < classes.length; i++) {
if (languages[classes[i]] || classes[i] == 'no-highlight') {
return classes[i];
}
}
}
/* Stream merging */
function nodeStream(node) {
var result = [];
(function _nodeStream(node, offset) {
for (var child = node.firstChild; child; child = child.nextSibling) {
if (child.nodeType == 3)
offset += child.nodeValue.length;
else if (child.nodeName == 'BR')
offset += 1;
else if (child.nodeType == 1) {
result.push({
event: 'start',
offset: offset,
node: child
});
offset = _nodeStream(child, offset);
result.push({
event: 'stop',
offset: offset,
node: child
});
}
}
return offset;
})(node, 0);
return result;
}
function mergeStreams(stream1, stream2, value) {
var processed = 0;
var result = '';
var nodeStack = [];
function selectStream() {
if (stream1.length && stream2.length) {
if (stream1[0].offset != stream2[0].offset)
return (stream1[0].offset < stream2[0].offset) ? stream1 : stream2;
else {
/*
To avoid starting the stream just before it should stop the order is
ensured that stream1 always starts first and closes last:
if (event1 == 'start' && event2 == 'start')
return stream1;
if (event1 == 'start' && event2 == 'stop')
return stream2;
if (event1 == 'stop' && event2 == 'start')
return stream1;
if (event1 == 'stop' && event2 == 'stop')
return stream2;
... which is collapsed to:
*/
return stream2[0].event == 'start' ? stream1 : stream2;
}
} else {
return stream1.length ? stream1 : stream2;
}
}
function open(node) {
function attr_str(a) {return ' ' + a.nodeName + '="' + escape(a.value) + '"'};
return '<' + node.nodeName + Array.prototype.map.call(node.attributes, attr_str).join('') + '>';
}
while (stream1.length || stream2.length) {
var current = selectStream().splice(0, 1)[0];
result += escape(value.substr(processed, current.offset - processed));
processed = current.offset;
if ( current.event == 'start') {
result += open(current.node);
nodeStack.push(current.node);
} else if (current.event == 'stop') {
var node, i = nodeStack.length;
do {
i--;
node = nodeStack[i];
result += ('</' + node.nodeName.toLowerCase() + '>');
} while (node != current.node);
nodeStack.splice(i, 1);
while (i < nodeStack.length) {
result += open(nodeStack[i]);
i++;
}
}
}
return result + escape(value.substr(processed));
}
/* Initialization */
function compileLanguage(language) {
function langRe(value, global) {
return RegExp(
value,
'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '')
);
}
function compileMode(mode, parent) {
if (mode.compiled)
return;
mode.compiled = true;
var keywords = []; // used later with beginWithKeyword but filled as a side-effect of keywords compilation
if (mode.keywords) {
var compiled_keywords = {};
function flatten(className, str) {
str.split(' ').forEach(function(kw) {
var pair = kw.split('|');
compiled_keywords[pair[0]] = [className, pair[1] ? Number(pair[1]) : 1];
keywords.push(pair[0]);
});
}
mode.lexemsRe = langRe(mode.lexems || hljs.IDENT_RE, true);
if (typeof mode.keywords == 'string') { // string
flatten('keyword', mode.keywords)
} else {
for (var className in mode.keywords) {
if (!mode.keywords.hasOwnProperty(className))
continue;
flatten(className, mode.keywords[className]);
}
}
mode.keywords = compiled_keywords;
}
if (parent) {
if (mode.beginWithKeyword) {
mode.begin = '\\b(' + keywords.join('|') + ')\\s';
}
mode.beginRe = langRe(mode.begin ? mode.begin : '\\B|\\b');
if (!mode.end && !mode.endsWithParent)
mode.end = '\\B|\\b';
if (mode.end)
mode.endRe = langRe(mode.end);
mode.terminator_end = mode.end || '';
if (mode.endsWithParent && parent.terminator_end)
mode.terminator_end += (mode.end ? '|' : '') + parent.terminator_end;
}
if (mode.illegal)
mode.illegalRe = langRe(mode.illegal);
if (mode.relevance === undefined)
mode.relevance = 1;
if (!mode.contains) {
mode.contains = [];
}
for (var i = 0; i < mode.contains.length; i++) {
if (mode.contains[i] == 'self') {
mode.contains[i] = mode;
}
compileMode(mode.contains[i], mode);
}
if (mode.starts) {
compileMode(mode.starts, parent);
}
var terminators = [];
for (var i = 0; i < mode.contains.length; i++) {
terminators.push(mode.contains[i].begin);
}
if (mode.terminator_end) {
terminators.push(mode.terminator_end);
}
if (mode.illegal) {
terminators.push(mode.illegal);
}
mode.terminators = terminators.length ? langRe(terminators.join('|'), true) : {exec: function(s) {return null;}};
}
compileMode(language);
}
/*
Core highlighting function. Accepts a language name and a string with the
code to highlight. Returns an object with the following properties:
- relevance (int)
- keyword_count (int)
- value (an HTML string with highlighting markup)
*/
function highlight(language_name, value) {
function subMode(lexem, mode) {
for (var i = 0; i < mode.contains.length; i++) {
var match = mode.contains[i].beginRe.exec(lexem);
if (match && match.index == 0) {
return mode.contains[i];
}
}
}
function endOfMode(mode, lexem) {
if (mode.end && mode.endRe.test(lexem)) {
return mode;
}
if (mode.endsWithParent) {
return endOfMode(mode.parent, lexem);
}
}
function isIllegal(lexem, mode) {
return mode.illegal && mode.illegalRe.test(lexem);
}
function keywordMatch(mode, match) {
var match_str = language.case_insensitive ? match[0].toLowerCase() : match[0];
return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str];
}
function processKeywords() {
var buffer = escape(mode_buffer);
if (!top.keywords)
return buffer;
var result = '';
var last_index = 0;
top.lexemsRe.lastIndex = 0;
var match = top.lexemsRe.exec(buffer);
while (match) {
result += buffer.substr(last_index, match.index - last_index);
var keyword_match = keywordMatch(top, match);
if (keyword_match) {
keyword_count += keyword_match[1];
result += '<span class="'+ keyword_match[0] +'">' + match[0] + '</span>';
} else {
result += match[0];
}
last_index = top.lexemsRe.lastIndex;
match = top.lexemsRe.exec(buffer);
}
return result + buffer.substr(last_index);
}
function processSubLanguage() {
if (top.subLanguage && !languages[top.subLanguage]) {
return escape(mode_buffer);
}
var result = top.subLanguage ? highlight(top.subLanguage, mode_buffer) : highlightAuto(mode_buffer);
// Counting embedded language score towards the host language may be disabled
// with zeroing the containing mode relevance. Usecase in point is Markdown that
// allows XML everywhere and makes every XML snippet to have a much larger Markdown
// score.
if (top.relevance > 0) {
keyword_count += result.keyword_count;
relevance += result.relevance;
}
return '<span class="' + result.language + '">' + result.value + '</span>';
}
function processBuffer() {
return top.subLanguage !== undefined ? processSubLanguage() : processKeywords();
}
function startNewMode(mode, lexem) {
var markup = mode.className? '<span class="' + mode.className + '">': '';
if (mode.returnBegin) {
result += markup;
mode_buffer = '';
} else if (mode.excludeBegin) {
result += escape(lexem) + markup;
mode_buffer = '';
} else {
result += markup;
mode_buffer = lexem;
}
top = Object.create(mode, {parent: {value: top}});
relevance += mode.relevance;
}
function processModeInfo(buffer, lexem) {
mode_buffer += buffer;
if (lexem === undefined) {
result += processBuffer();
return;
}
var new_mode = subMode(lexem, top);
if (new_mode) {
result += processBuffer();
startNewMode(new_mode, lexem);
return new_mode.returnBegin;
}
var end_mode = endOfMode(top, lexem);
if (end_mode) {
if (!(end_mode.returnEnd || end_mode.excludeEnd)) {
mode_buffer += lexem;
}
result += processBuffer();
do {
if (top.className) {
result += '</span>';
}
top = top.parent;
} while (top != end_mode.parent);
if (end_mode.excludeEnd) {
result += escape(lexem);
}
mode_buffer = '';
if (end_mode.starts) {
startNewMode(end_mode.starts, '');
}
return end_mode.returnEnd;
}
if (isIllegal(lexem, top))
throw 'Illegal';
}
var language = languages[language_name];
compileLanguage(language);
var top = language;
var mode_buffer = '';
var relevance = 0;
var keyword_count = 0;
var result = '';
try {
var match, index = 0;
while (true) {
top.terminators.lastIndex = index;
match = top.terminators.exec(value);
if (!match)
break;
var return_lexem = processModeInfo(value.substr(index, match.index - index), match[0]);
index = match.index + (return_lexem ? 0 : match[0].length);
}
processModeInfo(value.substr(index), undefined);
return {
relevance: relevance,
keyword_count: keyword_count,
value: result,
language: language_name
};
} catch (e) {
if (e == 'Illegal') {
return {
relevance: 0,
keyword_count: 0,
value: escape(value)
};
} else {
throw e;
}
}
}
/*
Highlighting with language detection. Accepts a string with the code to
highlight. Returns an object with the following properties:
- language (detected language)
- relevance (int)
- keyword_count (int)
- value (an HTML string with highlighting markup)
- second_best (object with the same structure for second-best heuristically
detected language, may be absent)
*/
function highlightAuto(text) {
var result = {
keyword_count: 0,
relevance: 0,
value: escape(text)
};
var second_best = result;
for (var key in languages) {
if (!languages.hasOwnProperty(key))
continue;
var current = highlight(key, text);
current.language = key;
if (current.keyword_count + current.relevance > second_best.keyword_count + second_best.relevance) {
second_best = current;
}
if (current.keyword_count + current.relevance > result.keyword_count + result.relevance) {
second_best = result;
result = current;
}
}
if (second_best.language) {
result.second_best = second_best;
}
return result;
}
/*
Post-processing of the highlighted markup:
- replace TABs with something more useful
- replace real line-breaks with '<br>' for non-pre containers
*/
function fixMarkup(value, tabReplace, useBR) {
if (tabReplace) {
value = value.replace(/^((<[^>]+>|\t)+)/gm, function(match, p1, offset, s) {
return p1.replace(/\t/g, tabReplace);
});
}
if (useBR) {
value = value.replace(/\n/g, '<br>');
}
return value;
}
/*
Applies highlighting to a DOM node containing code. Accepts a DOM node and
two optional parameters for fixMarkup.
*/
function highlightBlock(block, tabReplace, useBR) {
var text = blockText(block, useBR);
var language = blockLanguage(block);
if (language == 'no-highlight')
return;
var result = language ? highlight(language, text) : highlightAuto(text);
language = result.language;
var original = nodeStream(block);
if (original.length) {
var pre = document.createElement('pre');
pre.innerHTML = result.value;
result.value = mergeStreams(original, nodeStream(pre), text);
}
result.value = fixMarkup(result.value, tabReplace, useBR);
var class_name = block.className;
if (!class_name.match('(\\s|^)(language-)?' + language + '(\\s|$)')) {
class_name = class_name ? (class_name + ' ' + language) : language;
}
block.innerHTML = result.value;
block.className = class_name;
block.result = {
language: language,
kw: result.keyword_count,
re: result.relevance
};
if (result.second_best) {
block.second_best = {
language: result.second_best.language,
kw: result.second_best.keyword_count,
re: result.second_best.relevance
};
}
}
/*
Applies highlighting to all <pre><code>..</code></pre> blocks on a page.
*/
function initHighlighting() {
if (initHighlighting.called)
return;
initHighlighting.called = true;
Array.prototype.map.call(document.getElementsByTagName('pre'), findCode).
filter(Boolean).
forEach(function(code){highlightBlock(code, hljs.tabReplace)});
}
/*
Attaches highlighting to the page load event.
*/
function initHighlightingOnLoad() {
window.addEventListener('DOMContentLoaded', initHighlighting, false);
window.addEventListener('load', initHighlighting, false);
}
var languages = {}; // a shortcut to avoid writing "this." everywhere
/* Interface definition */
this.LANGUAGES = languages;
this.highlight = highlight;
this.highlightAuto = highlightAuto;
this.fixMarkup = fixMarkup;
this.highlightBlock = highlightBlock;
this.initHighlighting = initHighlighting;
this.initHighlightingOnLoad = initHighlightingOnLoad;
// Common regexps
this.IDENT_RE = '[a-zA-Z][a-zA-Z0-9_]*';
this.UNDERSCORE_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_]*';
this.NUMBER_RE = '\\b\\d+(\\.\\d+)?';
this.C_NUMBER_RE = '(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float
this.BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b...
this.RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';
// Common modes
this.BACKSLASH_ESCAPE = {
begin: '\\\\[\\s\\S]', relevance: 0
};
this.APOS_STRING_MODE = {
className: 'string',
begin: '\'', end: '\'',
illegal: '\\n',
contains: [this.BACKSLASH_ESCAPE],
relevance: 0
};
this.QUOTE_STRING_MODE = {
className: 'string',
begin: '"', end: '"',
illegal: '\\n',
contains: [this.BACKSLASH_ESCAPE],
relevance: 0
};
this.C_LINE_COMMENT_MODE = {
className: 'comment',
begin: '//', end: '$'
};
this.C_BLOCK_COMMENT_MODE = {
className: 'comment',
begin: '/\\*', end: '\\*/'
};
this.HASH_COMMENT_MODE = {
className: 'comment',
begin: '#', end: '$'
};
this.NUMBER_MODE = {
className: 'number',
begin: this.NUMBER_RE,
relevance: 0
};
this.C_NUMBER_MODE = {
className: 'number',
begin: this.C_NUMBER_RE,
relevance: 0
};
this.BINARY_NUMBER_MODE = {
className: 'number',
begin: this.BINARY_NUMBER_RE,
relevance: 0
};
// Utility functions
this.inherit = function(parent, obj) {
var result = {}
for (var key in parent)
result[key] = parent[key];
if (obj)
for (var key in obj)
result[key] = obj[key];
return result;
}
}
/*
Syntax highlighting with language autodetection.
http://softwaremaniacs.org/soft/highlight/
*/
var highlightBefore = function() {
/* Utility functions */
function escape(value) {
return value.replace(/&/gm, '&amp;').replace(/</gm, '&lt;');
}
function findCode(pre) {
for (var node = pre.firstChild; node; node = node.nextSibling) {
if (node.nodeName == 'CODE')
return node;
if (!(node.nodeType == 3 && node.nodeValue.match(/\s+/)))
break;
}
}
function blockText(block, ignoreNewLines) {
return Array.prototype.map.call(block.childNodes, function(node) {
if (node.nodeType == 3) {
return ignoreNewLines ? node.nodeValue.replace(/\n/g, '') : node.nodeValue;
}
if (node.nodeName == 'BR') {
return '\n';
}
return blockText(node, ignoreNewLines);
}).join('');
}
function blockLanguage(block) {
var classes = (block.className + ' ' + block.parentNode.className).split(/\s+/);
classes = classes.map(function(c) {return c.replace(/^language-/, '')});
for (var i = 0; i < classes.length; i++) {
if (languages[classes[i]] || classes[i] == 'no-highlight') {
return classes[i];
}
}
}
/* Stream merging */
function nodeStream(node) {
var result = [];
(function _nodeStream(node, offset) {
for (var child = node.firstChild; child; child = child.nextSibling) {
if (child.nodeType == 3)
offset += child.nodeValue.length;
else if (child.nodeName == 'BR')
offset += 1;
else if (child.nodeType == 1) {
result.push({
event: 'start',
offset: offset,
node: child
});
offset = _nodeStream(child, offset);
result.push({
event: 'stop',
offset: offset,
node: child
});
}
}
return offset;
})(node, 0);
return result;
}
function mergeStreams(stream1, stream2, value) {
var processed = 0;
var result = '';
var nodeStack = [];
function selectStream() {
if (stream1.length && stream2.length) {
if (stream1[0].offset != stream2[0].offset)
return (stream1[0].offset < stream2[0].offset) ? stream1 : stream2;
else {
/*
To avoid starting the stream just before it should stop the order is
ensured that stream1 always starts first and closes last:
if (event1 == 'start' && event2 == 'start')
return stream1;
if (event1 == 'start' && event2 == 'stop')
return stream2;
if (event1 == 'stop' && event2 == 'start')
return stream1;
if (event1 == 'stop' && event2 == 'stop')
return stream2;
... which is collapsed to:
*/
return stream2[0].event == 'start' ? stream1 : stream2;
}
} else {
return stream1.length ? stream1 : stream2;
}
}
function open(node) {
function attr_str(a) {return ' ' + a.nodeName + '="' + escape(a.value) + '"'};
return '<' + node.nodeName + Array.prototype.map.call(node.attributes, attr_str).join('') + '>';
}
while (stream1.length || stream2.length) {
var current = selectStream().splice(0, 1)[0];
result += escape(value.substr(processed, current.offset - processed));
processed = current.offset;
if ( current.event == 'start') {
result += open(current.node);
nodeStack.push(current.node);
} else if (current.event == 'stop') {
var node, i = nodeStack.length;
do {
i--;
node = nodeStack[i];
result += ('</' + node.nodeName.toLowerCase() + '>');
} while (node != current.node);
nodeStack.splice(i, 1);
while (i < nodeStack.length) {
result += open(nodeStack[i]);
i++;
}
}
}
return result + escape(value.substr(processed));
}
/* Initialization */
function compileLanguage(language) {
function langRe(value, global) {
return RegExp(
value,
'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '')
);
}
function compileMode(mode, parent) {
if (mode.compiled)
return;
mode.compiled = true;
var keywords = []; // used later with beginWithKeyword but filled as a side-effect of keywords compilation
if (mode.keywords) {
var compiled_keywords = {};
function flatten(className, str) {
str.split(' ').forEach(function(kw) {
var pair = kw.split('|');
compiled_keywords[pair[0]] = [className, pair[1] ? Number(pair[1]) : 1];
keywords.push(pair[0]);
});
}
mode.lexemsRe = langRe(mode.lexems || hljs.IDENT_RE, true);
if (typeof mode.keywords == 'string') { // string
flatten('keyword', mode.keywords)
} else {
for (var className in mode.keywords) {
if (!mode.keywords.hasOwnProperty(className))
continue;
flatten(className, mode.keywords[className]);
}
}
mode.keywords = compiled_keywords;
}
if (parent) {
if (mode.beginWithKeyword) {
mode.begin = '\\b(' + keywords.join('|') + ')\\s';
}
mode.beginRe = langRe(mode.begin ? mode.begin : '\\B|\\b');
if (!mode.end && !mode.endsWithParent)
mode.end = '\\B|\\b';
if (mode.end)
mode.endRe = langRe(mode.end);
mode.terminator_end = mode.end || '';
if (mode.endsWithParent && parent.terminator_end)
mode.terminator_end += (mode.end ? '|' : '') + parent.terminator_end;
}
if (mode.illegal)
mode.illegalRe = langRe(mode.illegal);
if (mode.relevance === undefined)
mode.relevance = 1;
if (!mode.contains) {
mode.contains = [];
}
for (var i = 0; i < mode.contains.length; i++) {
if (mode.contains[i] == 'self') {
mode.contains[i] = mode;
}
compileMode(mode.contains[i], mode);
}
if (mode.starts) {
compileMode(mode.starts, parent);
}
var terminators = [];
for (var i = 0; i < mode.contains.length; i++) {
terminators.push(mode.contains[i].begin);
}
if (mode.terminator_end) {
terminators.push(mode.terminator_end);
}
if (mode.illegal) {
terminators.push(mode.illegal);
}
mode.terminators = terminators.length ? langRe(terminators.join('|'), true) : {exec: function(s) {return null;}};
}
compileMode(language);
}
/*
Core highlighting function. Accepts a language name and a string with the
code to highlight. Returns an object with the following properties:
- relevance (int)
- keyword_count (int)
- value (an HTML string with highlighting markup)
*/
function highlight(language_name, value) {
function subMode(lexem, mode) {
for (var i = 0; i < mode.contains.length; i++) {
var match = mode.contains[i].beginRe.exec(lexem);
if (match && match.index == 0) {
return mode.contains[i];
}
}
}
function endOfMode(mode, lexem) {
if (mode.end && mode.endRe.test(lexem)) {
return mode;
}
if (mode.endsWithParent) {
return endOfMode(mode.parent, lexem);
}
}
function isIllegal(lexem, mode) {
return mode.illegal && mode.illegalRe.test(lexem);
}
function keywordMatch(mode, match) {
var match_str = language.case_insensitive ? match[0].toLowerCase() : match[0];
return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str];
}
function processKeywords() {
var buffer = escape(mode_buffer);
if (!top.keywords)
return buffer;
var result = '';
var last_index = 0;
top.lexemsRe.lastIndex = 0;
var match = top.lexemsRe.exec(buffer);
while (match) {
result += buffer.substr(last_index, match.index - last_index);
var keyword_match = keywordMatch(top, match);
if (keyword_match) {
keyword_count += keyword_match[1];
result += '<span class="'+ keyword_match[0] +'">' + match[0] + '</span>';
} else {
result += match[0];
}
last_index = top.lexemsRe.lastIndex;
match = top.lexemsRe.exec(buffer);
}
return result + buffer.substr(last_index);
}
function processSubLanguage() {
if (top.subLanguage && !languages[top.subLanguage]) {
return escape(mode_buffer);
}
var result = top.subLanguage ? highlight(top.subLanguage, mode_buffer) : highlightAuto(mode_buffer);
// Counting embedded language score towards the host language may be disabled
// with zeroing the containing mode relevance. Usecase in point is Markdown that
// allows XML everywhere and makes every XML snippet to have a much larger Markdown
// score.
if (top.relevance > 0) {
keyword_count += result.keyword_count;
relevance += result.relevance;
}
return '<span class="' + result.language + '">' + result.value + '</span>';
}
function processBuffer() {
return top.subLanguage !== undefined ? processSubLanguage() : processKeywords();
}
function startNewMode(mode, lexem) {
var markup = mode.className? '<span class="' + mode.className + '">': '';
if (mode.returnBegin) {
result += markup;
mode_buffer = '';
} else if (mode.excludeBegin) {
result += escape(lexem) + markup;
mode_buffer = '';
} else {
result += markup;
mode_buffer = lexem;
}
top = Object.create(mode, {parent: {value: top}});
relevance += mode.relevance;
}
function processModeInfo(buffer, lexem) {
mode_buffer += buffer;
if (lexem === undefined) {
result += processBuffer();
return;
}
var new_mode = subMode(lexem, top);
if (new_mode) {
result += processBuffer();
startNewMode(new_mode, lexem);
return new_mode.returnBegin;
}
var end_mode = endOfMode(top, lexem);
if (end_mode) {
if (!(end_mode.returnEnd || end_mode.excludeEnd)) {
mode_buffer += lexem;
}
result += processBuffer();
do {
if (top.className) {
result += '</span>';
}
top = top.parent;
} while (top != end_mode.parent);
if (end_mode.excludeEnd) {
result += escape(lexem);
}
mode_buffer = '';
if (end_mode.starts) {
startNewMode(end_mode.starts, '');
}
return end_mode.returnEnd;
}
if (isIllegal(lexem, top))
throw 'Illegal';
}
var language = languages[language_name];
compileLanguage(language);
var top = language;
var mode_buffer = '';
var relevance = 0;
var keyword_count = 0;
var result = '';
try {
var match, index = 0;
while (true) {
top.terminators.lastIndex = index;
match = top.terminators.exec(value);
if (!match)
break;
var return_lexem = processModeInfo(value.substr(index, match.index - index), match[0]);
index = match.index + (return_lexem ? 0 : match[0].length);
}
processModeInfo(value.substr(index), undefined);
return {
relevance: relevance,
keyword_count: keyword_count,
value: result,
language: language_name
};
} catch (e) {
if (e == 'Illegal') {
return {
relevance: 0,
keyword_count: 0,
value: escape(value)
};
} else {
throw e;
}
}
}
/*
Highlighting with language detection. Accepts a string with the code to
highlight. Returns an object with the following properties:
- language (detected language)
- relevance (int)
- keyword_count (int)
- value (an HTML string with highlighting markup)
- second_best (object with the same structure for second-best heuristically
detected language, may be absent)
*/
function highlightAuto(text) {
var result = {
keyword_count: 0,
relevance: 0,
value: escape(text)
};
var second_best = result;
for (var key in languages) {
if (!languages.hasOwnProperty(key))
continue;
var current = highlight(key, text);
current.language = key;
if (current.keyword_count + current.relevance > second_best.keyword_count + second_best.relevance) {
second_best = current;
}
if (current.keyword_count + current.relevance > result.keyword_count + result.relevance) {
second_best = result;
result = current;
}
}
if (second_best.language) {
result.second_best = second_best;
}
return result;
}
/*
Post-processing of the highlighted markup:
- replace TABs with something more useful
- replace real line-breaks with '<br>' for non-pre containers
*/
function fixMarkup(value, tabReplace, useBR) {
if (tabReplace) {
value = value.replace(/^((<[^>]+>|\t)+)/gm, function(match, p1, offset, s) {
return p1.replace(/\t/g, tabReplace);
});
}
if (useBR) {
value = value.replace(/\n/g, '<br>');
}
return value;
}
/*
Applies highlighting to a DOM node containing code. Accepts a DOM node and
two optional parameters for fixMarkup.
*/
function highlightBlock(block, tabReplace, useBR) {
var text = blockText(block, useBR);
var language = blockLanguage(block);
if (language == 'no-highlight')
return;
var result = language ? highlight(language, text) : highlightAuto(text);
language = result.language;
var original = nodeStream(block);
if (original.length) {
var pre = document.createElement('pre');
pre.innerHTML = result.value;
result.value = mergeStreams(original, nodeStream(pre), text);
}
result.value = fixMarkup(result.value, tabReplace, useBR);
var class_name = block.className;
if (!class_name.match('(\\s|^)(language-)?' + language + '(\\s|$)')) {
class_name = class_name ? (class_name + ' ' + language) : language;
}
block.innerHTML = result.value;
block.className = class_name;
block.result = {
language: language,
kw: result.keyword_count,
re: result.relevance
};
if (result.second_best) {
block.second_best = {
language: result.second_best.language,
kw: result.second_best.keyword_count,
re: result.second_best.relevance
};
}
}
/*
Applies highlighting to all <pre><code>..</code></pre> blocks on a page.
*/
function initHighlighting() {
if (initHighlighting.called)
return;
initHighlighting.called = true;
Array.prototype.map.call(document.getElementsByTagName('pre'), findCode).
filter(Boolean).
forEach(function(code){highlightBlock(code, hljs.tabReplace)});
}
/*
Attaches highlighting to the page load event.
*/
function initHighlightingOnLoad() {
window.addEventListener('DOMContentLoaded', initHighlighting, false);
window.addEventListener('load', initHighlighting, false);
}
var languages = {}; // a shortcut to avoid writing "this." everywhere
/* Interface definition */
this.LANGUAGES = languages;
this.highlight = highlight;
this.highlightAuto = highlightAuto;
this.fixMarkup = fixMarkup;
this.highlightBlock = highlightBlock;
this.initHighlighting = initHighlighting;
this.initHighlightingOnLoad = initHighlightingOnLoad;
// Common regexps
this.IDENT_RE = '[a-zA-Z][a-zA-Z0-9_]*';
this.UNDERSCORE_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_]*';
this.NUMBER_RE = '\\b\\d+(\\.\\d+)?';
this.C_NUMBER_RE = '(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float
this.BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b...
this.RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';
// Common modes
this.BACKSLASH_ESCAPE = {
begin: '\\\\[\\s\\S]', relevance: 0
};
this.APOS_STRING_MODE = {
className: 'string',
begin: '\'', end: '\'',
illegal: '\\n',
contains: [this.BACKSLASH_ESCAPE],
relevance: 0
};
this.QUOTE_STRING_MODE = {
className: 'string',
begin: '"', end: '"',
illegal: '\\n',
contains: [this.BACKSLASH_ESCAPE],
relevance: 0
};
this.C_LINE_COMMENT_MODE = {
className: 'comment',
begin: '//', end: '$'
};
this.C_BLOCK_COMMENT_MODE = {
className: 'comment',
begin: '/\\*', end: '\\*/'
};
this.HASH_COMMENT_MODE = {
className: 'comment',
begin: '#', end: '$'
};
this.NUMBER_MODE = {
className: 'number',
begin: this.NUMBER_RE,
relevance: 0
};
this.C_NUMBER_MODE = {
className: 'number',
begin: this.C_NUMBER_RE,
relevance: 0
};
this.BINARY_NUMBER_MODE = {
className: 'number',
begin: this.BINARY_NUMBER_RE,
relevance: 0
};
// Utility functions
this.inherit = function(parent, obj) {
var result = {}
for (var key in parent)
result[key] = parent[key];
if (obj)
for (var key in obj)
result[key] = obj[key];
return result;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment