public
Last active — forked from 140bytes/LICENSE.txt

basic js syntax highlighting

  • Download Gist
LICENSE.txt
1 2 3 4 5 6 7 8 9 10 11 12 13
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
 
Copyright (C) 2011 Alex Kloss <alexthkloss@web.de>
 
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
 
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 
0. You just DO WHAT THE FUCK YOU WANT TO.
README.md
Markdown

basic js syntax highlighting

Encapsulates found regexp, strings, comments, keywords, predefined objects, numbers, brackets and operators into t-tags with a matching class.

Usage

Requires a RegExp to match regexp, strings, comments, keywords, predefined objects, numbers, brackets and operators, e.g.:

var re = /(?![\d\w]\s*)(\/[^\/\*][^\n\/]*\/[gi])|(".*?"|'.*?')|(\/\/.*?\n|\/\*[\x00-\xff\u00\uffff]*?\*\/)|(?:\b)(abstract|boolean|break|byte|case|catch|char|class|const|continue|debugger|default|delete|do|double|else|enum|export|extends|false|final|finally|float|for|function|goto|if|implements|import|in|instanceof|int|interface|long|native|new|null|package|private|protected|public|return|short|static|super|switch|synchronized|this|throw|throws|transient|true|try|typeof|var|void|volatile|while|with)(?:\b)|(?:\b)(Array|Boolean|Date|Function|Math|Number|Object|RegExp|String|document|window|arguments)(?:\b)|(\d[\d\.eE]*)|([\x28-\x2b\x2d\x3a-\x3f\x5b\x5d\x5e\x7b-\x7e]+|\x2f|(?=\D)\.(?=\D))/g;

Provides a filter inserting t-tags* with the following classNames:

  • f-1 = regexp
  • f1 = string
  • f2 = comment
  • f3 = keyword
  • f4 = predefined object
  • f5 = number
  • f6 = operator, bracket

remember to use innerText/firstChild.data instead of innerHTML to avoid its ability to convert HTML entities which cannot be matched here. "&" needs to be escaped beforehand, otherwise will be transformed on html reinsertion.

* IE6-8 need a js shim to allow for the non-standard tag: document.createElement('t');

This was created with the 140byt.es homepage in mind, too :-)

annotated.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
function(
c, // code
r // regexp
){
return c.replace(r,
// use a replace callback
function(
f, // full match
i // counter
){
for(
// initialize counter (with count of regexp arguments)
i=7;
// only continue until i is other than -1 (~i becomes 0)
// and arguments[i] is not present (!'' becomes true coerces to 1),
// decrease counter
~i * !arguments[i--]
// found something?
return f ?
// encapsulate it with a t-tag adding the corresponding class name
'<t class=f' + i + '>' +
// replace "<" so it will not be interpreted as a tag
f.replace('<','&lt;') +
'</t>' :
// otherwise return empty string
'';
})
}
index.js
JavaScript
1
function(c,r){return c.replace(r,function(f,i){for(i=7;~i*!arguments[i--];);return i?'<t class=f'+i+'>'+f.replace('<','&lt;')+'</t>':''})}
package.json
JSON
1 2 3 4 5 6 7 8 9 10 11
{
"name": "jsSyntaxHighlighting",
 
"description": "Basic JS Syntax highlighting in 138bytes",
 
"keywords": [
"JavaScript",
"Syntax",
"Highlighting"
]
}
test.html
HTML
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
<!DOCTYPE html>
<title>small syntax highlighting</title>
<style>
/* syntax highlighting styles, shameless ripoff off github ;-) */
body{font-family:sans-serif;}
pre{color:#333;font-size:120%;background:#fff;width:100%;overflow-x:auto;padding:5px;}
/* regexp */
.f-1, .f-1 * {color:#092;}
/* string */
.f1,.f1 * {color:#d14;}
/* comment */
.f2,.f2 * {color:#999;font-style:italic;}
/* keyword */
.f3,.f3 * {color:#000;font-weight: 700;}
/* predefined object */
.f4,.f4 * {color:#08B;}
/* number */
.f5,.f5 * {color:#099;}
/* bracket, operator */
.f6 {color:#000;}
</style>
<div>
<h2>Code with syntax highlighting:</h2>
<pre id="ret">/* index.js */
 
function(c,r){return c.replace(r,function(f,i){for(i=7;~i*!arguments[i--];);return i?'&lt;t class=f'+i+'>'+f.replace('&lt;','&amp;lt;')+'&lt;/t>':''})}
 
/* annotated.js */
 
function(
c, // code
r // regexp
){
return c.replace(r,
// use a replace callback
function(
f, // full match
i // counter
){
for(
// initialize counter (with count of regexp arguments)
i=6;
// only continue until i is other than -1 (~i becomes 0)
// and arguments[i] is not present (!'' becomes true coerces to 1),
// decrease counter
~i * !arguments[i--]
// found something?
return f ?
// encapsulate it with a t-tag adding the corresponding class name
'&lt;t class=f' + i + '>' +
// replace "&lt;" so it will not be interpreted as a tag
f.replace('&lt;','&amp;lt;') +
'&lt;/t>' :
// otherwise return empty string
'';
})
}
 
</pre>
</div>
 
<script>
// RegExp to detect regexp, strings, comments, keywords, predefined object, numbers and operators
var re = /(?![\d\w]\s*)(\/[^\/\*][^\n\/]*\/[gi])|(".*?"|'.*?')|(\/\/.*?\n|\/\*[\x00-\xff\u00\uffff]*?\*\/)|(?:\b)(abstract|boolean|break|byte|case|catch|char|class|const|continue|debugger|default|delete|do|double|else|enum|export|extends|false|final|finally|float|for|function|goto|if|implements|import|in|instanceof|int|interface|long|native|new|null|package|private|protected|public|return|short|static|super|switch|synchronized|this|throw|throws|transient|true|try|typeof|var|void|volatile|while|with)(?:\b)|(?:\b)(Array|Boolean|Date|Function|Math|Number|Object|RegExp|String|document|window|arguments)(?:\b)|(\d[\d\.eE]*)|([\x28-\x2b\x2d\x3a-\x3f\x5b\x5d\x5e\x7b-\x7e]+|\x2f|(?=\D)\.(?=\D))/g;
// syntax highlighting filter
var myFunction = function(c,r){return c.replace(r,function(f,i){for(i=7;~i*!arguments[i--];);return i?'<t class=f'+i+'>'+f.replace('<','&lt;')+'</t>':''})}
// test it on the textual contents of the pre tag with the id "ret"
var ret = document.getElementById( "ret" );
ret.innerHTML = myFunction(ret.firstChild.data.replace(/&/g,'&amp;'),re);
</script>

@tsaniel reminded me of non-capture groups. Thanks! That way, I can detect predefined objects, too.

The usage of html code within the source inside a pre tag still leads to errors :-(
ideas, anyone?

Got a solution (will edit the gist accordingly the next minute) :) Since it uses nonstandard tags, it will only work in IE with a small fix:

, but it still fits inside the 139bytes, though the filter class names have changed.

Only 1 thing could probably be optimized, which is the following expression inside the for loop:
```~i&&!arguments[i--]```
here coercion and a binary operation instead of the logical `&&` could probably save another byte. Will test it later.

Even more simple, we can use number coercion and the "*" operator!

Next thing I will add is a comment detection :)

Update: done

RegExp are detected now, too.

Syntax highlighting still needs improvement where escaped characters are involved (especially within strings, regexp).

Is this used on the 140byt.es main page? Some of the code snippets there are not highlighted well. Let's see if we can fix this.

  • To match strings with escaped characters change ".*?" to "(?:\\.|\\\r*\n|[^\n"])*" and '.*?' to '(?:\\.|\\\r*\n|[^\n'])*'. This also matches escaped line breaks. I'm not sure if this is allowed in JavaScript, but it makes the syntax highlighting more reliable.
  • To match regular expressions with escaped characters change (?![\d\w]\s*)(\/[^\/\*][^\n\/]*\/[gi]) to (\/(?:\\.|[^\n/*])(?:\\.|[^\n/])*\/[gim]*) (according to this ECMAScript specification). This also fixes some other issues like not allowing /.../gi. The part (?!...) is noneffective and can be omitted, as explained below.
  • Change \/\/.*?\n to \/\/[^\n]*\n, this is faster.
  • Change all (?:\b) to \b, the brackets are not needed.
  • The part (?=\D)\.(?=\D) is a bit strange since it uses two zero-width positive lookaheads. The first (?=\D) looks for a non-digit but does not advance the position. Then \. checks if the same character (which we know is a non-digit) is a dot. Due to this the first bracket is noneffective and can be omitted. Same with the negative lookahead above. It checks if the trailing slash of the regular expression is not a digit or word character. This is always true for a slash. A lookbehind (using (?<=...)) would be appropriate but is not supported in JavaScript unfortunately.

Here is a simple example where both your and mine approach fails.

function(a,b){b/=2;return (a+b)/b;}

Everything between the two slashes is highlighted as a regular expression. I'm not sure how to fix this.
Edit 1: I think the only possibility is to use a matching parenthesis instead of a lookbehind, e.g. ([^\s\w)]\s*)(...). The problem is, you will need to prepend this part of the match to the result.
Edit 2: While commenting @jed's enlink I learned a new trick. Replace (?!...) with \B. This will not solve all issues, but it helps in cases where the slash is preceded by a word character. \B checks if there is no word boundary. A similar trick for matching dots outside of numbers is not possible, unfortunately.

Thanks. I'll look deeper into this issue later.

Here is the full regular expression again with all my suggested changes:

var re = /(\B\/(?:\\.|[^\n/*])(?:\\.|[^\n/])*\/[gim]*)|("(?:\\.|\\\r*\n|[^\n"])*"|'(?:\\.|\\\r*\n|[^\n'])*')|(\/\/[^\n]*\n|\/\*[\x00-\xff\u00\uffff]*?\*\/)|\b(abstract|boolean|break|byte|case|catch|char|class|const|continue|debugger|default|delete|do|double|else|enum|export|extends|false|final|finally|float|for|function|goto|if|implements|import|in|instanceof|int|interface|long|native|new|null|package|private|protected|public|return|short|static|super|switch|synchronized|this|throw|throws|transient|true|try|typeof|var|void|volatile|while|with)\b|\b(Array|Boolean|Date|Function|Math|Number|Object|RegExp|String|document|window|arguments)\b|(\d[\d\.eE]*)|([\x28-\x2b\x2d\x3a-\x3f\x5b\x5d\x5e\x7b-\x7e]+|\x2f|\.(?=\D))/g;

Thank you, @maettig! I'll test it and will integrate it into a) this gist and b) the page soon.

nice one, @maettig!

Why is the "t-tag" used and not span or else?

Sorry for the late answer: span would be longer (>140bytes) and could interfere with other page styles.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.