BonsaiDen/tokenizer.js

## tokenizer.js
/**
  * Copyright (c) 2012 Ivo Wetzel.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  * copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */

/**
  *  Tokenizer
  *  - Parses stuff into tokens
  *  - Creates basic tree layout with {([ / ])} tokens
  */
function tokenize(input, tokenData, whitespace) {

    var parsePos = 0,
        inputPos = 0,
        searchPos = 0,
        inputLength = input.length,
        inputLineStart = 0,
        value = '',
        token = null,
        tokenStream = [],
        inString = false,
        escaped = false,
        inputLine = 1,
        lastPos = 0;

    while(parsePos < inputLength) {

        var current = input[inputPos],
            endOfInput = inputPos === inputLength;

        value += current;

        // Match the next token
        // first we try the regexpressions since those are greedy
        var mToken = matchExp(input.substring(searchPos), tokenData.list, tokenData.map);
        if (mToken) {
            searchPos += mToken.value.length;

        // Then the simple tokens
        } else {
            searchPos++;
            mToken = matchToken(value, tokenData.list, tokenData.map);
        }

        if (token === null) {
            token = mToken;
        }

        if (mToken.value.length < token.value.length || endOfInput) {

            var other = matchToken(token.value, tokenData.list, tokenData.map, true);
            if (other && other.type !== 'unknown' && other.type !== token.type) {
                token.type = other.type;
            }

            var inputColumn = parsePos - inputLineStart + 1;

            // Add unkown token and end parsing
            if (token.type === 'unknown') {

                var lastToken = tokenStream[tokenStream.length - 1];
                tokenStream.push({
                    type: 'unkown',
                    line: inputLine,
                    start: lastToken.end,
                    end: -1
                });

                break;

            }

            // Add token to stream
            token.start = {
                line: inputLine,
                column:  inputColumn
            };

            token.end = {
                line: inputLine,
                column: token.start.column + token.value.length
            };

            if (token.type !== 'whitespace' && token.type !== 'newline') {
                tokenStream.push(token);
            }

            // Advance parser
            parsePos += token.value.length;
            inputPos = parsePos;

            // Handle newlines
            if (token.type === 'comment.multi')  {

                var lines = token.value.split('\n');
                inputLine += lines.length - 1;

                token.end.line = inputLine;
                token.end.column = lines[lines.length - 1].length;
                inputLineStart = parsePos - token.end.column;

            }

            if (token.type === 'newline') {
                inputLine++;
                inputLineStart = parsePos;
            }

            value = '';
            token = null;

            continue;

        } else {
            token = mToken;
        }

        inputPos++;

    }

    function level(tokens, open, close, opens, closes) {

        var lvl = 0, openTokens = [];
        for(var i = 0, l = tokens.length; i < l; i++) {

            var token = tokens[i];

            if (opens.indexOf(token.type) !== -1) {
                lvl++;

            } else if (closes.indexOf(token.type) !== -1) {
                token.lvl = lvl;
                lvl--;
            }


            if (token.type === open) {
                openTokens.push(token);
                token.lvl = lvl;

            } else if (token.type === close) {
                openTokens.pop();

                if (lvl === -1) {
                    parseError('Unbalanced', token, 'missing opening token');
                }

            }

        }

        if (lvl > 0) {
            parseError('Unbalanced ', openTokens[0], 'missing closing token');
        }

    }

    function group(tokens, open, close, type) {

        var count = tokens.length;
        for(var i = 0, l = count; i < l; i++) {

            var token = tokens[i];
            if (!token) {
                continue;
            }

            // Enter exisiting sub groups
            if (token.values) {
                group(token.values, open, close, type);
            }

            // Convert the token
            if (token.type === open) {

                token.type = type;
                token.values = [];

                for(var e = i + 1; e < count; e++) {

                    var otherToken = tokens[e];

                    // Remove token from stream
                    tokens.splice(e, 1);
                    e--;
                    i--;
                    count--;

                    // Check for end of group
                    if (otherToken.type === close
                        && otherToken.level === token.level) {

                        // Recurse through sub groups
                        group(token.values, open, close, type);
                        break;

                    }

                    token.values.push(otherToken);

                }

            }

        }

    }

    var opens = ['group.curly.open', 'group.brace.open', 'group.paren.open'],
        closes = ['group.curly.close', 'group.brace.close', 'group.paren.close'];

    // Leveling

    level(tokenStream, 'group.curly.open', 'group.curly.close', opens, closes);
    level(tokenStream, 'group.brace.open', 'group.brace.close', opens, closes);
    level(tokenStream, 'group.paren.open', 'group.paren.close', opens, closes);

    // Grouping
    group(tokenStream, 'group.curly.open', 'group.curly.close', 'block.curly');
    group(tokenStream, 'group.brace.open', 'group.brace.close', 'block.brace');
    group(tokenStream, 'group.paren.open', 'group.paren.close', 'block.paren');

    // Create lists
    function list(tokens, type) {

        var count = tokens.length;
        for(var i = 0, l = count; i < l; i++) {

            var token = tokens[i];
            if (!token) {
                continue;
            }

            // Enter exisiting sub groups
            if (token.values) {
                list(token.values, type);
            }

            if (token.type === type) {

                var subList = [], tokenList = [];
                for(var e = 0, el = token.values.length; e < el; e++) {

                    var subToken = token.values[e];
                    if (subToken.type === 'punct.comma') {
                        tokenList.push(subList);
                        subList = [];

                    } else {
                        subList.push(subToken);
                    }

                }

                if (subList.length > 0) {
                    tokenList.push(subList);
                }

                if (tokenList.length > 0) {
                    //console.log(tokenList);
                }

            }

        }

    }

    //list(tokenStream, 'block.paren');

    // Returns a tree, actually
    return tokenStream;

}

exports.getTokens = function(source) {
    var tokenData = generateTokenTree(TOKEN_TREE);
    return tokenize(source, tokenData);
};


// Helpers --------------------------------------------------------------------
// ----------------------------------------------------------------------------
function parseError(message, token, expected) {
    console.log(message + ': "' + token.value + '" at line ' + token.start.line + ' column ' + token.start.column + '; ' + expected);
    process.exit(0);
}

function parseTokenTree(tokens, base, map) {

    for(var i in tokens) {
        if (typeof tokens[i] === 'string') {
            map['s' + base + '.' + i] = tokens[i];

        } else if (tokens[i] instanceof RegExp) {
            map['r' + base + '.' + i] = tokens[i];

        } else if (tokens[i] instanceof Array) {
            map['l' + base + '.' + i] = tokens[i][0];

        } else {
            parseTokenTree(tokens[i], base + '.' + i, map);
        }
    }

}

function generateTokenTree(tree) {

    var tokenMap = {},
        tokenList = [],
        typeMap = {},
        typeList = [];

    parseTokenTree(tree, '', tokenMap);

    for(var i in tokenMap) {
        tokenList.push(tokenMap[i]);
        typeMap[tokenMap[i]] = i;
    }

    tokenList.sort(function(a, b) {

        // Move regexp at the start
        if (typeMap[a][0] === 'r' && typeMap[b][0] !== 'r') {
            return 1;

        } else if (typeMap[b][0] === 'r' && typeMap[a][0] !== 'r') {
            return -1;

        } else {
            return b.toString().length - a.toString().length;
        }

    });

    tokenList.sort(function(a, b) {

        // Move regexp at the start
        if (typeMap[a][0] === 'l' && typeMap[b][0] !== 'l') {
            return 1;

        } else if (typeMap[b][0] === 'l' && typeMap[a][0] !== 'l') {
            return -1;

        } else {
            return b.toString().length - a.toString().length;
        }

    });

    return {
        list: tokenList,
        map: typeMap
    };

}


function matchExp(string, list, map) {

    for(var i = 0, l = list.length; i < l; i++) {

        var token = list[i],
            type = map[token.toString()];

        if (type[0] === 'l') {

            var match = string.match(token);
            if (match) {

                return {
                    type: type.substr(2),
                    value: match[0]
                };

            }

        }

    }

    return null;

}

function matchToken(string, list, map, noexp) {

    for(var i = 0, l = list.length; i < l; i++) {

        var token = list[i],
            type = map[token.toString()]

        if (type[0] === 'r' && !noexp) {

            var match = string.match(token);
            if (match) {

                return {
                    type: type.substr(2),
                    value: match[0]
                };

            }

        } else if (type[0] !== 'l') {

            if (string === token) {
                return {
                    type: type.substr(2),
                    value: token
                };
            }

        }

    }

    return {
        type: 'unknown',
        value: ''
    };

}


// Tokens ---------------------------------------------------------------------
// ----------------------------------------------------------------------------
var TOKEN_TREE = {

    whitespace: /^[\ \t]+$/,
    newline: /^(\n|\r)$/,
    comment: {
        line: /^\/\/.*$/,
        multi: [/^\/\*(.|[\r\n])*?\*\/$/m]
    },

    operator: {

        increment: '++',
        decrement: '--',

        math: {

            add: '+',
            subtract: '-',
            multiply: '*',
            divide: '/'

        },

        assign: {

            assign: '=',

            math: {
                add: '+=',
                substract: '-=',
                multiply: '*=',
                divide: '/='
            },

            bitwise: {
                and: '&=',
                or: '|=',
                negate: '~=',
                xor: '^=',
                shiftLeft: '<<=',
                shiftRight: '>>=',
                ushiftRight: '>>>='
            }

        },

        compare: {

            lt: '<',
            gt: '>',
            lte: '<=',
            gte: '>=',
            equal: '==',
            not: '!='

        },

        bitwise: {
            and: '&',
            or: '|',
            negate: '~',
            xor: '^',
            shiftLeft: '<<',
            shiftRight: '>>',
            ushiftRight: '>>>'
        },

        logic: {
            and: '&&',
            or: '||'
        },

        not: '!'

    },

    punct: {

        dot: '.',
        colon: ':',
        tenary: '?',
        comma: ',',
        semicolon: ';'

    },

    group: {

        brace: {
            open: '[',
            close: ']'
        },

        curly: {
            open: '{',
            close: '}'
        },

        paren: {
            open: '(',
            close: ')'
        }

    },

    number: {
        integer: /^(0|[1-9]([0-9]+)?)$/,
        floating: /^(0|[1-9]([0-9]+)?)\.[0-9]*$/,
        hex: /^0x[0-9a-fA-F]+$/
    },

    string: {
        single: /^'([^'\\]|\\.)*'$/,
        'double': /^"([^"\\]|\\.)*"$/
    },

    identifier: /^[a-zA-Z\$_]([a-zA-Z0-9\$_]+)?$/,
    //member: /^@[a-zA-Z\$_]([a-zA-Z0-9\$_]+)?$/,


    keyword: {

        'var': 'var',
        'this': 'this',
        'void': 'void',
        'function': 'function',

        'boolean': {
            'true': 'true',
            'false': 'false'
        },

        taken: {
            'boolean': 'boolean',
            'public': 'public',
            'private': 'private',
            'protected': 'protected',
            'const': 'const',
            'static': 'static',
            'abstract': 'abstract',
            'interface': 'interface',
            'class': 'class',
            'extends': 'extends',
            'implements': 'implements'

        },

        statement: {
            conditional: {
                'if': 'if',
                'else': 'else',
                'switch': 'switch'
            },

            'switch': {
                'case': 'switch',
                'default': 'default'
            },

            loop: {
                'while': 'while',
                'for': 'for',
                'do': 'do'
            },

            error: {
                'try': 'try',
                'catch': 'catch',
                'finally': 'finally',
                'raise': 'raise'
            },

            'continue': 'continue',
            'break': 'break',
            'return': 'return'
        },

        type: {
            number: 'Number',
            string: 'String',
            array: 'Array',
            object: 'Object',
            error: 'Error',
            date: 'Date',
            regexp: 'RegExp'
        }

    }

};


function ast(tokens, level) {

    level = level || 0;

    var pos = 0;
    function after(i) {

        if (i === undefined) {
            i = 1;
        }

        return tokens[pos + i];

    }

    function next() {
        return tokens[pos++];
    }

    function end() {
        level--;
        log('--');
        log();
    }

    function log() {
        var data = Array.prototype.slice.call(arguments);
        data.unshift(new Array(level + 1).join('    '));
        console.log.apply(console, data);
    }

    var token = next();
    if (!token) {
        end();
        return;
    }
    do {

        switch(token.type) {
            case 'keyword':
                break

            case 'block.paren':
                log('() block', token.lvl);
                ast(token.values, token.lvl);
                break;

            case 'block.brace':
                log('[] block', token.lvl);
                ast(token.values, token.lvl);
                break;

            case 'block.curly':
                log('{} block', token.lvl);
                ast(token.values, token.lvl);
                break;

            default:
                log(token.type, level);
                break;

        }

        token = next();

    } while(token);

    end();

}


// test
var fs = require('fs'),
    source = fs.readFileSync('test.js').toString().trimRight();

var tokens = exports.getTokens(source);
ast(tokens);


var util = require('util'),
    data = JSON.stringify(tokens);

fs.writeFileSync('tokens.json', data);
	/**
	* Copyright (c) 2012 Ivo Wetzel.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in
	* all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	* THE SOFTWARE.
	*/

	/**
	* Tokenizer
	* - Parses stuff into tokens
	* - Creates basic tree layout with {([ / ])} tokens
	*/
	function tokenize(input, tokenData, whitespace) {

	var parsePos = 0,
	inputPos = 0,
	searchPos = 0,
	inputLength = input.length,
	inputLineStart = 0,
	value = '',
	token = null,
	tokenStream = [],
	inString = false,
	escaped = false,
	inputLine = 1,
	lastPos = 0;

	while(parsePos < inputLength) {

	var current = input[inputPos],
	endOfInput = inputPos === inputLength;

	value += current;

	// Match the next token
	// first we try the regexpressions since those are greedy
	var mToken = matchExp(input.substring(searchPos), tokenData.list, tokenData.map);
	if (mToken) {
	searchPos += mToken.value.length;

	// Then the simple tokens
	} else {
	searchPos++;
	mToken = matchToken(value, tokenData.list, tokenData.map);
	}

	if (token === null) {
	token = mToken;
	}

	if (mToken.value.length < token.value.length \|\| endOfInput) {

	var other = matchToken(token.value, tokenData.list, tokenData.map, true);
	if (other && other.type !== 'unknown' && other.type !== token.type) {
	token.type = other.type;
	}

	var inputColumn = parsePos - inputLineStart + 1;

	// Add unkown token and end parsing
	if (token.type === 'unknown') {

	var lastToken = tokenStream[tokenStream.length - 1];
	tokenStream.push({
	type: 'unkown',
	line: inputLine,
	start: lastToken.end,
	end: -1
	});

	break;

	}

	// Add token to stream
	token.start = {
	line: inputLine,
	column: inputColumn
	};

	token.end = {
	line: inputLine,
	column: token.start.column + token.value.length
	};

	if (token.type !== 'whitespace' && token.type !== 'newline') {
	tokenStream.push(token);
	}

	// Advance parser
	parsePos += token.value.length;
	inputPos = parsePos;

	// Handle newlines
	if (token.type === 'comment.multi') {

	var lines = token.value.split('\n');
	inputLine += lines.length - 1;

	token.end.line = inputLine;
	token.end.column = lines[lines.length - 1].length;
	inputLineStart = parsePos - token.end.column;

	}

	if (token.type === 'newline') {
	inputLine++;
	inputLineStart = parsePos;
	}

	value = '';
	token = null;

	continue;

	} else {
	token = mToken;
	}

	inputPos++;

	}

	function level(tokens, open, close, opens, closes) {

	var lvl = 0, openTokens = [];
	for(var i = 0, l = tokens.length; i < l; i++) {

	var token = tokens[i];

	if (opens.indexOf(token.type) !== -1) {
	lvl++;

	} else if (closes.indexOf(token.type) !== -1) {
	token.lvl = lvl;
	lvl--;
	}


	if (token.type === open) {
	openTokens.push(token);
	token.lvl = lvl;

	} else if (token.type === close) {
	openTokens.pop();

	if (lvl === -1) {
	parseError('Unbalanced', token, 'missing opening token');
	}

	}

	}

	if (lvl > 0) {
	parseError('Unbalanced ', openTokens[0], 'missing closing token');
	}

	}

	function group(tokens, open, close, type) {

	var count = tokens.length;
	for(var i = 0, l = count; i < l; i++) {

	var token = tokens[i];
	if (!token) {
	continue;
	}

	// Enter exisiting sub groups
	if (token.values) {
	group(token.values, open, close, type);
	}

	// Convert the token
	if (token.type === open) {

	token.type = type;
	token.values = [];

	for(var e = i + 1; e < count; e++) {

	var otherToken = tokens[e];

	// Remove token from stream
	tokens.splice(e, 1);
	e--;
	i--;
	count--;

	// Check for end of group
	if (otherToken.type === close
	&& otherToken.level === token.level) {

	// Recurse through sub groups
	group(token.values, open, close, type);
	break;

	}

	token.values.push(otherToken);

	}

	}

	}

	}

	var opens = ['group.curly.open', 'group.brace.open', 'group.paren.open'],
	closes = ['group.curly.close', 'group.brace.close', 'group.paren.close'];

	// Leveling

	level(tokenStream, 'group.curly.open', 'group.curly.close', opens, closes);
	level(tokenStream, 'group.brace.open', 'group.brace.close', opens, closes);
	level(tokenStream, 'group.paren.open', 'group.paren.close', opens, closes);

	// Grouping
	group(tokenStream, 'group.curly.open', 'group.curly.close', 'block.curly');
	group(tokenStream, 'group.brace.open', 'group.brace.close', 'block.brace');
	group(tokenStream, 'group.paren.open', 'group.paren.close', 'block.paren');

	// Create lists
	function list(tokens, type) {

	var count = tokens.length;
	for(var i = 0, l = count; i < l; i++) {

	var token = tokens[i];
	if (!token) {
	continue;
	}

	// Enter exisiting sub groups
	if (token.values) {
	list(token.values, type);
	}

	if (token.type === type) {

	var subList = [], tokenList = [];
	for(var e = 0, el = token.values.length; e < el; e++) {

	var subToken = token.values[e];
	if (subToken.type === 'punct.comma') {
	tokenList.push(subList);
	subList = [];

	} else {
	subList.push(subToken);
	}

	}

	if (subList.length > 0) {
	tokenList.push(subList);
	}

	if (tokenList.length > 0) {
	//console.log(tokenList);
	}

	}

	}

	}

	//list(tokenStream, 'block.paren');

	// Returns a tree, actually
	return tokenStream;

	}

	exports.getTokens = function(source) {
	var tokenData = generateTokenTree(TOKEN_TREE);
	return tokenize(source, tokenData);
	};


	// Helpers --------------------------------------------------------------------
	// ----------------------------------------------------------------------------
	function parseError(message, token, expected) {
	console.log(message + ': "' + token.value + '" at line ' + token.start.line + ' column ' + token.start.column + '; ' + expected);
	process.exit(0);
	}

	function parseTokenTree(tokens, base, map) {

	for(var i in tokens) {
	if (typeof tokens[i] === 'string') {
	map['s' + base + '.' + i] = tokens[i];

	} else if (tokens[i] instanceof RegExp) {
	map['r' + base + '.' + i] = tokens[i];

	} else if (tokens[i] instanceof Array) {
	map['l' + base + '.' + i] = tokens[i][0];

	} else {
	parseTokenTree(tokens[i], base + '.' + i, map);
	}
	}

	}

	function generateTokenTree(tree) {

	var tokenMap = {},
	tokenList = [],
	typeMap = {},
	typeList = [];

	parseTokenTree(tree, '', tokenMap);

	for(var i in tokenMap) {
	tokenList.push(tokenMap[i]);
	typeMap[tokenMap[i]] = i;
	}

	tokenList.sort(function(a, b) {

	// Move regexp at the start
	if (typeMap[a][0] === 'r' && typeMap[b][0] !== 'r') {
	return 1;

	} else if (typeMap[b][0] === 'r' && typeMap[a][0] !== 'r') {
	return -1;

	} else {
	return b.toString().length - a.toString().length;
	}

	});

	tokenList.sort(function(a, b) {

	// Move regexp at the start
	if (typeMap[a][0] === 'l' && typeMap[b][0] !== 'l') {
	return 1;

	} else if (typeMap[b][0] === 'l' && typeMap[a][0] !== 'l') {
	return -1;

	} else {
	return b.toString().length - a.toString().length;
	}

	});

	return {
	list: tokenList,
	map: typeMap
	};

	}


	function matchExp(string, list, map) {

	for(var i = 0, l = list.length; i < l; i++) {

	var token = list[i],
	type = map[token.toString()];

	if (type[0] === 'l') {

	var match = string.match(token);
	if (match) {

	return {
	type: type.substr(2),
	value: match[0]
	};

	}

	}

	}

	return null;

	}

	function matchToken(string, list, map, noexp) {

	for(var i = 0, l = list.length; i < l; i++) {

	var token = list[i],
	type = map[token.toString()]

	if (type[0] === 'r' && !noexp) {

	var match = string.match(token);
	if (match) {

	return {
	type: type.substr(2),
	value: match[0]
	};

	}

	} else if (type[0] !== 'l') {

	if (string === token) {
	return {
	type: type.substr(2),
	value: token
	};
	}

	}

	}

	return {
	type: 'unknown',
	value: ''
	};

	}


	// Tokens ---------------------------------------------------------------------
	// ----------------------------------------------------------------------------
	var TOKEN_TREE = {

	whitespace: /^[\ \t]+$/,
	newline: /^(\n\|\r)$/,
	comment: {
	line: /^\/\/.*$/,
	multi: [/^\/\(.\|[\r\n])?\*\/$/m]
	},

	operator: {

	increment: '++',
	decrement: '--',

	math: {

	add: '+',
	subtract: '-',
	multiply: '*',
	divide: '/'

	},

	assign: {

	assign: '=',

	math: {
	add: '+=',
	substract: '-=',
	multiply: '*=',
	divide: '/='
	},

	bitwise: {
	and: '&=',
	or: '\|=',
	negate: '~=',
	xor: '^=',
	shiftLeft: '<<=',
	shiftRight: '>>=',
	ushiftRight: '>>>='
	}

	},

	compare: {

	lt: '<',
	gt: '>',
	lte: '<=',
	gte: '>=',
	equal: '==',
	not: '!='

	},

	bitwise: {
	and: '&',
	or: '\|',
	negate: '~',
	xor: '^',
	shiftLeft: '<<',
	shiftRight: '>>',
	ushiftRight: '>>>'
	},

	logic: {
	and: '&&',
	or: '\|\|'
	},

	not: '!'

	},

	punct: {

	dot: '.',
	colon: ':',
	tenary: '?',
	comma: ',',
	semicolon: ';'

	},

	group: {

	brace: {
	open: '[',
	close: ']'
	},

	curly: {
	open: '{',
	close: '}'
	},

	paren: {
	open: '(',
	close: ')'
	}

	},

	number: {
	integer: /^(0\|[1-9]([0-9]+)?)$/,
	floating: /^(0\|[1-9]([0-9]+)?)\.[0-9]*$/,
	hex: /^0x[0-9a-fA-F]+$/
	},

	string: {
	single: /^'([^'\\]\|\\.)*'$/,
	'double': /^"([^"\\]\|\\.)*"$/
	},

	identifier: /^[a-zA-Z\$_]([a-zA-Z0-9\$_]+)?$/,
	//member: /^@[a-zA-Z\$_]([a-zA-Z0-9\$_]+)?$/,


	keyword: {

	'var': 'var',
	'this': 'this',
	'void': 'void',
	'function': 'function',

	'boolean': {
	'true': 'true',
	'false': 'false'
	},

	taken: {
	'boolean': 'boolean',
	'public': 'public',
	'private': 'private',
	'protected': 'protected',
	'const': 'const',
	'static': 'static',
	'abstract': 'abstract',
	'interface': 'interface',
	'class': 'class',
	'extends': 'extends',
	'implements': 'implements'

	},

	statement: {
	conditional: {
	'if': 'if',
	'else': 'else',
	'switch': 'switch'
	},

	'switch': {
	'case': 'switch',
	'default': 'default'
	},

	loop: {
	'while': 'while',
	'for': 'for',
	'do': 'do'
	},

	error: {
	'try': 'try',
	'catch': 'catch',
	'finally': 'finally',
	'raise': 'raise'
	},

	'continue': 'continue',
	'break': 'break',
	'return': 'return'
	},

	type: {
	number: 'Number',
	string: 'String',
	array: 'Array',
	object: 'Object',
	error: 'Error',
	date: 'Date',
	regexp: 'RegExp'
	}

	}

	};


	function ast(tokens, level) {

	level = level \|\| 0;

	var pos = 0;
	function after(i) {

	if (i === undefined) {
	i = 1;
	}

	return tokens[pos + i];

	}

	function next() {
	return tokens[pos++];
	}

	function end() {
	level--;
	log('--');
	log();
	}

	function log() {
	var data = Array.prototype.slice.call(arguments);
	data.unshift(new Array(level + 1).join(' '));
	console.log.apply(console, data);
	}

	var token = next();
	if (!token) {
	end();
	return;
	}
	do {

	switch(token.type) {
	case 'keyword':
	break

	case 'block.paren':
	log('() block', token.lvl);
	ast(token.values, token.lvl);
	break;

	case 'block.brace':
	log('[] block', token.lvl);
	ast(token.values, token.lvl);
	break;

	case 'block.curly':
	log('{} block', token.lvl);
	ast(token.values, token.lvl);
	break;

	default:
	log(token.type, level);
	break;

	}

	token = next();

	} while(token);

	end();

	}


	// test
	var fs = require('fs'),
	source = fs.readFileSync('test.js').toString().trimRight();

	var tokens = exports.getTokens(source);
	ast(tokens);


	var util = require('util'),
	data = JSON.stringify(tokens);

	fs.writeFileSync('tokens.json', data);