LianSheng197/ast-parser-example.js

## ast-parser-example.js
function lexer(input) {
    const regex = /<\/?(title|h1|html|head|body|p)>|[^<]+/g;
    let result, tokens = [];
    while ((result = regex.exec(input)) !== null) {
        tokens.push(result[0]);
    }
    return tokens;
}

function parser(tokens) {
    let current = 0;

    function walk() {
        let token = tokens[current];

        if (token.startsWith("<")) {
            let tag = token.slice(1, -1);
            current++;

            let node = {
                type: 'Element',
                tagName: tag,
                children: [],
            };

            while (!tokens[current].startsWith(`</${tag}>`)) {
                node.children.push(walk());
            }

            current++;
            return node;
        } else {
            current++;
            return {
                type: 'Text',
                value: token,
            };
        }
    }

    let ast = {
        type: 'Document',
        children: [],
    };

    while (current < tokens.length) {
        ast.children.push(walk());
    }

    return ast;
}

function parseHTML(input) {
    let tokens = lexer(input);
    return parser(tokens);
}

const exampleHTML = `
<html>
    <head>
        <title>Test</title>
    </head>
    <body>
        <h1>Hello</h1>
        <p>World</p>
    </body>
</html>`;
let ast = parseHTML(exampleHTML);
console.log(JSON.stringify(ast, null, 2));
	function lexer(input) {
	const regex = /<\/?(title\|h1\|html\|head\|body\|p)>\|[^<]+/g;
	let result, tokens = [];
	while ((result = regex.exec(input)) !== null) {
	tokens.push(result[0]);
	}
	return tokens;
	}

	function parser(tokens) {
	let current = 0;

	function walk() {
	let token = tokens[current];

	if (token.startsWith("<")) {
	let tag = token.slice(1, -1);
	current++;

	let node = {
	type: 'Element',
	tagName: tag,
	children: [],
	};

	while (!tokens[current].startsWith(`</${tag}>`)) {
	node.children.push(walk());
	}

	current++;
	return node;
	} else {
	current++;
	return {
	type: 'Text',
	value: token,
	};
	}
	}

	let ast = {
	type: 'Document',
	children: [],
	};

	while (current < tokens.length) {
	ast.children.push(walk());
	}

	return ast;
	}

	function parseHTML(input) {
	let tokens = lexer(input);
	return parser(tokens);
	}

	const exampleHTML = `
	<html>
	<head>
	<title>Test</title>
	</head>
	<body>
	<h1>Hello</h1>
	<p>World</p>
	</body>
	</html>`;
	let ast = parseHTML(exampleHTML);
	console.log(JSON.stringify(ast, null, 2));