Skip to content

Instantly share code, notes, and snippets.

@gatopeich
Last active May 13, 2018 11:01
Show Gist options
  • Save gatopeich/3ad77fc64f29c2adcea560cd757b59fb to your computer and use it in GitHub Desktop.
Save gatopeich/3ad77fc64f29c2adcea560cd757b59fb to your computer and use it in GitHub Desktop.
gatopeich's prototype of a minimal Javascript interpreter
# gatopeich's minimal Javascript interpreter prototype
# Coding this as a prototype for a specific-purpose lightweight Javascript
# engine in C/C++.
# DONE: Tokenizer (except quote parsing)
# DONE: Expression extraction
# DONE: Pretty printer
# next: Interpreter...
Punctuators = (
# https://www.ecma-international.org/ecma-262/5.1/#sec-7.7
'{','}','(',')','[',']',
'.',';',',','<','>','<=',
'>=','==','!=','===','!==',
'+','-','*','%','++','--',
'<<','>>','>>>','&','|','^',
'!','~','&&','||','?',':',
'=','+=','-=','*=','%=','<<=',
'>>=','>>>=','&=','|=','^=',
'/','/=' ) # := DivPunctuators
# Sort longer first:
Punctuators = sorted(Punctuators, key=lambda p: str(len(p))+p, reverse=True)
# print (Punctuators)
def tokenize(txt):
# 1. Remove comments
lines = (l.strip() for l in txt.splitlines())
lines = (l[:l.find('//')] if '//' in l else l for l in lines)
# 2. Proccess quotations
# (TBD)
def divide_in_tokens(word):
if not word:
return []
for p in Punctuators:
if p in word:
before,p,after = word.partition(p)
return divide_in_tokens(before) + [p] + divide_in_tokens(after)
return [word]
# 3. Lines to tokens
tokens = []
for line in lines:
as_tokens = []
for word in line.split():
as_tokens.extend(divide_in_tokens(word))
# Inject initial semicolon where adequate
# - The five problematic tokens (on start of a line) are
# open parenthesis "(", open bracket "[", slash "/", plus "+", and minus "-"
# - prepend ';' on any new line that does NOT start with these
if as_tokens and as_tokens[0] not in ('([/+-'):
tokens.append(';')
tokens.extend(as_tokens)
return tokens
PARENS = {'{':'}','(':')','[':']'}
def to_expressions(tokens, opening = None):
expressions = [opening] if opening else []
closing = PARENS[opening] if opening else None
current_exp = []
while tokens:
token = tokens.pop(0)
if token == closing:
if current_exp:
expressions.append(current_exp)
return expressions
elif token == ';':
if current_exp:
expressions.append(current_exp)
current_exp = []
elif token in PARENS:
current_exp.append(to_expressions(tokens, token))
else:
current_exp.append(token)
if opening:
raise Exception('No match for "%s"'%opening)
return expressions
WikipediaExample1='''
var x = 0; // A global variable, because it is not in any function
function f() {
var z = 'foxes', r = 'birds'; // 2 local variables
m = 'fish'; // global, because it wasn't declared anywhere before
function child() {
var r = 'monkeys'; // This variable is local and does not affect the "birds" r of the parent function.
z = 'penguins'; // Closure: Child function is able to access the variables of the parent function.
}
twenty = 20; // This variable is declared on the next line, but usable anywhere in the function, even before, as here
var twenty;
child();
return x; // We can use x here, because it is global
}
f();
console.log(z); // This line will raise a ReferenceError exception, because the value of z is no longer available
'''
class LineCounter():
def __init__(self, prefix = ': ', lines = 0):
self.prefix = prefix
self.lines = lines
def __str__(self):
self.lines += 1
return '%3d%s'%(self.lines, self.prefix)
def indent(self):
self.prefix += ' '
return self
def unindent(self):
self.prefix = self.prefix[:-2]
return self
def pretty_print(expressions, linecount = None):
is_main = not linecount
linedue = is_main
if not linecount:
linecount = LineCounter()
for expr in expressions:
if linedue:
print(end='\n%s'%linecount)
for elem in expr:
if type(elem) is str:
print (elem, end=' ')
else:
if elem[0] == '(':
print (end='( ')
pretty_print(elem[1:], linecount)
print (end = ') ')
else:
print (elem[0], end='\n%s'%linecount.indent())
pretty_print(elem[1:], linecount)
print ('\n%s'%linecount.unindent(), end=PARENS[elem[0]])
linedue = True
if is_main:
print('\n')
for exp in to_expressions(tokenize(WikipediaExample1)):
print ('>', exp)
pretty_print(to_expressions(tokenize(WikipediaExample1)))
CODE='''
// Sample Javascript snippet
a += 1
// The line below starts a new expression...
b = 2
// Unlike this one which is a continuation...
+ a
print( a + b )
'''
print (tokenize(CODE))
pretty_print(to_expressions(tokenize(CODE)))
@gatopeich
Copy link
Author

Current output:

> ['var', 'x', '=', '0']
> ['function', 'f', ['('], ['{', ['var', 'z', '=', "'foxes'", ',', 'r', '=', "'birds'"], ['m', '=', "'fish'"], ['function', 'child', ['('], ['{', ['var', 'r', '=', "'monkeys'"], ['z', '=', "'penguins'"]]], ['twenty', '=', '20'], ['var', 'twenty'], ['child', ['(']], ['return', 'x']]]
> ['f', ['(']]
> ['console', '.', 'log', ['(', ['z']]]

  1: var x = 0 
  2: function f ( ) {
  3:   var z = 'foxes' , r = 'birds' 
  4:   m = 'fish' 
  5:   function child ( ) {
  6:     var r = 'monkeys' 
  7:     z = 'penguins' 
  8:   }
  9:   twenty = 20 
 10:   var twenty 
 11:   child ( ) 
 12:   return x 
 13: }
 14: f ( ) 
 15: console . log ( z ) 

[';', 'a', '+=', '1', ';', 'b', '=', '2', '+', 'a', ';', 'print', '(', 'a', '+', 'b', ')']

  1: a += 1 
  2: b = 2 + a 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment