Skip to content

Instantly share code, notes, and snippets.

@aholmes
Last active December 8, 2022 00:49
Show Gist options
  • Save aholmes/d86153f62093a6f257cb792ae9a228df to your computer and use it in GitHub Desktop.
Save aholmes/d86153f62093a6f257cb792ae9a228df to your computer and use it in GitHub Desktop.
A compiler generated by ChatGPT.
# This is a tokenizer, parser, and compiler that has been mostly generated by ChatGPT.
# A handful of errors have been corrected, as well as the addition of the ";" token
# and the ability to write multiple statements in a single line. This last part needed
# to be done by hand because ChatGPT lost the context of the BNF it created somewhere
# along the way, and was not able to regenerate a tokenizer/parser/compiler that
# worked with the original BNF.
#
# 94.42% of this code was created by ChatGPT (see differences.patch for more information).
#
# The conversation can be read here:
# https://gpt.best/dUkxp2UA
#
def tokenize(code):
tokens = []
pos = 0
while pos < len(code):
if code[pos] in ["+", "-", "*", "/", "%", "=", "(", ")"]:
tokens.append(code[pos])
pos += 1
elif code[pos] == " ":
pos += 1
elif code[pos] == ";":
tokens.append(code[pos])
pos += 1
elif code[pos:pos+5].lower() == "print":
tokens.append("print")
pos += 5
else:
value = ""
while pos < len(code) and code[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")", " ", ";"]:
value += code[pos]
pos += 1
tokens.append(value)
return tokens
def is_integer(n):
try:
float(n)
except ValueError:
return False
else:
return float(n).is_integer()
def parse(tokens):
pos = 0
def parse_program():
nonlocal pos
statements = []
while pos < len(tokens):
st = parse_statement()
if st is not None:
statements.append(st)
return statements
def parse_statement():
nonlocal pos
if tokens[pos] == "print":
pos += 1
return ("print", parse_expression())
elif tokens[pos] == ";":
pos += 1
result = parse_program()
if len(result) == 0:
return None
return ("statement", result)
elif tokens[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")"]:
name = tokens[pos]
pos += 1
if tokens[pos] == "=":
pos += 1
return ("assign", name, parse_expression())
def parse_expression():
nonlocal pos
return parse_sum()
def parse_sum():
nonlocal pos
result = parse_product()
while pos < len(tokens) and tokens[pos] in ["+", "-"]:
operator = tokens[pos]
pos += 1
right = parse_product()
result = (operator, result, right)
return result
def parse_product():
nonlocal pos
result = parse_value()
while pos < len(tokens) and tokens[pos] in ["*", "/", "%"]:
operator = tokens[pos]
pos += 1
right = parse_value()
result = (operator, result, right)
return result
def parse_value():
nonlocal pos
type = tokens[pos]
if type in ["+", "-", "*", "/", "%"]:
pos += 1
return (type, parse_value())
elif type == "(":
pos += 1
result = parse_expression()
pos += 1
return result
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]:
pos += 1
return type
elif type == 'print':
pos += 1
result = ('print', parse_expression())
pos += 1
return result
elif is_integer(type):
pos += 1
return int(type)
else:
pos += 1
return type
return parse_program()
def compile(parse_tree):
def compile_program(parse_tree):
# create a dictionary to store variable values
variables = {}
results = []
for st in parse_tree:
result = compile_statement(st, variables)
if result is not None:
results.append(result)
return results
def compile_statement(parse_tree, variables):
type = parse_tree[0]
if type == "print":
result = compile_expression(parse_tree[1], variables)
print(result)
return result
elif type == "assign":
# store the value of the expression in the variable
name = parse_tree[1]
value = compile_expression(parse_tree[2], variables)
variables[name] = value
return value
elif type == "statement":
result = compile_statement(parse_tree[1][0], variables)
return result
def compile_expression(parse_tree, variables):
if isinstance(parse_tree, (list, tuple)):
if len(parse_tree) == 0:
return None
# parse_tree is subscriptable, so we can access its elements
type = parse_tree[0]
if type in ["+", "-", "*", "/", "%"]:
# perform the mathematical operation and return the result
if type == "+":
return compile_expression(parse_tree[1], variables) + compile_expression(parse_tree[2], variables)
elif type == "-":
return compile_expression(parse_tree[1], variables) - compile_expression(parse_tree[2], variables)
elif type == "*":
return compile_expression(parse_tree[1], variables) * compile_expression(parse_tree[2], variables)
elif type == "/":
return compile_expression(parse_tree[1], variables) / compile_expression(parse_tree[2], variables)
elif type == "%":
return compile_expression(parse_tree[1], variables) % compile_expression(parse_tree[2], variables)
elif type == "assign":
# extract the name of the variable and the value of the expression
name = parse_tree[1]
value = compile_expression(parse_tree[2], variables)
# return the value of the expression
return value
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]:
return type
elif type in ["num", "var"]:
if type == "num":
# return the numeric value directly
return parse_tree[1]
elif type == "var":
# return the value of the variable
name = parse_tree[1]
if name in variables:
return variables[name]
else:
print(f"Error: variable {name} is not defined")
return None
elif type == "print":
return compile_statement(parse_tree, variables)
else:
return variables[parse_tree]
else:
# parse_tree is not subscriptable, so we can't access its elements
if variables.get(parse_tree):
return variables[parse_tree]
else:
return parse_tree
return compile_program(parse_tree)
# test the compile function with a simple expression
def test_compile_expression():
# compile the expression "10 + 30"
result = compile(parse(tokenize("print 10 + 30")))
# check the result
assert result == [40]
# test the compile function with a simple assignment
def test_compile_assignment():
# compile the expression "result = 10 + 30"
result = compile(parse(tokenize("result = 10 + 30")))
# check the result
assert result == [40]
# test the compile function with multiple statements
def test_compile_statements():
# compile the expression "result = 10 + 30 print result"
result = compile(parse(tokenize("result = 10 + 30; print result;")))
# check the result
assert result == [40, 40]
# test the compile function with nested expressions
def test_compile_nested_expressions():
# compile the expression "result = (10 + 30) * 2"
result = compile(parse(tokenize("result = (10 + 30) * 2;")))
# check the result
assert result == [80]
# test the compile function with a complex expression
def test_compile_complex_expression():
# compile the expression "result = ((10 + 30) * 2) / 6"
result = compile(parse(tokenize("result = ((10 + 30) * 2) / 6")))
# check the result
assert result == [13.333333333333334]
test_compile_expression()
test_compile_assignment()
test_compile_statements()
test_compile_nested_expressions()
test_compile_complex_expression()
# This is the script assembled from the different responses ChatGPT gave
#
def tokenize(code):
tokens = []
pos = 0
while pos < len(code):
if code[pos] in ["+", "-", "*", "/", "%", "=", "(", ")"]:
tokens.append(code[pos])
pos += 1
elif code[pos] == " ":
pos += 1
elif code[pos] == ";":
tokens.append(code[pos])
pos += 1
elif code[pos:pos+5].lower() == "print":
tokens.append("print")
pos += 5
else:
value = ""
while pos < len(code) and code[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")", " ", ";"]:
value += code[pos]
pos += 1
tokens.append(value)
return tokens
def is_integer(n):
try:
float(n)
except ValueError:
return False
else:
return float(n).is_integer()
def parse(tokens):
pos = 0
def parse_program():
nonlocal pos
statements = []
while pos < len(tokens):
st = parse_statement()
if st is not None:
statements.append(st)
return statements
def parse_statement():
nonlocal pos
if tokens[pos] == "print":
pos += 1
return ("print", parse_expression())
elif tokens[pos] == ";":
pos += 1
result = parse_program()
if len(result) == 0:
return None
return ("statement", result)
elif tokens[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")"]:
name = tokens[pos]
pos += 1
if tokens[pos] == "=":
pos += 1
return ("assign", name, parse_expression())
def parse_expression():
nonlocal pos
return parse_sum()
def parse_sum():
nonlocal pos
result = parse_product()
while pos < len(tokens) and tokens[pos] in ["+", "-"]:
operator = tokens[pos]
pos += 1
right = parse_product()
result = (operator, result, right)
return result
def parse_product():
nonlocal pos
result = parse_value()
while pos < len(tokens) and tokens[pos] in ["*", "/", "%"]:
operator = tokens[pos]
pos += 1
right = parse_value()
result = (operator, result, right)
return result
def parse_value():
nonlocal pos
type = tokens[pos]
if type in ["+", "-", "*", "/", "%"]:
pos += 1
return (type, parse_value())
elif type == "(":
pos += 1
result = parse_expression()
pos += 1
return result
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]:
pos += 1
return type
elif type == 'print':
pos += 1
result = ('print', parse_expression())
pos += 1
return result
elif is_integer(type):
pos += 1
return int(type)
else:
pos += 1
return type
return parse_program()
def compile(parse_tree):
def compile_program(parse_tree):
# create a dictionary to store variable values
variables = {}
results = []
for st in parse_tree:
result = compile_statement(st, variables)
if result is not None:
results.append(result)
return results
def compile_statement(parse_tree, variables):
type = parse_tree[0]
if type == "print":
result = compile_expression(parse_tree[1], variables)
print(result)
return result
elif type == "assign":
# store the value of the expression in the variable
name = parse_tree[1]
value = compile_expression(parse_tree[2], variables)
variables[name] = value
return value
def compile_expression(parse_tree, variables):
if isinstance(parse_tree, (list, tuple)):
# parse_tree is subscriptable, so we can access its elements
type = parse_tree[0]
if type in ["+", "-", "*", "/", "%"]:
# perform the mathematical operation and return the result
if type == "+":
return compile_expression(parse_tree[1], variables) + compile_expression(parse_tree[2], variables)
elif type == "-":
return compile_expression(parse_tree[1], variables) - compile_expression(parse_tree[2], variables)
elif type == "*":
return compile_expression(parse_tree[1], variables) * compile_expression(parse_tree[2], variables)
elif type == "/":
return compile_expression(parse_tree[1], variables) / compile_expression(parse_tree[2], variables)
elif type == "%":
return compile_expression(parse_tree[1], variables) % compile_expression(parse_tree[2], variables)
elif type == "assign":
# extract the name of the variable and the value of the expression
name = parse_tree[1]
value = compile_expression(parse_tree[2], variables)
# return the value of the expression
return value
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]:
return type
elif type in ["num", "var"]:
if type == "num":
# return the numeric value directly
return parse_tree[1]
elif type == "var":
# return the value of the variable
name = parse_tree[1]
if name in variables:
return variables[name]
else:
print(f"Error: variable {name} is not defined")
return None
else:
# parse_tree is not subscriptable, so we can't access its elements
return parse_tree
return compile_program(parse_tree)
# test the compile function with a simple expression
def test_compile_expression():
# compile the expression "10 + 30"
result = compile("print 10 + 30;")
# check the result
assert result == [40]
# test the compile function with a simple assignment
def test_compile_assignment():
# compile the expression "result = 10 + 30"
result = compile("result = 10 + 30;")
# check the result
assert result == [40]
# test the compile function with multiple statements
def test_compile_statements():
# compile the expression "result = 10 + 30; print result;"
result = compile("result = 10 + 30; print result;")
# check the result
assert result == [40, 40]
# test the compile function with nested expressions
def test_compile_nested_expressions():
# compile the expression "result = (10 + 30) * 2;"
result = compile("result = (10 + 30) * 2;")
# check the result
assert result == [80]
# test the compile function with a complex expression
def test_compile_complex_expression():
# compile the expression "result = ((10 + 30) * 2) / 6;"
result = compile("result = ((10 + 30) * 2) / 6;")
# check the result
assert result == [13.333333333333334]
test_compile_expression()
test_compile_assignment()
test_compile_statements()
test_compile_nested_expressions()
test_compile_complex_expression()
These are the differences between the original and the fixed code.
Setting aside the differences in what method the tests are calling, likely caused
by a poor prompt from myself, the original and the fixed code differ by 5.57%.
This means 94.42% of this code is created by ChatGPT.
--- chatgpt_compiler_original.py 2022-12-07 16:24:38.209911500 -0800
+++ chatgpt_compiler.py 2022-12-07 16:25:15.979911500 -0800
@@ -136,9 +136,16 @@
value = compile_expression(parse_tree[2], variables)
variables[name] = value
return value
+ elif type == "statement":
+ result = compile_statement(parse_tree[1][0], variables)
+ return result
+
def compile_expression(parse_tree, variables):
if isinstance(parse_tree, (list, tuple)):
+ if len(parse_tree) == 0:
+ return None
+
# parse_tree is subscriptable, so we can access its elements
type = parse_tree[0]
if type in ["+", "-", "*", "/", "%"]:
@@ -173,9 +180,17 @@
else:
print(f"Error: variable {name} is not defined")
return None
+ elif type == "print":
+ return compile_statement(parse_tree, variables)
+ else:
+ return variables[parse_tree]
else:
# parse_tree is not subscriptable, so we can't access its elements
- return parse_tree
+ if variables.get(parse_tree):
+ return variables[parse_tree]
+ else:
+ return parse_tree
+
return compile_program(parse_tree)
@@ -197,39 +212,38 @@
# test the compile function with a simple expression
def test_compile_expression():
# compile the expression "10 + 30"
- result = compile("print 10 + 30;")
+ result = compile(parse(tokenize("print 10 + 30")))
# check the result
assert result == [40]
# test the compile function with a simple assignment
def test_compile_assignment():
# compile the expression "result = 10 + 30"
- result = compile("result = 10 + 30;")
+ result = compile(parse(tokenize("result = 10 + 30")))
# check the result
assert result == [40]
# test the compile function with multiple statements
def test_compile_statements():
- # compile the expression "result = 10 + 30; print result;"
- result = compile("result = 10 + 30; print result;")
+ # compile the expression "result = 10 + 30 print result"
+ result = compile(parse(tokenize("result = 10 + 30; print result;")))
# check the result
assert result == [40, 40]
# test the compile function with nested expressions
def test_compile_nested_expressions():
- # compile the expression "result = (10 + 30) * 2;"
- result = compile("result = (10 + 30) * 2;")
+ # compile the expression "result = (10 + 30) * 2"
+ result = compile(parse(tokenize("result = (10 + 30) * 2;")))
# check the result
assert result == [80]
# test the compile function with a complex expression
def test_compile_complex_expression():
- # compile the expression "result = ((10 + 30) * 2) / 6;"
- result = compile("result = ((10 + 30) * 2) / 6;")
+ # compile the expression "result = ((10 + 30) * 2) / 6"
+ result = compile(parse(tokenize("result = ((10 + 30) * 2) / 6")))
# check the result
assert result == [13.333333333333334]
-
test_compile_expression()
test_compile_assignment()
test_compile_statements()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment