mimoo/ocaml_parser.py

## ocaml_parser.py
import sys
import os
import itertools

from rich.console import Console
from rich.syntax import Syntax
from rich.style import Style
from rich.progress_bar import ProgressBar
from rich.panel import Panel
from rich.console import Group
from rich.console import group
from rich.columns import Columns
from rich.layout import Layout
from rich.text import Text
from rich.table import Table

console = Console()


#
# Functions that can parse errors
#


def parse_error_inner(error, lines):
    # now comes the hard part
    # figure out from the error what it will look like
    desc = error.split(": ")[1]
    if desc.startswith("This expression has type"):
        return "type is not compatible with type"
    elif desc.startswith("The implementation"):
        return parse_interface_match(error, lines)
    else:
        return f"unknown error {desc}"


@group()
def parse_interface_match(error, lines):
    # desc part
    desc = error
    in_desc = True

    # bt part
    in_frame = False
    frame_error = ""
    backtrace = []

    # files part
    in_files = False
    expect_actual = []

    for line in lines:
        # end of description
        if in_desc:
            if ":" in line:
                desc += line[:-1]
                in_desc = False
            else:
                desc += line
            continue

        # start of files
        if "File" in line:
            if not in_files:
                backtrace.append(frame_error)
                in_files = True
            (filename, start_line, _) = parse_filename(line)
            expect_actual.append((filename, start_line))

        if in_files:
            continue

        # end of suberrors part
        if "Types for method" in line:
            continue

        # start of a suberror
        if "Values do not match:" in line or "The type" in line or "Type\n" in line or "Type " in line:
            # not the first, let's add the previous one
            if in_frame:
                backtrace.append(frame_error)
            else:
                in_frame = True

            frame_error = line
            continue

        # append to frame error
        frame_error += line

    # reverse the backtrace
    backtrace.reverse()

    # second pass on the backtrace to get split the "impl" and "intf"
    for (idx, bt) in enumerate(backtrace):
        if "is not compatible with type" in bt:
            backtrace[idx] = bt.split("is not compatible with type")
        elif "is not compatible with the type" in bt:
            backtrace[idx] = bt.split("is not compatible with the type")
        elif "is not included in" in bt:
            backtrace[idx] = bt.split("is not included in")
        else:
            raise "could not split the suberror in two parts"

        backtrace[idx][0] = backtrace[idx][0].lstrip("Type")
        backtrace[idx][0] = backtrace[idx][0].lstrip("The type")
        backtrace[idx][0] = backtrace[idx][0].lstrip("Values do not match:")
        backtrace[idx][0] = backtrace[idx][0].strip()
        backtrace[idx][1] = backtrace[idx][1].strip()

    # description
    yield Text("the implementation does not match the interface\n")

    # files
    table = Table(expand=True, box=None)
    table.add_column(f"implementation: {expect_actual[1][0]}\n", justify="center",
                     style="cyan", no_wrap=True, ratio=1)
    table.add_column(
        f"expected by interface: {expect_actual[0][0]}\n", style="magenta", justify="center", ratio=1)

    # backtrace
    for [impl, intf] in backtrace:
        col1 = Panel(Syntax(impl, "ocaml", line_numbers=True,
                     start_line=int(expect_actual[1][1])))
        col2 = Panel(Syntax(intf, "ocaml", line_numbers=True,
                     start_line=int(expect_actual[0][1])))
        table.add_row(col1, col2)

    yield table
#
# The rest
#


def parse_filename(line):
    # these are all valid:
    # `File "path", line 104, characters 1-3:`
    # `File "path", lines 104-105, characters 1-3:`
    # `File "path", lines 104-105:`
    line = line.split(",")
    filepath = line[0][6:].strip('"')
    start_line = 0
    end_line = 0
    if "lines" in line[1]:
        [start_line, end_line] = line[1][7:].strip().strip(":").split("-")
    else:
        start_line = line[1][6:].strip().strip(":")
        end_line = start_line

    # characters = line[2][12:].split("-")

    # get relative path
    cwd = os.getcwd()
    relpath = os.path.relpath(filepath)

    # format and return
    formatted = f"{relpath}:{start_line}"

    return (formatted, start_line, end_line)


def parse_error(error_lines):
    # parse a single error

    lines = iter(error_lines)

    # first line is the filename
    (filename, start_line, end_line) = parse_filename(next(lines))

    # next might come some snippet or "Error:"
    snippet = []
    marker = ""
    for line in lines:
        # we're out of the snippet
        if line.startswith("Error:"):
            parsed = parse_error_inner(line, lines)
            break

        # line number or `^^^^^^^` (highlight)
        elif line[0].isnumeric():
            parsed = line.split(" | ")
            snippet.append(parsed)
        elif "^" in line:
            marker += line
        else:
            print(line)
            raise "Error: unexpected line in error"

    result = {
        "filename": filename,
        "snippet": snippet,
        "marker": marker,
        "error": parsed,
    }

    return result


def print_error(idx, error):
    # snippet
    snippet = ""
    if len(error["snippet"]) != 0:
        print("\n")

        # start line
        start_line_str = error["snippet"][0][0]
        start_line = int(error["snippet"][0][0])
        last_line = int(error["snippet"][-1][0])

        # snippet as string
        snippet = ""
        for [ln, line] in error["snippet"]:
            snippet += f"{line}"

        # use library to output
        snippet = Syntax(snippet, "ocaml", line_numbers=True,
                         start_line=start_line, highlight_lines=[last_line])

        # highlight characters
        highlight_chars = [pos for pos, char in enumerate(
            error["marker"]) if char == "^"]
        offset = len(start_line_str) + 3
        s1 = highlight_chars[0] - offset
        s2 = highlight_chars[-1] + 1 - offset
        style = Style(bgcolor='deep_pink4')
        snippet.stylize_range(style, (1, s1), (1, s2))

        # print snippet

        # print error description

        # print suberror

    if snippet == "":
        panel_group = Group(
            error["error"],
        )
    else:
        panel_group = Group(
            snippet,
            error["error"],
        )

    # error + filename
    title = f"Error #{idx + 1} in {error['filename']}"
    panel = Panel(panel_group, title=title)
    console.print(panel)


def print_errors(errors):
    for error in errors:
        print_error(error)


def split_errors(compiler_output):
    # split errors from the compiler output
    # (each error starts with `File "`
    in_error = False
    errors = []
    current_error = []

    for line in compiler_output:
        # new error
        if line[:6] == 'File "' and in_error:
            errors.append(current_error)
            current_error = []

        # bad start
        if not in_error:
            if line[:6] != 'File "':
                raise "Error: expected 'File' at start of line"
            in_error = True

        # just add to the current error
        current_error.append(line)

    # last error pending
    errors.append(current_error)

    # return
    return errors


def main():
    # read stdin
    compiler_output = sys.stdin.readlines()

    # split errors
    errors = split_errors(compiler_output)
    print(f"Found {len(errors)} errors")

    # print errors
    for (idx, error) in enumerate(errors):
        # error number
        progress = ProgressBar(total=len(errors), completed=idx)
        console.print(progress)

        # parse
        res = parse_error(error)
        print_error(idx, res)
        print("\n\n")


if __name__ == "__main__":
    main()
	import sys
	import os
	import itertools

	from rich.console import Console
	from rich.syntax import Syntax
	from rich.style import Style
	from rich.progress_bar import ProgressBar
	from rich.panel import Panel
	from rich.console import Group
	from rich.console import group
	from rich.columns import Columns
	from rich.layout import Layout
	from rich.text import Text
	from rich.table import Table

	console = Console()


	#
	# Functions that can parse errors
	#


	def parse_error_inner(error, lines):
	# now comes the hard part
	# figure out from the error what it will look like
	desc = error.split(": ")[1]
	if desc.startswith("This expression has type"):
	return "type is not compatible with type"
	elif desc.startswith("The implementation"):
	return parse_interface_match(error, lines)
	else:
	return f"unknown error {desc}"


	@group()
	def parse_interface_match(error, lines):
	# desc part
	desc = error
	in_desc = True

	# bt part
	in_frame = False
	frame_error = ""
	backtrace = []

	# files part
	in_files = False
	expect_actual = []

	for line in lines:
	# end of description
	if in_desc:
	if ":" in line:
	desc += line[:-1]
	in_desc = False
	else:
	desc += line
	continue

	# start of files
	if "File" in line:
	if not in_files:
	backtrace.append(frame_error)
	in_files = True
	(filename, start_line, _) = parse_filename(line)
	expect_actual.append((filename, start_line))

	if in_files:
	continue

	# end of suberrors part
	if "Types for method" in line:
	continue

	# start of a suberror
	if "Values do not match:" in line or "The type" in line or "Type\n" in line or "Type " in line:
	# not the first, let's add the previous one
	if in_frame:
	backtrace.append(frame_error)
	else:
	in_frame = True

	frame_error = line
	continue

	# append to frame error
	frame_error += line

	# reverse the backtrace
	backtrace.reverse()

	# second pass on the backtrace to get split the "impl" and "intf"
	for (idx, bt) in enumerate(backtrace):
	if "is not compatible with type" in bt:
	backtrace[idx] = bt.split("is not compatible with type")
	elif "is not compatible with the type" in bt:
	backtrace[idx] = bt.split("is not compatible with the type")
	elif "is not included in" in bt:
	backtrace[idx] = bt.split("is not included in")
	else:
	raise "could not split the suberror in two parts"

	backtrace[idx][0] = backtrace[idx][0].lstrip("Type")
	backtrace[idx][0] = backtrace[idx][0].lstrip("The type")
	backtrace[idx][0] = backtrace[idx][0].lstrip("Values do not match:")
	backtrace[idx][0] = backtrace[idx][0].strip()
	backtrace[idx][1] = backtrace[idx][1].strip()

	# description
	yield Text("the implementation does not match the interface\n")

	# files
	table = Table(expand=True, box=None)
	table.add_column(f"implementation: {expect_actual[1][0]}\n", justify="center",
	style="cyan", no_wrap=True, ratio=1)
	table.add_column(
	f"expected by interface: {expect_actual[0][0]}\n", style="magenta", justify="center", ratio=1)

	# backtrace
	for [impl, intf] in backtrace:
	col1 = Panel(Syntax(impl, "ocaml", line_numbers=True,
	start_line=int(expect_actual[1][1])))
	col2 = Panel(Syntax(intf, "ocaml", line_numbers=True,
	start_line=int(expect_actual[0][1])))
	table.add_row(col1, col2)

	yield table
	#
	# The rest
	#


	def parse_filename(line):
	# these are all valid:
	# `File "path", line 104, characters 1-3:`
	# `File "path", lines 104-105, characters 1-3:`
	# `File "path", lines 104-105:`
	line = line.split(",")
	filepath = line[0][6:].strip('"')
	start_line = 0
	end_line = 0
	if "lines" in line[1]:
	[start_line, end_line] = line[1][7:].strip().strip(":").split("-")
	else:
	start_line = line[1][6:].strip().strip(":")
	end_line = start_line

	# characters = line[2][12:].split("-")

	# get relative path
	cwd = os.getcwd()
	relpath = os.path.relpath(filepath)

	# format and return
	formatted = f"{relpath}:{start_line}"

	return (formatted, start_line, end_line)


	def parse_error(error_lines):
	# parse a single error

	lines = iter(error_lines)

	# first line is the filename
	(filename, start_line, end_line) = parse_filename(next(lines))

	# next might come some snippet or "Error:"
	snippet = []
	marker = ""
	for line in lines:
	# we're out of the snippet
	if line.startswith("Error:"):
	parsed = parse_error_inner(line, lines)
	break

	# line number or `^^^^^^^` (highlight)
	elif line[0].isnumeric():
	parsed = line.split(" \| ")
	snippet.append(parsed)
	elif "^" in line:
	marker += line
	else:
	print(line)
	raise "Error: unexpected line in error"

	result = {
	"filename": filename,
	"snippet": snippet,
	"marker": marker,
	"error": parsed,
	}

	return result


	def print_error(idx, error):
	# snippet
	snippet = ""
	if len(error["snippet"]) != 0:
	print("\n")

	# start line
	start_line_str = error["snippet"][0][0]
	start_line = int(error["snippet"][0][0])
	last_line = int(error["snippet"][-1][0])

	# snippet as string
	snippet = ""
	for [ln, line] in error["snippet"]:
	snippet += f"{line}"

	# use library to output
	snippet = Syntax(snippet, "ocaml", line_numbers=True,
	start_line=start_line, highlight_lines=[last_line])

	# highlight characters
	highlight_chars = [pos for pos, char in enumerate(
	error["marker"]) if char == "^"]
	offset = len(start_line_str) + 3
	s1 = highlight_chars[0] - offset
	s2 = highlight_chars[-1] + 1 - offset
	style = Style(bgcolor='deep_pink4')
	snippet.stylize_range(style, (1, s1), (1, s2))

	# print snippet

	# print error description

	# print suberror

	if snippet == "":
	panel_group = Group(
	error["error"],
	)
	else:
	panel_group = Group(
	snippet,
	error["error"],
	)

	# error + filename
	title = f"Error #{idx + 1} in {error['filename']}"
	panel = Panel(panel_group, title=title)
	console.print(panel)


	def print_errors(errors):
	for error in errors:
	print_error(error)


	def split_errors(compiler_output):
	# split errors from the compiler output
	# (each error starts with `File "`
	in_error = False
	errors = []
	current_error = []

	for line in compiler_output:
	# new error
	if line[:6] == 'File "' and in_error:
	errors.append(current_error)
	current_error = []

	# bad start
	if not in_error:
	if line[:6] != 'File "':
	raise "Error: expected 'File' at start of line"
	in_error = True

	# just add to the current error
	current_error.append(line)

	# last error pending
	errors.append(current_error)

	# return
	return errors


	def main():
	# read stdin
	compiler_output = sys.stdin.readlines()

	# split errors
	errors = split_errors(compiler_output)
	print(f"Found {len(errors)} errors")

	# print errors
	for (idx, error) in enumerate(errors):
	# error number
	progress = ProgressBar(total=len(errors), completed=idx)
	console.print(progress)

	# parse
	res = parse_error(error)
	print_error(idx, res)
	print("\n\n")


	if __name__ == "__main__":
	main()