L3viathan/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Proof-of-concept: character-based coverage tool


As of 3.11, code objects have a new co_positions() method that yields tuples of the form (lineno_start, lineno_end, char_start, char_end) for each bytecode instruction.
Combined with setting f_trace_opcodes to True on a frame, trace functions can theoretically track coverage on a character level.
There are a bunch of issues though, shown in part in the code:

Some instructions correctly cover a wide range of code: JUMP_* instructions from branching code basically seems to span the entire length of the jump, i.e. from the if to the end of the indented block. MAKE_FUNCTION covers the entire function definition. These issues seem to be easy to resolve on first glance, because you can just ignore the corresponding opcodes.
Some instructions incorrectly (at least it seems that way to me) report an overly wide range. One example is a function with default values for parameters. These cause a LOAD_CONST instruction for a tuple of the default values, but the range extends for the entirety of the function. I have not found a good workaround for this.

One more small hack I've done is restrict almost every multiline opcode to the first line only. This allows me to not ignore some opcodes (like MAKE_FUNCTION).

Overall verdict: Doing this correctly probably requires a) fixes in CPython to more accurately record bytecode ranges, and b) a whole lot of special casing, inspecting surrounding opcodes and opcode arguments, etc. to make it mostly work.

  
## charcov.py
import sys
import inspect
import dis

from collections import defaultdict

COVERAGE = defaultdict(set)
POSITIONS = {}


def global_tf(frame, event, arg):
    if "charcov.py" not in frame.f_code.co_filename:
        frame.f_trace_opcodes = True
        return local_tf


def local_tf(frame, event, arg):
    code = frame.f_code
    opname = dis.opname[code.co_code[frame.f_lasti]]
    if opname in {"NOP", "POP_TOP", "RETURN_VALUE"} or "JUMP" in opname:
        return local_tf

    if frame not in POSITIONS:
        POSITIONS[frame] = list(code.co_positions())
    my_position = POSITIONS[frame][frame.f_lasti // 2]

    multiline = my_position[1] - my_position[0]
    if multiline:
        # heuristics to mostly trim multiline ranges to the first line
        trim_multiline = True
        if opname == "LOAD_CONST":
            oparg = code.co_code[frame.f_lasti + 1]
            if isinstance(code.co_consts[oparg], tuple):
                trim_multiline = False
        elif opname in {"BUILD_LIST", "BUILD_SET", "BUILD_CONST_KEY_MAP", "BUILD_MAP", "CALL"}:
            # this is also technically wrong, e.g.:
            # [
            #     4 or untested,
            # ]
            trim_multiline = False
        if trim_multiline:
            my_position = (my_position[0], my_position[0], my_position[2], 99)

    COVERAGE[code.co_filename].add(my_position)
    return local_tf


def start():
    sys.settrace(global_tf)
    frame = sys._getframe().f_back
    frame.f_trace = local_tf
    frame.f_trace_opcodes = True


def stop_and_show():
    sys.settrace(None)
    for file, positions in COVERAGE.items():  # TODO: user setting
        with open(file) as f:
            lines = [line.rstrip("\n") for line in f]
        print("Coverage for", file)
        print("=" * 20)
        show_coverage(positions, lines, off_by=1)


red = lambda s: f'\x1b[31m{s}\x1b[0m'
green = lambda s: f'\x1b[32m{s}\x1b[0m'
def show_coverage(positions, lines, off_by=0):
    for li, line in enumerate(lines):
        li += off_by
        for ci, c in enumerate(line):
            if any(
                (ls<li or (ls == li and cs <= ci)) and
                (le>li or (le == li and ce >= ci))
                for (ls, le, cs, ce) in positions):
                print(green(c), end="")
            else:
                print(red(c), end="")
        print()

## example.py
import charcov

charcov.start()
def this():
    x = int("1")
    if x or not_tested:
        1 or print("hello")
        y = "yes" if x else "no"
        z = dict(
            x=23,
            y=72,
        )
    return 8 or False

this()
charcov.stop_and_show()
this()
	import sys
	import inspect
	import dis

	from collections import defaultdict

	COVERAGE = defaultdict(set)
	POSITIONS = {}


	def global_tf(frame, event, arg):
	if "charcov.py" not in frame.f_code.co_filename:
	frame.f_trace_opcodes = True
	return local_tf


	def local_tf(frame, event, arg):
	code = frame.f_code
	opname = dis.opname[code.co_code[frame.f_lasti]]
	if opname in {"NOP", "POP_TOP", "RETURN_VALUE"} or "JUMP" in opname:
	return local_tf

	if frame not in POSITIONS:
	POSITIONS[frame] = list(code.co_positions())
	my_position = POSITIONS[frame][frame.f_lasti // 2]

	multiline = my_position[1] - my_position[0]
	if multiline:
	# heuristics to mostly trim multiline ranges to the first line
	trim_multiline = True
	if opname == "LOAD_CONST":
	oparg = code.co_code[frame.f_lasti + 1]
	if isinstance(code.co_consts[oparg], tuple):
	trim_multiline = False
	elif opname in {"BUILD_LIST", "BUILD_SET", "BUILD_CONST_KEY_MAP", "BUILD_MAP", "CALL"}:
	# this is also technically wrong, e.g.:
	# [
	# 4 or untested,
	# ]
	trim_multiline = False
	if trim_multiline:
	my_position = (my_position[0], my_position[0], my_position[2], 99)

	COVERAGE[code.co_filename].add(my_position)
	return local_tf


	def start():
	sys.settrace(global_tf)
	frame = sys._getframe().f_back
	frame.f_trace = local_tf
	frame.f_trace_opcodes = True


	def stop_and_show():
	sys.settrace(None)
	for file, positions in COVERAGE.items(): # TODO: user setting
	with open(file) as f:
	lines = [line.rstrip("\n") for line in f]
	print("Coverage for", file)
	print("=" * 20)
	show_coverage(positions, lines, off_by=1)


	red = lambda s: f'\x1b[31m{s}\x1b[0m'
	green = lambda s: f'\x1b[32m{s}\x1b[0m'
	def show_coverage(positions, lines, off_by=0):
	for li, line in enumerate(lines):
	li += off_by
	for ci, c in enumerate(line):
	if any(
	(ls<li or (ls == li and cs <= ci)) and
	(le>li or (le == li and ce >= ci))
	for (ls, le, cs, ce) in positions):
	print(green(c), end="")
	else:
	print(red(c), end="")
	print()
	import charcov

	charcov.start()
	def this():
	x = int("1")
	if x or not_tested:
	1 or print("hello")
	y = "yes" if x else "no"
	z = dict(
	x=23,
	y=72,
	)
	return 8 or False

	this()
	charcov.stop_and_show()
	this()