lembergerth/create_yaml_files.py

## create_yaml_files.py
#!/usr/bin/env python3

import glob
import os
import re

CHANGE_FILE_NAME = True
"""Whether to shorten file names by removing the verdicts, if possible without collisions."""
INCLUDE_PROPERTY_AS_REQUIRED_FILE = False
"""Whether to include property files as required files in task definition."""
USE_SUFFIX_WILDCARDS = False
"""Whether to use longest-possible prefixes before wildcard. If False, directory/* is used."""
AVOID_COLLISIONS_ACROSS_DIRECTORIES = True
"""Whether to avoid same file names in different directories."""

NAME_TO_PROP_AND_SUBPROP = {
    'unreach-call': ('properties/unreach-call.prp', None),
    'termination': ('properties/termination.prp', None),
    'no-overflow': ('properties/no-overflow.prp', None),
    'valid-memcleanup': ('properties/valid-memcleanup.prp', None),
    'valid-memsafety': ('properties/valid-memsafety.prp', None),
    'valid-deref': ('properties/valid-memsafety.prp', 'valid-deref'),
    'valid-free': ('properties/valid-memsafety.prp', 'valid-free'),
    'valid-memtrack': ('properties/valid-memsafety.prp', 'valid-memtrack'),
    'def-behavior': ('properties/def-behavior.prp', None)
}

COVERAGE_CRITERIA = ('properties/coverage-branches.prp', 'properties/coverage-conditions.prp', 'properties/coverage-statements.prp')

DIRECTORY='.'
PROPERTIES_DIR = 'properties/'
CANDIDATE_REGEX = re.compile(r'.*\.(c|i)')

sets_to_tasks = dict()
all_tasks = set()

testable_tasks = set()
testable_set_files = glob.iglob('*-testable.set')
for testable_set in testable_set_files:
    sets_to_tasks[testable_set] = list()
    with open(testable_set, 'r') as inp:
        for line in (l.strip() for l in inp.readlines()):
            for l in (l for l in glob.iglob(line) if CANDIDATE_REGEX.match(l)):
                testable_tasks.add(l)
                all_tasks.add(l)
                sets_to_tasks[testable_set].append(l)

assert os.path.exists(PROPERTIES_DIR)

verification_set_files = glob.glob('*.set')
for verification_set in verification_set_files:
    sets_to_tasks[verification_set] = list()
    with open(verification_set, 'r') as inp:
        for line in (l.strip() for l in inp.readlines()):
            if not line:
                continue
            if '*' in line:
                sets_to_tasks[verification_set].append("## " + line)
            for l in sorted(l for l in glob.iglob(line) if CANDIDATE_REGEX.match(l)):
                all_tasks.add(l)
                sets_to_tasks[verification_set].append(l)

tasks_to_new_names_and_yml = dict()
for task_file in all_tasks:
    # check whether preprocessed .i file exists for current .c file
    if task_file[-1] == 'c' and \
            (task_file[:-1] + 'i' in all_tasks or task_file + '.i' in all_tasks):
        print("Redundant file: ", task_file)
        continue
    properties = list()
    name_pcs_dot = task_file.split('.')
    new_name_pcs_dot = list()
    for pd in name_pcs_dot:
        name_pcs = pd.split('_')
        new_name_pcs = list()
        for p in name_pcs:
            offset = 0
            for name, prop in NAME_TO_PROP_AND_SUBPROP.items():
                if name not in p:
                    continue  # with next name_pc p
                if p.startswith('true'):
                    expected = 'true'
                    offset += len('true-')
                elif p.startswith('false'):
                    expected = 'false'
                    offset += len('false-')
                elif p.startswith('unknown-'):
                    expected = None
                    offset += len('unknown-')
                else:
                    continue  # with next name_pc p
                properties.append((prop, expected))
                offset += len(name)
                break  # for-loop over properties once one matches, because they are distinct
            new_p = p[offset:]
            if new_p:
                new_name_pcs.append(new_p)
        new_name_pcs_dot.append("_".join(new_name_pcs))

    if not properties:
        continue  # with next C file task_file

    yml_info = (task_file, properties)

    if CHANGE_FILE_NAME:
        new_task_file = '.'.join(new_name_pcs_dot)
        if new_task_file[-4:] == ".c.i":
            new_task_file = new_task_file[:-4] + ".i"
    else:
        new_task_file = task_file
    tasks_to_new_names_and_yml[task_file] = (new_task_file, yml_info)

for old_name, (curr_task, yml_info) in tasks_to_new_names_and_yml.items():
    task_basename = os.path.basename(curr_task)
    if AVOID_COLLISIONS_ACROSS_DIRECTORIES:
        collisions = [k
                for k, v in tasks_to_new_names_and_yml.items()
                if os.path.basename(v[0]).lower()[:-1] == task_basename.lower()[:-1]
                and k != old_name]
    else:
        collisions = [k
                for k, v in tasks_to_new_names_and_yml.items()
                if v[0].lower()[:-1] == task_basename.lower()[:-1]
                and k != old_name]
        if collisions:
            curr_task = curr_task[:-2] + '-1' + curr_task[-2:]
            for idx, other in enumerate(collisions, 2):
                new_name, content = tasks_to_new_names_and_yml.pop(other)
                new_name = new_name[:-2] + "-" + str(idx) + new_name[-2:]
                tasks_to_new_names_and_yml[other] = (new_name, content)

    task_basename = os.path.basename(curr_task)
    yml_content = "format_version: '1.0'\n"
    yml_content += "\n"
    if CHANGE_FILE_NAME:
        yml_content += "# old file name: " + os.path.basename(old_name) + "\n"
    yml_content += "input_files: '" + task_basename + "'\n"
    yml_content += "\n"

    if INCLUDE_PROPERTY_AS_REQUIRED_FILE and len(yml_info[1]) > 0:
        yml_content += "required_files:\n"
        yml_content += "  - '" + os.path.dirname(yml_info[1][0]) + "/'\n"
        yml_content += "\n"

    task_dir = os.path.dirname(curr_task)
    yml_content += "properties:\n"
    for prop, expected in sorted(yml_info[1], key=lambda p: p[0][0]):
        prop_file = os.path.relpath(prop[0], task_dir)
        yml_content += "  - property_file: " + prop_file + "\n"
        if expected:
            yml_content += "    expected_verdict: " + expected + "\n"
        if prop[1]:
            yml_content += "    subproperty: " + prop[1] + "\n"
        if 'unreach-call' in prop_file and expected == 'false':
            prop_file = os.path.relpath('properties/coverage-error-call.prp', task_dir)
            yml_content += "  - property_file: " + prop_file + "\n"
    if old_name in testable_tasks:
        for prop in COVERAGE_CRITERIA:
            prop_file = os.path.relpath(prop, task_dir)
            yml_content += "  - property_file: " + prop_file + "\n"
    yml_file = curr_task[:-2] + ".yml"
    with open(yml_file, 'w+') as outp:
        outp.write(yml_content)


    if old_name != curr_task:
        os.rename(old_name, curr_task)
        if old_name[-1] == 'i':
            # *.i -> *.c
            if os.path.exists(old_name[:-1] + 'c'):
                old_c = old_name[:-1] + 'c'
            # *.c.i -> *.c
            elif os.path.exists(old_name[:-2]):
                old_c = old_name[:-2]
            # ldv-memsafety/memleaks*.i -> ldv-memsafety/memleaks-notpreprocessed/memleaks*.c
            elif old_name.startswith('ldv-memsafety/memleaks'):
                old_c = 'ldv-memsafety/memleaks-notpreprocessed/'\
                        + old_name.split('/')[-1][:-1] + 'c'
            else:
                old_c = None
            if old_c:
                assert old_c not in all_tasks
                curr_task_name = curr_task.split('/')[-1]
                new_c_name = os.path.dirname(old_c) + '/' + curr_task_name[:-1] + 'c'

                os.rename(old_c, new_c_name)
    for task_set, content in sets_to_tasks.items():
        try:
            idx = content.index(old_name)
            content[idx] = yml_file
        except ValueError:
            pass

for task_set, content in sets_to_tasks.items():
    new_content = list()
    remaining = set(content)
    glob_suffix = "*"
    for task in content:
        if task not in remaining or task.startswith('#'):
            continue
        prefix_len = task.index('/') + 1
        prefix = task[:prefix_len]
        globbed_tasks = glob.glob(prefix + glob_suffix)
        globbed_tasks = [t for t in globbed_tasks if t.endswith('.yml')]
        assert len(globbed_tasks) > 0

        if USE_SUFFIX_WILDCARDS and len(globbed_tasks) > len([t for t in content if t.startswith(prefix)]):
            while len(globbed_tasks) > len([t for t in content if t.startswith(prefix)]):
                prefix_len += 1
                prefix = task[:prefix_len]
                globbed_tasks = glob.glob(prefix + glob_suffix)
                globbed_tasks = [t for t in globbed_tasks if t.endswith('.yml')]
            assert len(globbed_tasks) > 0
            new_prefix_len = prefix_len
            new_prefix = prefix
            while len([t for t in content if t.startswith(prefix)]) == len([t for t in content if t.startswith(new_prefix)])\
                    and prefix != task:
                prefix_len = new_prefix_len
                prefix = new_prefix
                new_prefix_len += 1
                new_prefix = task[:new_prefix_len]

        globbed_tasks = glob.glob(prefix + glob_suffix)
        globbed_tasks = [t for t in globbed_tasks if t.endswith('.yml')]
        if not task_set.endswith('testable.set') and (not USE_SUFFIX_WILDCARDS or len(globbed_tasks) > 3 or prefix[-1] == '/'):
            new_content.append(prefix + '*.yml')
            remaining -= set(globbed_tasks)
        else:
            new_content.append(task)
            remaining.remove(task)
    with open(task_set, 'w+') as outp:
        outp.writelines(l + '\n' for l in new_content)
	#!/usr/bin/env python3

	import glob
	import os
	import re

	CHANGE_FILE_NAME = True
	"""Whether to shorten file names by removing the verdicts, if possible without collisions."""
	INCLUDE_PROPERTY_AS_REQUIRED_FILE = False
	"""Whether to include property files as required files in task definition."""
	USE_SUFFIX_WILDCARDS = False
	"""Whether to use longest-possible prefixes before wildcard. If False, directory/* is used."""
	AVOID_COLLISIONS_ACROSS_DIRECTORIES = True
	"""Whether to avoid same file names in different directories."""

	NAME_TO_PROP_AND_SUBPROP = {
	'unreach-call': ('properties/unreach-call.prp', None),
	'termination': ('properties/termination.prp', None),
	'no-overflow': ('properties/no-overflow.prp', None),
	'valid-memcleanup': ('properties/valid-memcleanup.prp', None),
	'valid-memsafety': ('properties/valid-memsafety.prp', None),
	'valid-deref': ('properties/valid-memsafety.prp', 'valid-deref'),
	'valid-free': ('properties/valid-memsafety.prp', 'valid-free'),
	'valid-memtrack': ('properties/valid-memsafety.prp', 'valid-memtrack'),
	'def-behavior': ('properties/def-behavior.prp', None)
	}

	COVERAGE_CRITERIA = ('properties/coverage-branches.prp', 'properties/coverage-conditions.prp', 'properties/coverage-statements.prp')

	DIRECTORY='.'
	PROPERTIES_DIR = 'properties/'
	CANDIDATE_REGEX = re.compile(r'.*\.(c\|i)')

	sets_to_tasks = dict()
	all_tasks = set()

	testable_tasks = set()
	testable_set_files = glob.iglob('*-testable.set')
	for testable_set in testable_set_files:
	sets_to_tasks[testable_set] = list()
	with open(testable_set, 'r') as inp:
	for line in (l.strip() for l in inp.readlines()):
	for l in (l for l in glob.iglob(line) if CANDIDATE_REGEX.match(l)):
	testable_tasks.add(l)
	all_tasks.add(l)
	sets_to_tasks[testable_set].append(l)

	assert os.path.exists(PROPERTIES_DIR)

	verification_set_files = glob.glob('*.set')
	for verification_set in verification_set_files:
	sets_to_tasks[verification_set] = list()
	with open(verification_set, 'r') as inp:
	for line in (l.strip() for l in inp.readlines()):
	if not line:
	continue
	if '*' in line:
	sets_to_tasks[verification_set].append("## " + line)
	for l in sorted(l for l in glob.iglob(line) if CANDIDATE_REGEX.match(l)):
	all_tasks.add(l)
	sets_to_tasks[verification_set].append(l)

	tasks_to_new_names_and_yml = dict()
	for task_file in all_tasks:
	# check whether preprocessed .i file exists for current .c file
	if task_file[-1] == 'c' and \
	(task_file[:-1] + 'i' in all_tasks or task_file + '.i' in all_tasks):
	print("Redundant file: ", task_file)
	continue
	properties = list()
	name_pcs_dot = task_file.split('.')
	new_name_pcs_dot = list()
	for pd in name_pcs_dot:
	name_pcs = pd.split('_')
	new_name_pcs = list()
	for p in name_pcs:
	offset = 0
	for name, prop in NAME_TO_PROP_AND_SUBPROP.items():
	if name not in p:
	continue # with next name_pc p
	if p.startswith('true'):
	expected = 'true'
	offset += len('true-')
	elif p.startswith('false'):
	expected = 'false'
	offset += len('false-')
	elif p.startswith('unknown-'):
	expected = None
	offset += len('unknown-')
	else:
	continue # with next name_pc p
	properties.append((prop, expected))
	offset += len(name)
	break # for-loop over properties once one matches, because they are distinct
	new_p = p[offset:]
	if new_p:
	new_name_pcs.append(new_p)
	new_name_pcs_dot.append("_".join(new_name_pcs))

	if not properties:
	continue # with next C file task_file

	yml_info = (task_file, properties)

	if CHANGE_FILE_NAME:
	new_task_file = '.'.join(new_name_pcs_dot)
	if new_task_file[-4:] == ".c.i":
	new_task_file = new_task_file[:-4] + ".i"
	else:
	new_task_file = task_file
	tasks_to_new_names_and_yml[task_file] = (new_task_file, yml_info)

	for old_name, (curr_task, yml_info) in tasks_to_new_names_and_yml.items():
	task_basename = os.path.basename(curr_task)
	if AVOID_COLLISIONS_ACROSS_DIRECTORIES:
	collisions = [k
	for k, v in tasks_to_new_names_and_yml.items()
	if os.path.basename(v[0]).lower()[:-1] == task_basename.lower()[:-1]
	and k != old_name]
	else:
	collisions = [k
	for k, v in tasks_to_new_names_and_yml.items()
	if v[0].lower()[:-1] == task_basename.lower()[:-1]
	and k != old_name]
	if collisions:
	curr_task = curr_task[:-2] + '-1' + curr_task[-2:]
	for idx, other in enumerate(collisions, 2):
	new_name, content = tasks_to_new_names_and_yml.pop(other)
	new_name = new_name[:-2] + "-" + str(idx) + new_name[-2:]
	tasks_to_new_names_and_yml[other] = (new_name, content)

	task_basename = os.path.basename(curr_task)
	yml_content = "format_version: '1.0'\n"
	yml_content += "\n"
	if CHANGE_FILE_NAME:
	yml_content += "# old file name: " + os.path.basename(old_name) + "\n"
	yml_content += "input_files: '" + task_basename + "'\n"
	yml_content += "\n"

	if INCLUDE_PROPERTY_AS_REQUIRED_FILE and len(yml_info[1]) > 0:
	yml_content += "required_files:\n"
	yml_content += " - '" + os.path.dirname(yml_info[1][0]) + "/'\n"
	yml_content += "\n"

	task_dir = os.path.dirname(curr_task)
	yml_content += "properties:\n"
	for prop, expected in sorted(yml_info[1], key=lambda p: p[0][0]):
	prop_file = os.path.relpath(prop[0], task_dir)
	yml_content += " - property_file: " + prop_file + "\n"
	if expected:
	yml_content += " expected_verdict: " + expected + "\n"
	if prop[1]:
	yml_content += " subproperty: " + prop[1] + "\n"
	if 'unreach-call' in prop_file and expected == 'false':
	prop_file = os.path.relpath('properties/coverage-error-call.prp', task_dir)
	yml_content += " - property_file: " + prop_file + "\n"
	if old_name in testable_tasks:
	for prop in COVERAGE_CRITERIA:
	prop_file = os.path.relpath(prop, task_dir)
	yml_content += " - property_file: " + prop_file + "\n"
	yml_file = curr_task[:-2] + ".yml"
	with open(yml_file, 'w+') as outp:
	outp.write(yml_content)


	if old_name != curr_task:
	os.rename(old_name, curr_task)
	if old_name[-1] == 'i':
	# .i -> .c
	if os.path.exists(old_name[:-1] + 'c'):
	old_c = old_name[:-1] + 'c'
	# .c.i -> .c
	elif os.path.exists(old_name[:-2]):
	old_c = old_name[:-2]
	# ldv-memsafety/memleaks.i -> ldv-memsafety/memleaks-notpreprocessed/memleaks.c
	elif old_name.startswith('ldv-memsafety/memleaks'):
	old_c = 'ldv-memsafety/memleaks-notpreprocessed/'\
	+ old_name.split('/')[-1][:-1] + 'c'
	else:
	old_c = None
	if old_c:
	assert old_c not in all_tasks
	curr_task_name = curr_task.split('/')[-1]
	new_c_name = os.path.dirname(old_c) + '/' + curr_task_name[:-1] + 'c'

	os.rename(old_c, new_c_name)
	for task_set, content in sets_to_tasks.items():
	try:
	idx = content.index(old_name)
	content[idx] = yml_file
	except ValueError:
	pass

	for task_set, content in sets_to_tasks.items():
	new_content = list()
	remaining = set(content)
	glob_suffix = "*"
	for task in content:
	if task not in remaining or task.startswith('#'):
	continue
	prefix_len = task.index('/') + 1
	prefix = task[:prefix_len]
	globbed_tasks = glob.glob(prefix + glob_suffix)
	globbed_tasks = [t for t in globbed_tasks if t.endswith('.yml')]
	assert len(globbed_tasks) > 0

	if USE_SUFFIX_WILDCARDS and len(globbed_tasks) > len([t for t in content if t.startswith(prefix)]):
	while len(globbed_tasks) > len([t for t in content if t.startswith(prefix)]):
	prefix_len += 1
	prefix = task[:prefix_len]
	globbed_tasks = glob.glob(prefix + glob_suffix)
	globbed_tasks = [t for t in globbed_tasks if t.endswith('.yml')]
	assert len(globbed_tasks) > 0
	new_prefix_len = prefix_len
	new_prefix = prefix
	while len([t for t in content if t.startswith(prefix)]) == len([t for t in content if t.startswith(new_prefix)])\
	and prefix != task:
	prefix_len = new_prefix_len
	prefix = new_prefix
	new_prefix_len += 1
	new_prefix = task[:new_prefix_len]

	globbed_tasks = glob.glob(prefix + glob_suffix)
	globbed_tasks = [t for t in globbed_tasks if t.endswith('.yml')]
	if not task_set.endswith('testable.set') and (not USE_SUFFIX_WILDCARDS or len(globbed_tasks) > 3 or prefix[-1] == '/'):
	new_content.append(prefix + '*.yml')
	remaining -= set(globbed_tasks)
	else:
	new_content.append(task)
	remaining.remove(task)
	with open(task_set, 'w+') as outp:
	outp.writelines(l + '\n' for l in new_content)