Created
July 9, 2020 16:33
-
-
Save tmadlener/6f97cc77aabbe7a6b821b6bd74344c27 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Schema definitions for the different parts that can occur in a yaml | |
datalayout file. | |
""" | |
from __future__ import print_function, unicode_literals, absolute_import | |
# TODO: | |
# - [x] syntactic checks of format read from yaml file | |
# - [x] basic checks of types | |
# - [ ] checks of relations between definitions | |
import re | |
from schema import ( | |
Schema, Optional, Use, And, Hook, SchemaError | |
) | |
BUILTIN_TYPES = ["int", "long", "float", "double", | |
"unsigned int", "unsigned", "unsigned long", | |
"short", "bool", "long long", | |
"unsigned long long", "std::string"] | |
class MemberVariable(object): | |
"""Simple class to hold information about a member variable""" | |
def __init__(self, **kwargs): | |
self.name = kwargs.pop('name', '') | |
self.full_type = kwargs.pop('type', '') | |
self.description = kwargs.pop('description', '') | |
is_array = kwargs.pop('is_array', False) | |
self.array_type = kwargs.pop('array_type', None) | |
self.array_size = kwargs.pop('array_size', None) | |
if is_array and not (self.array_type and self.array_size): | |
array_match = ValidType.array_re.match(self.full_type) | |
if array_match: | |
self.array_type, self.array_size = array_match.groups() | |
else: | |
raise ValueError("Trying to construct MemberVariable with 'is_array' but 'type' " | |
"is not a valid array definition") | |
self.is_array = is_array or (self.array_type and self.array_size) | |
if self.is_array: | |
self.full_type = r'std::array<{}, {}>'.format(self.array_type, self.array_size) | |
if kwargs: | |
raise ValueError("Unused kwargs in MemberVariable: {}".format(kwargs.keys())) | |
def __str__(self): | |
"""string representation""" | |
definition = r'{} {};'.format(self.full_type, self.name) | |
if self.description: | |
definition += r' ///< {}'.format(self.description) | |
return definition | |
class ValidName(object): | |
"""Class that can be used to validate that a given string could potentially be | |
used as a type name in c++""" | |
# Names can be almost anything as long as it doesn't start with a digit and | |
# doesn't contain anything fancy or space | |
name_re = re.compile(r'([a-zA-Z_]+\w*)') | |
def validate(self, data): | |
"""Check if string is a valid name""" | |
# TODO: More stringent tests | |
if isinstance(data, str) and len(data) and " " not in data.strip() and self.name_re.match(data.strip()): | |
return data.strip() | |
else: | |
print('{} is not a valid key'.format(data)) | |
raise SchemaError('{n} is not a valid name for a type'.format(n=data)) | |
class ValidType(object): | |
"""Class that can be used to validate that a given string is a valid c++ typ. | |
Given the complexity of that decicion we don't even try to get all | |
possibilities, but simply try to get a subset of everything that is possible | |
in c++ and simply declare the rest invalid. | |
""" | |
# Doing this with regex is non-ideal, but we should be able to at least | |
# enforce something that will yield valid c++ identifiers even if we might not | |
# cover all possibilities that are admitted by the c++ standard | |
# A type can either start with a double colon, or a character (types starting | |
# with _ are technically allowed, but partially reserved for compilers) | |
# Additionally we have to take int account the possible whitespaces in the | |
# builtin types above. Currently this is done by simple brute-forcing | |
type_str = r'((?:\:{{2}})?[a-zA-Z]+[a-zA-Z0-9:_]*|{builtin_re})'.format( | |
builtin_re=r'|'.join((r'(?:{})'.format(t)) for t in BUILTIN_TYPES)) | |
type_re = re.compile(type_str) | |
# std::array declaration with some whitespace distribution freedom | |
array_re = re.compile(r' *std::array *< *{typ} *, *([0-9]+) *>'.format(typ=type_str)) | |
def __init__(self, allow_arr=True): | |
"""""" | |
self.allow_arr = allow_arr | |
def validate(self, data): | |
"""Check if the passed data (str) could be a valid type""" | |
if not isinstance(data, str): | |
raise SchemaError('{} has to be a string in order to be a valid type'.format(data)) | |
type_match = self.type_re.match(data.strip()) | |
if type_match: | |
return MemberVariable(type=type_match.group(1).strip()) | |
if self.allow_arr: | |
array_match = self.array_re.match(data.strip()) | |
if array_match: | |
return MemberVariable(array_type=array_match.group(1), array_size=array_match.group(2)) | |
raise SchemaError('{} is not a valid type name in this context'.format(data)) | |
class ValidMember(object): | |
"""TODO""" | |
# Comments can be anything after // | |
# stripping of trailing whitespaces is done later as it is hard to do with regex | |
comment_str = r'\/\/ *(.*)' | |
type_or_array_str = r'(?:{t}|{a})'.format(t=ValidType.type_str, a=ValidType.array_re.pattern) | |
member_re = re.compile(' *{type} +{name} *{comment}'.format( | |
type=type_or_array_str, name=ValidName.name_re.pattern, comment=comment_str | |
)) | |
def __init__(self, allow_arr=True): | |
self.allow_arr = allow_arr | |
def validate(self, data): | |
"""Validate the definition and return a MemberVariable""" | |
member_match = self.member_re.match(data) | |
if member_match: | |
name = member_match.group(4) | |
description = member_match.group(5).strip() | |
# Depending on whether group 1 or groups 2 and 3 are present, it is a | |
# simple type or an array | |
if member_match.group(1): | |
return MemberVariable(type=member_match.group(1), name=name, description=description) | |
else: | |
if self.allow_arr: | |
return MemberVariable(array_type=member_match.group(2), array_size=member_match.group(3), | |
name=name, description=description) | |
else: | |
raise SchemaError('{} defines an array type which is not allowd in this context'.format(data)) | |
raise SchemaError('{} does not define a valid member of a datatype'.format(data)) | |
COMPONENT_SCHEMA = Schema({ | |
ValidName(): ValidType(), | |
Optional('ExtraCode'): {'declaration': str} | |
}) | |
DATATYPE_SCHEMA = Schema({ | |
'Description': str, | |
'Author': str, | |
'Members': [ValidMember()], | |
Optional('ExtraCode'): { Optional('declaration'): str, | |
Optional('implementation'): str, | |
Optional('const_declaration'): str, | |
Optional('const_implementation'): str, | |
Optional('includes'): str }, | |
Optional('OneToOneRelations'): [ValidMember(allow_arr=False)], | |
Optional('OneToManyRelations'): [ValidMember(allow_arr=False)], | |
Optional('VectorMembers'): [ValidMember(allow_arr=False)] | |
# TODO (are they in use anywhere?): | |
# ConstExtraCode | |
# TransientMembers | |
# Typedefs | |
}) | |
if __name__ == '__main__': | |
# As read in from yaml file | |
valid_component = { | |
'x': 'int', | |
'y': 'int', | |
'z': 'int', | |
'p': 'std::array<int, 4>', | |
'ExtraCode': {'declaration': ' SimpleStruct() : x(0),y(0),z(0) {} SimpleStruct( const int* v) : x(v[0]),y(v[1]),z(v[2]) {} '} | |
} | |
# As read in from yaml file | |
valid_datatype = { | |
'Description': 'Reconstructed Particle', | |
'Author': 'F.Gaede, DESY', | |
'Members': ['int type //type of reconstructed particle. Check/set collection parameters ReconstructedParticleTypeNames and ReconstructedParticleTypeValues.', | |
'float energy // [GeV] energy of the reconstructed particle.', | |
'edm4hep::Vector3f momentum // [GeV] particle momentum', | |
'edm4hep::Vector3f referencePoint // [mm] reference, i.e. where the particle has been measured', | |
'float charge //charge of the reconstructed particle.', | |
'float mass // [GeV] mass of the reconstructed particle, set independently from four vector', | |
'float goodnessOfPID //overall goodness of the PID on a scale of [0;1]', | |
'std::array<float,10> covMatrix //cvariance matrix of the reconstructed particle 4vector (10 parameters). Stored as lower triangle matrix of the four momentum (px,py,pz,E), i.e. cov(px,px), cov(py,##'], | |
'OneToOneRelations': ['edm4hep::Vertex startVertex //start vertex associated to this particle', | |
'edm4hep::ParticleID particleIDUsed //particle Id used for the kinematics of this particle'], | |
'OneToManyRelations': ['edm4hep::Cluster clusters //clusters that have been used for this particle.', | |
'edm4hep::Track tracks //tracks that have been used for this particle.', | |
'edm4hep::ReconstructedParticle particles //reconstructed particles that have been combined to this particle.', | |
'edm4hep::ParticleID particleIDs //particle Ids (not sorted by their likelihood)'], | |
'ExtraCode': {'declaration': ' bool isCompound() { return particles_size() > 0 ;}\n //vertex where the particle decays This method actually returns the start vertex from the first daughter particle found.\n //TODO: edm4hep::Vertex getEndVertex() { return edm4hep::Vertex( (getParticles(0).isAvailable() ? getParticles(0).getStartVertex() : edm4hep::Vertex(0,0) ) ) ; }\n '}} | |
print(COMPONENT_SCHEMA.validate(valid_component)) | |
print(DATATYPE_SCHEMA.validate(valid_datatype)) | |
# inserting an invalid key will lead to a failure with an almost readable error message | |
valid_component['invalid name'] = 'this does not matter any longer' | |
COMPONENT_SCHEMA.validate(valid_component) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment