Skip to content

Instantly share code, notes, and snippets.

@thorwhalen
Created September 3, 2019 00:29
Show Gist options
  • Save thorwhalen/7e6a967bde2a8ae4ddf8928f1c9d8ea5 to your computer and use it in GitHub Desktop.
Save thorwhalen/7e6a967bde2a8ae4ddf8928f1c9d8ea5 to your computer and use it in GitHub Desktop.
Validate, Generate and Parse Templated Strings
import re
base_validation_funs = {
"be a": isinstance,
"be in": lambda val, check_val: val in check_val,
"be at least": lambda val, check_val: val >= check_val,
"be more than": lambda val, check_val: val > check_val,
"be no more than": lambda val, check_val: val <= check_val,
"be less than": lambda val, check_val: val < check_val,
}
dflt_validation_funs = base_validation_funs
dflt_all_kwargs_should_be_in_validation_dict = False
dflt_ignore_misunderstood_validation_instructions = False
dflt_arg_pattern = r'.+'
day_format = "%Y-%m-%d"
day_format_pattern = re.compile('\d{4}-\d{2}-\d{2}')
until_slash = "[^/]+"
until_slash_capture = '(' + until_slash + ')'
capture_template = '({format})'
named_capture_template = '(?P<{name}>{format})'
names_re = re.compile('(?<={)[^}]+(?=})')
def validate_kwargs(kwargs_to_validate,
validation_dict,
validation_funs=None,
all_kwargs_should_be_in_validation_dict=False,
ignore_misunderstood_validation_instructions=False
):
"""
Utility to validate a dict. It's main use is to validate function arguments (expressing the validation checks
in validation_dict) by doing validate_kwargs(locals()), usually in the beginning of the function
(to avoid having more accumulated variables than we need in locals())
:param kwargs_to_validate: as the name implies...
:param validation_dict: A dict specifying what to validate. Keys are usually name of variables (when feeding
locals()) and values are dicts, themselves specifying check:check_val pairs where check is a string that
points to a function (see validation_funs argument) and check_val is an object that the kwargs_to_validate
value will be checked against.
:param validation_funs: A dict of check:check_function(val, check_val) where check_function is a function returning
True if val is valid (with respect to check_val).
:param all_kwargs_should_be_in_validation_dict: If True, will raise an error if kwargs_to_validate contains
keys that are not in validation_dict.
:param ignore_misunderstood_validation_instructions: If True, will raise an error if validation_dict contains
a key that is not in validation_funs (safer, since if you mistype a key in validation_dict, the function will
tell you so!
:return: True if all the validations passed.
>>> validation_dict = {
... 'system': {
... 'be in': {'darwin', 'linux'}
... },
... 'fv_version': {
... 'be a': int,
... 'be at least': 5
... }
... }
>>> validate_kwargs({'system': 'darwin'}, validation_dict)
True
>>> try:
... validate_kwargs({'system': 'windows'}, validation_dict)
... except AssertionError as e:
... assert str(e).startswith('system must be in') # omitting the set because inconsistent order
>>> try:
... validate_kwargs({'fv_version': 9.9}, validation_dict)
... except AssertionError as e:
... print(e)
fv_version must be a <class 'int'>
>>> try:
... validate_kwargs({'fv_version': 4}, validation_dict)
... except AssertionError as e:
... print(e)
fv_version must be at least 5
>>> validate_kwargs({'fv_version': 6}, validation_dict)
True
"""
validation_funs = dict(base_validation_funs or {}, **(validation_funs or {}))
for var, val in kwargs_to_validate.items(): # for every (var, val) pair of kwargs
if var in validation_dict: # if var is in the validation_dict
for check, check_val in validation_dict[var].items(): # for every (key, val) of this dict
if check in base_validation_funs: # if you have a validation check for it
if not validation_funs[check](val, check_val): # check it's valid
raise AssertionError("{} must {} {}".format(var, check, check_val)) # and raise an error if not
elif not ignore_misunderstood_validation_instructions: # should ignore if check not understood?
raise AssertionError("I don't know what to do with the validation check '{}'".format(
check
))
elif all_kwargs_should_be_in_validation_dict: # should all variables have checks?
raise AssertionError("{} wasn't in the validation_dict")
return True
def get_names_from_template(template):
"""
Get list from {item} items of template string
:param template: a "template" string (a string with {item} items
-- the kind that is used to mark token for str.format)
:return: a list of the token items of the string, in the order they appear
>>> get_names_from_template('this{is}an{example}of{a}template')
['is', 'example', 'a']
"""
return names_re.findall(template)
def mk_format_mapping_dict(format_dict, required_keys, default_format=until_slash):
new_format_dict = format_dict.copy()
for k in required_keys:
if k not in new_format_dict:
new_format_dict[k] = default_format
return new_format_dict
def mk_capture_patterns(mapping_dict):
new_mapping_dict = dict()
for k, v in mapping_dict.items():
new_v = capture_template.format(format=v)
new_mapping_dict[k] = new_v
return new_mapping_dict
def mk_named_capture_patterns(mapping_dict):
new_mapping_dict = dict()
for k, v in mapping_dict.items():
new_v = named_capture_template.format(name=k, format=v)
new_mapping_dict[k] = new_v
return new_mapping_dict
def template_to_pattern(mapping_dict, template):
p = re.compile("{}".format("|".join(['{' + re.escape(x) + '}' for x in list(mapping_dict.keys())])))
return p.sub(lambda x: mapping_dict[x.string[(x.start() + 1):(x.end() - 1)]], template)
def mk_extract_pattern(template, format_dict, named_capture_patterns, name):
mapping_dict = dict(format_dict, **{name: named_capture_patterns[name]})
p = re.compile("{}".format("|".join(
['{' + re.escape(x) + '}' for x in list(mapping_dict.keys())])))
return re.compile(p.sub(lambda x: mapping_dict[x.string[(x.start() + 1):(x.end() - 1)]], template))
def mk_prefix_templates_dicts(template):
names = get_names_from_template(template)
prefix_template_dict_including_name = dict()
none_and_names = [None] + names
for name in none_and_names:
if name == names[-1]:
prefix_template_dict_including_name[name] = template
else:
if name is None:
next_name = names[0]
else:
next_name = names[1 + next(i for i, _name in enumerate(names) if _name == name)]
p = '{' + next_name + '}'
template_idx_of_next_name = re.search(p, template).start()
prefix_template_dict_including_name[name] = template[:template_idx_of_next_name]
prefix_template_dict_excluding_name = dict()
for i, name in enumerate(names):
prefix_template_dict_excluding_name[name] = prefix_template_dict_including_name[none_and_names[i]]
prefix_template_dict_excluding_name[None] = template
return prefix_template_dict_including_name, prefix_template_dict_excluding_name
def mk_kwargs_trans(**trans_func_for_key):
""" Make a dict transformer from functions that depends solely on keys (of the dict to be transformed)
Used to easily make process_kwargs and process_info_dict arguments for LinearNaming.
"""
assert all(map(callable, trans_func_for_key.values())), "all argument values must be callable"
def key_based_val_trans(**kwargs):
for k, v in kwargs.items():
if k in trans_func_for_key:
kwargs[k] = trans_func_for_key[k](v)
return kwargs
return key_based_val_trans
class LinearNaming(object):
def __init__(self, template, format_dict=None,
process_kwargs=None, process_info_dict=None):
"""
Args:
template: The string format template
format_dict: A {field_name: field_value_format_regex, ...} dict
process_kwargs: A function taking the field=value pairs and producing a dict of processed
{field: value,...} dict (where both fields and values could have been processed.
This is useful when we need to process (format, default, etc.) fields, or their values,
according to the other fields of values in the collection.
A specification of {field: function_to_process_this_value,...} wouldn't allow the full powers
we are allowing here.
process_info_dict: A sort of converse of format_dict.
This is a {field_name: field_conversion_func, ...} dict that is used to convert info_dict values
before returning them.
>>> ln = LinearNaming('/home/{user}/fav/{num}.txt',
... format_dict={'user': '[^/]+', 'num': '\d+'},
... process_info_dict={'num': int}
... )
>>> ln.is_valid('/home/USER/fav/123.txt')
True
>>> ln.is_valid('/home/US/ER/fav/123.txt')
False
>>> ln.is_valid('/home/US/ER/fav/not_a_number.txt')
False
>>> ln.mk('USER', num=123) # making a string (with args or kwargs)
'/home/USER/fav/123.txt'
>>> # Note: but ln.mk('USER', num='not_a_number') would fail because num is not valid
>>> ln.info_dict('/home/USER/fav/123.txt') # note in the output, 123 is an int, not a string
{'user': 'USER', 'num': 123}
>>>
>>> ####### prefix methods #######
>>> ln.is_valid_prefix('/home/USER/fav/')
True
>>> ln.is_valid_prefix('/home/USER/fav/12')
False # too long
>>> ln.is_valid_prefix('/home/USER/fav')
False # too short
>>> ln.is_valid_prefix('/home/')
True # just right
>>> ln.is_valid_prefix('/home/USER/fav/123.txt') # full path, so output same as is_valid() method
True
>>>
>>> ln.mk_prefix('ME')
'/home/ME/fav/'
>>> ln.mk_prefix(user='YOU', num=456) # full specification, so output same as same as mk() method
'/home/YOU/fav/456.txt'
"""
if format_dict is None:
format_dict = {}
self.template = template
names = get_names_from_template(template)
format_dict = mk_format_mapping_dict(format_dict, names)
named_capture_patterns = mk_named_capture_patterns(format_dict)
pattern = template_to_pattern(named_capture_patterns, template)
pattern += '$'
pattern = re.compile(pattern)
extract_pattern = {}
for name in names:
extract_pattern[name] = mk_extract_pattern(template, format_dict, named_capture_patterns, name)
if isinstance(process_info_dict, dict):
_processor_for_kw = process_info_dict
def process_info_dict(**info_dict):
return {k: _processor_for_kw.get(k, lambda x: x)(v) for k, v in info_dict.items()}
self.names = names
self.n_names = len(names)
self.format_dict = format_dict
self.named_capture_patterns = named_capture_patterns
self.pattern = pattern
self.extract_pattern = extract_pattern
self.process_kwargs = process_kwargs
self.process_info_dict = process_info_dict
self.prefix_template_including_name, self.prefix_template_excluding_name = \
mk_prefix_templates_dicts(self.template)
_prefix_pattern = '$|'.join(
[x.format(**self.format_dict) for x in sorted(list(self.prefix_template_including_name.values()), key=len)])
_prefix_pattern += '$'
self.prefix_pattern = re.compile(_prefix_pattern)
def __call__(self, *args, **kwargs):
return self.mk(*args, **kwargs)
def is_valid(self, sref):
"""
Check if the name has the "upload format" (i.e. the kind of srefs that are _ids of fv_mgc, and what
sref means in most of the iatis system.
:param sref: the sref (string) to check
:return: True iff sref has the upload format
"""
return bool(self.pattern.match(sref))
def is_valid_prefix(self, sref):
"""
Check if sref is a valid prefix.
:param sref: a string (that might be a valid sref prefix)
:return: True iff sref is a valid prefix
"""
return bool(self.prefix_pattern.match(sref))
def info_dict(self, sref):
"""
Get a dict with the arguments of an sref (for example group, user, subuser, etc.)
:param sref:
:return: a dict holding the argument names and values
"""
m = self.pattern.match(sref)
if m:
info_dict = m.groupdict()
if self.process_info_dict:
return self.process_info_dict(**info_dict)
else:
return info_dict
def info_tuple(self, sref):
info_dict = self.info_dict(sref)
return tuple(info_dict[x] for x in self.names)
def extract(self, item, sref):
"""
Extract a single item from an sref
:param item: item of the item to extract
:param sref: the sref from which to extract it
:return: the value for name
"""
return self.extract_pattern[item].match(sref).group(1)
def mk_prefix(self, *args, **kwargs):
"""
Make a prefix for an uploads sref that has has the path up to the first None argument.
:return: A string that is the prefix of a valid sref
"""
assert len(args) + len(kwargs) <= self.n_names, "You have too many arguments"
kwargs = dict({k: v for k, v in zip(self.names, args)}, **kwargs)
if self.process_kwargs is not None:
kwargs = self.process_kwargs(**kwargs)
keep_kwargs = {}
last_name = None
for name in self.names:
if name in kwargs:
keep_kwargs[name] = kwargs[name]
last_name = name
else:
break
return self.prefix_template_including_name[last_name].format(**keep_kwargs)
def mk(self, *args, **kwargs):
"""
Make a full sref with given kwargs. All required name=val must be present (or infered by self.process_kwargs
function.
The required names are in self.names.
Does NOT check for validity of the vals.
:param kwargs: The name=val arguments needed to construct a valid sref
:return: an sref
"""
assert len(args) + len(kwargs) == self.n_names, "You're missing, or have too many arguments"
kwargs = dict({k: v for k, v in zip(self.names, args)}, **kwargs)
if self.process_kwargs is not None:
kwargs = self.process_kwargs(**kwargs)
return self.template.format(**kwargs)
def replace_sref_elements(self, sref, **elements_kwargs):
"""
Replace specific sref argument values with others
:param sref: the sref to replace
:param elements_kwargs: the arguments to replace (and their values)
:return: a new sref
"""
sref_info_dict = self.info_dict(sref)
for k, v in elements_kwargs.items():
sref_info_dict[k] = v
return self.mk(**sref_info_dict)
def __repr__(self):
kv = self.__dict__.copy()
exclude = ['process_kwargs', 'extract_pattern', 'prefix_pattern',
'prefix_template_including_name', 'prefix_template_excluding_name']
for f in exclude:
kv.pop(f)
s = ""
s += " * {}: {}\n\n".format('template', kv.pop('template'))
s += " * {}: {}\n\n".format('format_dict', kv.pop('format_dict'))
for k, v in kv.items():
if hasattr(v, 'pattern'):
v = v.pattern
s += " * {}: {}\n\n".format(k, v)
return s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment