-
-
Save pypt/94d747fe5180851196eb to your computer and use it in GitHub Desktop.
import yaml | |
from yaml.constructor import ConstructorError | |
try: | |
from yaml import CLoader as Loader | |
except ImportError: | |
from yaml import Loader | |
def no_duplicates_constructor(loader, node, deep=False): | |
"""Check for duplicate keys.""" | |
mapping = {} | |
for key_node, value_node in node.value: | |
key = loader.construct_object(key_node, deep=deep) | |
value = loader.construct_object(value_node, deep=deep) | |
if key in mapping: | |
raise ConstructorError("while constructing a mapping", node.start_mark, | |
"found duplicate key (%s)" % key, key_node.start_mark) | |
mapping[key] = value | |
return loader.construct_mapping(node, deep) | |
yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, no_duplicates_constructor) | |
# Works fine (no duplicate keys) | |
yaml_data = yaml.load(''' | |
--- | |
foo: bar | |
baz: qux | |
''' | |
) | |
# Works fine (no duplicate keys on the same level) | |
yaml_data = yaml.load(''' | |
--- | |
foo: | |
bar: baz | |
baz: qux | |
bar: | |
bar: baz | |
baz: qux | |
''' | |
) | |
# Raises exception (has duplicate keys) | |
yaml_data = yaml.load(''' | |
--- | |
foo: bar | |
foo: qux | |
''' | |
) |
Thanks for this gist, it helped me out.
For others coming here, I'd suggest creating a separate class to prevent pollution of the yaml namespace, as that may cause nasty side effects, especially in libraries. Consider something like the below:
def no_duplicates_constructor(loader, node, deep=False):
"""Check for duplicate keys."""
mapping = {}
for key_node, value_node in node.value:
key = loader.construct_object(key_node, deep=deep)
if key in mapping:
msg = "Duplicate key {0} (overwrite existing value '{1}' with new value '{2}'"
msg = msg.format(key, mapping[key], value_node)
raise BadYamlFormatError(msg)
value = loader.construct_object(value_node, deep=deep)
mapping[key] = value
return loader.construct_mapping(node, deep)
def construct_mapping(loader, node):
loader.flatten_mapping(node)
return object_pairs_hook(loader.construct_pairs(node))
class DupCheckLoader(yaml.Loader):
"""Local class to prevent pollution of global yaml.Loader."""
pass
DupCheckLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
no_duplicates_constructor)
return yaml.load(stream, DupCheckLoader)
Good to have this gist, thanks for posting it!
I have made a few enhancements after I had added your code to my project:
-
I added the call to
loader.flatten_mapping
(like @jzohrab has it in his comment) -- this is required for correct processing of merge keys (<<:
); -
I return the
mapping
variable fromno_duplicates_constructor
, there is no point in constructing values for all keys and values second time by PyYAML's loaders (which might be perhaps useful if you added other map constructors that for instance returnOrderedDict
s and wanted to be able to mix them arbitrarily); -
I register
no_duplicates_constructor
with all the constructor classes inyaml.constructor
, e.g.:for cls in (BaseConstructor, Constructor, SafeConstructor): cls.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, no_duplicates_constructor)
which makes it applied also in the
yaml.safe_load()
method (to address @JonathonReinhart's point).
In my use case, I need simply to make all calls to yaml.load
(or yaml.safe_load
, if any) use this logic, so I added this snippet into my module's __init__.py
file. If I was writing a library, there would be more changes to be made. Would be really nice if we could get this functionality integrated in the PyYAML library directly, as per https://bitbucket.org/xi/pyyaml/issues/9/ignore-duplicate-keys-and-send-warning-or.
Why are you iterating over each Constructor class?
If the constructor is applied to BaseConstructor shouldn't the children inherit it?
And if not, then is this simply to cover every type of tag? (Why would something load using BaseConstructor?)
Following @jzohrab's approach, here is a standalone loader class that checks uniqueness for all mappings. This overrides BaseConstructor.construct_mapping()
to add a check for duplicate keys.
from yaml.constructor import ConstructorError
from yaml.nodes import MappingNode
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
class UniqueKeyLoader(Loader):
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
try:
hash(key)
except TypeError, exc:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" % exc, key_node.start_mark)
# check for duplicate keys
if key in mapping:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found duplicate key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
Thanks, great gist. For my limited requirements (a single call to yaml.load()
), it worked perfectly.
@ngaya-II, thanks, worked pretty cool for me, just've added
yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, UniqueKeyLoader.construct_mapping)
below your code and imported the module
Thanks. Cool. Merged your solution with a constructor for OrderedDict rather than regular dict.
This minimal version seems to work:
import yaml
# special loader with duplicate key checking
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
assert key not in mapping
mapping.append(key)
return super().construct_mapping(node, deep)
This minimal version seems to work:
import yaml # special loader with duplicate key checking class UniqueKeyLoader(yaml.SafeLoader): def construct_mapping(self, node, deep=False): mapping = [] for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) assert key not in mapping mapping.append(key) return super().construct_mapping(node, deep)
This works like a charm with the yaml.load()
when defined as the loader. Great work!
This minimal version seems to work:
import yaml # special loader with duplicate key checking class UniqueKeyLoader(yaml.SafeLoader): def construct_mapping(self, node, deep=False): mapping = [] for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) assert key not in mapping mapping.append(key) return super().construct_mapping(node, deep)
This works nicely, though I'd use a ValueError to be clearer about what's wrong and avoid the check being optimized out.
Minor optimization of ^ using sets instead of lists, that throws a ValueError
instead of an AssertionError
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise ValueError(f"Duplicate {key!r} key found in YAML.")
mapping.add(key)
return super().construct_mapping(node, deep)
Python 3.6+ only due to f-strings
based on pbsds,we can deal merge keys like this
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
if ':merge' in key_node.tag:
continue
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise ValueError(f"Duplicate {key!r} key found in YAML.")
mapping.add(key)
return super().construct_mapping(node, deep)
# other code
yaml_dic=yaml.load(yaml_file,Loader=UniqueKeyLoader)
Unfortunately this seems to have no effect when using
yaml.safe_load()
. I'm not yet sure why.I thought it was because
yaml.add_constructor
by default works on theLoader
class, whereassafe_load
uses theSafeLoader
class. However, I tried both:and
and neither seemed to have any effect. 🤷