-
-
Save pypt/94d747fe5180851196eb to your computer and use it in GitHub Desktop.
import yaml | |
from yaml.constructor import ConstructorError | |
try: | |
from yaml import CLoader as Loader | |
except ImportError: | |
from yaml import Loader | |
def no_duplicates_constructor(loader, node, deep=False): | |
"""Check for duplicate keys.""" | |
mapping = {} | |
for key_node, value_node in node.value: | |
key = loader.construct_object(key_node, deep=deep) | |
value = loader.construct_object(value_node, deep=deep) | |
if key in mapping: | |
raise ConstructorError("while constructing a mapping", node.start_mark, | |
"found duplicate key (%s)" % key, key_node.start_mark) | |
mapping[key] = value | |
return loader.construct_mapping(node, deep) | |
yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, no_duplicates_constructor) | |
# Works fine (no duplicate keys) | |
yaml_data = yaml.load(''' | |
--- | |
foo: bar | |
baz: qux | |
''' | |
) | |
# Works fine (no duplicate keys on the same level) | |
yaml_data = yaml.load(''' | |
--- | |
foo: | |
bar: baz | |
baz: qux | |
bar: | |
bar: baz | |
baz: qux | |
''' | |
) | |
# Raises exception (has duplicate keys) | |
yaml_data = yaml.load(''' | |
--- | |
foo: bar | |
foo: qux | |
''' | |
) |
Following @jzohrab's approach, here is a standalone loader class that checks uniqueness for all mappings. This overrides BaseConstructor.construct_mapping()
to add a check for duplicate keys.
from yaml.constructor import ConstructorError
from yaml.nodes import MappingNode
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
class UniqueKeyLoader(Loader):
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
try:
hash(key)
except TypeError, exc:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" % exc, key_node.start_mark)
# check for duplicate keys
if key in mapping:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found duplicate key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
Thanks, great gist. For my limited requirements (a single call to yaml.load()
), it worked perfectly.
@ngaya-II, thanks, worked pretty cool for me, just've added
yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, UniqueKeyLoader.construct_mapping)
below your code and imported the module
Thanks. Cool. Merged your solution with a constructor for OrderedDict rather than regular dict.
This minimal version seems to work:
import yaml
# special loader with duplicate key checking
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
assert key not in mapping
mapping.append(key)
return super().construct_mapping(node, deep)
This minimal version seems to work:
import yaml # special loader with duplicate key checking class UniqueKeyLoader(yaml.SafeLoader): def construct_mapping(self, node, deep=False): mapping = [] for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) assert key not in mapping mapping.append(key) return super().construct_mapping(node, deep)
This works like a charm with the yaml.load()
when defined as the loader. Great work!
This minimal version seems to work:
import yaml # special loader with duplicate key checking class UniqueKeyLoader(yaml.SafeLoader): def construct_mapping(self, node, deep=False): mapping = [] for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) assert key not in mapping mapping.append(key) return super().construct_mapping(node, deep)
This works nicely, though I'd use a ValueError to be clearer about what's wrong and avoid the check being optimized out.
Minor optimization of ^ using sets instead of lists, that throws a ValueError
instead of an AssertionError
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise ValueError(f"Duplicate {key!r} key found in YAML.")
mapping.add(key)
return super().construct_mapping(node, deep)
Python 3.6+ only due to f-strings
based on pbsds,we can deal merge keys like this
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
if ':merge' in key_node.tag:
continue
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise ValueError(f"Duplicate {key!r} key found in YAML.")
mapping.add(key)
return super().construct_mapping(node, deep)
# other code
yaml_dic=yaml.load(yaml_file,Loader=UniqueKeyLoader)
Nice! Thanks guys!
It does not seem to work when the duplicate key comes from an anchor:
build_template: &BUILD_TEMPLATE
eks_container:
cpu: 4
build_task:
eks_container:
cpu: 10
env:
DEPLOY_PULL_REQUEST: true
BUILD_ARGUMENTS: "-DtrafficInspection=false --parallel --profile -x test -x sonar"
<<: *BUILD_TEMPLATE # Duplicate key eks_container and CPU not detected
Any idea on how to support this case?
Thanks.
Why are you iterating over each Constructor class?
If the constructor is applied to BaseConstructor shouldn't the children inherit it?
And if not, then is this simply to cover every type of tag? (Why would something load using BaseConstructor?)