-
-
Save pypt/94d747fe5180851196eb to your computer and use it in GitHub Desktop.
import yaml | |
from yaml.constructor import ConstructorError | |
try: | |
from yaml import CLoader as Loader | |
except ImportError: | |
from yaml import Loader | |
def no_duplicates_constructor(loader, node, deep=False): | |
"""Check for duplicate keys.""" | |
mapping = {} | |
for key_node, value_node in node.value: | |
key = loader.construct_object(key_node, deep=deep) | |
value = loader.construct_object(value_node, deep=deep) | |
if key in mapping: | |
raise ConstructorError("while constructing a mapping", node.start_mark, | |
"found duplicate key (%s)" % key, key_node.start_mark) | |
mapping[key] = value | |
return loader.construct_mapping(node, deep) | |
yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, no_duplicates_constructor) | |
# Works fine (no duplicate keys) | |
yaml_data = yaml.load(''' | |
--- | |
foo: bar | |
baz: qux | |
''' | |
) | |
# Works fine (no duplicate keys on the same level) | |
yaml_data = yaml.load(''' | |
--- | |
foo: | |
bar: baz | |
baz: qux | |
bar: | |
bar: baz | |
baz: qux | |
''' | |
) | |
# Raises exception (has duplicate keys) | |
yaml_data = yaml.load(''' | |
--- | |
foo: bar | |
foo: qux | |
''' | |
) |
Thanks, great gist. For my limited requirements (a single call to yaml.load()
), it worked perfectly.
@ngaya-II, thanks, worked pretty cool for me, just've added
yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, UniqueKeyLoader.construct_mapping)
below your code and imported the module
Thanks. Cool. Merged your solution with a constructor for OrderedDict rather than regular dict.
This minimal version seems to work:
import yaml
# special loader with duplicate key checking
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
assert key not in mapping
mapping.append(key)
return super().construct_mapping(node, deep)
This minimal version seems to work:
import yaml # special loader with duplicate key checking class UniqueKeyLoader(yaml.SafeLoader): def construct_mapping(self, node, deep=False): mapping = [] for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) assert key not in mapping mapping.append(key) return super().construct_mapping(node, deep)
This works like a charm with the yaml.load()
when defined as the loader. Great work!
This minimal version seems to work:
import yaml # special loader with duplicate key checking class UniqueKeyLoader(yaml.SafeLoader): def construct_mapping(self, node, deep=False): mapping = [] for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) assert key not in mapping mapping.append(key) return super().construct_mapping(node, deep)
This works nicely, though I'd use a ValueError to be clearer about what's wrong and avoid the check being optimized out.
Minor optimization of ^ using sets instead of lists, that throws a ValueError
instead of an AssertionError
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise ValueError(f"Duplicate {key!r} key found in YAML.")
mapping.add(key)
return super().construct_mapping(node, deep)
Python 3.6+ only due to f-strings
based on pbsds,we can deal merge keys like this
class UniqueKeyLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
if ':merge' in key_node.tag:
continue
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise ValueError(f"Duplicate {key!r} key found in YAML.")
mapping.add(key)
return super().construct_mapping(node, deep)
# other code
yaml_dic=yaml.load(yaml_file,Loader=UniqueKeyLoader)
Nice! Thanks guys!
It does not seem to work when the duplicate key comes from an anchor:
build_template: &BUILD_TEMPLATE
eks_container:
cpu: 4
build_task:
eks_container:
cpu: 10
env:
DEPLOY_PULL_REQUEST: true
BUILD_ARGUMENTS: "-DtrafficInspection=false --parallel --profile -x test -x sonar"
<<: *BUILD_TEMPLATE # Duplicate key eks_container and CPU not detected
Any idea on how to support this case?
Thanks.
Following @jzohrab's approach, here is a standalone loader class that checks uniqueness for all mappings. This overrides
BaseConstructor.construct_mapping()
to add a check for duplicate keys.