dsoprea/assert_yaml_uniqueness.py

## assert_yaml_uniqueness.py
import yaml

def load_and_assert_uniqueness(x):

    # We'd like to detect duplicates. Since PyYAML both loads things depth-first
    # *and* doesn't give us the parent when processing a child node, we'll index
    # of all of the object IDs as we're constructing them, and then see which
    # are disappeared from the final hierarchy. Since all we can do is pass a
    # class, we need to inline the class in order to load into an index within
    # our scope.
    #
    # We're only concerned about dictionary keys with dictionary values because
    # a) this is our use-case, and b) we can stash additional information as
    # dictionary keys without having to override any types.

    nodes_by_id = {}


    class _UniqueCheckedLoader(yaml.SafeLoader):

        def construct_yaml_map(self, node):
            data = {}

            id_ = id(data)
            data['_id'] = id_
            nodes_by_id[id_] = data

            yield data

            value = self.construct_mapping(node)
            data.update(value)


    _UniqueCheckedLoader.add_constructor(
        'tag:yaml.org,2002:map',
        _UniqueCheckedLoader.construct_yaml_map
    )


    # Load

    blob = yaml.load(x, Loader=_UniqueCheckedLoader)


    # Remove all nodes in the final dictionary from the by-ID index

    q = [blob]
    while q:
        d, q = q[0], q[1:]

        id_ = d.pop('_id')
        del nodes_by_id[id_]

        for v in d.values():

            # We're only concern with dictionary nodes
            if v.__class__ is not dict:
                continue

            q.append(v)


    # We've visited all referencesd nodes. Everything still indexed must've been
    # pruned due to nonuniqueness. As mentioned above, we really don't have any
    # hierarchical context, by we can just search out occurrences of the
    # attributes from the node(s) in the data in order to find the duplicates.

    if nodes_by_id:

        # Cleanup representation before displaying

        nodes = []
        for node in nodes_by_id.values():
            del node['_id']
            nodes.append(node)

        # Error out

        raise \
            Exception(
                "({}) nodes were duplicates:\n{}".format(
                    len(nodes), nodes))


    return blob
	import yaml

	def load_and_assert_uniqueness(x):

	# We'd like to detect duplicates. Since PyYAML both loads things depth-first
	# and doesn't give us the parent when processing a child node, we'll index
	# of all of the object IDs as we're constructing them, and then see which
	# are disappeared from the final hierarchy. Since all we can do is pass a
	# class, we need to inline the class in order to load into an index within
	# our scope.
	#
	# We're only concerned about dictionary keys with dictionary values because
	# a) this is our use-case, and b) we can stash additional information as
	# dictionary keys without having to override any types.

	nodes_by_id = {}


	class _UniqueCheckedLoader(yaml.SafeLoader):

	def construct_yaml_map(self, node):
	data = {}

	id_ = id(data)
	data['_id'] = id_
	nodes_by_id[id_] = data

	yield data

	value = self.construct_mapping(node)
	data.update(value)


	_UniqueCheckedLoader.add_constructor(
	'tag:yaml.org,2002:map',
	_UniqueCheckedLoader.construct_yaml_map
	)


	# Load

	blob = yaml.load(x, Loader=_UniqueCheckedLoader)


	# Remove all nodes in the final dictionary from the by-ID index

	q = [blob]
	while q:
	d, q = q[0], q[1:]

	id_ = d.pop('_id')
	del nodes_by_id[id_]

	for v in d.values():

	# We're only concern with dictionary nodes
	if v.__class__ is not dict:
	continue

	q.append(v)


	# We've visited all referencesd nodes. Everything still indexed must've been
	# pruned due to nonuniqueness. As mentioned above, we really don't have any
	# hierarchical context, by we can just search out occurrences of the
	# attributes from the node(s) in the data in order to find the duplicates.

	if nodes_by_id:

	# Cleanup representation before displaying

	nodes = []
	for node in nodes_by_id.values():
	del node['_id']
	nodes.append(node)

	# Error out

	raise \
	Exception(
	"({}) nodes were duplicates:\n{}".format(
	len(nodes), nodes))


	return blob