mjdietzx/vis_rekognition_gfs_overlapping_indices.py

## vis_rekognition_gfs_overlapping_indices.py
import collections
import json


def duplicates():
    #
    # organize raw Rekognition `boto3.client('rekognition').get_face_search()` response for debugging this issue
    #

    with open('duplicated_index_bug.json', 'r') as f:  # https://s3.us-east-2.amazonaws.com/brayniac-waya-ai/duplicated_index_bug.json
        # list of all `PersonMatch` objects returned by Rekognition
        person_match_objects = json.loads(f.read())

    # `indices` maps `Index` of `Person` tracked throughout video, to a set of their `PersonMatch` objects
    indices = collections.defaultdict(set)
    for person_match_object in person_match_objects:
        index = person_match_object['Person'].pop('Index')  # `Index` popped off b/c this is the only thing that differs

        if person_match_object['Person'].get('Face'):
            indices[index].add(json.dumps(person_match_object))  # use sets instead of lists of `PersonMatch` objects

    # `merged` maps `Index` to the other indices it overlaps with
    merged = collections.defaultdict(set)

    #
    # just see how much overlap we actually have b/w indexes
    #

    for i, (k, v) in enumerate(indices.items()):  # `i`, `j` used to avoid iterating over same combo multiple times..
        for j, (_k, _v) in enumerate(indices.items()):
            if i >= j or k in [i for v in merged.values() for i in v]:
                continue

            if len(v & _v):  # if there is any overlap at all b/w `Index` print this info and add to `merged`
                print(k, _k, len(v), len(_v), len(v & _v), len(v & _v) / len(v))
                merged[k].add(_k)

    print(merged)
	import collections
	import json


	def duplicates():
	#
	# organize raw Rekognition `boto3.client('rekognition').get_face_search()` response for debugging this issue
	#

	with open('duplicated_index_bug.json', 'r') as f: # https://s3.us-east-2.amazonaws.com/brayniac-waya-ai/duplicated_index_bug.json
	# list of all `PersonMatch` objects returned by Rekognition
	person_match_objects = json.loads(f.read())

	# `indices` maps `Index` of `Person` tracked throughout video, to a set of their `PersonMatch` objects
	indices = collections.defaultdict(set)
	for person_match_object in person_match_objects:
	index = person_match_object['Person'].pop('Index') # `Index` popped off b/c this is the only thing that differs

	if person_match_object['Person'].get('Face'):
	indices[index].add(json.dumps(person_match_object)) # use sets instead of lists of `PersonMatch` objects

	# `merged` maps `Index` to the other indices it overlaps with
	merged = collections.defaultdict(set)

	#
	# just see how much overlap we actually have b/w indexes
	#

	for i, (k, v) in enumerate(indices.items()): # `i`, `j` used to avoid iterating over same combo multiple times..
	for j, (_k, _v) in enumerate(indices.items()):
	if i >= j or k in [i for v in merged.values() for i in v]:
	continue

	if len(v & _v): # if there is any overlap at all b/w `Index` print this info and add to `merged`
	print(k, _k, len(v), len(_v), len(v & _v), len(v & _v) / len(v))
	merged[k].add(_k)

	print(merged)