Last active
February 2, 2024 05:54
-
-
Save deangrant/7285df8b708beab2191abdef360f615b to your computer and use it in GitHub Desktop.
Find duplicate values in a nested JSON structure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find_duplicates(data, seen=None, duplicates=None): | |
""" | |
Recursively finds and returns duplicate values in a nested JSON-like structure. | |
This function traverses through the given data (which can be a dictionary, a list, | |
or a single value) and identifies values that appear more than once. | |
Parameters: | |
data (dict or list): The JSON-like structure (dict or list) to be checked for duplicates. | |
seen (set, optional): A set to keep track of values already encountered. | |
duplicates (set, optional): A set to accumulate the duplicate values found. | |
Returns: | |
set: A set of duplicate values identified in the data. | |
""" | |
# Initialize sets for tracking seen values and duplicates | |
if seen is None: | |
seen = set() | |
if duplicates is None: | |
duplicates = set() | |
if isinstance(data, dict): | |
for value in data.values(): | |
find_duplicates(value, seen, duplicates) | |
elif isinstance(data, list): | |
for item in data: | |
find_duplicates(item, seen, duplicates) | |
else: | |
if data in seen: | |
duplicates.add(data) | |
else: | |
seen.add(data) | |
return duplicates | |
# Example usage | |
json_data = { | |
'a': 1, | |
'b': 2, | |
'c': { | |
'd': 3, | |
'e': 1, # Duplicate | |
'f': { | |
'g': 2, # Duplicate | |
'h': 4 | |
} | |
} | |
} | |
duplicates = find_duplicates(json_data) | |
print(duplicates) # Output: {1, 2} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment