Skip to content

Instantly share code, notes, and snippets.

@rvndbalaji
Created June 17, 2024 18:25
Show Gist options
  • Save rvndbalaji/be1c7df1d81cb1fe0e035ca472ca6457 to your computer and use it in GitHub Desktop.
Save rvndbalaji/be1c7df1d81cb1fe0e035ca472ca6457 to your computer and use it in GitHub Desktop.
# Given an incomplete JSON string, extract a subset JSON that is valid
# Obtain the maximum valid portion of this string
# by removing characters one by one from the end and checking the validity
# By default the incoming json is considered to be an object surrounded by {}
# Set is_list=True if incoming json is a list surrounded by []
# This method might be inefficient for now since it removes 1 character and validates
# TODO Future optimizaion -
# Matching blocks of {} for objects and removing block
# Matching commas (,) for list items and removing each item
def is_json(myjson, debug=False):
try:
json.loads(myjson)
except ValueError as e:
if debug is True:
print(e)
return False
return True
def get_maximum_valid_json(json_string:str, is_list=False, debug=False):
if is_json(json_string):
return json.loads(json_string)
#Remove first character '[' or '{' from the string, we'll add this back later
temp_string = json_string.strip()[1:]
#Keep removing characters from the end, until a valid json is found
while len(temp_string) > 0:
partial_json = '{' + temp_string + '}' # for objects, which is default
if is_list is True:
partial_json = '[' + temp_string + ']' # for lists
if is_json(partial_json, debug=debug):
return json.loads(partial_json)
else:
if debug is True:
print(partial_json)
# Remove last character for next iteration. #TODO - Improve this later with optimization
temp_string = temp_string[:-1]
print("No valid json found for string - " + json_string)
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment