Created
August 17, 2023 02:47
-
-
Save turnercore/37dfea8fe7b1c85bc59f9d9a5a798bca to your computer and use it in GitHub Desktop.
Parser for turning bad json into valid json, or PJTJP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def convert_to_json_like(input_str): | |
# Preprocess the input by removing quotes and condensing spaces | |
input_str = re.sub(r'[\'"]', '', input_str) | |
input_str = re.sub(r'\s+', ' ', input_str).strip() | |
# Initialize variables | |
new_str = [] | |
looking_for_first_alpha = True | |
last_alpha_pos_new_str = -1 # Position of the last alphanumeric character in the new string | |
# Virtual cursor to iterate through characters | |
i = 0 | |
while i < len(input_str): | |
c = input_str[i] | |
if c.isalnum() or c == '_': | |
# Alphanumeric character found | |
if looking_for_first_alpha: | |
new_str.append('"') # Insert quote before first alphanumeric | |
looking_for_first_alpha = False | |
last_alpha_pos_new_str = len(new_str) # Record position in the new string | |
elif c.isspace(): | |
# Space found, just continue | |
i += 1 | |
continue | |
elif c in ['{', '}', '[', ']', ':', ',']: | |
# Structural character found | |
if not looking_for_first_alpha: | |
# Insert quote after last alphanumeric | |
new_str.insert(last_alpha_pos_new_str + 1, '"') | |
looking_for_first_alpha = True | |
# Add the current character to the new string | |
new_str.append(c) | |
i += 1 | |
# Handle trailing word without structural character | |
if not looking_for_first_alpha: | |
new_str.insert(last_alpha_pos_new_str + 1, '"') | |
# Convert to string and handle trailing commas | |
result = ''.join(new_str) | |
result = re.sub(r'\s*,\s*([}\]])', r'\1', result) # Remove trailing commas before } or ] | |
return result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment