Skip to content

Instantly share code, notes, and snippets.

@turnercore
Created August 17, 2023 02:47
Show Gist options
  • Save turnercore/37dfea8fe7b1c85bc59f9d9a5a798bca to your computer and use it in GitHub Desktop.
Save turnercore/37dfea8fe7b1c85bc59f9d9a5a798bca to your computer and use it in GitHub Desktop.
Parser for turning bad json into valid json, or PJTJP
import re
def convert_to_json_like(input_str):
# Preprocess the input by removing quotes and condensing spaces
input_str = re.sub(r'[\'"]', '', input_str)
input_str = re.sub(r'\s+', ' ', input_str).strip()
# Initialize variables
new_str = []
looking_for_first_alpha = True
last_alpha_pos_new_str = -1 # Position of the last alphanumeric character in the new string
# Virtual cursor to iterate through characters
i = 0
while i < len(input_str):
c = input_str[i]
if c.isalnum() or c == '_':
# Alphanumeric character found
if looking_for_first_alpha:
new_str.append('"') # Insert quote before first alphanumeric
looking_for_first_alpha = False
last_alpha_pos_new_str = len(new_str) # Record position in the new string
elif c.isspace():
# Space found, just continue
i += 1
continue
elif c in ['{', '}', '[', ']', ':', ',']:
# Structural character found
if not looking_for_first_alpha:
# Insert quote after last alphanumeric
new_str.insert(last_alpha_pos_new_str + 1, '"')
looking_for_first_alpha = True
# Add the current character to the new string
new_str.append(c)
i += 1
# Handle trailing word without structural character
if not looking_for_first_alpha:
new_str.insert(last_alpha_pos_new_str + 1, '"')
# Convert to string and handle trailing commas
result = ''.join(new_str)
result = re.sub(r'\s*,\s*([}\]])', r'\1', result) # Remove trailing commas before } or ]
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment