Skip to content

Instantly share code, notes, and snippets.

@franciscomvargas
Last active October 15, 2023 18:56
Show Gist options
  • Save franciscomvargas/59977fda1182fc3607c0c2b0fa606511 to your computer and use it in GitHub Desktop.
Save franciscomvargas/59977fda1182fc3607c0c2b0fa606511 to your computer and use it in GitHub Desktop.
JSON Quote Remover

JSON Quote Remover

Description

This Python function, json_comquotes, is a handy tool for preprocessing JSON data that contains unescaped quotes within string values. It takes a JSON string as input and transforms it by replacing the double and single quotes within the string values with alternative characters, allowing you to parse the JSON data without errors.

CLI Capability

Take a look into json_esquotes

Key Features:

  • Replaces double quotes " within string values with escaped double quotes \".
  • Replaces single quotes ' within string values with escaped single quotes \'.
  • Outputs the processed JSON as a dictionary.

Usage:

  • Pass your raw JSON string as input to the json_comquotes function.
  • The function will return:
    • On sucess: processed JSON dictionary ;
    • On insucess: raise ValueError ;

The Function:

import json, re, ast

def json_comquotes(raw_json, lone_char_searches=2, debug=False):
    try:
        out_json = json.loads(raw_json)
        return out_json
    except:
        try:
            out_json = ast.literal_eval(raw_json)
            return out_json
        except:
            # prepare raw json from some unwanted scenarios 
            raw_json = raw_json.replace(": '", ":'").replace(", '", ",'").replace("{ '", "{'").replace("[ '", "['").replace("' }", "'}").replace("' }", "'}").replace("''", "' '")
            raw_json = raw_json.replace(': "', ':"').replace(', "', ',"').replace('{ "', '{"').replace('[ "', '["').replace('" }', '"}').replace('" }', '"}').replace('""', '" "')

            # Regex patterns : dq|sq stands for double|single quote(s)
            _re_dq_pattern = r'([\s\w])"([\s\w])'
            _re_dq_sub = r"\1\"\2"
            _re_sq_pattern = r"([\s\w])'([\s\w])"
            _re_sq_sub = r'\1\'\2'
            
            for _lone_char in range(lone_char_searches):
                # Substitute Double Quotes
                if _lone_char == 0:
                    _re_find = re.sub(_re_dq_pattern, _re_dq_sub, raw_json)
                #   > Solve schenarios like ""a"a"a"a"a" since 1st return "a\"a"a\"a"a", second time return a\"a\"a\"a\"a" (Other egs. ["Anything"a"Anything else", "Anything"a"Anythin"g" else"])
                else:
                    _re_find = re.sub(_re_dq_pattern, _re_dq_sub, _re_find)

                # Substitute Single Quote   > Solve schenarios like 'a'a'a' since 1st return 'a\'a'a', secund time return 'a\'a\'\a' ...
                _re_find = re.sub(_re_sq_pattern, _re_sq_sub, _re_find)

                if debug:
                    print(f"Iteration #{_lone_char+1}:", _re_find)

                try:
                    out_json = json.loads(_re_find)
                    # Rem space from raw_json.replace("''", "' '").replace('""', '" "')
                    _re_find= _re_find.replace('\\" "', '\\""').replace('\\" \\"', '\\"\\"').replace("\\' '", "\\''").replace("\\' \\'", "\\'\\'")
                    return json.loads(_re_find)
                except Exception as ej:
                    try:
                        out_json = ast.literal_eval(_re_find)
                        # Rem space from raw_json.replace("''", "' '").replace('""', '" "')
                        _re_find= _re_find.replace('\\" "', '\\""').replace("\\' '", "\\''")
                        return ast.literal_eval(_re_find)
                    except Exception as ea:
                        if _lone_char != lone_char_searches-1:
                            continue
                        raise ValueError(f"Json Parse exception: {ej}\nAst Parse exception : {ea}\nProcessed Json      : {_re_find}")

Example Usage:

if __name__ == "__main__":
    req_jsons = ['{"na"me": "Jack O"Sullivan", "id": "1"}', '{"name": "Jack: The "OG" O"Sullivan"", "id": "2"}', '{"name": "Jack: The "OG"", "surname": \'O\'Sullivan\', "id": "3"}', '{"test_str": {"1singlechar": "a""a""a", "2singlechars": "a"a"a"a"a"a"a"a"a"}, "id": "5"}', "{'name': 'Jack O'Sullivan, 'id': '6'}"]
    for req_json in req_jsons:
        try:
            proc_json = json_comquotes(req_json)        
            print("Raw json      :", req_json)
            print("Processed json:", json.dumps(proc_json, indent=2), "\n")
        except Exception as e:
            print("Something went wrong!")
            print("Raw json:", req_json)
            print(f"{e}\n")

Result:

Raw json      : {"na"me": "Jack O"Sullivan", "id": "1"}
Processed json: {
  "na\"me": "Jack O\"Sullivan",
  "id": "1"
}

Raw json      : {"name": "Jack: The "OG" O"Sullivan"", "id": "2"}
Processed json: {
  "name": "Jack: The \"OG\" O\"Sullivan\"",
  "id": "2"
}

Raw json      : {"name": "Jack: The "OG"", "surname": 'O'Sullivan', "id": "3"}
Processed json: {
  "name": "Jack: The \"OG\"",
  "surname": "O'Sullivan",
  "id": "3"
}

Raw json      : {"test_str": {"1singlechar": "a""a""a", "2singlechars": "a"a"a"a"a"a"a"a"a"}, "id": "4"}
Processed json: {
  "test_str": {
    "1singlechar": "a\"\"a\"\"a",
    "2singlechars": "a\"a\"a\"a\"a\"a\"a\"a\"a"
  },
  "id": "4"
}

Something went wrong!
Raw json: {'name': 'Jack O'Sullivan, 'id': '5'}
Json Parse exception: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Ast Parse exception : unterminated string literal (detected at line 1) (<unknown>, line 1)
Processed Json      : {'name':'Jack O\'Sullivan,'id':'5'}

Credits

paraffin

import json, re, ast
def json_comquotes(raw_json, lone_char_searches=2, debug=False):
try:
out_json = json.loads(raw_json)
return out_json
except:
try:
out_json = ast.literal_eval(raw_json)
return out_json
except:
# prepare raw json from some unwanted scenarios
raw_json = raw_json.replace(": '", ":'").replace(", '", ",'").replace("{ '", "{'").replace("[ '", "['").replace("' }", "'}").replace("' }", "'}").replace("''", "' '")
raw_json = raw_json.replace(': "', ':"').replace(', "', ',"').replace('{ "', '{"').replace('[ "', '["').replace('" }', '"}').replace('" }', '"}').replace('""', '" "')
# Regex patterns : dq|sq stands for double|single quote
_re_dq_pattern = r'([\s\w])"([\s\w])'
_re_dq_sub = r"\1\"\2"
_re_sq_pattern = r"([\s\w])'([\s\w])"
_re_sq_sub = r'\1\'\2'
for _lone_char in range(lone_char_searches):
# Substitute Double Quotes
if _lone_char == 0:
_re_find = re.sub(_re_dq_pattern, _re_dq_sub, raw_json)
# > Solve schenarios like ""a"a"a"a"a" since 1st return "a\"a"a\"a"a", second time return a\"a\"a\"a\"a" (Other egs. ["Anything"a"Anything else", "Anything"a"Anythin"g" else"])
else:
_re_find = re.sub(_re_dq_pattern, _re_dq_sub, _re_find)
# Substitute Double Quotes > Solve schenarios like 'a'a'a' since 1st return 'a\'a'a', secund time return 'a\'a\'\a' ...
_re_find = re.sub(_re_sq_pattern, _re_sq_sub, _re_find)
if debug:
print(f"Iteration #{_lone_char+1}:", _re_find)
try:
out_json = json.loads(_re_find)
# Rem space from raw_json.replace("''", "' '").replace('""', '" "')
_re_find= _re_find.replace('\\" "', '\\""').replace('\\" \\"', '\\"\\"').replace("\\' '", "\\''").replace("\\' \\'", "\\'\\'")
return json.loads(_re_find)
except Exception as ej:
try:
out_json = ast.literal_eval(_re_find)
# Rem space from raw_json.replace("''", "' '").replace('""', '" "')
_re_find= _re_find.replace('\\" "', '\\""').replace("\\' '", "\\''")
return ast.literal_eval(_re_find)
except Exception as ea:
if _lone_char != lone_char_searches-1:
continue
raise ValueError(f"Json Parse exception: {ej}\nAst Parse exception : {ea}\nProcessed Json : {_re_find}")
if __name__ == "__main__":
req_jsons = ['{"na"me": "Jack O"Sullivan", "id": "1"}', '{"name": "Jack: The "OG" O"Sullivan"", "id": "2"}', '{"name": "Jack: The "OG"", "surname": \'O\'Sullivan\', "id": "3"}', '{"test_str": {"1singlechar": "a""a""a", "2singlechars": "a"a"a"a"a"a"a"a"a"}, "id": "4"}', "{'name': 'Jack O'Sullivan, 'id': '5'}"]
for req_json in req_jsons:
try:
proc_json = json_comquotes(req_json)
print("Raw json :", req_json)
print("Processed json:", json.dumps(proc_json, indent=2), "\n")
except Exception as e:
print("Something went wrong!")
print("Raw json:", req_json)
print(f"{e}\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment