Skip to content

Instantly share code, notes, and snippets.

@AlmightyOatmeal
Last active February 1, 2024 20:01
Show Gist options
  • Save AlmightyOatmeal/eadde0393f3a04c6bc7a618f93ca240e to your computer and use it in GitHub Desktop.
Save AlmightyOatmeal/eadde0393f3a04c6bc7a618f93ca240e to your computer and use it in GitHub Desktop.
Pretty-print a dictionary/list structure in JSON while having the option to use a custom JSON encoder to help with special objects that don't support serialization, such as converting 'datetime' objects to an ISO-formatted string.
import datetime
import decimal
import json
import re
class CustomJSONEncoder(json.JSONEncoder):
"""Custom JSON encoder that does things that shouldn't need to be done."""
def default(self, obj):
"""Overrides the default serialization of JSONEncoder then calls the JSONEncoder default() method.
:param obj: Object to serialize.
:type obj: object
:return: json.JSONEncoder.default() object.
:rtype: instance
"""
try:
if isinstance(obj, (datetime.datetime, datetime.time, datetime.date)):
return obj.isoformat()
if isinstance(obj, decimal.Decimal):
s = str(obj)
if '.' in s:
return float(s)
else:
return int(s)
iterable = iter(obj)
except TypeError:
pass
else:
return list(iterable)
return json.JSONEncoder.default(self, obj)
def json_string_hook(obj):
"""JSON deserializer helper to ensure values are converted to strings instead of native datatypes due
to data inconsistencies.
Current behavior:
- Convert all non-iterable values to strings.
- Exclude values where the key contains the word 'date'.
:param obj: json.loads() dict
:type obj: dict
:return: Updated dictionary
:rtype: dict
"""
obj_d = dict(obj)
# return {k: str(v) if isinstance(v, bool) else v for k, v in obj_d.items()}
return {k: str(v) if 'date' not in str(k).lower() and not hasattr(v, '__iter__') else v for k, v in obj_d.items()}
def json_pretty(data, encoder=CustomJSONEncoder):
"""Converts Python dict or list/set/array objects to a pretty-printed JSON string.
:param data: Python iter object like dict, list, set, array, tuple, etc.
:type data: dict, list, set, array, tuple
:param encoder: (optional) Custom JSON encoder class that's an extension of `json.JSONEncoder`.
(default: CustomJSONEncoder)
:type encoder: json.JSONEncoder
:return: Pretty-printed JSON string.
:rtype: str
"""
return json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '), ensure_ascii=True, cls=encoder)
def json_min(data, encoder=CustomJSONEncoder):
"""Converts Python dict or list/set/array objects to a minified JSON string.
:param data: Python iter object like dict, list, set, array, tuple, etc.
:type data: dict, list, set, array, tuple
:param encoder: (optional) Custom JSON encoder class that's an extension of `json.JSONEncoder`.
(default: CustomJSONEncoder)
:type encoder: json.JSONEncoder
:return: Minified JSON string.
:rtype: str
"""
return json.dumps(data, separators=(',', ":"), cls=encoder)
def fix_broken_json(input_str):
"""Fix broken JSON quotes...
:param input_str: Broken JSON string.
:type input_str: str
:return: Fixed JSON string
:rtype: str
"""
# <MOVED INSIDE FUNCTION>
# Move this stuff out of the function because it doesn't need to be assigned and compiled every. single. time.
# It's just here for the sake of organization and keeping things somewhat organized.
# Use sets `()` instead of lists `[]` because of set hashtables which improves performance over lists. Set's need
# to have more than one value so some may have a comma without an additional value just to appease the Python gods.
expected_chars = {
"[": (",", "]"),
"]": ("[", ","),
"{": (":",),
"}": (",", "{", "]"),
":": (",", "}"),
",": (":", "{", "}", "[", "]"),
}
double_quote = '"'
# Backslash needs to be escaped otherwise Python thinks it's escaping the single quote.
backslash = '\\'
# Precompile the regular expression (which is why it's better outside of this function)
regex_nonwhite = re.compile(r'\S')
# </MOVED INSIDE FUNCTION>
output_str = ''
in_string = False
prev = None
prev_nonwhite_nonquote = None
# Iterate over string, letter by letter, with character position.
for char_pos, char in enumerate(input_str):
if char is double_quote and prev is not backslash:
if in_string:
# If we're already inside a quoted string and if the next non-whitespace character is an expected one,
# then we have exited the quoted string. Otherwise, escape the quote.
nonwhite_char = regex_nonwhite.search(input_str, pos=char_pos+1).group()
if nonwhite_char in expected_chars.get(prev_nonwhite_nonquote, ''):
in_string = False
else:
output_str += backslash
else:
in_string = True
elif not in_string and char.strip():
# Previous non-whitespace non-quote character.
prev_nonwhite_nonquote = char
# Add character to the output string.
output_str += char
prev = char
return output_str
def sub_json_parser(obj):
"""Try to parse JSON values from a dictionary or list of dictionaries.
NOTE: This does not recursively go through and try to parse; this is currently setup to only do the root level
for things such as JSON stored in a database table.
:param obj: Dictionary or list of dictionaries.
:type obj: dict or list
:return: dict or list
:rtype: dict or list
"""
# If it's a list of dictionaries then iterate and pass each dict into this function
if isinstance(obj, (list, set, tuple)):
# Create a new list of results because you can't manipulate a list you're iterating over.
new_obj = []
for i in obj:
# E.T. phone home...
new_obj.append(
sub_json_parser(i)
)
return new_obj
# If it's a dict then iterate over the keys and values
elif isinstance(obj, dict):
# Create a new dictionary object because you can't edit dicts or lists while iterating over them without
# causing state inconsistencies.
new_obj = {}
for k, v in obj.items():
# If the value is not already a string then keep the original value and move on.
if not isinstance(v, str):
new_obj[k] = v
continue
# If there is a curly brace in there then assume it might be JSON.
elif '{' in v:
# Try to parse the JSON as-is
try:
new_obj[k] = json.loads(v)
except Exception as err:
# Since the JSON might be broken then try to run it through the `fix_broken_json()` function
try:
new_obj[k] = json.loads(fix_broken_json(v))
except Exception as errrrrrrr:
logger.debug(f'Unable to fix broken json key={k}, value={v}')
# If the JSON can't be fixed then keep the original value and move on.
new_obj[k] = v
else:
# Catch-all
new_obj[k] = v
return new_obj
# If it's not a list, set, tuple, or dict, then return the object untouched.
else:
return obj
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment