Skip to content

Instantly share code, notes, and snippets.

@ishaquehassan
Last active August 4, 2023 11:34
Show Gist options
  • Save ishaquehassan/476aa67be10070091eb97677f5edf17e to your computer and use it in GitHub Desktop.
Save ishaquehassan/476aa67be10070091eb97677f5edf17e to your computer and use it in GitHub Desktop.
html to json converter
import html_to_json
def get_recursively(search_dict, field):
"""
Takes a dict with nested lists and dicts,
and searches all dicts for a key of the field
provided.
"""
fields_found = []
for key, value in search_dict.items():
if key == field:
fields_found.append(value)
elif isinstance(value, dict):
results = get_recursively(value, field)
for result in results:
fields_found.append(result)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
more_results = get_recursively(item, field)
for another_result in more_results:
fields_found.append(another_result)
return fields_found
def html_str_to_json(html: str):
output_json = html_to_json.convert(html)
return '\n'.join(get_recursively(output_json, "_value"))
# This is a sample Python script.
# Press ⌃R to execute it or replace it with your code.
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
str_out = html_str_to_json("""
<head>
<title>Floyd Hightower's Projects</title>
<p>Floyd Hightower's Projects <span>HELLO</span></p>
<meta charset="UTF-8">
<meta name="description" content="Floyd Hightower&#39;s Projects">
<meta name="keywords" content="projects,fhightower,Floyd,Hightower">
</head>
""")
print(str_out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment