Last active
August 4, 2023 11:34
-
-
Save ishaquehassan/476aa67be10070091eb97677f5edf17e to your computer and use it in GitHub Desktop.
html to json converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import html_to_json | |
def get_recursively(search_dict, field): | |
""" | |
Takes a dict with nested lists and dicts, | |
and searches all dicts for a key of the field | |
provided. | |
""" | |
fields_found = [] | |
for key, value in search_dict.items(): | |
if key == field: | |
fields_found.append(value) | |
elif isinstance(value, dict): | |
results = get_recursively(value, field) | |
for result in results: | |
fields_found.append(result) | |
elif isinstance(value, list): | |
for item in value: | |
if isinstance(item, dict): | |
more_results = get_recursively(item, field) | |
for another_result in more_results: | |
fields_found.append(another_result) | |
return fields_found | |
def html_str_to_json(html: str): | |
output_json = html_to_json.convert(html) | |
return '\n'.join(get_recursively(output_json, "_value")) | |
# This is a sample Python script. | |
# Press ⌃R to execute it or replace it with your code. | |
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings. | |
str_out = html_str_to_json(""" | |
<head> | |
<title>Floyd Hightower's Projects</title> | |
<p>Floyd Hightower's Projects <span>HELLO</span></p> | |
<meta charset="UTF-8"> | |
<meta name="description" content="Floyd Hightower's Projects"> | |
<meta name="keywords" content="projects,fhightower,Floyd,Hightower"> | |
</head> | |
""") | |
print(str_out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment