Last active
July 5, 2024 16:56
-
-
Save vadimkantorov/b26eda3645edb13feaa62b874a3e7f6f to your computer and use it in GitHub Desktop.
Simple string-valued parser for YAML supporting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# supports only strings, dicts and lists | |
# does not support multiline strings as the first list-item key `- run: |` | |
# does not preserve whitespaces in " |" literal string blocks as described in : https://docs.ansible.com/ansible/latest/reference_appendices/YAMLSyntax.html | |
def yaml_loads(content, convert_bool = True, convert_int = True, convert_dict = True): # from https://gist.github.com/vadimkantorov/b26eda3645edb13feaa62b874a3e7f6f | |
def procval(val): | |
read_until = lambda tail, chars: ([(tail[:i], tail[i+1:]) for i, c in enumerate(tail) if c in chars] or [(tail, '')])[0] | |
val = val.strip() | |
is_quoted_string = len(val) >= 2 and ((val[0] == val[-1] == '"') or (val[0] == val[-1] == "'")) | |
if is_quoted_string: | |
return val[1:-1] | |
else: | |
val = val.split('#', maxsplit = 1)[0].strip() | |
is_int = val.isdigit() | |
is_bool = val.lower() in ['true', 'false'] | |
is_dict = len(val) >= 2 and (val[0] == '{' and val[-1] == '}') | |
if is_int and convert_int: | |
return int(val) if convert_int else val | |
elif is_bool and convert_bool: | |
return dict(true = True, false = False)[val.lower()] if convert_int else val | |
elif is_dict and convert_dict: | |
res = {} | |
tail = val | |
head, tail = read_until(tail, '{') | |
while tail: | |
key, tail = read_until(tail, ':') | |
val, tail = read_until(tail, ',}') | |
res[key.strip()] = procval(val.strip()) | |
return res | |
return val | |
lines = content.strip().splitlines() | |
res = {} | |
keyprev = '' | |
indentprev = 0 | |
dictprev = {} | |
is_multiline = False | |
stack = {0: ({None: res}, None)} | |
begin_multiline_indent = 0 | |
for line in lines: | |
line_lstrip = line.lstrip() | |
line_strip = line.strip() | |
indent = len(line) - len(line_lstrip) | |
splitted_colon = line.split(':', maxsplit = 1) | |
key, val = (splitted_colon[0].strip(), splitted_colon[1].strip()) if len(splitted_colon) > 1 else ('', line_strip) | |
is_list_item = line_lstrip.startswith('- ') or line_lstrip.rstrip() == '-' | |
list_val = line_strip.split('-', maxsplit = 1)[-1].lstrip() if is_list_item else '' | |
is_comment = not line_strip or line_lstrip.startswith('#') | |
is_dedent = indent < indentprev | |
begin_multiline = val in ['>', '|', '|>'] | |
is_record = len(list_val) >= 2 and list_val[0] == '{' and list_val[-1] == '}' | |
if is_multiline and begin_multiline_indent and indent < begin_multiline_indent: | |
is_multiline = False | |
begin_multiline_indent = 0 | |
if not is_multiline: | |
if is_list_item and indent in stack and isinstance(stack[indent][0][stack[indent][1]], dict): | |
indent += 2 | |
if indent not in stack: | |
stack[indent] = (stack[indentprev][0][stack[indentprev][1]], keyprev) if keyprev is not None else ({None: dictprev}, None) | |
curdict, curkey = stack[indent] | |
if is_comment: | |
continue | |
elif is_list_item: | |
curdict[curkey] = curdict[curkey] or [] | |
if list_val and (not key) or is_record: | |
curdict[curkey].append(procval(list_val)) | |
else: | |
dictprev = {key.removeprefix('- ') : procval(list_val)} if list_val else {} | |
curdict[curkey].append(dictprev) | |
key = None | |
elif begin_multiline: | |
curdict[curkey][key] = '' | |
curdict, curkey = curdict[curkey], key | |
is_multiline = True | |
elif is_multiline: | |
curdict[curkey] += ('\n' + val) if curdict[curkey] else val | |
begin_multiline_indent = min(indent, begin_multiline_indent) if begin_multiline_indent else indent | |
elif key and not val: | |
curdict[curkey][key] = dictprev = {} | |
else: | |
curdict[curkey][key] = procval(val) | |
if is_dedent: | |
stack = {i : v for i, v in stack.items() if i <= indent} | |
indentprev = indent | |
keyprev = key | |
return res | |
if __name__ == '__main__': | |
# https://github.com/jekyll/minima/blob/demo-site/_config.yml | |
yaml_str = """ | |
title: Your awesome title | |
author: | |
name: GitHub User | |
email: your-email@domain.com | |
description: > | |
Write an awesome description for your new site here. You can edit this | |
line in _config.yml. It will appear in your document head meta (for | |
Google search results) and in your feed.xml site description. | |
minima: | |
date_format: "%b %-d, %Y" | |
social_links: | |
- { platform: devto, user_url: "https://dev.to/jekyll" } | |
- { platform: dribbble, user_url: "https://dribbble.com/jekyll" } | |
- { platform: facebook, user_url: "https://www.facebook.com/jekyll" } | |
- { platform: flickr, user_url: "https://www.flickr.com/photos/jekyll" } | |
- { platform: github, user_url: "https://github.com/jekyll/minima" } | |
- { platform: google_scholar, user_url: "https://scholar.google.com/citations?user=qc6CJjYAAAAJ" } | |
- { platform: instagram, user_url: "https://www.instagram.com/jekyll" } | |
- { platform: keybase, user_url: "https://keybase.io/jekyll" } | |
- { platform: linkedin, user_url: "https://www.linkedin.com/in/jekyll" } | |
- { platform: microdotblog, user_url: "https://micro.blog/jekyll" } | |
- { platform: pinterest, user_url: "https://www.pinterest.com/jekyll" } | |
- { platform: stackoverflow, user_url: "https://stackoverflow.com/users/1234567/jekyll" } | |
- { platform: telegram, user_url: "https://t.me/jekyll" } | |
- { platform: twitter, user_url: "https://twitter.com/jekyllrb" } | |
- { platform: youtube, user_url: "https://www.youtube.com/jekyll" } | |
- { platform: rss, user_url: "https://jekyll.github.io/minima/feed.xml" } | |
show_excerpts: false # set to true to show excerpts on the homepage | |
# Build settings | |
baseurl: /minima | |
remote_theme: jekyll/minima | |
plugins: | |
- jekyll-feed | |
- jekyll-remote-theme | |
- jekyll-seo-tag | |
""" | |
yaml_dict = yaml_loads(yaml_str) | |
import json | |
print(json.dumps(yaml_dict, indent = 2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment