Skip to content

Instantly share code, notes, and snippets.

@vadimkantorov
Last active July 5, 2024 16:56
Show Gist options
  • Save vadimkantorov/b26eda3645edb13feaa62b874a3e7f6f to your computer and use it in GitHub Desktop.
Save vadimkantorov/b26eda3645edb13feaa62b874a3e7f6f to your computer and use it in GitHub Desktop.
Simple string-valued parser for YAML supporting
# supports only strings, dicts and lists
# does not support multiline strings as the first list-item key `- run: |`
# does not preserve whitespaces in " |" literal string blocks as described in : https://docs.ansible.com/ansible/latest/reference_appendices/YAMLSyntax.html
def yaml_loads(content, convert_bool = True, convert_int = True, convert_dict = True): # from https://gist.github.com/vadimkantorov/b26eda3645edb13feaa62b874a3e7f6f
def procval(val):
read_until = lambda tail, chars: ([(tail[:i], tail[i+1:]) for i, c in enumerate(tail) if c in chars] or [(tail, '')])[0]
val = val.strip()
is_quoted_string = len(val) >= 2 and ((val[0] == val[-1] == '"') or (val[0] == val[-1] == "'"))
if is_quoted_string:
return val[1:-1]
else:
val = val.split('#', maxsplit = 1)[0].strip()
is_int = val.isdigit()
is_bool = val.lower() in ['true', 'false']
is_dict = len(val) >= 2 and (val[0] == '{' and val[-1] == '}')
if is_int and convert_int:
return int(val) if convert_int else val
elif is_bool and convert_bool:
return dict(true = True, false = False)[val.lower()] if convert_int else val
elif is_dict and convert_dict:
res = {}
tail = val
head, tail = read_until(tail, '{')
while tail:
key, tail = read_until(tail, ':')
val, tail = read_until(tail, ',}')
res[key.strip()] = procval(val.strip())
return res
return val
lines = content.strip().splitlines()
res = {}
keyprev = ''
indentprev = 0
dictprev = {}
is_multiline = False
stack = {0: ({None: res}, None)}
begin_multiline_indent = 0
for line in lines:
line_lstrip = line.lstrip()
line_strip = line.strip()
indent = len(line) - len(line_lstrip)
splitted_colon = line.split(':', maxsplit = 1)
key, val = (splitted_colon[0].strip(), splitted_colon[1].strip()) if len(splitted_colon) > 1 else ('', line_strip)
is_list_item = line_lstrip.startswith('- ') or line_lstrip.rstrip() == '-'
list_val = line_strip.split('-', maxsplit = 1)[-1].lstrip() if is_list_item else ''
is_comment = not line_strip or line_lstrip.startswith('#')
is_dedent = indent < indentprev
begin_multiline = val in ['>', '|', '|>']
is_record = len(list_val) >= 2 and list_val[0] == '{' and list_val[-1] == '}'
if is_multiline and begin_multiline_indent and indent < begin_multiline_indent:
is_multiline = False
begin_multiline_indent = 0
if not is_multiline:
if is_list_item and indent in stack and isinstance(stack[indent][0][stack[indent][1]], dict):
indent += 2
if indent not in stack:
stack[indent] = (stack[indentprev][0][stack[indentprev][1]], keyprev) if keyprev is not None else ({None: dictprev}, None)
curdict, curkey = stack[indent]
if is_comment:
continue
elif is_list_item:
curdict[curkey] = curdict[curkey] or []
if list_val and (not key) or is_record:
curdict[curkey].append(procval(list_val))
else:
dictprev = {key.removeprefix('- ') : procval(list_val)} if list_val else {}
curdict[curkey].append(dictprev)
key = None
elif begin_multiline:
curdict[curkey][key] = ''
curdict, curkey = curdict[curkey], key
is_multiline = True
elif is_multiline:
curdict[curkey] += ('\n' + val) if curdict[curkey] else val
begin_multiline_indent = min(indent, begin_multiline_indent) if begin_multiline_indent else indent
elif key and not val:
curdict[curkey][key] = dictprev = {}
else:
curdict[curkey][key] = procval(val)
if is_dedent:
stack = {i : v for i, v in stack.items() if i <= indent}
indentprev = indent
keyprev = key
return res
if __name__ == '__main__':
# https://github.com/jekyll/minima/blob/demo-site/_config.yml
yaml_str = """
title: Your awesome title
author:
name: GitHub User
email: your-email@domain.com
description: >
Write an awesome description for your new site here. You can edit this
line in _config.yml. It will appear in your document head meta (for
Google search results) and in your feed.xml site description.
minima:
date_format: "%b %-d, %Y"
social_links:
- { platform: devto, user_url: "https://dev.to/jekyll" }
- { platform: dribbble, user_url: "https://dribbble.com/jekyll" }
- { platform: facebook, user_url: "https://www.facebook.com/jekyll" }
- { platform: flickr, user_url: "https://www.flickr.com/photos/jekyll" }
- { platform: github, user_url: "https://github.com/jekyll/minima" }
- { platform: google_scholar, user_url: "https://scholar.google.com/citations?user=qc6CJjYAAAAJ" }
- { platform: instagram, user_url: "https://www.instagram.com/jekyll" }
- { platform: keybase, user_url: "https://keybase.io/jekyll" }
- { platform: linkedin, user_url: "https://www.linkedin.com/in/jekyll" }
- { platform: microdotblog, user_url: "https://micro.blog/jekyll" }
- { platform: pinterest, user_url: "https://www.pinterest.com/jekyll" }
- { platform: stackoverflow, user_url: "https://stackoverflow.com/users/1234567/jekyll" }
- { platform: telegram, user_url: "https://t.me/jekyll" }
- { platform: twitter, user_url: "https://twitter.com/jekyllrb" }
- { platform: youtube, user_url: "https://www.youtube.com/jekyll" }
- { platform: rss, user_url: "https://jekyll.github.io/minima/feed.xml" }
show_excerpts: false # set to true to show excerpts on the homepage
# Build settings
baseurl: /minima
remote_theme: jekyll/minima
plugins:
- jekyll-feed
- jekyll-remote-theme
- jekyll-seo-tag
"""
yaml_dict = yaml_loads(yaml_str)
import json
print(json.dumps(yaml_dict, indent = 2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment