Created
September 11, 2022 22:49
-
-
Save apainintheneck/25f43b16219258d6be3178ec38453316 to your computer and use it in GitHub Desktop.
A program that packs yaml docs into a more greppable format.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env awk -f | |
# A program that packs yaml into strings representing the location | |
# of elements that makes yaml more greppable. Inspired by Gron | |
# which does something similar with JSON (https://github.com/tomnomnom/gron). | |
# This currently only supports a subset of YAML. | |
# - Simple keys | |
# - (alphanumeric + dash + underscore) + (optional spaces) + colon | |
# - [A-Za-z_-]+[ ]*: | |
# - Colon must be followed by a space | |
# - Arrays | |
# - Can start on same line or be indented | |
# - Objects | |
# - See simple keys above | |
# - Multiline strings | |
# - Plain strings and | |+ |- > >+ >- are all supported | |
# - Comments | |
# - Are ignored | |
# - Begin and end of a document | |
# - Are ignored | |
# Example | |
# ------- | |
# Homestead.yaml: | |
# --- | |
# ip: "192.168.10.10" | |
# memory: 2048 | |
# cpus: 2 | |
# provider: virtualbox | |
# | |
# authorize: ~/.ssh/id_rsa.pub | |
# | |
# keys: | |
# - ~/.ssh/id_rsa | |
# | |
# folders: | |
# - map: ~/code | |
# to: /home/vagrant/code | |
# | |
# sites: | |
# - map: homestead.test | |
# to: /home/vagrant/code/public | |
# | |
# databases: | |
# - homestead | |
# | |
# features: | |
# - mariadb: false | |
# - ohmyzsh: false | |
# - webdriver: false | |
# Output: | |
# $ yamlpack.awk Homestead.yaml | |
# ip: "192.168.10.10" | |
# memory: 2048 | |
# cpus: 2 | |
# provider: virtualbox | |
# authorize: ~/.ssh/id_rsa.pub | |
# keys:1: ~/.ssh/id_rsa | |
# folders:1:map: ~/code | |
# folders:1:to: /home/vagrant/code | |
# sites:1:map: homestead.test | |
# sites:1:to: /home/vagrant/code/public | |
# databases:1: homestead | |
# features:1:mariadb: false | |
# features:2:ohmyzsh: false | |
# features:3:webdriver: false | |
BEGIN { | |
# Precompute the multiline string identifiers | |
split("| |+ |- > >+ >-",types_array) | |
for(i in types_array) | |
STRING_TYPES[types_array[i]]; | |
} | |
# Ignore empty lines and comments | |
NF == 0 || $1 == "#" { next } | |
# Ignore start and end of documents | |
$1 == "---" || $1 == "..." { | |
# Clear old values | |
delete list[2] | |
delete label[2] | |
next | |
} | |
{ # Find indent | |
indent = get_indent() | |
if(!INDENT_SIZE) INDENT_SIZE = indent | |
} | |
{ # Check for list item | |
is_list_item = ($1 == "-") | |
if(is_list_item) { | |
sub("-", " ", $0) | |
indent += 2 | |
list[indent]++ | |
} | |
} | |
{ # Check for key | |
is_key = ($0 ~ /^[ ]*[A-Za-z_-]+[ ]*:/) | |
if(is_key) { | |
# Normalize key by deleting any spaces between key and colon | |
sub(/[ ]*:/, ":", $0) | |
# Save key | |
label[indent] = $1 | |
# Clean up old values | |
delete list[indent + 2] | |
delete label[indent + 2] | |
if(INDENT_SIZE) { | |
delete list[indent + INDENT_SIZE + 2] | |
delete label[indent + INDENT_SIZE] | |
delete label[indent + INDENT_SIZE + 2] | |
} | |
# If key is by itself, the next line might be a plain string | |
if(NF > 1) string_type = "" | |
else { | |
string_type = "%s" | |
next | |
} | |
} | |
} | |
!is_key && !is_list_item && string_type { # Check for continued multiline string | |
if(!string_indent) string_indent = indent | |
is_multiline_string = (indent >= string_indent) | |
if(is_multiline_string) indent = string_indent | |
else string_indent = 0 | |
} | |
is_key && NF > 1 { | |
is_multiline_string = (NF == 2) && ($2 in STRING_TYPES) | |
if(is_multiline_string) { | |
string_type = $2 | |
next | |
} | |
print_labels() | |
print_value() | |
next | |
} | |
is_multiline_string || (is_list_item && NF > 0) { | |
delete label[indent] | |
print_labels() | |
print(substr($0, indent)) | |
next | |
} | |
function get_indent() { | |
return match($0, /^[ ]+/) > 0 ? RLENGTH : 0 | |
} | |
function print_labels() { | |
for(i = 0; i <= indent; ++i) { | |
if(list[i] > 0) | |
printf("%d:", list[i]) | |
printf("%s", label[i]) | |
} | |
if(is_multiline_string) | |
printf("%s:", string_type) | |
} | |
function print_value() { | |
print(substr($0, indent + length($1) + 1)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment