Last active
November 7, 2024 22:41
-
-
Save crazygolem/e68207b516fe5f1d13eddaab9a91b280 to your computer and use it in GitHub Desktop.
Utility functions for jq, to copy-paste into your scripts or extract and include as module
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Dynamically parses a string into a PATHS array, typically to use with the | |
# builtins `getpath/1`, `setpath/2` and `delpaths/1`. | |
# | |
# This is similar to the native function `path/1` for exact match path | |
# expressions, but takes the path as a string instead of an actual path, which | |
# allows to parse it at runtime. This can be useful for working with paths that | |
# you get from your data, or passing paths from a shell script safely (until a | |
# more powerful `eval` builtin gets implemented, see [jq#384]), | |
# e.g.: | |
# | |
# jq --arg path "$path" ' | |
# def topath: ...; | |
# getpath($path | topath) | |
# ' data.json | |
# | |
# The function is carefully written without any single quote, to allow you to | |
# copy-paste it in your shell scripts as-is. Alternatively you might want to | |
# consider using `include` or `import` in your jq script so you can place it in | |
# a separate file. | |
# | |
# This function supports a subset of the syntax supported by the native `path/1` | |
# function, and specifically only exact match path expressions[^slices] with | |
# extra restrictions (e.g. the comma is not supported, and extraneous whitespace | |
# is not allowed). Syntax that works with this function but not with the native | |
# `path/1` should be considered as bug. | |
# | |
# | |
# [^slices]: Slices are currently supported, but the whole thing is quite weird | |
# (and the builtin `path/1`'s behavior is a bit whacky around them), and it's | |
# not even clear if they should count as exact match path expressions in the | |
# first place; anyway their implementation was not too difficult (and in | |
# particular, unlike commas, they don't produce multiple outputs) so they're in. | |
# | |
# [jq#384]: https://github.com/stedolan/jq/issues/384 | |
# | |
# ALTERNATIVES | |
# | |
# If your use-case is simple enough, you might want to cosider: | |
# | |
# def topath: split(".")[1:]; | |
# | |
# If you don't mind bulky JSON arrays in your shell scripts, you can pass them | |
# using jq's `--argjson` argument, and then use them in place of calling | |
# `topath`. | |
# | |
# EXAMPLES | |
# | |
# Filter: | |
# getpath(".foo.bar" | topath) | |
# Input: | |
# {"foo":{"bar":"hello","baz":"world"}} | |
# Output: | |
# "hello" | |
def topath: | |
if test("^[^.]") then error("Paths must always start with a dot") else . end | |
| [scan( | |
[ | |
# Top-level parsing, for complex cases we rely on native methods (e.g. | |
# `fromjson`) to perform extra validation in the next step. | |
# We rely on priority: the alternation order is usually important | |
# between top-level ones, and sometimes also within them. | |
# Note: Each output has always exactly one matching top-level aternation | |
# 1: identity | |
"^(\\.)$", | |
# 2: simple identifier | |
"\\.([_A-Za-z][_A-Za-z0-9]*)", | |
# 3: quoted identifier | |
"\\.(\"(?:\\\\\"|[^\"])+\")", | |
# 4: generic object index (quoted identifier in brackets) | |
"(?:\\.)?\\[(\"(?:\\\\\"|[^\"])+\")\\]", | |
# 5: array index (integer in brackets) | |
"(?:\\.)?\\[(-?[0-9]+)\\]", | |
# 6,7,8: array/string slice | |
"(?:\\.)?\\[(?:(-?[0-9]+):(-?[0-9]+)?|:(-?[0-9]+))\\]", | |
# 9: syntax error | |
"(.)" | |
] | "(" + join("|") + ")" | |
)] | |
| map( | |
# Extra validation and conversion to the expected type. | |
# Note: using `fromjson` on a JSON string "unescapes" it, that is why we | |
# kept the quotes around quoted identifiers in the previous parsing step. | |
if .[1] then | |
{ key: null, src: .[0] } | |
elif .[2] then | |
{ key: .[2], src: .[0] } | |
elif .[3] then | |
{ key: (.[3] | fromjson), src: .[0] } | |
elif .[4] then | |
{ key: (.[4] | fromjson), src: .[0] } | |
elif .[5] then | |
{ key: (.[5] | tonumber), src: .[0] } | |
elif .[6:9] | any then | |
.[6] as $s | (.[7] // .[8]) as $e | |
| { | |
key: { | |
start: (if $s then $s | tonumber else null end), | |
end: (if $e then $e | tonumber else null end), | |
}, | |
src: .[0] | |
} | |
elif .[9] then | |
{ err: .[9], src: .[0] } | |
else | |
error("program error (missing case?)") | |
end | |
) | |
| reduce .[] as $match ({ path: [], parsed: "", error: null, message: null }; | |
#({ match: $match, state: . } | debug) | .state | | |
if $match.err or $match.msg then | |
# We continue processing on error, to try and give more context... | |
{ | |
path, | |
parsed, | |
error: (.error + $match.err), | |
message: (.message + $match.msg) | |
} | |
elif .error then | |
# ... and once the scan matches something expected again, we ignore the | |
# remaining input. `break` cannot be used as we would lose the state | |
# needed for the error message, and we cannot use `error` here because | |
# this branch will not be called if there is nothing valid after the | |
# error. | |
. | |
else | |
{ | |
path: (.path + [$match.key // empty]), | |
parsed: (.parsed + $match.src), | |
error, | |
message | |
} | |
end | |
) | |
| if .error then | |
error("syntax error, unexpected `\(.error)` after `\(.parsed)`") | |
else | |
.path | |
end | |
; | |
# Converts a PATHS array into a path expression. | |
# | |
# No slice support nor validation, and the output looks encumbered. In exchange | |
# the code is trivial. | |
# | |
# This should be suitable for most shell scripting situations where you need to | |
# pass paths to other tools but don't need to look at them. | |
# | |
# EXAMPLES | |
# | |
# Filter: | |
# path(.foo[2,4].bar) | topathexpr | |
# Input: | |
# null | |
# Output: | |
# ".[\"foo\"][2][\"bar\"]" | |
# ".[\"foo\"][4][\"bar\"]" | |
def topathexpr: map("[\(tojson)]") | "." + join(""); | |
# Converts a PATHS array into a path expression. | |
# | |
# No slice support nor validation. In exchange the code is somewhat concise. | |
# | |
# This should be suitable for most shell scripting needs e.g. together with | |
# `path/1`, as slices seem to be somewhat niche there. | |
# | |
# EXAMPLES | |
# | |
# Filter: | |
# path(.foo[2,4].bar) | topathexpr | |
# Input: | |
# null | |
# Output: | |
# ".foo[2].bar" | |
# ".foo[4].bar" | |
def topathexpr: | |
map( | |
if type == "string" and test("^[_A-Za-z][_A-Za-z0-9]*$") then ".\(.)" | |
else "[\(tojson)]" | |
end | |
) | join("") | if startswith(".") then . else ".\(.)" end | |
; | |
# Converts a PATHS array into a path expression. | |
# | |
# When the input argument is a valid PATHS array the output is always a valid | |
# path expression (i.e. it would be accepted by the builtin `path/1`), however | |
# it might not always match exactly the path expression from which the PATHS | |
# array was initially created. | |
# | |
# EXAMPLES | |
# | |
# Filter: | |
# path(.foo[2,4].bar) | topathexpr | |
# Input: | |
# null | |
# Output: | |
# ".foo[2].bar" | |
# ".foo[4].bar" | |
def topathexpr: | |
map( | |
if type == "string" and test("^[_A-Za-z][_A-Za-z0-9]*$") then | |
".\(.)" | |
elif type == "string" or type == "number" then | |
"[\(tojson)]" | |
elif type == "object" | |
and (keys | contains(["start", "end"])) | |
and (.start or .end) | |
and (map(type) | inside(["number", "null"])) | |
then | |
"[\([.start, .end] | join(":"))]" | |
else | |
error("input error: \(.)") | |
end | |
) | join("") | if startswith(".") then . else ".\(.)" end | |
; | |
# Returns whether the given path can be found in the input object or array. | |
# | |
# This is similar to the builtin `has/1` but for an entire path, given as a | |
# PATHS array or a filter that produces one (e.g. the builtin `path/1`). | |
# | |
# EXAMPLES | |
# | |
# Filter: | |
# haspath(["foo", "bar"]) | |
# Input: | |
# {"foo":{"bar":2}} | |
# Output: | |
# true | |
# | |
# Filter: | |
# haspath(path(.foo.bar, .foo.bar.baz.qux)) | |
# Input: | |
# {"foo":{"bar":2}} | |
# Output: | |
# true | |
# false | |
def haspath(PATHS): (null | PATHS) as $needle | any(paths; . == $needle); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment