Last active
June 15, 2024 20:58
-
-
Save knbknb/56ce2feae2095ce95d329aa61f583989 to your computer and use it in GitHub Desktop.
jq basics (oneliners)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#% jq, unixtools, knb | |
# manual: https://jqlang.github.io/jq/manual/ | |
# wiki jq: | |
# https://github.com/jqlang/jq/wiki/jq-Language-Description#lexical-symbol-bindings-function-definitions-and-data-symbol-bindings | |
# man jq | |
# jq exit codes in $?: (default: 0 on success null on error.) | |
# jq -e : jq exit codes $?: (default: 0 on success 1 on error. | |
# convert non-json to json | |
echo "1 2 3 4 5" | sed "s/ /\n/g" |jq -R . | jq -s . | |
# similar | |
jq -n "[71, null, true, false, -2.3]" | |
# jq 1.7. only: | |
# new pick() function - (transform objects on the fly), | |
# returns object with only the specified keys | |
jq -n '{"a": 1, "b": {"c": 2, "d": 3}, "e": 4} ' \ | |
| jq 'pick(.a, .b.c, .x)' | |
# get all public projects from gitlab | |
gl_url="https://git.gfz-potsdam.de/api/v4/projects?sort=desc&visibility=public&page=1&per_page=200&simple=false&statistics=false" | |
# display name, namespace, star_count, with line numbers, in csv format | |
curl -sL "$gl_url" | jq 'map(select(.star_count > 0) | [.name, .namespace.name, .star_count] ) ' | |
# To append paged JSON results from a web API into a single file, | |
# use a loop to fetch each page and append the results to the file. | |
# Loop over the pages in bash | |
# Initialize an empty JSON array in the file | |
echo '[]' > data.json | |
for page in {1..10}; do # Replace 10 with the number of pages | |
# Fetch the page and append the results to the array in the file | |
gl_url="https://git.gfz-potsdam.de/api/v4/projects?sort=desc&visibility=public&per_page=200&simple=false&statistics=false" | |
curl -sL "$gl_url&page=$page" | jq --slurpfile data data.json '. + $data[0]' > tmp.json && mv tmp.json data.json | |
done | |
# last activity, name, namespace, star_count as a giant table / csv | |
< data.json jq 'map([.last_activity_at[0:10], .name, .namespace.name, .star_count]) ' | jq -r ".[] | @csv" | csvsort -c 1 -H | csvlook -l | |
# sort json by key, even nested objects' keys - good for comparing or diff'ing JSON data | |
echo '{"b":2,"a":1, "c": {"yy": 2, "xx": 1}}' | jq -S . | |
## Arrays | |
# jq range indexing: slice (default empty start: 0, default empty end: end of array) | |
echo "[0,1,2,3,4,5]" | jq '.[3:]' # => [3,4,5] | |
echo "[0,1,2,3,4,5]" | jq '.[:3]' # => [0,1,2] | |
echo "[1,2,3,4,5]" | jq '.[2:4]' # => [3,4] | |
# jq range indexing: one-sided | |
echo "[1,2,3,4,5]" | jq '.[2:]' # => [3,4,5] | |
# jq range indexing: use negatives to select from the end: | |
echo "[1,2,3,4,5]" | jq '.[-2:]' # => [4,5] | |
# jq: Putting Elements Into an Object | |
echo '["Adam","Gordon","Bell"]' | jq -r '{ "first_name":.[0], "last_name": .[2]}' | |
# jq: sort simple array | |
jq -n '["3","2","1"]' | jq 'sort' | |
# in-place edit | |
jq -n '["33","22","11"]' | jq 'sort' | jq ".[1] = \"99\"" | |
# add element to array | |
jq -n '["33","22","11"]' | jq 'sort' | jq ". + [\"99\"]" | |
# same as | |
jq -n '["33","22","11"]' | jq 'sort' | jq ". += [\"99\"]" | |
#same as | |
jq -n '["33","22","11"]' | jq 'sort' | jq '.[.|length]=99' | |
## create file "rock_types.json" - minimal example | |
## (helper datastructer for later examples) | |
rock_types='{"success":{"v":2,"license":"CC-BY 4.0","data":[{"lith_id":1,"name":"siliciclastic","type":"siliciclastic","group":"","class":"sedimentary","color":"#FFF400","fill":670,"t_units":198},{"lith_id":2,"name":"gravel","type":"siliciclastic","group":"unconsolidated","class":"sedimentary","color":"#FFAB00","fill":601,"t_units":908},{"lith_id":3,"name":"sand","type":"siliciclastic","group":"unconsolidated","class":"sedimentary","color":"#FFCA00","fill":607,"t_units":3931},{"lith_id":4,"name":"silt","type":"siliciclastic","group":"unconsolidated","class":"sedimentary","color":"#919AA3","fill":619,"t_units":2357}]}}' | |
## extract original license label, for coparison | |
jq -n "$rock_types" | jq .success.license # CC-BY 4.0 | |
# write access: change key 'license' | |
jq -n "$rock_types" | jq .success.license = "All rights reserved" | |
jq -n "$rock_types" | jq '.success.license = "All rights reserved"' | jq '.success.license' # All rights reserved | |
# same as | |
jq -n "$rock_types" | jq '.success += {"license" : "All rights reserved" }' | more | |
##--raw-output / -r: from the manual: | |
## With this option, if the filter´s result is a string then it will be written directly to standard output | |
## rather than being formatted as a JSON string with quotes. | |
## This can be useful for making jq filters talk to non-JSON-based systems. | |
# Array of objects: Get property names of first object/entry | |
jq -n "$rock_types" | jq -r ' .success.data[0] | keys_unsorted' | jq -c | |
# the @csv construct needs an array of arrays as input. | |
# JSON to csv: simplest way | |
jq -n "$rock_types" | jq -r ' .success.data[] | map(.) | @csv' | |
## Use "csvlook" at data, Use "-h option" when no header names are given | |
# json 2 csvlook: numeric column names | |
jq -n "$rock_types" | jq -r ' .success.data[] | map(.) | @csv' | csvlook -H | |
# json2csv Step 1/3: Create header row for CSV-datatable (prepend later) | |
jq -n "$rock_types" | jq -r -j ' [.success.data[0]] | map((keys_unsorted))[0]| join(", ")' | |
# json2csv Step 2/3: Create data-body for CSV-datatable | |
jq -n "$rock_types" jq -r ' .success.data[] | map(.) | @csv' > data.csv; | |
# json2csv: Steps: 3/3: concatenate headerrow and data into a single file | |
cat header.csv data.csv >> data_full.csv; csvlook < data_full.csv | |
# test for array membership | |
jq -n "$rock_types" | jq '.[]| select( .data |index( "lith_id") ) ' | |
# jq select(): get objects where the "name" property is "updated_on" | |
curl -sL -X GET "http://localhost:8983/solr/search_twitter/schema/fields" \ | |
| jq '.fields[] | select(.name=="updated_on")' | |
## rock_types 'stream.json' represents twitter data , serialized tweets | |
# create 'stream.json' Testdata: 1 serialised tweet | |
tweets='{"created_at":"Mon Jul 18 21:07:15 +0000 2016","id":755146942586482689,"id_str":"755146942586482689","text":"Detect security threats with behavioral analytics #BigData \u007buse case\u007d\nhttps:\/\/t.co\/rKaSPcY8eh https:\/\/t.co\/ygKIv5vlZE","source":"\u003ca href=\"http:\/\/bufferapp.com\" rel=\"nofollow\"\u003eBuffer\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":108471290,"id_str":"108471290","name":"Datameer","screen_name":"datameer","location":"San Francisco","url":"http:\/\/www.datameer.com","description":"We make big data analytics easy. Self-service data integration, preparation, analytics and visualization. Request a demo: http:\/\/bit.ly\/1Uh0igg","protected":false,"verified":false,"followers_count":11196,"friends_count":1255,"listed_count":814,"favourites_count":629,"statuses_count":9656,"created_at":"Tue Jan 26 01:47:19 +0000 2010","utc_offset":-25200,"time_zone":"Pacific Time (US & Canada)","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"2E7EA3","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/528225690340970496\/EcnywzLj.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/528225690340970496\/EcnywzLj.jpeg","profile_background_tile":false,"profile_link_color":"0091D0","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1562714971\/256x256_wBG_normal.png","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1562714971\/256x256_wBG_normal.png","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/108471290\/1421278000","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"BigData","indices":[50,58]}],"urls":[{"url":"https:\/\/t.co\/rKaSPcY8eh","expanded_url":"http:\/\/www.datameer.com\/wp-content\/uploads\/pdf\/use_cases\/Detect-Security-Threats-With-Behavioral-Analytics.pdf?ls=Social%20Media&lsd=Twitter&c=Social%20Media&cd=Twitter","display_url":"datameer.com\/wp-content\/upl\u2026","indices":[70,93]}],"user_mentions":[],"symbols":[],"media":[{"id":755146940006993920,"id_str":"755146940006993920","indices":[94,117],"media_url":"http:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","url":"https:\/\/t.co\/ygKIv5vlZE","display_url":"pic.twitter.com\/ygKIv5vlZE","expanded_url":"http:\/\/twitter.com\/datameer\/status\/755146942586482689\/photo\/1","type":"photo","sizes":{"medium":{"w":1200,"h":341,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":193,"resize":"fit"},"large":{"w":1540,"h":438,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":755146940006993920,"id_str":"755146940006993920","indices":[94,117],"media_url":"http:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","url":"https:\/\/t.co\/ygKIv5vlZE","display_url":"pic.twitter.com\/ygKIv5vlZE","expanded_url":"http:\/\/twitter.com\/datameer\/status\/755146942586482689\/photo\/1","type":"photo","sizes":{"medium":{"w":1200,"h":341,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":193,"resize":"fit"},"large":{"w":1540,"h":438,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1468876035721"}' | |
# Get first tweet (if input is an array of entries) | |
jq -n "$tweets" | jq -s ".[0]" | |
# Get keynames, but only from first element of JSON object stream, | |
# 1 key per line | |
jq -n "$tweets" | jq -s ".[0]| keys_unsorted" | |
# max_by function (similar for min_by, sort_by, group_by, unique_by, etc.) | |
jq -n "$tweets| .entities.media[0].sizes| map(.)|max_by(.w)" | |
## (perfect for importing in R, renaming columns of a data frame) | |
## -s means "slurp into array", -r means "raw" | |
# Get key names as 1 line of csv => | |
jq -n "$tweets" | jq -s ".[0]| (keys_unsorted) " | jq -r @csv | |
# get all _values_ from the first key in each element of the JSON stream | |
# often contains id, number or as here: tweet creation date in humanreadable form | |
jq -n "$tweets" | jq "[.[]][0]" | |
# unique users (-C means Colorize JSON) | |
jq -n "$tweets" | jq -C -s ".[]| {(.user.name?): .user.description}" | sort -u | |
# username, screen-name, description, with string-interpolation | |
jq -n "$tweets" | jq -C -s ".[]| { \"https://twitter.com/\(.user.screen_name) \(.user.name)\":.user.description}" | sort -u | |
# username, screen-name, description, with string-concatenation | |
jq -n "$tweets" | jq -C -s '.[]| {(.user.screen_name? + " --- " + .user.name): .user.description}' | sort -u | |
# jq: map and select: filter issue-numbers >2500 | |
curl -sL https://api.github.com/repos/stedolan/jq/issues?per_page=4 | \ | |
jq 'map({ title: .title, number: .number, labels: .labels | length }) | | |
map(select(.number >= 2500))' | |
## select english tweets | |
## bad example | |
## https://stackoverflow.com/questions/48224065/jq-convert-json-file-with-arrays-to-csv-transposed | |
# jq: convert JSON file with arrays to CSV - transposed? | |
jq -n "$tweets" | jq -s '[.[] | {(.user.screen_name):.extended_entities.media[]?.indices? }] ' \ | |
| jq -r '.[]| [( keys_unsorted, ([.[]]|transpose)[] )] ' \ | |
| jq -s | |
# delete "null" _strings_ anywhere in the structure | |
del(..|select(. == "null")) | |
# If your values are true nulls, rather than the string "null", | |
# you can use the nulls builtin function in place of the whole select: | |
del(..|nulls). | |
# Use environment variables in jq | |
jq -n "env.ENV_VAR_NAME" | |
jq -n "[env.USER, env.HOME]" | |
# custom variable with bash-export | |
# use the --arg key value option | |
jq -n --arg myName "${USER}" --arg currentDir "${PWD}" '$myName, $currentDir' | |
jq -n --arg myName "$(date)" '$myName' | |
jq -n "range(1;10)" | |
jq -n '{"evens": [range(0;11;2)]} ' | |
jq -n '["A", "V", "C", "Z"] | map(. | ascii_downcase)' | |
# type detection and conversion | |
jq -n "true | type" | |
jq -n '"My " + (2 | tostring) + " cents" | tostring' | |
jq -n '("23 " | tonumber) + ("42" | tonumber)' | |
# tonumber is required to convert the string to a number | |
export reqIndex=${1} | |
jq '.[env.reqIndex | tonumber ]' arrayOfObjects.json | |
# map_values() is better than map() for objects: incr "age" by 1 | |
echo '[ | |
{"name": "Alice", "age": 30}, | |
{"name": "Bob", "age": 25}, | |
{"name": "Carol", "age": 35} | |
]' | jq 'map(map_values(if type == "number" then . + 1 else . end))' | |
# combines both |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment