Skip to content

Instantly share code, notes, and snippets.

@knbknb
Last active June 15, 2024 20:58
Show Gist options
  • Save knbknb/56ce2feae2095ce95d329aa61f583989 to your computer and use it in GitHub Desktop.
Save knbknb/56ce2feae2095ce95d329aa61f583989 to your computer and use it in GitHub Desktop.
jq basics (oneliners)
#% jq, unixtools, knb
# manual: https://jqlang.github.io/jq/manual/
# wiki jq:
# https://github.com/jqlang/jq/wiki/jq-Language-Description#lexical-symbol-bindings-function-definitions-and-data-symbol-bindings
# man jq
# jq exit codes in $?: (default: 0 on success null on error.)
# jq -e : jq exit codes $?: (default: 0 on success 1 on error.
# convert non-json to json
echo "1 2 3 4 5" | sed "s/ /\n/g" |jq -R . | jq -s .
# similar
jq -n "[71, null, true, false, -2.3]"
# jq 1.7. only:
# new pick() function - (transform objects on the fly),
# returns object with only the specified keys
jq -n '{"a": 1, "b": {"c": 2, "d": 3}, "e": 4} ' \
| jq 'pick(.a, .b.c, .x)'
# get all public projects from gitlab
gl_url="https://git.gfz-potsdam.de/api/v4/projects?sort=desc&visibility=public&page=1&per_page=200&simple=false&statistics=false"
# display name, namespace, star_count, with line numbers, in csv format
curl -sL "$gl_url" | jq 'map(select(.star_count > 0) | [.name, .namespace.name, .star_count] ) '
# To append paged JSON results from a web API into a single file,
# use a loop to fetch each page and append the results to the file.
# Loop over the pages in bash
# Initialize an empty JSON array in the file
echo '[]' > data.json
for page in {1..10}; do # Replace 10 with the number of pages
# Fetch the page and append the results to the array in the file
gl_url="https://git.gfz-potsdam.de/api/v4/projects?sort=desc&visibility=public&per_page=200&simple=false&statistics=false"
curl -sL "$gl_url&page=$page" | jq --slurpfile data data.json '. + $data[0]' > tmp.json && mv tmp.json data.json
done
# last activity, name, namespace, star_count as a giant table / csv
< data.json jq 'map([.last_activity_at[0:10], .name, .namespace.name, .star_count]) ' | jq -r ".[] | @csv" | csvsort -c 1 -H | csvlook -l
# sort json by key, even nested objects' keys - good for comparing or diff'ing JSON data
echo '{"b":2,"a":1, "c": {"yy": 2, "xx": 1}}' | jq -S .
## Arrays
# jq range indexing: slice (default empty start: 0, default empty end: end of array)
echo "[0,1,2,3,4,5]" | jq '.[3:]' # => [3,4,5]
echo "[0,1,2,3,4,5]" | jq '.[:3]' # => [0,1,2]
echo "[1,2,3,4,5]" | jq '.[2:4]' # => [3,4]
# jq range indexing: one-sided
echo "[1,2,3,4,5]" | jq '.[2:]' # => [3,4,5]
# jq range indexing: use negatives to select from the end:
echo "[1,2,3,4,5]" | jq '.[-2:]' # => [4,5]
# jq: Putting Elements Into an Object
echo '["Adam","Gordon","Bell"]' | jq -r '{ "first_name":.[0], "last_name": .[2]}'
# jq: sort simple array
jq -n '["3","2","1"]' | jq 'sort'
# in-place edit
jq -n '["33","22","11"]' | jq 'sort' | jq ".[1] = \"99\""
# add element to array
jq -n '["33","22","11"]' | jq 'sort' | jq ". + [\"99\"]"
# same as
jq -n '["33","22","11"]' | jq 'sort' | jq ". += [\"99\"]"
#same as
jq -n '["33","22","11"]' | jq 'sort' | jq '.[.|length]=99'
## create file "rock_types.json" - minimal example
## (helper datastructer for later examples)
rock_types='{"success":{"v":2,"license":"CC-BY 4.0","data":[{"lith_id":1,"name":"siliciclastic","type":"siliciclastic","group":"","class":"sedimentary","color":"#FFF400","fill":670,"t_units":198},{"lith_id":2,"name":"gravel","type":"siliciclastic","group":"unconsolidated","class":"sedimentary","color":"#FFAB00","fill":601,"t_units":908},{"lith_id":3,"name":"sand","type":"siliciclastic","group":"unconsolidated","class":"sedimentary","color":"#FFCA00","fill":607,"t_units":3931},{"lith_id":4,"name":"silt","type":"siliciclastic","group":"unconsolidated","class":"sedimentary","color":"#919AA3","fill":619,"t_units":2357}]}}'
## extract original license label, for coparison
jq -n "$rock_types" | jq .success.license # CC-BY 4.0
# write access: change key 'license'
jq -n "$rock_types" | jq .success.license = "All rights reserved"
jq -n "$rock_types" | jq '.success.license = "All rights reserved"' | jq '.success.license' # All rights reserved
# same as
jq -n "$rock_types" | jq '.success += {"license" : "All rights reserved" }' | more
##--raw-output / -r: from the manual:
## With this option, if the filter´s result is a string then it will be written directly to standard output
## rather than being formatted as a JSON string with quotes.
## This can be useful for making jq filters talk to non-JSON-based systems.
# Array of objects: Get property names of first object/entry
jq -n "$rock_types" | jq -r ' .success.data[0] | keys_unsorted' | jq -c
# the @csv construct needs an array of arrays as input.
# JSON to csv: simplest way
jq -n "$rock_types" | jq -r ' .success.data[] | map(.) | @csv'
## Use "csvlook" at data, Use "-h option" when no header names are given
# json 2 csvlook: numeric column names
jq -n "$rock_types" | jq -r ' .success.data[] | map(.) | @csv' | csvlook -H
# json2csv Step 1/3: Create header row for CSV-datatable (prepend later)
jq -n "$rock_types" | jq -r -j ' [.success.data[0]] | map((keys_unsorted))[0]| join(", ")'
# json2csv Step 2/3: Create data-body for CSV-datatable
jq -n "$rock_types" jq -r ' .success.data[] | map(.) | @csv' > data.csv;
# json2csv: Steps: 3/3: concatenate headerrow and data into a single file
cat header.csv data.csv >> data_full.csv; csvlook < data_full.csv
# test for array membership
jq -n "$rock_types" | jq '.[]| select( .data |index( "lith_id") ) '
# jq select(): get objects where the "name" property is "updated_on"
curl -sL -X GET "http://localhost:8983/solr/search_twitter/schema/fields" \
| jq '.fields[] | select(.name=="updated_on")'
## rock_types 'stream.json' represents twitter data , serialized tweets
# create 'stream.json' Testdata: 1 serialised tweet
tweets='{"created_at":"Mon Jul 18 21:07:15 +0000 2016","id":755146942586482689,"id_str":"755146942586482689","text":"Detect security threats with behavioral analytics #BigData \u007buse case\u007d\nhttps:\/\/t.co\/rKaSPcY8eh https:\/\/t.co\/ygKIv5vlZE","source":"\u003ca href=\"http:\/\/bufferapp.com\" rel=\"nofollow\"\u003eBuffer\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":108471290,"id_str":"108471290","name":"Datameer","screen_name":"datameer","location":"San Francisco","url":"http:\/\/www.datameer.com","description":"We make big data analytics easy. Self-service data integration, preparation, analytics and visualization. Request a demo: http:\/\/bit.ly\/1Uh0igg","protected":false,"verified":false,"followers_count":11196,"friends_count":1255,"listed_count":814,"favourites_count":629,"statuses_count":9656,"created_at":"Tue Jan 26 01:47:19 +0000 2010","utc_offset":-25200,"time_zone":"Pacific Time (US & Canada)","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"2E7EA3","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/528225690340970496\/EcnywzLj.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/528225690340970496\/EcnywzLj.jpeg","profile_background_tile":false,"profile_link_color":"0091D0","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1562714971\/256x256_wBG_normal.png","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1562714971\/256x256_wBG_normal.png","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/108471290\/1421278000","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"BigData","indices":[50,58]}],"urls":[{"url":"https:\/\/t.co\/rKaSPcY8eh","expanded_url":"http:\/\/www.datameer.com\/wp-content\/uploads\/pdf\/use_cases\/Detect-Security-Threats-With-Behavioral-Analytics.pdf?ls=Social%20Media&lsd=Twitter&c=Social%20Media&cd=Twitter","display_url":"datameer.com\/wp-content\/upl\u2026","indices":[70,93]}],"user_mentions":[],"symbols":[],"media":[{"id":755146940006993920,"id_str":"755146940006993920","indices":[94,117],"media_url":"http:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","url":"https:\/\/t.co\/ygKIv5vlZE","display_url":"pic.twitter.com\/ygKIv5vlZE","expanded_url":"http:\/\/twitter.com\/datameer\/status\/755146942586482689\/photo\/1","type":"photo","sizes":{"medium":{"w":1200,"h":341,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":193,"resize":"fit"},"large":{"w":1540,"h":438,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":755146940006993920,"id_str":"755146940006993920","indices":[94,117],"media_url":"http:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/CnrSJBhXEAAVUDc.jpg","url":"https:\/\/t.co\/ygKIv5vlZE","display_url":"pic.twitter.com\/ygKIv5vlZE","expanded_url":"http:\/\/twitter.com\/datameer\/status\/755146942586482689\/photo\/1","type":"photo","sizes":{"medium":{"w":1200,"h":341,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":193,"resize":"fit"},"large":{"w":1540,"h":438,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1468876035721"}'
# Get first tweet (if input is an array of entries)
jq -n "$tweets" | jq -s ".[0]"
# Get keynames, but only from first element of JSON object stream,
# 1 key per line
jq -n "$tweets" | jq -s ".[0]| keys_unsorted"
# max_by function (similar for min_by, sort_by, group_by, unique_by, etc.)
jq -n "$tweets| .entities.media[0].sizes| map(.)|max_by(.w)"
## (perfect for importing in R, renaming columns of a data frame)
## -s means "slurp into array", -r means "raw"
# Get key names as 1 line of csv =>
jq -n "$tweets" | jq -s ".[0]| (keys_unsorted) " | jq -r @csv
# get all _values_ from the first key in each element of the JSON stream
# often contains id, number or as here: tweet creation date in humanreadable form
jq -n "$tweets" | jq "[.[]][0]"
# unique users (-C means Colorize JSON)
jq -n "$tweets" | jq -C -s ".[]| {(.user.name?): .user.description}" | sort -u
# username, screen-name, description, with string-interpolation
jq -n "$tweets" | jq -C -s ".[]| { \"https://twitter.com/\(.user.screen_name) \(.user.name)\":.user.description}" | sort -u
# username, screen-name, description, with string-concatenation
jq -n "$tweets" | jq -C -s '.[]| {(.user.screen_name? + " --- " + .user.name): .user.description}' | sort -u
# jq: map and select: filter issue-numbers >2500
curl -sL https://api.github.com/repos/stedolan/jq/issues?per_page=4 | \
jq 'map({ title: .title, number: .number, labels: .labels | length }) |
map(select(.number >= 2500))'
## select english tweets
## bad example
## https://stackoverflow.com/questions/48224065/jq-convert-json-file-with-arrays-to-csv-transposed
# jq: convert JSON file with arrays to CSV - transposed?
jq -n "$tweets" | jq -s '[.[] | {(.user.screen_name):.extended_entities.media[]?.indices? }] ' \
| jq -r '.[]| [( keys_unsorted, ([.[]]|transpose)[] )] ' \
| jq -s
# delete "null" _strings_ anywhere in the structure
del(..|select(. == "null"))
# If your values are true nulls, rather than the string "null",
# you can use the nulls builtin function in place of the whole select:
del(..|nulls).
# Use environment variables in jq
jq -n "env.ENV_VAR_NAME"
jq -n "[env.USER, env.HOME]"
# custom variable with bash-export
# use the --arg key value option
jq -n --arg myName "${USER}" --arg currentDir "${PWD}" '$myName, $currentDir'
jq -n --arg myName "$(date)" '$myName'
jq -n "range(1;10)"
jq -n '{"evens": [range(0;11;2)]} '
jq -n '["A", "V", "C", "Z"] | map(. | ascii_downcase)'
# type detection and conversion
jq -n "true | type"
jq -n '"My " + (2 | tostring) + " cents" | tostring'
jq -n '("23 " | tonumber) + ("42" | tonumber)'
# tonumber is required to convert the string to a number
export reqIndex=${1}
jq '.[env.reqIndex | tonumber ]' arrayOfObjects.json
# map_values() is better than map() for objects: incr "age" by 1
echo '[
{"name": "Alice", "age": 30},
{"name": "Bob", "age": 25},
{"name": "Carol", "age": 35}
]' | jq 'map(map_values(if type == "number" then . + 1 else . end))'
# combines both
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment