nijave/json_parse.sh

## json_parse.sh
#1/bin/sh

# --line-buffer to prevent json from accidentally getting split in the middle
# -n 128 cat-ing large quantities of files is faster since the programs aren't re-invoked as often (too high and you risk not using all the threads in the final batches)
# -P 125% slightly over subscribe the CPU to lessen the chance of wasting cycles if something becomes blocked
# -q -m 4M make a small file buffer to smooth out I/O (don't make it too large as a single cat process could starve the other threads)
# -rc for single line per json object and "raw" output
# final jq is just to format and colorize the results

# zcat can be used to read json.gz files. This examples read off zfs compressing with zstd
# takes ~1m15s to run with 3.4GiB of logs (~370mb/s) on Ryzen 2700x (cpu appears to bottleneck)

find . -name "*.json" \
  | parallel --line-buffer -n 128 -P 125% \
  "cat {} | mbuffer -q -m 4M | jq -rc '.Records[] | select(.eventSource == \"cloudtrail.amazonaws.com\")'" \
  | jq
	#1/bin/sh

	# --line-buffer to prevent json from accidentally getting split in the middle
	# -n 128 cat-ing large quantities of files is faster since the programs aren't re-invoked as often (too high and you risk not using all the threads in the final batches)
	# -P 125% slightly over subscribe the CPU to lessen the chance of wasting cycles if something becomes blocked
	# -q -m 4M make a small file buffer to smooth out I/O (don't make it too large as a single cat process could starve the other threads)
	# -rc for single line per json object and "raw" output
	# final jq is just to format and colorize the results

	# zcat can be used to read json.gz files. This examples read off zfs compressing with zstd
	# takes ~1m15s to run with 3.4GiB of logs (~370mb/s) on Ryzen 2700x (cpu appears to bottleneck)

	find . -name "*.json" \
	\| parallel --line-buffer -n 128 -P 125% \
	"cat {} \| mbuffer -q -m 4M \| jq -rc '.Records[] \| select(.eventSource == \"cloudtrail.amazonaws.com\")'" \
	\| jq