Skip to content

Instantly share code, notes, and snippets.

@johndstein
Created October 28, 2020 12:19
Show Gist options
  • Save johndstein/47b7123df44e5b75a7721a79ff538b93 to your computer and use it in GitHub Desktop.
Save johndstein/47b7123df44e5b75a7721a79ff538b93 to your computer and use it in GitHub Desktop.
parallel s3 select
#!/usr/bin/env bash
help () {
echo "
Usage:
test/s3select.sh -v \\
--bucket some-bucket \\
--prefix foo/2020/10/20/20/57 \\
--expression \"select * from s3object o where o.rawmsghostname = 'ynthtest-Oct20T20-filler-5b45a8d7'\"
"
exit 0
}
if [[ $# -eq 0 ]] ; then
help
fi
while [[ $# -gt 0 ]]; do
case $1 in
-b|--bucket)
bucket=$2
shift 2
;;
-p|--prefix)
prefix=$2
shift 2
;;
-e|--expression)
expression=$2
shift 2
;;
-v|--verbose)
verbose=verbose
shift 1
;;
-h|--help)
help
;;
*)
help
esac
done
cat << EOF > s0m3unlikelyFIL3n4m.sh
#!/usr/bin/env bash
if [ "verbose" == "$verbose" ]; then echo "\$1" >>/dev/stderr; fi
aws s3api select-object-content \
--bucket "$bucket" \
--key "\$1" \
--expression "$expression" \
--expression-type 'SQL' \
--input-serialization 'JSON={Type=LINES},CompressionType=GZIP' \
--output-serialization 'JSON={}' \
/dev/stdout
EOF
chmod +x s0m3unlikelyFIL3n4m.sh
aws s3 ls --recursive "$bucket/$prefix" | awk '{print $4}' | parallel ./s0m3unlikelyFIL3n4m.sh {}
rm s0m3unlikelyFIL3n4m.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment