Created
February 7, 2019 15:39
-
-
Save eidosam/c406f788864eafd95fb230ff66fabb32 to your computer and use it in GitHub Desktop.
Read the first couple of rows in Parquet file stored in S3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
profile=default | |
limit=100 | |
while [ -n "$1" ]; do | |
case $1 in | |
--profile ) | |
shift | |
profile=$1 | |
;; | |
--limit ) | |
shift | |
limit=$1 | |
;; | |
* ) | |
inputfile=$1 | |
;; | |
esac | |
shift | |
done | |
inputfile=${inputfile/s3\:\/\//} | |
key=${inputfile#*/} | |
bucket=${inputfile%%/*} | |
if [[ -z "$key" ]] || [[ -z "$bucket" ]] | |
then | |
echo " | |
Invalid S3 file path | |
" | |
exit 1 | |
fi | |
aws --profile ${profile} s3api select-object-content \ | |
--bucket ${bucket} \ | |
--key ${key} \ | |
--expression "SELECT * FROM S3Object LIMIT ${limit}" \ | |
--expression-type SQL \ | |
--input-serialization 'Parquet={}' \ | |
--output-serialization 'JSON={}' \ | |
/dev/stdout |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment