Created
July 14, 2016 18:18
-
-
Save vpipkt/08effc1a586321883eb0a0e5635b1b67 to your computer and use it in GitHub Desktop.
zipped geojsons to gz's quasi-jsons
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Script to take these gbdx vector zips and create tar.gz from them | |
maxLinesStage=700000 | |
outFileNum=0 | |
workDir=/ebs/workdir | |
# find the zips and loop over them | |
find /ebs/vectors/ -name *.zip | while read f | |
do | |
echo $f | |
# Unzip the file in working directory | |
mkdir $workDir | |
unzip -qq $f -d $workDir | |
# cat unzipped file contents to temp file; with newlines between | |
find $workDir -maxdepth 1 -name '*.json' -type f -print0 | xargs -0 -I {} sh -c "cat {}; echo" >> /tmp/gbdx_vectors_$outFileNum.json | |
rm -r $workDir | |
# check stage file size | |
lines=$(wc -l < /tmp/gbdx_vectors_$outFileNum.json) | |
if [ "$lines" -gt "$maxLinesStage" ]; then | |
# create tarball using file number | |
gzip /tmp/gbdx_vectors_$outFileNum.json | |
# post to s3 | |
aws s3 cp /tmp/gbdx_vectors_$outFileNum.json.gz s3://geoint-data/object-detection/ | |
# clean up stage | |
rm /tmp/gbdx_vectors_* | |
# increment file num | |
(( targzNum++ )) | |
fi | |
done | |
# tarball last file and post to s3 | |
gzip /tmp/gbdx_vectors_$outFileNum.json | |
# post to s3 | |
aws s3 cp /tmp/gbdx_vectors_$outFileNum.json.gz s3://geoint-data/object-detection/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment