Last active
December 19, 2015 15:58
-
-
Save chrismeserole/5979932 to your computer and use it in GitHub Desktop.
This script downloads files from GDELT and then transfers to s3. It's best used on an EC2 instance, because it can rely on Amazon's internal bandwidth to upload to s3. To run the script, just enter something like 'sh gdelt-to-s3.sh 2000 2005', which will upload the csv files for each year between 2000 and 2005 to s3. For data after 2005, enter s…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
## NOTE: This relies on Tim Kay's AWS script, so uncomment | |
## the following lines if it is not installed: | |
# curl https://raw.github.com/timkay/aws/master/aws -o aws | |
# export EC2_ACCESS_KEY=<AWS_ACCESS_KEY> | |
# export EC2_SECRET_KEY=<AWS_SECRET_KEY> | |
# chmod +x aws | |
# perl aws --install | |
BREAK_YEAR=2006 | |
THIS_YEAR=${1} | |
END_YEAR=${2} | |
MY_BUCKET='<my bucket>/<gdelt-folder>/' | |
# Function to download a file, transfer to s3, and then remove | |
function transfer_gdelt(){ | |
curl -O http://gdelt.utdallas.edu/data/backfiles/$THIS_FILE.zip | |
unzip $THIS_FILE.zip | |
echo 'uploading '${THIS_FILE}'.csv to s3 ... ' | |
s3put $MY_BUCKET $THIS_FILE.csv | |
echo 'uploaded to s3 ...' | |
rm -f $THIS_FILE.zip | |
echo 'deleted .zip file ...' | |
rm -f $THIS_FILE.csv | |
echo 'deleted .csv file ...' | |
echo 'done with '${THIS_FILE}.zip | |
} | |
# Check to see if a 2nd parameter was passed | |
if [ -z $END_YEAR ]; then | |
if [ $THIS_YEAR -lt $BREAK_YEAR ]; then | |
END_YEAR=$THIS_YEAR | |
fi | |
else | |
END_YEAR=${2} | |
fi | |
# Loop through all relevant files | |
if [ $THIS_YEAR -lt $BREAK_YEAR ]; then | |
for THIS_FILE in `seq $THIS_YEAR $END_YEAR` | |
do | |
transfer_gdelt $THIS_FILE | |
done | |
else | |
for THIS_MONTH in `seq 1 12` | |
do | |
transfer_gdelt $THIS_FILE | |
done | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment