Skip to content

Instantly share code, notes, and snippets.

@eidosam
Created March 7, 2019 14:03
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eidosam/a5ec15d193da266b68bdfc62986ebcb9 to your computer and use it in GitHub Desktop.
Save eidosam/a5ec15d193da266b68bdfc62986ebcb9 to your computer and use it in GitHub Desktop.
Start a Google Cloud Storage Transfer job from command-line
#!/usr/bin/env bash
### ------ Preparation ------ ###
# Acquire new user credentials to use for Application Default Credentials
# Run: gcloud auth application-default login
### ------ -------------- --- ###
function printUsage() {
echo -e "
\rUsage: bash ./create-transfer-job.sh [options] <s3-source-data> <gcs-bucket-name>
\rSource: <s3-source-data> # Amazon S3 file/directory url used as data source
\rDestination: <gcs-bucket-name> # Google Cloud Storage bucket used as data sink
\rOptions:
\r\t--project-id | --project | -p <GCP-PROJECT-ID> # Google Cloud project to create the job in
\r\t--profile | -P <AWS-PROFILE> # AWS profile to use credentials from
\r\t--min-age | -g <MIN-AGE-IN-SECONDS> # Minimum time elapsed since last modification
\r\t--max-age | -l <MAX-AGE-IN-SECONDS> # Maximum time elapsed since last modification
\r\t--dry-run | -d # Print job configuraion and expected files to transfer without running the job
\r"
}
projectId=""
profile=default
dryRun=false
scheduleStartDate=`date "+{\"day\":\"%d\",\"month\":\"%m\",\"year\":\"%Y\"}"`
minTimeElapsedSinceLastModification="0s"
maxTimeElapsedSinceLastModification=`date +%ss`
while [ -n "$1" ]; do
case $1 in
--project-id | --project | -p )
shift
projectId=$1
;;
--profile | -P )
shift
profile=$1
;;
--dry-run | -d )
dryRun=true
;;
--min-age | -g )
shift
minTimeElapsedSinceLastModification=$1
;;
--max-age | -l )
shift
maxTimeElapsedSinceLastModification=$1
;;
* )
if [ -z "${awsSource}" ];
then
awsSource=$1
else
gcsBucket=$1
fi
;;
esac
shift
done
accessKeyId=`aws configure get aws_access_key_id --profile ${profile}`
secretAccessKey=`aws configure get aws_secret_access_key --profile ${profile}`
awsSource=${awsSource/s3\:\/\//}
includePrefixes=${awsSource#*/}
awsBucket=${awsSource%%/*}
if [[ -z "$awsBucket" ]] || [[ -z "$gcsBucket" ]]
then
echo "Error: Missing S3 or GCS bucket"
printUsage
exit 1
fi
if [[ -z "$projectId" ]]
then
projectId=`gcloud config get-value project`
fi
description="Transfer ${awsSource} to ${gcsBucket}"
transferJobConf="
{
\"projectId\": \"${projectId}\",
\"description\": \"${description}\",
\"status\": \"ENABLED\",
\"schedule\": {
\"scheduleStartDate\": ${scheduleStartDate},
\"scheduleEndDate\": ${scheduleStartDate}
},
\"transferSpec\": {
\"objectConditions\": {
\"minTimeElapsedSinceLastModification\": \"${minTimeElapsedSinceLastModification}\",
\"maxTimeElapsedSinceLastModification\": \"${maxTimeElapsedSinceLastModification}\",
\"includePrefixes\": [ \"${includePrefixes}\" ]
},
\"awsS3DataSource\": {
\"bucketName\": \"${awsBucket}\",
\"awsAccessKey\": {
\"accessKeyId\": \"${accessKeyId}\",
\"secretAccessKey\": \"${secretAccessKey}\"
}
},
\"gcsDataSink\": { \"bucketName\": \"${gcsBucket}\" }
}
}"
if ${dryRun}
then
echo Transfer job configuration:
echo ${transferJobConf}
echo --------------------------------------------------
echo Objects to transfer:
aws s3 ls \
--profile ${profile} \
--recursive \
--summarize \
--human-readable \
s3://${awsSource}
else
curl \
--silent \
--url https://storagetransfer.googleapis.com/v1/transferJobs \
--header "Content-Type: application/json" \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--request POST \
--data "${transferJobConf}"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment