Last active
May 29, 2018 11:35
-
-
Save jousby/3f8c09f8f57704c809aa02dfa2f0f6b7 to your computer and use it in GitHub Desktop.
Happy path scala code for transferring files from Google Storage (Google Cloud Object Storage) to S3 (AWS Object Storage).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example | |
import java.io.ByteArrayInputStream | |
import java.util.concurrent.atomic.AtomicInteger | |
import com.amazonaws.services.s3.AmazonS3ClientBuilder | |
import com.amazonaws.services.s3.model.ObjectMetadata | |
import com.google.cloud.storage.Storage.BlobListOption | |
import com.google.cloud.storage.{Blob, Bucket, StorageOptions} | |
import scala.collection.JavaConverters._ | |
/** | |
* You will need the following libraries: | |
* lazy val awsS3 = "com.amazonaws" % "aws-java-sdk-s3" % awsVersion | |
* lazy val googleStorage = "com.google.cloud" % "google-cloud-storage" % googleStorageVersion | |
* | |
* The code to create the google and aws clients will expect you to have some credentials injected | |
* into your environment (out of scope). | |
*/ | |
object GoogleStorageToS3 { | |
def main(args: Array[String]): Unit = { | |
// Google Storage client | |
val googleStorage = StorageOptions.getDefaultInstance.getService | |
// S3 client | |
val s3 = AmazonS3ClientBuilder.standard().build() | |
// Google Bucket handle | |
val googleBucket: Bucket = googleStorage.get("<bucketname>") | |
// List of files in my Google bucket | |
val files: List[Blob] = googleBucket.list(BlobListOption.prefix("<folder/prefix name>")).iterateAll.asScala.toList | |
var remaining = new AtomicInteger(files.size) | |
// Parallel copy of Google Storage files to S3 | |
files.par.foreach { file => | |
val fileName = file.getName | |
// Read blob from Google Storage | |
val content: Array[Byte] = googleStorage.readAllBytes(file.getBlobId) | |
// Stream our byte array from Google Storage into S3 as a new object | |
val metadata = new ObjectMetadata() | |
metadata.setContentLength(content.length) | |
val contentStream = new ByteArrayInputStream(content) | |
s3.putObject("<s3bucketname>", "<prefix>" + fileName, contentStream, metadata) | |
contentStream.close() | |
remaining.decrementAndGet() | |
println(s"Finished S3 put on file: <prefix>/$fileName (${remaining.get} files remaining)") | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment