Skip to content

Instantly share code, notes, and snippets.

@ogrodnek
Created June 15, 2012 00:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ogrodnek/2933771 to your computer and use it in GitHub Desktop.
Save ogrodnek/2933771 to your computer and use it in GitHub Desktop.
move AWS billing data to hive partitioned paths
package com.bizo.aws.billing
import com.amazonaws.services.s3._
import com.bizo.util.aws.SystemPropertyOrEnvAWSCredentials
import com.amazonaws.services.s3.model.ListObjectsRequest
import scala.collection.JavaConversions._
object BillingMover {
val bucket = ...
def main(args: Array[String]) {
val s3 = new AmazonS3Client(new SystemPropertyOrEnvAWSCredentials)
val listing = s3.listObjects(new ListObjectsRequest().withBucketName(bucket).withDelimiter("/"))
val billPattern = """.*-(\d{4})-(\d{2})\.csv""".r
for (o <- listing.getObjectSummaries) {
o.getKey match {
case billPattern(year, month) => {
println(o.getKey + ", " + year + ", " + month)
val partitioned = "partitioned/year=%s/month=%s/%s".format(year, month, o.getKey)
s3.copyObject(bucket, o.getKey, bucket, partitioned)
s3.deleteObject(bucket, o.getKey)
}
case _ => // ignore
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment