Skip to content

Instantly share code, notes, and snippets.

@gxercavins
Created October 28, 2019 19:46
Show Gist options
  • Save gxercavins/3187e37c9799bdaee58cf1ceae4e808c to your computer and use it in GitHub Desktop.
Save gxercavins/3187e37c9799bdaee58cf1ceae4e808c to your computer and use it in GitHub Desktop.
SO question 58589470
package com.dataflow.samples;
import java.util.ArrayList;
import java.util.List;
import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.Bucket;
import com.google.cloud.storage.BucketInfo;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Storage.BlobListOption;
import com.google.cloud.storage.StorageOptions;
import org.apache.beam.sdk.io.FileIO;
import org.apache.beam.sdk.io.fs.EmptyMatchTreatment;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.values.PCollection;
public class RecursiveGCS {
@SuppressWarnings("serial")
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().create();
Pipeline p = Pipeline.create(options);
Storage storage = StorageOptions.getDefaultInstance().getService();
String inputPath = options.getPath();
String bucketName = "BUCKET_NAME";
String directory = "temp/";
List<String> subdirectories = new ArrayList<String>();
Page<Blob> blobs =
storage.list(
bucketName, BlobListOption.currentDirectory(), BlobListOption.prefix(directory));
for (Blob blob : blobs.iterateAll()) {
if (blob.isDirectory()) {
subdirectories.add("gs://" + bucketName + "/" + blob.getName() + "*");
}
}
p
.apply("Read subdirectories", Create.of(subdirectories))
.apply("Match files", FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW))
.apply("Read files", FileIO.readMatches());
p.run().waitUntilFinish();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment