Skip to content

Instantly share code, notes, and snippets.

@iolalla
Last active August 11, 2022 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save iolalla/c1978ae1c40348ceac0bd7018eeba1a8 to your computer and use it in GitHub Desktop.
Save iolalla/c1978ae1c40348ceac0bd7018eeba1a8 to your computer and use it in GitHub Desktop.
I used this code to split a big file into chunks and upload it to Google Cloud Storage . The use case is "How to upload a huge file to GCS in parts so you can overcome the 5 Tb limits in GCS and then join it back with cat or gcloud cat.
package main
import (
"bufio"
"cloud.google.com/go/storage"
"context"
"flag"
"fmt"
"os"
"strings"
)
/**
* I used this code to split a big file into chunks and upload it to Google Cloud Storage
* The use case is "How to upload a huge file to GCS in parts so you can overcome the 5 Tb limits in GCS
* and then join it back with cat or gcloud cat.
* @Author: iolalla@gmail.com
*/
var filez string
var chunkSize int
var destination string
func main() {
flag.StringVar(&filez, "file", "tradeinn.tar.gz", "File to split")
flag.IntVar(&chunkSize, "size", 500000, "Size in Byte of the parts")
flag.StringVar(&destination, "dest", ".", "Where to write the file, if it starts with gs, will save it in Google Cloud Storage")
flag.Parse()
file, err := os.Open(filez)
if err != nil {
panic(err)
}
defer file.Close()
// to divide file in chunks of chunkSize in Bytes
info, _ := file.Stat()
chunks := int(info.Size()) / chunkSize
// reader of chunk size
bufR := bufio.NewReaderSize(file, chunkSize)
// We need to add a final slice for the final bytes
var slice = make([]int, chunks+1)
if strings.HasPrefix(destination, "gs://") {
for i := range slice {
reader := make([]byte, chunkSize)
rlen, err := bufR.Read(reader)
// fmt.Println("Read: ", rlen)
if err != nil {
panic(err)
}
writeGCS(i, rlen, &reader)
}
} else {
for i := range slice {
reader := make([]byte, chunkSize)
rlen, err := bufR.Read(reader)
if err != nil {
panic(err)
}
writeFile(i, rlen, &reader)
}
}
}
func writeGCS(i int, rlen int, bufW *[]byte) {
ctx := context.Background()
client, err := storage.NewClient(ctx)
if err != nil {
panic(err)
}
fname := fmt.Sprintf("%v/file_%v", filez, i)
path := strings.ReplaceAll(destination, "gs://", "")
wc := client.Bucket(path).Object(fname).NewWriter(ctx)
wbytes := *(bufW)
if _, err := wc.Write(wbytes[:rlen]); err != nil {
panic(err)
}
if err := wc.Close(); err != nil {
panic(err)
}
fmt.Println("gcloud cat ", destination, "/", fname, " >> ", filez)
}
func writeFile(i int, rlen int, bufW *[]byte) {
fname := fmt.Sprintf("file_%v", i)
f, err := os.Create(fname)
defer f.Close()
w := bufio.NewWriterSize(f, rlen)
wbytes := *(bufW)
_, err = w.Write(wbytes[:rlen])
if err != nil {
panic(err)
}
fmt.Println("cat ", fname, " >> ", filez)
w.Flush()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment