Last active
August 11, 2022 08:22
-
-
Save iolalla/c1978ae1c40348ceac0bd7018eeba1a8 to your computer and use it in GitHub Desktop.
I used this code to split a big file into chunks and upload it to Google Cloud Storage . The use case is "How to upload a huge file to GCS in parts so you can overcome the 5 Tb limits in GCS and then join it back with cat or gcloud cat.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"cloud.google.com/go/storage" | |
"context" | |
"flag" | |
"fmt" | |
"os" | |
"strings" | |
) | |
/** | |
* I used this code to split a big file into chunks and upload it to Google Cloud Storage | |
* The use case is "How to upload a huge file to GCS in parts so you can overcome the 5 Tb limits in GCS | |
* and then join it back with cat or gcloud cat. | |
* @Author: iolalla@gmail.com | |
*/ | |
var filez string | |
var chunkSize int | |
var destination string | |
func main() { | |
flag.StringVar(&filez, "file", "tradeinn.tar.gz", "File to split") | |
flag.IntVar(&chunkSize, "size", 500000, "Size in Byte of the parts") | |
flag.StringVar(&destination, "dest", ".", "Where to write the file, if it starts with gs, will save it in Google Cloud Storage") | |
flag.Parse() | |
file, err := os.Open(filez) | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
// to divide file in chunks of chunkSize in Bytes | |
info, _ := file.Stat() | |
chunks := int(info.Size()) / chunkSize | |
// reader of chunk size | |
bufR := bufio.NewReaderSize(file, chunkSize) | |
// We need to add a final slice for the final bytes | |
var slice = make([]int, chunks+1) | |
if strings.HasPrefix(destination, "gs://") { | |
for i := range slice { | |
reader := make([]byte, chunkSize) | |
rlen, err := bufR.Read(reader) | |
// fmt.Println("Read: ", rlen) | |
if err != nil { | |
panic(err) | |
} | |
writeGCS(i, rlen, &reader) | |
} | |
} else { | |
for i := range slice { | |
reader := make([]byte, chunkSize) | |
rlen, err := bufR.Read(reader) | |
if err != nil { | |
panic(err) | |
} | |
writeFile(i, rlen, &reader) | |
} | |
} | |
} | |
func writeGCS(i int, rlen int, bufW *[]byte) { | |
ctx := context.Background() | |
client, err := storage.NewClient(ctx) | |
if err != nil { | |
panic(err) | |
} | |
fname := fmt.Sprintf("%v/file_%v", filez, i) | |
path := strings.ReplaceAll(destination, "gs://", "") | |
wc := client.Bucket(path).Object(fname).NewWriter(ctx) | |
wbytes := *(bufW) | |
if _, err := wc.Write(wbytes[:rlen]); err != nil { | |
panic(err) | |
} | |
if err := wc.Close(); err != nil { | |
panic(err) | |
} | |
fmt.Println("gcloud cat ", destination, "/", fname, " >> ", filez) | |
} | |
func writeFile(i int, rlen int, bufW *[]byte) { | |
fname := fmt.Sprintf("file_%v", i) | |
f, err := os.Create(fname) | |
defer f.Close() | |
w := bufio.NewWriterSize(f, rlen) | |
wbytes := *(bufW) | |
_, err = w.Write(wbytes[:rlen]) | |
if err != nil { | |
panic(err) | |
} | |
fmt.Println("cat ", fname, " >> ", filez) | |
w.Flush() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment