Skip to content

Instantly share code, notes, and snippets.

@mark-kubacki
Last active May 4, 2020 23:02
Show Gist options
  • Save mark-kubacki/0e8e88767131f76869779d9ccf453f60 to your computer and use it in GitHub Desktop.
Save mark-kubacki/0e8e88767131f76869779d9ccf453f60 to your computer and use it in GitHub Desktop.
count bytes per storage class
package main
import (
"context"
"io"
"log"
"net/url"
"os"
"strings"
"cloud.google.com/go/storage"
"github.com/aws/aws-sdk-go/service/s3"
"gocloud.dev/blob"
_ "gocloud.dev/blob/gcsblob" // Registers scheme "gs://".
_ "gocloud.dev/blob/s3blob" // Registers scheme "s3://".
"golang.org/x/sync/errgroup"
)
func main() {
if len(os.Args) <= 1 {
log.Printf("CRIT: Start this with bucket URLs as as arguments. Example:\n %s %s %s …\n",
os.Args[0], "scw://mkv?region=fr-par", "scw://packages?region=nl-ams",
)
os.Exit(1)
}
g, ctx := errgroup.WithContext(context.Background())
for _, bucketURL := range os.Args[1:] {
bucketURL := bucketURL // Breaks the reference 'for' always uses.
g.Go(func() error {
storageClasses, err := CountBytesPerStorageClass(ctx, bucketURL)
for storageClass, bytesUsed := range storageClasses {
log.Printf("%s\t%s\t%d\n", bucketURL, storageClass, bytesUsed)
}
return err
})
}
if err := g.Wait(); err != nil {
log.Println("ERR: This failed for at least one bucket.")
os.Exit(2)
} else {
log.Println("Done.")
}
}
// CountBytesPerStorageClass returns the tally as map: storage class → bytes.
// It does not count data usage of metadata, but merely the apparent object size.
func CountBytesPerStorageClass(ctx context.Context, bucketURL string) (map[string]uint64, error) {
storageClasses := make(map[string]uint64, 4) // GS has the most classes, four of them.
bucket, err := OpenBucket(ctx, bucketURL)
if err != nil {
log.Printf("WARN: Failed to open bucket: %s\n -- %v\n", bucketURL, err)
return storageClasses, err
}
defer bucket.Close()
iter := bucket.List(nil)
var obj *blob.ListObject
for {
obj, err = iter.Next(ctx)
if err != nil {
break
}
if obj == nil || obj.IsDir {
continue
}
s := GetStorageClass(obj)
storageClasses[s] += uint64(obj.Size)
}
if err == io.EOF {
err = nil
}
if err != nil {
log.Printf("WARN: Iteration has ended prematurely: %s\n -- %v\n", bucketURL, err)
}
return storageClasses, err
}
// GetStorageClass is a driver-agnostic way to obtain that attribute.
func GetStorageClass(obj *blob.ListObject) string {
var oa storage.ObjectAttrs
if obj.As(&oa) {
return oa.StorageClass
}
var s3o s3.Object
if obj.As(&s3o) {
return *s3o.StorageClass
}
return "UNKNOWN"
}
// OpenBucket wraps blob.OpenBucket, but knows more schemes.
func OpenBucket(ctx context.Context, src string) (*blob.Bucket, error) {
switch {
case strings.HasPrefix(src, "gs://"):
log.Println("For gs:// buckets prefer reading Stackdriver Metrics over counting.")
if _, beenSet := os.LookupEnv("GOOGLE_APPLICATION_CREDENTIALS"); !beenSet {
log.Println("WARN: For Google Storage have this envvar point to your credential file:",
"GOOGLE_APPLICATION_CREDENTIALS",
)
}
case strings.HasPrefix(src, "s3://"), strings.HasPrefix(src, "scw://"):
if _, beenSet := os.LookupEnv("AWS_ACCESS_KEY"); !beenSet {
log.Println("WARN: For s3-compatible storage, such as AWS or Scaleway, remember to set these:",
"AWS_ACCESS_KEY", "AWS_SECRET_KEY",
)
}
}
if strings.HasPrefix(src, "scw://") {
u, _ := url.Parse(src)
q := u.Query()
if q.Get("region") == "" {
q.Set("region", "fr-par")
}
if q.Get("endpoint") == "" {
q.Set("endpoint", "s3."+q.Get("region")+".scw.cloud")
}
u.RawQuery = q.Encode()
u.Scheme = "s3"
src = u.String()
}
return blob.OpenBucket(ctx, src)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment