Skip to content

Instantly share code, notes, and snippets.

@brasic
Last active December 26, 2015 04:38
Show Gist options
  • Save brasic/7094330 to your computer and use it in GitHub Desktop.
Save brasic/7094330 to your computer and use it in GitHub Desktop.
Quick and dirty S3 checker script
package main
import (
"bufio"
"fmt"
"launchpad.net/goamz/aws"
"launchpad.net/goamz/s3"
"os"
"sort"
"strconv"
"strings"
)
var bucket *s3.Bucket
var bucketName string
var employerId string
// Give an employer id and bucket as args, pass the list of ids to check on standard input.
func main() {
if len(os.Args) < 3 {
panic("missing employer id and bucket name")
}
employerId = os.Args[1]
bucketName = os.Args[2]
auth, err := aws.EnvAuth()
if err != nil {
panic("S3 connect failed due to auth issues, exiting!")
}
USEast := aws.Region{S3Endpoint: "http://s3.amazonaws.com"}
bucket = s3.New(auth, USEast).Bucket(bucketName)
ids := readIds()
found := checkFiles(ids)
printResult(found)
}
// Read the list of ids from standard input, validate and return them sorted
// lexicographically.
func readIds() (ids []string) {
ids = make([]string, 0)
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
text := scanner.Text()
_, err := strconv.Atoi(text)
if err != nil {
panic(err)
}
ids = append(ids, text)
}
if err := scanner.Err(); err != nil {
panic(err)
}
sort.Sort(sort.StringSlice(ids))
return
}
func checkFiles(ids []string) (found map[string]bool) {
found = make(map[string]bool)
for i, _ := range ids {
found[format(ids[i])] = false
}
allFiles := make([]s3.Key, 0)
debug("lex. earliest key is", ids[0])
debug("lex. last key is", ids[len(ids)-1])
firstSearchKey := predecessor(ids[0])
lastSearchKey := ids[len(ids)-1]
path := fmt.Sprintf("%s/docs/", employerId)
nextMarker := fmt.Sprintf("%s%s.pdf", path, firstSearchKey)
for {
debug("querying for prefix", path, "starting with", nextMarker)
resp, err := bucket.List(path, "/", nextMarker, 1000)
if err != nil {
panic(err)
}
if len(resp.Contents) < 1 {
debug("got no responses.")
break
}
for i, _ := range resp.Contents {
found[resp.Contents[i].Key] = true
}
allFiles = append(allFiles, resp.Contents...)
nextMarker = resp.Contents[len(resp.Contents)-1].Key
debug("got", len(resp.Contents), "keys, ending with", nextMarker, ". Currently have", len(allFiles), "total")
if gt(nextMarker, lastSearchKey) {
debug("ending early because the last key received:", nextMarker, "is lexographically greater than the last one we're looking for,", lastSearchKey)
break
}
}
debug("Done getting", len(allFiles), "keys")
return found
}
// Print the ids that were not found.
func printResult(found map[string]bool) {
idsAsInts := make([]int, 0)
for key, present := range found {
if !present {
parsed, err := strconv.Atoi(deformat(key))
if err != nil {
panic(err)
}
idsAsInts = append(idsAsInts, parsed)
}
}
sort.Sort(sort.IntSlice(idsAsInts))
for i, _ := range idsAsInts {
fmt.Println(idsAsInts[i])
}
}
func debug(args ...interface{}) {
if os.Getenv("DEBUG") != "" {
fmt.Fprintln(os.Stderr, args...)
}
}
// Is the found S3 key lex. greater than the equivalent id key?
func gt(found, expecting string) bool {
return found > format(expecting)
}
// Return a key that is lex. less than the input, for use as a starting point.
func predecessor(num string) (prev string) {
return num[:len(num)-1]
}
// Transform an id-formatted string into key-format.
func format(num string) string {
return employerId + "/docs/" + num + ".pdf"
}
// Transform a key-formatted string into id-format.
func deformat(key string) string {
pieces := strings.Split(key, "/")
item := pieces[len(pieces)-1]
return strings.Split(item, ".")[0]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment