Last active
December 26, 2015 04:38
-
-
Save brasic/7094330 to your computer and use it in GitHub Desktop.
Quick and dirty S3 checker script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"launchpad.net/goamz/aws" | |
"launchpad.net/goamz/s3" | |
"os" | |
"sort" | |
"strconv" | |
"strings" | |
) | |
var bucket *s3.Bucket | |
var bucketName string | |
var employerId string | |
// Give an employer id and bucket as args, pass the list of ids to check on standard input. | |
func main() { | |
if len(os.Args) < 3 { | |
panic("missing employer id and bucket name") | |
} | |
employerId = os.Args[1] | |
bucketName = os.Args[2] | |
auth, err := aws.EnvAuth() | |
if err != nil { | |
panic("S3 connect failed due to auth issues, exiting!") | |
} | |
USEast := aws.Region{S3Endpoint: "http://s3.amazonaws.com"} | |
bucket = s3.New(auth, USEast).Bucket(bucketName) | |
ids := readIds() | |
found := checkFiles(ids) | |
printResult(found) | |
} | |
// Read the list of ids from standard input, validate and return them sorted | |
// lexicographically. | |
func readIds() (ids []string) { | |
ids = make([]string, 0) | |
scanner := bufio.NewScanner(os.Stdin) | |
for scanner.Scan() { | |
text := scanner.Text() | |
_, err := strconv.Atoi(text) | |
if err != nil { | |
panic(err) | |
} | |
ids = append(ids, text) | |
} | |
if err := scanner.Err(); err != nil { | |
panic(err) | |
} | |
sort.Sort(sort.StringSlice(ids)) | |
return | |
} | |
func checkFiles(ids []string) (found map[string]bool) { | |
found = make(map[string]bool) | |
for i, _ := range ids { | |
found[format(ids[i])] = false | |
} | |
allFiles := make([]s3.Key, 0) | |
debug("lex. earliest key is", ids[0]) | |
debug("lex. last key is", ids[len(ids)-1]) | |
firstSearchKey := predecessor(ids[0]) | |
lastSearchKey := ids[len(ids)-1] | |
path := fmt.Sprintf("%s/docs/", employerId) | |
nextMarker := fmt.Sprintf("%s%s.pdf", path, firstSearchKey) | |
for { | |
debug("querying for prefix", path, "starting with", nextMarker) | |
resp, err := bucket.List(path, "/", nextMarker, 1000) | |
if err != nil { | |
panic(err) | |
} | |
if len(resp.Contents) < 1 { | |
debug("got no responses.") | |
break | |
} | |
for i, _ := range resp.Contents { | |
found[resp.Contents[i].Key] = true | |
} | |
allFiles = append(allFiles, resp.Contents...) | |
nextMarker = resp.Contents[len(resp.Contents)-1].Key | |
debug("got", len(resp.Contents), "keys, ending with", nextMarker, ". Currently have", len(allFiles), "total") | |
if gt(nextMarker, lastSearchKey) { | |
debug("ending early because the last key received:", nextMarker, "is lexographically greater than the last one we're looking for,", lastSearchKey) | |
break | |
} | |
} | |
debug("Done getting", len(allFiles), "keys") | |
return found | |
} | |
// Print the ids that were not found. | |
func printResult(found map[string]bool) { | |
idsAsInts := make([]int, 0) | |
for key, present := range found { | |
if !present { | |
parsed, err := strconv.Atoi(deformat(key)) | |
if err != nil { | |
panic(err) | |
} | |
idsAsInts = append(idsAsInts, parsed) | |
} | |
} | |
sort.Sort(sort.IntSlice(idsAsInts)) | |
for i, _ := range idsAsInts { | |
fmt.Println(idsAsInts[i]) | |
} | |
} | |
func debug(args ...interface{}) { | |
if os.Getenv("DEBUG") != "" { | |
fmt.Fprintln(os.Stderr, args...) | |
} | |
} | |
// Is the found S3 key lex. greater than the equivalent id key? | |
func gt(found, expecting string) bool { | |
return found > format(expecting) | |
} | |
// Return a key that is lex. less than the input, for use as a starting point. | |
func predecessor(num string) (prev string) { | |
return num[:len(num)-1] | |
} | |
// Transform an id-formatted string into key-format. | |
func format(num string) string { | |
return employerId + "/docs/" + num + ".pdf" | |
} | |
// Transform a key-formatted string into id-format. | |
func deformat(key string) string { | |
pieces := strings.Split(key, "/") | |
item := pieces[len(pieces)-1] | |
return strings.Split(item, ".")[0] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment