Last active
January 19, 2018 17:15
-
-
Save robertknight/3bed5b33cc3255e6dddcdcf20ba06e8f to your computer and use it in GitHub Desktop.
Script to check for presence of annotations in an h Elasticsearch index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"encoding/base64" | |
"encoding/hex" | |
"encoding/json" | |
"fmt" | |
"net/http" | |
"os" | |
"strings" | |
) | |
type result struct { | |
id string | |
exists bool | |
} | |
func toURLSafe(uuid string) string { | |
uuid = strings.Replace(uuid, "-", "", -1) | |
isFlakeID := uuid[12] == 'e' && uuid[16] == '5' | |
if isFlakeID { | |
data, _ := hex.DecodeString(uuid[0:12] + uuid[13:16] + uuid[17:32]) | |
return base64.StdEncoding.EncodeToString(data) | |
} | |
data, _ := hex.DecodeString(uuid) | |
urlSafe := base64.RawURLEncoding.EncodeToString(data) | |
urlSafe = urlSafe[0:len(urlSafe)] | |
return urlSafe | |
} | |
func annExistsInElasticsearch(esHost string, indexName string, id string) bool { | |
type SearchQuery struct { | |
Query struct { | |
Term struct { | |
Id string `json:"_id"` | |
} `json:"term"` | |
} `json:"query"` | |
} | |
var q SearchQuery | |
q.Query.Term.Id = toURLSafe(id) | |
jsonQuery, _ := json.Marshal(q) | |
searchUrl := fmt.Sprintf("%v/%v/_search", esHost, indexName) | |
resp, err := http.Post(searchUrl, "application/json", bytes.NewReader(jsonQuery)) | |
if err != nil { | |
panic(err) | |
} | |
type SearchResult struct { | |
Hits struct { | |
Total int `json:"total"` | |
} `json:"hits"` | |
} | |
var sr SearchResult | |
dec := json.NewDecoder(resp.Body) | |
err = dec.Decode(&sr) | |
if err != nil { | |
panic(err) | |
} | |
if sr.Hits.Total > 1 { | |
panic("Too many hits") | |
} | |
return sr.Hits.Total == 1 | |
} | |
func checkIndexStatus(esHost string, indexName string, ids chan string, results chan result) { | |
for id := range(ids) { | |
exists := annExistsInElasticsearch(esHost, indexName, id) | |
results<-result{id:id, exists:exists} | |
} | |
} | |
func main() { | |
esHost, _ := os.LookupEnv("ES_HOST") | |
indexName := "hypothesis" | |
maxConcurrentRequests := 10 | |
idQueue := []string{} | |
scanner := bufio.NewScanner(os.Stdin) | |
for scanner.Scan() { | |
idQueue = append(idQueue, scanner.Text()) | |
} | |
idChan := make(chan string) | |
resultChan := make(chan result) | |
for i := 0; i < maxConcurrentRequests; i++ { | |
go checkIndexStatus(esHost, indexName, idChan, resultChan) | |
} | |
go func() { | |
for _, id := range(idQueue) { | |
idChan<-id | |
} | |
}() | |
count := 0 | |
missing := 0 | |
for result := range(resultChan) { | |
count++ | |
if !result.exists { | |
missing++ | |
fmt.Fprintf(os.Stdout, "%v / %v is not in Elasticsearch\n", result.id, toURLSafe(result.id)) | |
} | |
if count % 10 == 0 { | |
fmt.Fprintf(os.Stdout, "Checked %v ids, found %v missing in Elasticsearch\n", count, missing) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment