Skip to content

Instantly share code, notes, and snippets.

@robertknight
Last active January 19, 2018 17:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robertknight/3bed5b33cc3255e6dddcdcf20ba06e8f to your computer and use it in GitHub Desktop.
Save robertknight/3bed5b33cc3255e6dddcdcf20ba06e8f to your computer and use it in GitHub Desktop.
Script to check for presence of annotations in an h Elasticsearch index
package main
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/hex"
"encoding/json"
"fmt"
"net/http"
"os"
"strings"
)
type result struct {
id string
exists bool
}
func toURLSafe(uuid string) string {
uuid = strings.Replace(uuid, "-", "", -1)
isFlakeID := uuid[12] == 'e' && uuid[16] == '5'
if isFlakeID {
data, _ := hex.DecodeString(uuid[0:12] + uuid[13:16] + uuid[17:32])
return base64.StdEncoding.EncodeToString(data)
}
data, _ := hex.DecodeString(uuid)
urlSafe := base64.RawURLEncoding.EncodeToString(data)
urlSafe = urlSafe[0:len(urlSafe)]
return urlSafe
}
func annExistsInElasticsearch(esHost string, indexName string, id string) bool {
type SearchQuery struct {
Query struct {
Term struct {
Id string `json:"_id"`
} `json:"term"`
} `json:"query"`
}
var q SearchQuery
q.Query.Term.Id = toURLSafe(id)
jsonQuery, _ := json.Marshal(q)
searchUrl := fmt.Sprintf("%v/%v/_search", esHost, indexName)
resp, err := http.Post(searchUrl, "application/json", bytes.NewReader(jsonQuery))
if err != nil {
panic(err)
}
type SearchResult struct {
Hits struct {
Total int `json:"total"`
} `json:"hits"`
}
var sr SearchResult
dec := json.NewDecoder(resp.Body)
err = dec.Decode(&sr)
if err != nil {
panic(err)
}
if sr.Hits.Total > 1 {
panic("Too many hits")
}
return sr.Hits.Total == 1
}
func checkIndexStatus(esHost string, indexName string, ids chan string, results chan result) {
for id := range(ids) {
exists := annExistsInElasticsearch(esHost, indexName, id)
results<-result{id:id, exists:exists}
}
}
func main() {
esHost, _ := os.LookupEnv("ES_HOST")
indexName := "hypothesis"
maxConcurrentRequests := 10
idQueue := []string{}
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
idQueue = append(idQueue, scanner.Text())
}
idChan := make(chan string)
resultChan := make(chan result)
for i := 0; i < maxConcurrentRequests; i++ {
go checkIndexStatus(esHost, indexName, idChan, resultChan)
}
go func() {
for _, id := range(idQueue) {
idChan<-id
}
}()
count := 0
missing := 0
for result := range(resultChan) {
count++
if !result.exists {
missing++
fmt.Fprintf(os.Stdout, "%v / %v is not in Elasticsearch\n", result.id, toURLSafe(result.id))
}
if count % 10 == 0 {
fmt.Fprintf(os.Stdout, "Checked %v ids, found %v missing in Elasticsearch\n", count, missing)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment