Skip to content

Instantly share code, notes, and snippets.

@steveyen
Created April 9, 2018 16:27
Show Gist options
  • Save steveyen/875760b85f841241615da36aa8a03c38 to your computer and use it in GitHub Desktop.
Save steveyen/875760b85f841241615da36aa8a03c38 to your computer and use it in GitHub Desktop.
Steves-MacBook-Pro-2:bleve-blast steveyen$ git diff
diff --git a/cmd/bleve-blast/main.go b/cmd/bleve-blast/main.go
index c8b826f..2a42b4b 100644
--- a/cmd/bleve-blast/main.go
+++ b/cmd/bleve-blast/main.go
@@ -25,8 +25,6 @@ import (
_ "github.com/blevesearch/bleve/config"
_ "github.com/blevesearch/bleve/index/store/metrics"
_ "github.com/blevesearch/bleve/index/store/null"
-
- _ "github.com/couchbase/fuego"
)
var config = flag.String("config", "", "configuration file to use")
@@ -44,6 +42,7 @@ var printTime = flag.Duration("printTime", 5*time.Second, "print stats every pri
var bindHttp = flag.String("bindHttp", ":1234", "http bind port")
var statsFile = flag.String("statsFile", "", "<stdout>")
var waitPersist = flag.Bool("waitPersist", false, "wait for all data to be persisted before closing")
+var waitTime = flag.Int("waitTime", 0, "wait millisecs between batches")
var traceprofile = flag.String("traceprofile", "", "write trace profile to file")
var totalIndexed uint64
@@ -220,6 +219,34 @@ func printLine() {
lastTotalPlainTextIndexed = nowTotalPlainTextIndexed
}
+type ArticleReader interface {
+ Next() (*blevebench.Article, error)
+}
+
+type cachingWikiReader struct {
+ r *blevebench.WikiReader
+ n int
+
+ lastArticle *blevebench.Article
+ lastErr error
+}
+
+func (r *cachingWikiReader) Next() (*blevebench.Article, error) {
+ if r.lastArticle == nil || r.n % 10000 == 0 {
+ r.lastArticle, r.lastErr = r.r.Next()
+ }
+ if r.lastErr != nil {
+ return nil, r.lastErr
+ }
+
+ r.n += 1
+
+ return &blevebench.Article{
+ Title: r.lastArticle.Title + fmt.Sprintf("_%d", r.n),
+ Text: r.lastArticle.Text,
+ }, r.lastErr
+}
+
func readingWorker(index bleve.Index, work chan *Work) {
wikiReader, err := blevebench.NewWikiReader(*source)
if err != nil {
@@ -227,12 +254,17 @@ func readingWorker(index bleve.Index, work chan *Work) {
}
defer wikiReader.Close()
+ var articleReader ArticleReader = wikiReader
+ if false {
+ articleReader = &cachingWikiReader{r: wikiReader}
+ }
+
i := 0
if *batchSize > 1 {
batch := index.NewBatch()
bytesInBatch := uint64(0)
- a, err := wikiReader.Next()
+ a, err := articleReader.Next()
for a != nil && err == nil && i < *count {
if *maxTextSize > 0 && len(a.Text) > *maxTextSize {
a.Text = a.Text[0:*maxTextSize]
@@ -254,7 +286,7 @@ func readingWorker(index bleve.Index, work chan *Work) {
bytesInBatch = 0
}
- a, err = wikiReader.Next()
+ a, err = articleReader.Next()
}
if err != nil {
log.Fatalf("reading worker fatal: %v", err)
@@ -268,7 +300,7 @@ func readingWorker(index bleve.Index, work chan *Work) {
}
} else {
- a, err := wikiReader.Next()
+ a, err := articleReader.Next()
for a != nil && err == nil && i <= *count {
if *maxTextSize > 0 && len(a.Text) > *maxTextSize {
a.Text = a.Text[0:*maxTextSize]
@@ -280,7 +312,7 @@ func readingWorker(index bleve.Index, work chan *Work) {
id: strconv.Itoa(i),
plainTextBytes: uint64(len(a.Title) + len(a.Text)),
}
- a, err = wikiReader.Next()
+ a, err = articleReader.Next()
}
if err != nil {
log.Fatalf("reading worker fatal: %v", err)
@@ -317,5 +349,9 @@ func batchIndexingWorker(index bleve.Index, workChan chan *Work, timeStart time.
}
atomic.AddUint64(&totalIndexed, uint64(workSize))
atomic.AddUint64(&totalPlainTextIndexed, work.plainTextBytes)
+
+ if *waitTime > 0 {
+ time.Sleep(time.Duration(*waitTime) * time.Millisecond)
+ }
}
}
diff --git a/cmd/bleve-query/main.go b/cmd/bleve-query/main.go
index a6650e2..5aa3e48 100644
--- a/cmd/bleve-query/main.go
+++ b/cmd/bleve-query/main.go
@@ -7,6 +7,7 @@ import (
"log"
"math/rand"
"net/http"
+ _ "net/http/pprof"
"os"
"path"
"runtime/pprof"
@@ -20,10 +21,10 @@ import (
"github.com/blevesearch/bleve"
_ "github.com/blevesearch/bleve/config"
- "github.com/blevesearch/bleve/search"
+ // "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/query"
- _ "github.com/couchbase/fuego"
+ // _ "github.com/couchbase/fuego"
)
var target = flag.String("index", "bench.bleve", "index filename")
@@ -157,8 +158,9 @@ type ctxReq struct {
var poolCtxReq = sync.Pool{
New: func() interface{} {
ctx := context.Background()
- dmp := search.NewDocumentMatchPool(12, 1)
- ctx = search.WithDocumentMatchPool(ctx, dmp)
+
+ // dmp := search.NewDocumentMatchPool(12, 1)
+ // ctx = search.WithDocumentMatchPool(ctx, dmp)
return &ctxReq{
ctx: ctx,
@@ -182,6 +184,7 @@ func queryClient(index bleve.Index, queries []query.Query, closeChan chan struct
q := queries[p]
atomic.AddUint64(&queriesStarted, 1)
ctxReq := poolCtxReq.Get().(*ctxReq)
+ ctxReq.req.IncludeLocations = false
ctxReq.req.Query = q
res, err := index.SearchInContext(ctxReq.ctx, ctxReq.req)
if err != nil {
diff --git a/mapping.go b/mapping.go
index 43d0619..c090fb4 100644
--- a/mapping.go
+++ b/mapping.go
@@ -16,6 +16,12 @@ func BuildArticleMapping() mapping.IndexMapping {
standardJustIndexed.IncludeTermVectors = false
standardJustIndexed.Analyzer = "standard"
+ if true {
+ standardJustIndexed.Store = true
+ standardJustIndexed.IncludeInAll = true
+ standardJustIndexed.IncludeTermVectors = true
+ }
+
keywordJustIndexed := bleve.NewTextFieldMapping()
keywordJustIndexed.Store = false
keywordJustIndexed.IncludeInAll = false
Steves-MacBook-Pro-2:bleve-blast steveyen$
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment