Skip to content

Instantly share code, notes, and snippets.

@markharwood
Last active November 27, 2019 17:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markharwood/3b5519338045a12115a41fc5b72355fa to your computer and use it in GitHub Desktop.
Save markharwood/3b5519338045a12115a41fc5b72355fa to your computer and use it in GitHub Desktop.
Benchmark longs mapped as long vs keyword for exact-match searches
# Index a million random IDs.
from elasticsearch import helpers
from elasticsearch.client import Elasticsearch
from random import randint
import time
indexName = "test"
es = Elasticsearch()
indexSettings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"my_id": {
"type": "long",
# Adds a third to index size - 43mb vs 32mb
"fields":{
"asKeyword":{
"type":"keyword"
}
}
}
}
}
}
es.indices.delete(index=indexName, ignore=[400, 404])
es.indices.create(index=indexName, body=indexSettings)
maxIds = 1000000
start = time.time()
helpers.bulk(es, ({'my_id': randint(0, maxIds)} for _ in range(maxIds)), index=indexName)
now = time.time();
qps = maxIds / (now - start);
print (maxIds, "(", qps, "ips)")
print ("took ", (now - start), "seconds")
# Search for a million random ids.
# keyword-based searches are about 4 times faster than long-based searches
from elasticsearch.client import Elasticsearch
from random import randint
import time
indexName = "test"
es = Elasticsearch()
numItems =0
maxIds=1000000
start = time.time()
termsPerQuery =100
qTerms = []
while numItems < maxIds :
numItems += 1
qTerms.append(randint(0, maxIds))
if len(qTerms) >= termsPerQuery:
q = {
"query":{
"terms":{
# "my_id": qTerms # Takes 78 seconds for all queries (uses range queries under covers)
"my_id.asKeyword": qTerms # Takes 20 seconds for all queries
}
}
}
es.search(index=indexName, body=q)
qTerms = []
print numItems
now = time.time();
qps = numItems / (now - start);
print numItems, "(", qps, "ips)"
print "took ", (now - start), "seconds"c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment