-
-
Save markharwood/3b5519338045a12115a41fc5b72355fa to your computer and use it in GitHub Desktop.
Benchmark longs mapped as long vs keyword for exact-match searches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Index a million random IDs. | |
from elasticsearch import helpers | |
from elasticsearch.client import Elasticsearch | |
from random import randint | |
import time | |
indexName = "test" | |
es = Elasticsearch() | |
indexSettings = { | |
"settings": { | |
"number_of_shards": 1, | |
"number_of_replicas": 0 | |
}, | |
"mappings": { | |
"properties": { | |
"my_id": { | |
"type": "long", | |
# Adds a third to index size - 43mb vs 32mb | |
"fields":{ | |
"asKeyword":{ | |
"type":"keyword" | |
} | |
} | |
} | |
} | |
} | |
} | |
es.indices.delete(index=indexName, ignore=[400, 404]) | |
es.indices.create(index=indexName, body=indexSettings) | |
maxIds = 1000000 | |
start = time.time() | |
helpers.bulk(es, ({'my_id': randint(0, maxIds)} for _ in range(maxIds)), index=indexName) | |
now = time.time(); | |
qps = maxIds / (now - start); | |
print (maxIds, "(", qps, "ips)") | |
print ("took ", (now - start), "seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Search for a million random ids. | |
# keyword-based searches are about 4 times faster than long-based searches | |
from elasticsearch.client import Elasticsearch | |
from random import randint | |
import time | |
indexName = "test" | |
es = Elasticsearch() | |
numItems =0 | |
maxIds=1000000 | |
start = time.time() | |
termsPerQuery =100 | |
qTerms = [] | |
while numItems < maxIds : | |
numItems += 1 | |
qTerms.append(randint(0, maxIds)) | |
if len(qTerms) >= termsPerQuery: | |
q = { | |
"query":{ | |
"terms":{ | |
# "my_id": qTerms # Takes 78 seconds for all queries (uses range queries under covers) | |
"my_id.asKeyword": qTerms # Takes 20 seconds for all queries | |
} | |
} | |
} | |
es.search(index=indexName, body=q) | |
qTerms = [] | |
print numItems | |
now = time.time(); | |
qps = numItems / (now - start); | |
print numItems, "(", qps, "ips)" | |
print "took ", (now - start), "seconds"c |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment