Skip to content

Instantly share code, notes, and snippets.

@markharwood
Last active March 11, 2020 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markharwood/bd74538560f6a2be2ad0772a747e41be to your computer and use it in GitHub Desktop.
Save markharwood/bd74538560f6a2be2ad0772a747e41be to your computer and use it in GitHub Desktop.
# Index a million random IDs.
from elasticsearch import helpers
from elasticsearch.client import Elasticsearch
from random import randint
import time
indexName = "test"
es = Elasticsearch()
version = es.info()["version"]["number"]
majorVersion = int(version.split(".")[0])
indexSettings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
mapping = {
"properties": {
"my_id": {
"type": "long",
"fields":{
"asKeyword":{
"type":"keyword"
}
}
}
}
}
if majorVersion >= 7:
indexSettings["mappings"] = mapping
else:
indexSettings["mappings"] = {
"doc": mapping
}
es.indices.delete(index=indexName, ignore=[400, 404])
es.indices.create(index=indexName, body=indexSettings)
numDocs = 1000000
maxIds = 5
start = time.time()
if majorVersion >= 7:
helpers.bulk(es, ({'my_id': randint(0, maxIds)} for _ in range(numDocs)), index=indexName)
else:
helpers.bulk(es, ({'my_id': randint(0, maxIds)} for _ in range(numDocs)), index=indexName, doc_type = "doc")
now = time.time();
dps = numDocs / (now - start);
print (numDocs, "(", dps, "ips)")
print ("took ", (now - start), "seconds")
# Search for a million random ids.
# keyword-based searches are faster than long-based searches
from elasticsearch.client import Elasticsearch
from random import randint
import time
indexName = "test"
es = Elasticsearch()
version = es.info()["version"]["number"]
majorVersion = int(version.split(".")[0])
numItems =0
numSearches=1000
maxIds=5
start = time.time()
termsPerQuery =2
qTerms = []
while numItems < numSearches :
numItems += 1
qTerms.append(randint(0, maxIds))
if len(qTerms) >= termsPerQuery:
q = {
"query":{
"terms":{
# "my_id": qTerms # V5.6.0 = 3.8s v7.1=2.3s, v7.6.1 = 2.3s
"my_id.asKeyword": qTerms # V5.6.0 = 3.8s, v7.1=1.9s, v7.6.1 = 2.0s
}
}
}
if majorVersion >= 7:
q["track_total_hits"] = True
results = es.search(index=indexName, body=q, request_cache=False)
# print(results["hits"]["total"])
qTerms = []
print (numItems)
now = time.time();
qps = numItems / (now - start);
print (numItems, "(", qps, "ips)")
print ("took ", (now - start), "seconds")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment