Skip to content

Instantly share code, notes, and snippets.

@cdahlqvist
Last active April 5, 2023 06:27
Show Gist options
  • Save cdahlqvist/2f368e8a874259b5cf4ca28b8a75d454 to your computer and use it in GitHub Desktop.
Save cdahlqvist/2f368e8a874259b5cf4ca28b8a75d454 to your computer and use it in GitHub Desktop.
rally-bulk-rejections-track

Bulk Rejections Test

This Rally track is used to test the relationship between bulk indexing rejections and the following parameters:

  • Number of concurrent clients indexing into Elasticsearch
  • Number of shards actively being indexed into
  • Number of data nodes in the cluster
  • Size of bulk requests

The track contains a number of challenges, each indexing into an index with a set number of shards using a increasing number of concurrent client connections and two different bulk sizes.

For these benchmarks we have used clusters with one, two and three data nodes in Elastic Cloud, each data node with 8GB of RAM allocated (4GB heap, 4GB native memory). Rally has been invoked multiple times as follows (challenge and user-tag parameters have been updated for each run to go through all challenges for each cluster):

esrally --track-path=$DIR/track.json --user-tag="data_nodes:1" --challenge=bulk-index-8_shards --target-hosts=$EC_CLUSTER:9243 --pipeline=benchmark-only --cluster-health=yellow --client-options="use_ssl:true,verify_certs:true,basic_auth_user:'$EC_USER',basic_auth_password:'$EC_PASSWORD'"

Here $DIR is the directory where the files from this gist reside and $EC_CLUSTER, $EC_USER and $EC_PASSWORD are specific to the cluster being benchmarked. A separate Elastic Cloud cluster has been configured as the metrics store, which allows us to analyze the raw data using Kibana.

{
"type": {
"properties": {
"surcharge": {
"scaling_factor": 100,
"type": "scaled_float"
},
"dropoff_datetime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"trip_type": {
"type": "keyword"
},
"mta_tax": {
"scaling_factor": 100,
"type": "scaled_float"
},
"rate_code_id": {
"type": "keyword"
},
"passenger_count": {
"type": "integer"
},
"pickup_datetime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"tolls_amount": {
"scaling_factor": 100,
"type": "scaled_float"
},
"tip_amount": {
"scaling_factor": 100,
"type": "scaled_float"
},
"payment_type": {
"type": "keyword"
},
"extra": {
"scaling_factor": 100,
"type": "scaled_float"
},
"vendor_id": {
"type": "keyword"
},
"store_and_fwd_flag": {
"type": "keyword"
},
"improvement_surcharge": {
"scaling_factor": 100,
"type": "scaled_float"
},
"fare_amount": {
"scaling_factor": 100,
"type": "scaled_float"
},
"ehail_fee": {
"scaling_factor": 100,
"type": "scaled_float"
},
"cab_color": {
"type": "keyword"
},
"dropoff_location": {
"type": "geo_point"
},
"vendor_name": {
"type": "text"
},
"total_amount": {
"scaling_factor": 100,
"type": "scaled_float"
},
"trip_distance": {
"scaling_factor": 100,
"type": "scaled_float"
},
"pickup_location": {
"type": "geo_point"
}
},
"_all": {
"enabled": false
},
"dynamic": "strict"
}
}
{
"short-description": "Trip records completed in yellow and green taxis in New York in 2015",
"description": "This track contains challenges for evaluating the relationship between connections, shards and bulk rejections.",
"data-url": "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/nyc_taxis",
"indices": [
{
"name": "nyc_taxis",
"types": [
{
"name": "type",
"mapping": "mappings.json",
"documents": "documents.json.bz2",
"document-count": 165346692,
"compressed-bytes": 4812721501,
"uncompressed-bytes": 79802445255
}
]
}
],
"operations": [
{
"name": "index-100",
"operation-type": "index",
"bulk-size": 100
},
{
"name": "index-200",
"operation-type": "index",
"bulk-size": 200
}
],
"challenges": [
{
"name": "bulk-index-2_shards",
"description": "Indexes groups of 6400000 taxi trips at different concurrency levels against an index with 2 primary shards and 1 repolica. All index operations use auto-generated ids to avoid conflicts.",
"default": true,
"index-settings": {
"index.number_of_shards": 2,
"index.number_of_replicas": 1,
"index.refresh_interval": "30s",
"index.translog.flush_threshold_size": "4g"
},
"schedule": [
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 2,
"client_shard_concurrency": 16
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 2,
"client_shard_concurrency": 32
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 2,
"client_shard_concurrency": 48
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 2,
"client_shard_concurrency": 64
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 2,
"client_shard_concurrency": 96
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 2,
"client_shard_concurrency": 128
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 2,
"client_shard_concurrency": 16
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 2,
"client_shard_concurrency": 32
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 2,
"client_shard_concurrency": 48
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 2,
"client_shard_concurrency": 64
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 2,
"client_shard_concurrency": 96
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 2,
"client_shard_concurrency": 128
}
}
]
},
{
"name": "bulk-index-4_shards",
"description": "Indexes groups of 6400000 taxi trips at different concurrency levels against an index with 4 primary shards and 1 repolica. All index operations use auto-generated ids to avoid conflicts.",
"index-settings": {
"index.number_of_shards": 4,
"index.number_of_replicas": 1,
"index.refresh_interval": "30s",
"index.translog.flush_threshold_size": "4g"
},
"schedule": [
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 4,
"client_shard_concurrency": 32
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 4,
"client_shard_concurrency": 64
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 4,
"client_shard_concurrency": 96
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 4,
"client_shard_concurrency": 128
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 4,
"client_shard_concurrency": 192
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 4,
"client_shard_concurrency": 256
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 4,
"client_shard_concurrency": 32
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 4,
"client_shard_concurrency": 64
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 4,
"client_shard_concurrency": 96
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 4,
"client_shard_concurrency": 128
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 4,
"client_shard_concurrency": 192
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 4,
"client_shard_concurrency": 256
}
}
]
},
{
"name": "bulk-index-8_shards",
"description": "Indexes groups of 6400000 taxi trips at different concurrency levels against an index with 8 primary shards and 1 repolica. All index operations use auto-generated ids to avoid conflicts.",
"index-settings": {
"index.number_of_shards": 8,
"index.number_of_replicas": 1,
"index.refresh_interval": "30s",
"index.translog.flush_threshold_size": "4g"
},
"schedule": [
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 8,
"client_shard_concurrency": 64
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 8,
"client_shard_concurrency": 128
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 8,
"client_shard_concurrency": 192
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 8,
"client_shard_concurrency": 256
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 8,
"client_shard_concurrency": 384
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 8,
"client_shard_concurrency": 512
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 8,
"client_shard_concurrency": 64
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 8,
"client_shard_concurrency": 128
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 8,
"client_shard_concurrency": 192
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 8,
"client_shard_concurrency": 256
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 8,
"client_shard_concurrency": 384
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 8,
"client_shard_concurrency": 512
}
}
]
},
{
"name": "bulk-index-16_shards",
"description": "Indexes groups of 6400000 taxi trips at different concurrency levels against an index with 16 primary shards and 1 repolica. All index operations use auto-generated ids to avoid conflicts.",
"index-settings": {
"index.number_of_shards": 16,
"index.number_of_replicas": 1,
"index.refresh_interval": "30s",
"index.translog.flush_threshold_size": "4g"
},
"schedule": [
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 16,
"client_shard_concurrency": 128
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 16,
"client_shard_concurrency": 256
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 16,
"client_shard_concurrency": 384
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 16,
"client_shard_concurrency": 512
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 16,
"client_shard_concurrency": 768
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 16,
"client_shard_concurrency": 1024
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 16,
"client_shard_concurrency": 128
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 16,
"client_shard_concurrency": 256
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 16,
"client_shard_concurrency": 384
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 16,
"client_shard_concurrency": 512
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 16,
"client_shard_concurrency": 768
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 16,
"client_shard_concurrency": 1024
}
}
]
},
{
"name": "bulk-index-32_shards",
"description": "Indexes groups of 6400000 taxi trips at different concurrency levels against an index with 32 primary shards and 1 repolica. All index operations use auto-generated ids to avoid conflicts.",
"index-settings": {
"index.number_of_shards": 32,
"index.number_of_replicas": 1,
"index.refresh_interval": "30s",
"index.translog.flush_threshold_size": "4g"
},
"schedule": [
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 32,
"client_shard_concurrency": 256
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 32,
"client_shard_concurrency": 512
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 32,
"client_shard_concurrency": 768
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 32,
"client_shard_concurrency": 1024
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 32,
"client_shard_concurrency": 1536
}
},
{
"operation": "index-200",
"warmup-iterations": 0,
"iterations": 32000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 32,
"client_shard_concurrency": 2048
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 8,
"meta": {
"client_count": 8,
"shard_count": 32,
"client_shard_concurrency": 256
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 16,
"meta": {
"client_count": 16,
"shard_count": 32,
"client_shard_concurrency": 512
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 24,
"meta": {
"client_count": 24,
"shard_count": 32,
"client_shard_concurrency": 768
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 32,
"meta": {
"client_count": 32,
"shard_count": 32,
"client_shard_concurrency": 1024
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 48,
"meta": {
"client_count": 48,
"shard_count": 32,
"client_shard_concurrency": 1536
}
},
{
"operation": "index-100",
"warmup-iterations": 0,
"iterations": 64000,
"clients": 64,
"meta": {
"client_count": 64,
"shard_count": 32,
"client_shard_concurrency": 2048
}
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment