Skip to content

Instantly share code, notes, and snippets.

@ambud
Created March 20, 2017 16:10
Show Gist options
  • Save ambud/9ce4705c4c8f40820f49ebfa35b7bbb7 to your computer and use it in GitHub Desktop.
Save ambud/9ce4705c4c8f40820f49ebfa35b7bbb7 to your computer and use it in GitHub Desktop.
Kafka Benchmark
#!/bin/bash
#
# Author: Ambud Sharma
#
# Purpose: To run kafka Kafka Producer / Consumer benchmark and create results
#
export NUM_RECORDS=10000000
export TOPIC_NAME_PREFIX="perf"
export CLUSTER_SIZE=3
export DRIVE_COUNT=1
export BROKER_ADDRESS="localhost:9092"
export ZOOKEEPER_ADDRESS="localhost:2181"
export PARTITION_MULTIPLES=(1 2 3 4 5)
export REPLICATION_MULTIPLES=(1)
export THREAD_MULTIPLES=(1 2 4 8 16 24)
export RECORD_SIZES=(100 200 500 1000)
export KAFKA_BIN="/usr/hdp/current/kafka-server/bin/"
export DATE=`date +%y-%m-%d`
export OUTPUT_DIR="/tmp/kafka-benchmarks-$DATE"
function print_intro() {
echo -e "**********************************************************************\n"
echo -e " Kafka Benchmark \n"
echo -e " Author: Ambud Sharma \n"
echo -e " License: Apache 2.0 \n"
echo -e "**********************************************************************\n"
echo "This script will benchmark your Kafka cluster using multiple configurations and store benchmark results"
echo "Each benchmark result will also contain the configurations it was run with."
echo "As a part of benchmarking the cluster, several topics will be created with different configurations."
echo -e "\nMake sure delete.topic.enable property is set to true in the broker configuration\n"
echo -e "\n"
read -p "Press Enter to continue?"
}
function load_options() {
read -p "Kafka bin directory (default:$KAFKA_BIN)" TEMP
if [ ! -z $TEMP ];then
export KAFKA_BIN=$TEMP;
fi
TEMP=""
read -p "Output directory (default:$OUTPUT_DIR)" TEMP
if [ ! -z $TEMP ];then
export OUTPUT_DIR=$TEMP;
fi
TEMP=""
read -p "Broker Address (default:$BROKER_ADDRESS)" TEMP
if [ ! -z $TEMP ];then
export BROKER_ADDRESS=$TEMP;
fi
TEMP=""
read -p "Zookeeper Address (default:$ZOOKEEPER_ADDRESS)" TEMP
if [ ! -z $TEMP ];then
export ZOOKEEPER_ADDRESS=$TEMP
fi
TEMP=""
read -p "Topic Name Prefix (default:$TOPIC_NAME_PREFIX)" TEMP
if [ ! -z $TEMP ];then
export TOPIC_NAME_PREFIX=$TEMP
fi
TEMP=""
read -p "Cluster size (default:$CLUSTER_SIZE)" TEMP
if [ ! -z $TEMP ];then
export CLUSTER_SIZE=$TEMP
fi
TEMP=""
read -p "Data drives per node (default:$DRIVE_COUNT)" TEMP
if [ ! -z $TEMP ];then
export DRIVE_COUNT=$TEMP
fi
TEMP=""
}
print_intro
load_options
echo "Broker Address:$BROKER_ADDRESS, Zookeeper Address:$ZOOKEEPER_ADDRESS, Cluster Size:$CLUSTER_SIZE, Drive Count:$DRIVE_COUNT"
mkdir -p $OUTPUT_DIR
echo "bootstrap.servers=$BROKER_ADDRESS" > $OUTPUT_DIR/producer.conf
for i in ${PARTITION_MULTIPLES[@]}
do
for k in ${REPLICATION_MULTIPLES[@]}
do
PARTITION_COUNT=$(($i*$DRIVE_COUNT*$CLUSTER_SIZE))
TOPIC_NAME="$TOPIC_NAME_PREFIX-$PARTITION_COUNT-$k"
echo "Creating Benchmark Topic: $TOPIC_NAME, Partitions:$PARTITION_COUNT, Replication: $k"
$KAFKA_BIN/kafka-topics.sh --zookeeper $ZOOKEEPER_ADDRESS --create --topic $TOPIC_NAME --replication-factor $k --partitions $PARTITION_COUNT
echo -e "Running Benchmarks: Topic: $TOPIC_NAME, Partitions:$PARTITION_COUNT, Replication: $k\nResults will be stored in $OUTPUT_DIR/$TOPIC_NAME"
mkdir -p "$OUTPUT_DIR/$TOPIC_NAME"
$KAFKA_BIN/kafka-producer-perf-test.sh --topic $TOPIC_NAME --num-records $NUM_RECORDS --record-size 100 --producer.config $OUTPUT_DIR/producer.conf --throughput 100000000 > "$OUTPUT_DIR/$TOPIC_NAME/thread-1.txt"
#for rs in ${RECORD_SIZES[@]}
#do
#done
$KAFKA_BIN/kafka-topics.sh --zookeeper $ZOOKEEPER_ADDRESS --delete --topic $TOPIC_NAME
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment