Skip to content

Instantly share code, notes, and snippets.

@ellimilial
Last active December 23, 2015 14:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ellimilial/8cccdae0cd9ad3744b8b to your computer and use it in GitHub Desktop.
Save ellimilial/8cccdae0cd9ad3744b8b to your computer and use it in GitHub Desktop.
Run hadoop terasort benchmark, average the run time for all stages. SSH friendly.
#!/bin/bash
# Script to run hadoop terasort benchmark specified amount of times, getting the average runtime for all 3 stages.
#
# To run via ssh, say on Jenkins, wrap in:
# ssh namenode.server.com <<'ENDSSH'
# (... code ...)
# ENDSSH
readonly EXAMPLES_JAR="(...)/hadoop-mapreduce-examples.jar"
readonly BASE_DIR="/benchmarks/terasort"
readonly ITEMS=10000000000 # each being 100-byte in size, you might want to test run with smaller
readonly RUNS=2
readonly GEN_MAPPER_COUNT=1000
readonly SORT_REDUCER_COUNT=1000
join() { local IFS="$1"; shift; echo "$*"; }
avg() { for i in "$@"; do ((total+=$i)); ((count+=1)); done; echo $((total/count)); }
run_task() { var=$(yarn jar $EXAMPLES_JAR $1 ${@:2}); echo $var; }
clear_workspace(){
hdfs dfs -rm -R "${BASE_DIR}/input" # We want to be explicit when deleting
hdfs dfs -rm -R "${BASE_DIR}/output"
hdfs dfs -rm -R "${BASE_DIR}/validate"
}
main(){
results=()
for ((k = 1; k <= $RUNS; k++)); do
clear_workspace
start=`date +%s`
echo "--- Run ${k} - gen ---"
run_task teragen "-Dmapred.map.tasks=$GEN_MAPPER_COUNT" $ITEMS "${BASE_DIR}/input"
echo "--- Run ${k} - sort ---"
run_task terasort "-Dmapred.reduce.tasks=$SORT_REDUCER_COUNT" "${BASE_DIR}/input" "${BASE_DIR}/output"
echo "--- Run ${k} - validate ---"
run_task teravalidate "${BASE_DIR}/output" "${BASE_DIR}/validate"
end=`date +%s`
results+=($((end-start)))
done
avg=$(avg "${results[@]}")
results_joined=$(join , ${results[@]})
echo "--- Results after ${RUNS} runs ---"
echo "Completed in avg: ${avg}s, all (s): ${results_joined}"
}
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment