Last active
December 23, 2015 14:48
-
-
Save ellimilial/8cccdae0cd9ad3744b8b to your computer and use it in GitHub Desktop.
Run hadoop terasort benchmark, average the run time for all stages. SSH friendly.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Script to run hadoop terasort benchmark specified amount of times, getting the average runtime for all 3 stages. | |
# | |
# To run via ssh, say on Jenkins, wrap in: | |
# ssh namenode.server.com <<'ENDSSH' | |
# (... code ...) | |
# ENDSSH | |
readonly EXAMPLES_JAR="(...)/hadoop-mapreduce-examples.jar" | |
readonly BASE_DIR="/benchmarks/terasort" | |
readonly ITEMS=10000000000 # each being 100-byte in size, you might want to test run with smaller | |
readonly RUNS=2 | |
readonly GEN_MAPPER_COUNT=1000 | |
readonly SORT_REDUCER_COUNT=1000 | |
join() { local IFS="$1"; shift; echo "$*"; } | |
avg() { for i in "$@"; do ((total+=$i)); ((count+=1)); done; echo $((total/count)); } | |
run_task() { var=$(yarn jar $EXAMPLES_JAR $1 ${@:2}); echo $var; } | |
clear_workspace(){ | |
hdfs dfs -rm -R "${BASE_DIR}/input" # We want to be explicit when deleting | |
hdfs dfs -rm -R "${BASE_DIR}/output" | |
hdfs dfs -rm -R "${BASE_DIR}/validate" | |
} | |
main(){ | |
results=() | |
for ((k = 1; k <= $RUNS; k++)); do | |
clear_workspace | |
start=`date +%s` | |
echo "--- Run ${k} - gen ---" | |
run_task teragen "-Dmapred.map.tasks=$GEN_MAPPER_COUNT" $ITEMS "${BASE_DIR}/input" | |
echo "--- Run ${k} - sort ---" | |
run_task terasort "-Dmapred.reduce.tasks=$SORT_REDUCER_COUNT" "${BASE_DIR}/input" "${BASE_DIR}/output" | |
echo "--- Run ${k} - validate ---" | |
run_task teravalidate "${BASE_DIR}/output" "${BASE_DIR}/validate" | |
end=`date +%s` | |
results+=($((end-start))) | |
done | |
avg=$(avg "${results[@]}") | |
results_joined=$(join , ${results[@]}) | |
echo "--- Results after ${RUNS} runs ---" | |
echo "Completed in avg: ${avg}s, all (s): ${results_joined}" | |
} | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment