Skip to content

Instantly share code, notes, and snippets.

@busbey
Created December 14, 2015 15:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save busbey/9404f0f399d4edd5c86c to your computer and use it in GitHub Desktop.
Save busbey/9404f0f399d4edd5c86c to your computer and use it in GitHub Desktop.
ycsb helper with htraced tracing.
#!/bin/bash
# Copyright (c) 2015 Cloudera, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You
# may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License. See accompanying
# LICENSE file.
#
#
bin=$(dirname "$0")
bin=$(cd "$bin">/dev/null; pwd)
#
#
# Run through all of the recommend YCSB workloads
#
# assumptions
# general: YCSB has YCSB-529 applied.
# hbase : client configs are in /etc/hbase/conf
# accumulo: client configs are in /etc/accumulo/conf, zk quorum in env ZK_QUORUM, password in ACCUMULO_PASSWORD
# things you probably want to mess with
HTRACED="htraced.example.com:9075"
# ycsb dist artifact unpacted relative to the dir that holds this script
#DIST="ycsb-hbase-binding-0.2.0-RC2"
#DIST="ycsb-hbase10-binding-0.6.0-SNAPSHOT"
DIST="ycsb-hbase098-binding-0.6.0-SNAPSHOT"
#DIST="ycsb-0.2.0-RC3"
# By default this script attempts to run for a fixed time period, in seconds
MAX_TIME=120
# which data store
#BINDING="hbase10"
BINDING="hbase098"
#BINDING="accumulo"
# format to use when writing results
EXPORTER="com.yahoo.ycsb.measurements.exporter.JSONArrayMeasurementsExporter"
#EXPORTER="com.yahoo.ycsb.measurements.exporter.JSONMeasurementsExporter"
# things you probably don't care about messing with
# fail script if anything fails, echo everything
set -x
set -e
# RECORD_COUNT is at MAX_INT so that the time limit above can be used.
RECORD_COUNT=2147483647
# This is all an individual client will look at when limiting itself
# for very fast datastores that can handle this in < 1 hour, we should
# increase up to MAX_INT
INSERT_COUNT=429496729
# Name of the file that contains hostname (as given by `hostname`) to offset mappings
# one per line, each line like
# somehost.example.com:429496729
# if it doesn't exist, offset is presumed to be 0.
OFFSET_SOURCE="${bin}/host_offset.txt"
# things you shouldn't mess with
YCSB="${bin}/${DIST}/bin/ycsb"
WORKLOADS="${bin}/${DIST}/workloads"
HERE=$(hostname)
if [[ -f ${OFFSET_SOURCE} ]]; then
OFFSET=$(grep "${HERE}" "${OFFSET_SOURCE}" | cut -d: -f2)
[[ $OFFSET =~ ^[0-9]+$ ]]
else
OFFSET=0
fi
# The last bit points to the HTraced jar that ships in Cloudera Labs. It's not required so long as you
# point to a jar that has the htraced span receiver.
#
# http://blog.cloudera.com/blog/2015/12/new-in-cloudera-labs-apache-htrace-incubating/
#
EXEC_ARGS=(-s -p maxexecutiontime=${MAX_TIME} -threads 3 -jvm-args='-Xmx1024m' -p htrace.span.receiver.classes=org.apache.htrace.impl.HTracedSpanReceiver \
-p htrace.htraced.receiver.address=${HTRACED} -p htrace.htraced.error.log.period.ms=10000 -p htrace.sampler.classes=AlwaysSampler \
-p htrace.sampler.fraction=0.001 -cp /opt/cloudera/parcels/CLABS_HTRACE/lib/htrace/lib/htrace-htraced-cdh5.jar \
)
case "$BINDING" in
hbase*)
BINDING_ARGS=(-cp /etc/hbase/conf -p columnfamily=family)
;;
accumulo)
BINDING_ARGS=(-cp /etc/accumulo/conf -p accumulo.columnFamily=family -p accumulo.instanceName=dedicated -p "accumulo.zooKeepers=${ZK_QUORUM:-localhost}" -p accumulo.username=ycsb -p accumulo.password=${ACCUMULO_PASSOWRD:-protectyaneck})
;;
*)
BINDING_ARGS=()
;;
esac
WORKLOAD_ARGS=(-p table=ycsb "${BINDING_ARGS[@]}" "${EXEC_ARGS[@]}")
WORKLOADE_ARGS=(-p table=ycsb_workloade "${BINDING_ARGS[@]}" "${EXEC_ARGS[@]}")
echo "loading"
time python2.7 "${YCSB}" load "${BINDING}" -P "${WORKLOADS}/workloada" "${WORKLOAD_ARGS[@]}" -p recordcount=${RECORD_COUNT} -p "insertstart=${OFFSET}" -p insertcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-load-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-load-${HERE}.out" 2>"${bin}/ycsb-load-${HERE}.err"
echo "workload a"
time python2.7 "${YCSB}" run "${BINDING}" -P "${WORKLOADS}/workloada" "${WORKLOAD_ARGS[@]}" -p recordcount=0 -p operationcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-workloada-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-workloada-${HERE}.out" 2>"${bin}/ycsb-workloada-${HERE}.err"
echo "workload b"
time python2.7 "${YCSB}" run "${BINDING}" -P "${WORKLOADS}/workloadb" "${WORKLOAD_ARGS[@]}" -p recordcount=0 -p operationcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-workloadb-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-workloadb-${HERE}.out" 2>"${bin}/ycsb-workloadb-${HERE}.err"
echo "workload c"
time python2.7 "${YCSB}" run "${BINDING}" -P "${WORKLOADS}/workloadc" "${WORKLOAD_ARGS[@]}" -p recordcount=0 -p operationcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-workloadc-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-workloadc-${HERE}.out" 2>"${bin}/ycsb-workloadc-${HERE}.err"
echo "workload f"
time python2.7 "${YCSB}" run "${BINDING}" -P "${WORKLOADS}/workloadf" "${WORKLOAD_ARGS[@]}" -p recordcount=0 -p operationcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-workloadf-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-workloadf-${HERE}.out" 2>"${bin}/ycsb-workloadf-${HERE}.err"
echo "workload d"
time python2.7 "${YCSB}" run "${BINDING}" -P "${WORKLOADS}/workloadd" "${WORKLOAD_ARGS[@]}" -p recordcount=0 -p operationcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-workloadd-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-workloadd-${HERE}.out" 2>"${bin}/ycsb-workloadd-${HERE}.err"
echo "load workload e"
time python2.7 "${YCSB}" load "${BINDING}" -P "${WORKLOADS}/workloade" "${WORKLOADE_ARGS[@]}" -p recordcount=${RECORD_COUNT} -p "insertstart=${OFFSET}" -p insertcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-load_workloade-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-load_workloade-${HERE}.out" 2>"${bin}/ycsb-load_workloade-${HERE}.err"
# work around YCSB-384
echo "workload e"
time python2.7 "${YCSB}" run "${BINDING}" -P "${WORKLOADS}/workloade" "${WORKLOADE_ARGS[@]}" -p recordcount=$((RECORD_COUNT - INSERT_COUNT * 2 / 20)) -p operationcount=${INSERT_COUNT} \
-p exportfile="${bin}/ycsb-workloade-${HERE}-measurements.json" -p exporter="${EXPORTER}" \
>"${bin}/ycsb-workloade-${HERE}.out" 2>"${bin}/ycsb-workloade-${HERE}.err"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment