Skip to content

Instantly share code, notes, and snippets.

@jbenninghoff
Created December 3, 2015 21:18
Show Gist options
  • Save jbenninghoff/4fc25786f8f2d9353f14 to your computer and use it in GitHub Desktop.
Save jbenninghoff/4fc25786f8f2d9353f14 to your computer and use it in GitHub Desktop.
YCSB test run script
#!/bin/bash
# jbenninghoff 2013-Sep-13 vi: set ai et sw=3 tabstop=3:
# Assumes MapR YCSB branch to handle large tables: https://github.com/mapr/YCSB
# Assumes MapR HBase client software installed. Can be an edge/gateway node
export HBASE_CLASSPATH=core/lib/core-0.1.4.jar:hbase-binding/lib/hbase-binding-0.1.4.jar
table=/benchmarks/usertable #YCSB uses table named 'usertable' by default
thrds=4
count=$[100*1000*1000] #table row count
[ $[$count / (1000)] -gt 0 ] && mag=$[$count / (1000)]K
[ $[$count / (1000*1000)] -gt 0 ] && mag=$[$count / (1000*1000)]M
[ $[$count / (1000*1000*1000)] -gt 0 ] && mag=$[$count / (1000*1000*1000)]B
columns=$(stty -a | awk '/columns/{printf "%d\n",$7}')
hbase shell <<EOF
disable '$table'
drop '$table'
create '$table', {NAME => 'family', COMPRESSION => 'none', IN_MEMORY => 'true'}
EOF
# 4 YCSB workloads using threads on a single client machine to generate load
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloada -threads $thrds -p columnfamily=family -p recordcount=$count -p table=$table -load |tee ycsb-bigKey-wkldA-$mag-load.log
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloada -threads $thrds -p columnfamily=family -p operationcount=$count -p table=$table -t |tee ycsb-bigKey-wkldA-$mag-run.log
exit #comment out exit once workloadA runs well on your DB cluster
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloadb -threads $thrds -p columnfamily=family -p recordcount=$count -p table=$table -load |tee ycsb-bigKey-wkldB-$mag-load.log
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloadb -threads $thrds -p columnfamily=family -p operationcount=$count -p table=$table -t |tee ycsb-bigKey-wkldB-$mag-run.log
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloadc -threads $thrds -p columnfamily=family -p recordcount=$count -p table=$table -load |tee ycsb-bigKey-wkldC-$mag-load.log
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloadc -threads $thrds -p columnfamily=family -p operationcount=$count -p table=$table -t |tee ycsb-bigKey-wkldC-$mag-run.log
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloadd -threads $thrds -p columnfamily=family -p recordcount=$count -p table=$table -load |tee ycsb-bigKey-wkldD-$mag-load.log
hbase com.yahoo.ycsb.Client -db com.yahoo.ycsb.db.HBaseClient -P workloads/workloadd -threads $thrds -p columnfamily=family -p operationcount=$count -p table=$table -t |tee ycsb-bigKey-wkldD-$mag-run.log
echo usertable region list
/opt/mapr/bin/maprcli table region list -path $table | cut -c -$columns | tee usertable-region.list
echo usertable regions over Storage Pools
./regionsp.py $table | tee usertable-region-sp.list
for log in ycsb-big*load.log; do echo $log ====================; grep -i -e operations -e latency -e Throughput $log; done | tee ycsb-load-summary.txt
for log in ycsb-big*run.log; do echo $log ====================; grep -i -e operations -e latency -e Throughput $log; done | tee ycsb-run-summary.txt
# Options for HBase table creation
# hbase> create 't1', {NAME => 'f1', VERSIONS => 5}
# hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
# hbase> # The above in shorthand would be the following:
# hbase> create 't1', 'f1', 'f2', 'f3'
# hbase> create 't1', 'f1', {SPLITS => ['10', '20', '30', '40']}
# hbase> create 't1', 'f1', {SPLITS_FILE => 'splits.txt'}
# hbase> # Optionally pre-split the table into NUMREGIONS, using
# hbase> # SPLITALGO ("HexStringSplit", "UniformSplit" or classname)
# hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit'}
#
#create '$table','family'
#for i in 'a'..'z' do for j in 'a'..'z' do put 'usertable', "row-#{i}#{j}", "family:#{j}", "#{j}" end end
#scan '$table', {LIMIT=>10}
#create '$table', {NAME => 'family', COMPRESSION => 'none', IN_MEMORY => 'true'}, {NUMREGIONS => 24, SPLITALGO => 'HexStringSplit'}
#maprcli volume create -name tables -path /tables -topology /data/default-rack -replication 3 -replicationtype low_latency
#hadoop mfs -setcompression off /tables
#HBase to M7 namespace mapping configuration in /opt/mapr/hadoop/hadoop.*/conf/core-site.xml
#<property>
# <name>hbase.table.namespace.mappings</name>
# <value>table1:/user/${user.name}/tables,*:/tables</value>
#</property>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment