Skip to content

Instantly share code, notes, and snippets.

@keith-turner
Last active January 10, 2018 15:29
Show Gist options
  • Save keith-turner/e28ee6cd4941210f34e5cd0e6a6b3106 to your computer and use it in GitHub Desktop.
Save keith-turner/e28ee6cd4941210f34e5cd0e6a6b3106 to your computer and use it in GitHub Desktop.
Settings used for Fluo stress test run for 1.2.0 release.
###############################
# configuration for all scripts
###############################
# Fluo Home
test -z "$FLUO_HOME" && FLUO_HOME=/path/to/accumulo
# Fluo application name
FLUO_APP_NAME=stresso
###############################
# configuration for run-test.sh
###############################
# Place where logs from test are placed
LOG_DIR=$BIN_DIR/../logs
# Maximum number to generate
MAX=$((10**12))
#the number of splits to create in table
SPLITS=40
# Number of mappers to run for data generation, which determines how many files
# generation outputs. The number of files determines how many mappers loading
# data will run.
MAPS=10
# Number of reduce tasks
REDUCES=10
# Number of random numbers to generate initially
GEN_INIT=$((10**9))
# Number of random numbers to generate for each incremental step.
GEN_INCR=$((10**7))
# Load incremental data sets until this number of seconds passes
LOAD_TIME=$((60 * 60 * 24))
# Seconds to sleep between incremental steps.
SLEEP=180
# Compact levels with less than the following possible nodes after loads
COMPACT_CUTOFF=$((256**3 + 1))
# The fluo wait command is executed after this many incremental load steps.
WAIT_PERIOD=10
# To run map reduce jobs, a shaded jar is built. The following properties
# determine what versions of Fluo and Accumulo client libs end up in the shaded
# jar.
FLUO_VERSION=$($FLUO_HOME/bin/fluo version)
ACCUMULO_VERSION=$(accumulo version)
# determine a good stop level
if (("$MAX" <= $((10**9)))); then
STOP=6
elif (("$MAX" <= $((10**12)))); then
STOP=5
else
STOP=4
fi
fluo.yarn.zookeepers=leader1/fluo-yarn
fluo.yarn.resource.manager=leader1
fluo.yarn.dfs.root=hdfs://leader1:8020/
fluo.yarn.worker.instances=10
fluo.yarn.worker.max.memory.mb=5120
fluo.yarn.worker.reserved.memory.mb=1024
#muchs properties used to launch the cluster used Muchs commit 1ff3952
[general]
cloud_provider = ec2
cluster_user = centos
cluster_basedir = /home/centos
proxy_hostname = leader1
network_interface=eth0
proxy_socks_port = 38585
accumulo_instance = muchos
hadoop_version = 2.7.5
zookeeper_version = 3.4.11
spark_version = 1.6.3
fluo_version = 1.2.0-SNAPSHOT
fluo_yarn_version = 1.0.0-SNAPSHOT
accumulo_version = 1.8.1
hadoop_sha256 = 0bfc4d9b04be919be2fdf36f67fa3b4526cdbd406c512a7a1f5f1b715661f831
zookeeper_sha256 = f6bd68a1c8f7c13ea4c2c99f13082d0d71ac464ffaf3bf7a365879ab6ad10e84
spark_sha256 = d13358a2d45e78d7c8cf22656d63e5715a5900fab33b3340df9e11ce3747e314
fluo_sha256 = 9765f48ac2a075129905b1bac96e7ca6c735264ddb9aeeee15282ed5f8bc1062
fluo_yarn_sha256 = tbd
accumulo_sha256 = eba3bfe823935ca7901ea7c2bd59c84a68b9381361699c7e260bbd9191f237f4
[ec2]
default_instance_type = m3.xlarge
worker_instance_type = i3.2xlarge
fstype = xfs
force_format = no
associate_public_ip = true
[performance]
profile=perf-large
[perf-large]
accumulo_tserv_mem=16G
accumulo_dcache_size=10G
accumulo_icache_size=2G
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
twill_reserve_mem_mb=512
fluo_worker_threads=256
fluo_worker_instances_multiplier=1
yarn_nm_mem_mb=16384
[nodes]
leader1 = namenode,zookeeper,fluo,fluo_yarn,resourcemanager,accumulomaster
metrics = metrics
worker0 = worker
worker1 = worker
worker2 = worker
worker3 = worker
worker4 = worker
worker5 = worker
worker6 = worker
worker7 = worker
worker8 = worker
worker9 = worker
#!/bin/bash
#stresso setup script which was modified some prior to starting the test
BIN_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
#TODO maybe have a single skip checks var
SKIP_JAR_CHECKS=1
SKIP_FLUO_PROPS_CHECK=1
. $BIN_DIR/load-env.sh
cd $BIN_DIR/..
# stop if any command fails
set -e
# Build jar and shaded jar
mvn clean package dependency:copy-dependencies \
-DincludeArtifactIds=fluo-recipes-core \
-Dfluo.version=$FLUO_VERSION \
-Daccumulo.version=$ACCUMULO_VERSION
mkdir target/lib
cp target/stresso-0.0.1-SNAPSHOT.jar target/dependency/*.jar target/lib
# Create config file used for fluo initialization
cp $FLUO_HOME/conf/fluo-app.properties ./conf/fluo-app.properties
$SED '/fluo.worker.num.threads.*/d' ./conf/fluo-app.properties
cat << EOF >> ./conf/fluo-app.properties
fluo.observer.init.dir=$(pwd)/target/lib
fluo.observer.0=stresso.trie.NodeObserver
fluo.worker.num.threads=256
fluo.loader.num.threads=128
fluo.loader.queue.size=128
fluo.app.trie.nodeSize=8
fluo.app.trie.stopLevel=$STOP
EOF
# Create config file used for connection
cp $FLUO_HOME/conf/fluo-conn.properties ./conf/fluo-conn.properties
echo "fluo.connection.application.name=$FLUO_APP_NAME" >> ./conf/fluo-conn.properties
# Initialize Stresso
fluo init -a $FLUO_APP_NAME -p conf/fluo-app.properties -f
# Optimize Accumulo table used by Fluo Stresso Application
# TODO check if accumulo on path
accumulo shell -u root -p secret <<EOF
config -t $FLUO_APP_NAME -s table.custom.balancer.group.regex.pattern=(\\\\d\\\\d).*
config -t $FLUO_APP_NAME -s table.custom.balancer.group.regex.default=none
config -t $FLUO_APP_NAME -s table.balancer=org.apache.accumulo.server.master.balancer.RegexGroupBalancer
config -t $FLUO_APP_NAME -s table.compaction.major.ratio=1.5
config -t $FLUO_APP_NAME -s table.file.compress.blocksize.index=256K
config -t $FLUO_APP_NAME -s table.file.compress.blocksize=8K
config -t $FLUO_APP_NAME -s table.bloom.enabled=false
config -t $FLUO_APP_NAME -s table.bloom.error.rate=5%
config -s table.durability=flush
config -t accumulo.metadata -d table.durability
config -t accumulo.root -d table.durability
config -s tserver.readahead.concurrent.max=256
config -s tserver.server.threads.minimum=256
config -s tserver.scan.files.open.max=1000
config -s tserver.wal.replication=2
config -s table.file.replication=2
EOF
# Add initial splits to the table used by Fluo Stresso Application
fluo exec $FLUO_APP_NAME stresso.trie.Split $SPLITS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment