keith-turner/env.sh

## env.sh
###############################
# configuration for all scripts
###############################
# Fluo Home
test -z "$FLUO_HOME" && FLUO_HOME=/path/to/accumulo
# Fluo application name
FLUO_APP_NAME=stresso

###############################
# configuration for run-test.sh
###############################
# Place where logs from test are placed
LOG_DIR=$BIN_DIR/../logs
# Maximum number to generate
MAX=$((10**12))
#the number of splits to create in table
SPLITS=40
# Number of mappers to run for data generation, which determines how many files
# generation outputs.  The number of files determines how many mappers loading
# data will run.
MAPS=10
# Number of reduce tasks
REDUCES=10
# Number of random numbers to generate initially
GEN_INIT=$((10**9))
# Number of random numbers to generate for each incremental step.
GEN_INCR=$((10**7))
# Load incremental data sets until this number of seconds passes
LOAD_TIME=$((60 * 60 * 24))
# Seconds to sleep between incremental steps.
SLEEP=180
# Compact levels with less than the following possible nodes after loads
COMPACT_CUTOFF=$((256**3 + 1))
# The fluo wait command is executed after this many incremental load steps.
WAIT_PERIOD=10
# To run map reduce jobs, a shaded jar is built. The following properties
# determine what versions of Fluo and Accumulo client libs end up in the shaded
# jar.
FLUO_VERSION=$($FLUO_HOME/bin/fluo version)
ACCUMULO_VERSION=$(accumulo version)


# determine a good stop level
if (("$MAX" <= $((10**9)))); then
  STOP=6
elif (("$MAX" <= $((10**12)))); then
  STOP=5
else
  STOP=4
fi

## fluo-yarn.properties
fluo.yarn.zookeepers=leader1/fluo-yarn

fluo.yarn.resource.manager=leader1

fluo.yarn.dfs.root=hdfs://leader1:8020/

fluo.yarn.worker.instances=10
fluo.yarn.worker.max.memory.mb=5120
fluo.yarn.worker.reserved.memory.mb=1024

## muchos.props
#muchs properties used to launch the cluster used Muchs commit 1ff3952
[general]
cloud_provider = ec2
cluster_user = centos
cluster_basedir = /home/centos
proxy_hostname = leader1
network_interface=eth0
proxy_socks_port = 38585
accumulo_instance = muchos
hadoop_version = 2.7.5
zookeeper_version = 3.4.11
spark_version = 1.6.3
fluo_version = 1.2.0-SNAPSHOT
fluo_yarn_version = 1.0.0-SNAPSHOT
accumulo_version = 1.8.1
hadoop_sha256 = 0bfc4d9b04be919be2fdf36f67fa3b4526cdbd406c512a7a1f5f1b715661f831
zookeeper_sha256 = f6bd68a1c8f7c13ea4c2c99f13082d0d71ac464ffaf3bf7a365879ab6ad10e84
spark_sha256 = d13358a2d45e78d7c8cf22656d63e5715a5900fab33b3340df9e11ce3747e314
fluo_sha256 = 9765f48ac2a075129905b1bac96e7ca6c735264ddb9aeeee15282ed5f8bc1062
fluo_yarn_sha256 = tbd
accumulo_sha256 = eba3bfe823935ca7901ea7c2bd59c84a68b9381361699c7e260bbd9191f237f4
[ec2]
default_instance_type = m3.xlarge
worker_instance_type = i3.2xlarge
fstype = xfs
force_format = no
associate_public_ip = true
[performance]
profile=perf-large
[perf-large]
accumulo_tserv_mem=16G
accumulo_dcache_size=10G
accumulo_icache_size=2G
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
twill_reserve_mem_mb=512
fluo_worker_threads=256
fluo_worker_instances_multiplier=1
yarn_nm_mem_mb=16384
[nodes]
leader1 = namenode,zookeeper,fluo,fluo_yarn,resourcemanager,accumulomaster
metrics = metrics
worker0 = worker
worker1 = worker
worker2 = worker
worker3 = worker
worker4 = worker
worker5 = worker
worker6 = worker
worker7 = worker
worker8 = worker
worker9 = worker

## setup.sh
#!/bin/bash

#stresso setup script which was modified some prior to starting the test

BIN_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

#TODO maybe have a single skip checks var
SKIP_JAR_CHECKS=1
SKIP_FLUO_PROPS_CHECK=1
. $BIN_DIR/load-env.sh

cd $BIN_DIR/..

# stop if any command fails
set -e

# Build jar and shaded jar
mvn clean package dependency:copy-dependencies \
     -DincludeArtifactIds=fluo-recipes-core \
     -Dfluo.version=$FLUO_VERSION \
     -Daccumulo.version=$ACCUMULO_VERSION
mkdir target/lib
cp target/stresso-0.0.1-SNAPSHOT.jar target/dependency/*.jar target/lib

# Create config file used for fluo initialization
cp $FLUO_HOME/conf/fluo-app.properties ./conf/fluo-app.properties
$SED '/fluo.worker.num.threads.*/d' ./conf/fluo-app.properties
cat << EOF >> ./conf/fluo-app.properties
fluo.observer.init.dir=$(pwd)/target/lib
fluo.observer.0=stresso.trie.NodeObserver
fluo.worker.num.threads=256
fluo.loader.num.threads=128
fluo.loader.queue.size=128
fluo.app.trie.nodeSize=8
fluo.app.trie.stopLevel=$STOP
EOF

# Create config file used for connection
cp $FLUO_HOME/conf/fluo-conn.properties ./conf/fluo-conn.properties
echo "fluo.connection.application.name=$FLUO_APP_NAME" >> ./conf/fluo-conn.properties

# Initialize Stresso
fluo init -a $FLUO_APP_NAME -p conf/fluo-app.properties -f

# Optimize Accumulo table used by Fluo Stresso Application
# TODO check if accumulo on path
accumulo shell -u root -p secret <<EOF
config -t $FLUO_APP_NAME -s table.custom.balancer.group.regex.pattern=(\\\\d\\\\d).*
config -t $FLUO_APP_NAME -s table.custom.balancer.group.regex.default=none
config -t $FLUO_APP_NAME -s table.balancer=org.apache.accumulo.server.master.balancer.RegexGroupBalancer
config -t $FLUO_APP_NAME -s table.compaction.major.ratio=1.5
config -t $FLUO_APP_NAME -s table.file.compress.blocksize.index=256K
config -t $FLUO_APP_NAME -s table.file.compress.blocksize=8K
config -t $FLUO_APP_NAME -s table.bloom.enabled=false
config -t $FLUO_APP_NAME -s table.bloom.error.rate=5%
config -s table.durability=flush
config -t accumulo.metadata -d table.durability
config -t accumulo.root -d table.durability
config -s tserver.readahead.concurrent.max=256
config -s tserver.server.threads.minimum=256
config -s tserver.scan.files.open.max=1000
config -s tserver.wal.replication=2
config -s table.file.replication=2
EOF

# Add initial splits to the table used by Fluo Stresso Application
fluo exec $FLUO_APP_NAME stresso.trie.Split $SPLITS
	###############################
	# configuration for all scripts
	###############################
	# Fluo Home
	test -z "$FLUO_HOME" && FLUO_HOME=/path/to/accumulo
	# Fluo application name
	FLUO_APP_NAME=stresso

	###############################
	# configuration for run-test.sh
	###############################
	# Place where logs from test are placed
	LOG_DIR=$BIN_DIR/../logs
	# Maximum number to generate
	MAX=$((10**12))
	#the number of splits to create in table
	SPLITS=40
	# Number of mappers to run for data generation, which determines how many files
	# generation outputs. The number of files determines how many mappers loading
	# data will run.
	MAPS=10
	# Number of reduce tasks
	REDUCES=10
	# Number of random numbers to generate initially
	GEN_INIT=$((10**9))
	# Number of random numbers to generate for each incremental step.
	GEN_INCR=$((10**7))
	# Load incremental data sets until this number of seconds passes
	LOAD_TIME=$((60 * 60 * 24))
	# Seconds to sleep between incremental steps.
	SLEEP=180
	# Compact levels with less than the following possible nodes after loads
	COMPACT_CUTOFF=$((256**3 + 1))
	# The fluo wait command is executed after this many incremental load steps.
	WAIT_PERIOD=10
	# To run map reduce jobs, a shaded jar is built. The following properties
	# determine what versions of Fluo and Accumulo client libs end up in the shaded
	# jar.
	FLUO_VERSION=$($FLUO_HOME/bin/fluo version)
	ACCUMULO_VERSION=$(accumulo version)



	# determine a good stop level
	if (("$MAX" <= $((10**9)))); then
	STOP=6
	elif (("$MAX" <= $((10**12)))); then
	STOP=5
	else
	STOP=4
	fi
	fluo.yarn.zookeepers=leader1/fluo-yarn

	fluo.yarn.resource.manager=leader1

	fluo.yarn.dfs.root=hdfs://leader1:8020/

	fluo.yarn.worker.instances=10
	fluo.yarn.worker.max.memory.mb=5120
	fluo.yarn.worker.reserved.memory.mb=1024
	#muchs properties used to launch the cluster used Muchs commit 1ff3952
	[general]
	cloud_provider = ec2
	cluster_user = centos
	cluster_basedir = /home/centos
	proxy_hostname = leader1
	network_interface=eth0
	proxy_socks_port = 38585
	accumulo_instance = muchos
	hadoop_version = 2.7.5
	zookeeper_version = 3.4.11
	spark_version = 1.6.3
	fluo_version = 1.2.0-SNAPSHOT
	fluo_yarn_version = 1.0.0-SNAPSHOT
	accumulo_version = 1.8.1
	hadoop_sha256 = 0bfc4d9b04be919be2fdf36f67fa3b4526cdbd406c512a7a1f5f1b715661f831
	zookeeper_sha256 = f6bd68a1c8f7c13ea4c2c99f13082d0d71ac464ffaf3bf7a365879ab6ad10e84
	spark_sha256 = d13358a2d45e78d7c8cf22656d63e5715a5900fab33b3340df9e11ce3747e314
	fluo_sha256 = 9765f48ac2a075129905b1bac96e7ca6c735264ddb9aeeee15282ed5f8bc1062
	fluo_yarn_sha256 = tbd
	accumulo_sha256 = eba3bfe823935ca7901ea7c2bd59c84a68b9381361699c7e260bbd9191f237f4
	[ec2]
	default_instance_type = m3.xlarge
	worker_instance_type = i3.2xlarge
	fstype = xfs
	force_format = no
	associate_public_ip = true
	[performance]
	profile=perf-large
	[perf-large]
	accumulo_tserv_mem=16G
	accumulo_dcache_size=10G
	accumulo_icache_size=2G
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	twill_reserve_mem_mb=512
	fluo_worker_threads=256
	fluo_worker_instances_multiplier=1
	yarn_nm_mem_mb=16384
	[nodes]
	leader1 = namenode,zookeeper,fluo,fluo_yarn,resourcemanager,accumulomaster
	metrics = metrics
	worker0 = worker
	worker1 = worker
	worker2 = worker
	worker3 = worker
	worker4 = worker
	worker5 = worker
	worker6 = worker
	worker7 = worker
	worker8 = worker
	worker9 = worker
	#!/bin/bash

	#stresso setup script which was modified some prior to starting the test

	BIN_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

	#TODO maybe have a single skip checks var
	SKIP_JAR_CHECKS=1
	SKIP_FLUO_PROPS_CHECK=1
	. $BIN_DIR/load-env.sh

	cd $BIN_DIR/..

	# stop if any command fails
	set -e

	# Build jar and shaded jar
	mvn clean package dependency:copy-dependencies \
	-DincludeArtifactIds=fluo-recipes-core \
	-Dfluo.version=$FLUO_VERSION \
	-Daccumulo.version=$ACCUMULO_VERSION
	mkdir target/lib
	cp target/stresso-0.0.1-SNAPSHOT.jar target/dependency/*.jar target/lib

	# Create config file used for fluo initialization
	cp $FLUO_HOME/conf/fluo-app.properties ./conf/fluo-app.properties
	$SED '/fluo.worker.num.threads.*/d' ./conf/fluo-app.properties
	cat << EOF >> ./conf/fluo-app.properties
	fluo.observer.init.dir=$(pwd)/target/lib
	fluo.observer.0=stresso.trie.NodeObserver
	fluo.worker.num.threads=256
	fluo.loader.num.threads=128
	fluo.loader.queue.size=128
	fluo.app.trie.nodeSize=8
	fluo.app.trie.stopLevel=$STOP
	EOF

	# Create config file used for connection
	cp $FLUO_HOME/conf/fluo-conn.properties ./conf/fluo-conn.properties
	echo "fluo.connection.application.name=$FLUO_APP_NAME" >> ./conf/fluo-conn.properties

	# Initialize Stresso
	fluo init -a $FLUO_APP_NAME -p conf/fluo-app.properties -f

	# Optimize Accumulo table used by Fluo Stresso Application
	# TODO check if accumulo on path
	accumulo shell -u root -p secret <<EOF
	config -t $FLUO_APP_NAME -s table.custom.balancer.group.regex.pattern=(\\\\d\\\\d).*
	config -t $FLUO_APP_NAME -s table.custom.balancer.group.regex.default=none
	config -t $FLUO_APP_NAME -s table.balancer=org.apache.accumulo.server.master.balancer.RegexGroupBalancer
	config -t $FLUO_APP_NAME -s table.compaction.major.ratio=1.5
	config -t $FLUO_APP_NAME -s table.file.compress.blocksize.index=256K
	config -t $FLUO_APP_NAME -s table.file.compress.blocksize=8K
	config -t $FLUO_APP_NAME -s table.bloom.enabled=false
	config -t $FLUO_APP_NAME -s table.bloom.error.rate=5%
	config -s table.durability=flush
	config -t accumulo.metadata -d table.durability
	config -t accumulo.root -d table.durability
	config -s tserver.readahead.concurrent.max=256
	config -s tserver.server.threads.minimum=256
	config -s tserver.scan.files.open.max=1000
	config -s tserver.wal.replication=2
	config -s table.file.replication=2
	EOF

	# Add initial splits to the table used by Fluo Stresso Application
	fluo exec $FLUO_APP_NAME stresso.trie.Split $SPLITS