Skip to content

Instantly share code, notes, and snippets.

@sanmiguel
Last active August 25, 2016 12:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sanmiguel/aa06f1e6026601677af2ced87c6a35f9 to your computer and use it in GitHub Desktop.
Save sanmiguel/aa06f1e6026601677af2ced87c6a35f9 to your computer and use it in GitHub Desktop.
#!/bin/bash
if [ "x" = "x$1" -o "x" = "$2" ]; then
echo "Usage: $0 <port> <loaded-keys.out>"
exit 1
fi
PORT=$1
LIST=$2
for key in $(cat $LIST); do
reply=$(curl -q -XGET http://localhost:${PORT}/riak/${key} 2>/dev/null)
if [ "not found" = "$reply" ]; then
echo "NOT FOUND: ${key}"
fi
done
vagrant@ubuntu:~$ ls -1 gh83/*
gh83/riak-2.1.4-ubuntu-14.04.tar.gz
gh83/riak_erlpmd_patches-1.4.0-mesos-0.28.1-ubuntu-14.04.tar.gz
gh83/riak_explorer-1.2.0.patch-ubuntu-14.04.tar.gz
gh83/riak_mesos_executor-1.4.0-mesos-0.28.1-ubuntu-14.04.tar.gz
gh83/new:
config.json
riak_mesos_scheduler-1.6.0-mesos-0.28.1-ubuntu-14.04.tar.gz
gh83/old:
config.json
riak_mesos_scheduler-1.5.0-mesos-1.0.0-ubuntu-14.04.tar.gz
# As per proof-procedure.sh, we start with an old cluster: if you're reading this because you need to,
# you've already done up to L9
ln -nsf /home/vagrant/gh83/old/config.json /etc/riak-mesos/config.json
riak-mesos framework install
riak-mesos cluster create
riak-mesos node add --nodes 3
riak-mesos node transfers wait-for-service --timeout 600 # repeat if necessary, until complete
PORT=$(riak-mesos node info | jq .[].location.http_port)
# For safety, remember to backup your config - we'll need this shortly
riak-mesos cluster list # Verify you have all the clusters you expect, then
# for each cluster, do
riak-mesos cluster --cluster $name config | tee ${name}-riak.conf.new
$EDITOR *-riak.conf.new # Find 'platform_data_dir' and set its value to ../../data in all conf files
# Switch to the new scheduler, 1.6.0
# In this example, the new config file is /home/vagrant/gh83/new/config.json, but most likely you'll
# edit your /etc/riak-mesos/config.json file in place.
ln -nsf /home/vagrant/gh83/new/config.json /etc/riak-mesos/config.json
riak-mesos config | jq .riak.scheduler.url # Verify that this is now pointed at scheduler 1.6.0 tgz
# Now we update the scheduler:
riak-mesos framework uninstall # Remove your old scheduler
riak-mesos framework install # Install the new one
riak-mesos cluster list # verify you have the same list of clusters, then:
# for each cluster, do
riak-mesos cluster config --cluster $name --file ${name}-riak.conf.new
# Now you have a new scheduler with the appropriate config for every cluster
# We can start replacing nodes *safely*!
# The important principle here is to only remove an old node when:
# - a new node has been added AND
# - transfers have completed around the cluster
# For each old node:
# Take note of which nodes you have running:
riak-mesos node list --cluster $name
riak-mesos node add --cluster $name
riak-mesos node list --cluster $name # Take note of the new node's name
# Now we wait for transfers to complete:
riak-mesos node transfers --node $newnode wait-for-service --timeout 600
# This can take a while, depending on how much data you have
# Once transfers are complete, the 'wait-for-service' command above will print something like:
# ```
# {"transfers":{"down":[],"waiting_to_handoff":[],"stopped":[],"active":[]}}
# Node riak-default-6 transfers complete.
# ```
# now you have 1 new node in your cluster, using the fixed version
# you can remove 1 old node.
riak-mesos node remove --node $oldnode1
# Again, wait for transfers to complete
riak-mesos node transfers wait-for-service --node $newnode --timeout 600
# Repeat L32-49 for every old node, until you've replaced them all.
# As a shortcut, if you have the capacity, you can add all the new nodes at once,
# but you should still wait for transfers to settle before removing each old node.
#!/bin/bash
MAN_DIR=/usr/share/man
PORT=$1
if [ "x" = "x$1" ]; then
echo "Usage: $0 <port>"
exit 1
fi
files=$(find $MAN_DIR -maxdepth 2 -type f)
for f in $files ; do
bucket=$(basename $(dirname $f))
key=$(basename $f)
curl -q -XPUT http://localhost:${PORT}/riak/${bucket}/${key} -d@${f}
echo "${bucket}/${key}"
done
{
"riak": {
"framework-name": "riak",
"hostname": "riak.marathon.mesos",
"marathon": "marathon.mesos:8080",
"master": "leader.mesos:5050",
"zk": "leader.mesos:2181",
"user": "root",
"role": "riak",
"auth-principal": "riak",
"auth-provider": "",
"auth-secret-file": "",
"instances": 1,
"failover-timeout": 10000.0,
"healthcheck-grace-period-seconds": 300,
"healthcheck-interval-seconds": 60,
"healthcheck-timeout-seconds": 20,
"healthcheck-max-consecutive-failures": 5,
"constraints": [],
"scheduler": {
"url": "/home/vagrant/gh83/new/riak_mesos_scheduler-1.6.0-mesos-0.28.1-ubuntu-14.04.tar.gz",
"cpus": 0.5,
"mem": 256.0,
"constraints": []
},
"executor": {
"url": "/home/vagrant/gh83/riak_mesos_executor-1.4.0-mesos-0.28.1-ubuntu-14.04.tar.gz",
"cpus": 0.1,
"mem": 512.0
},
"node": {
"url": "/home/vagrant/gh83/riak-2.1.4-ubuntu-14.04.tar.gz",
"patches-url": "/home/vagrant/gh83/riak_erlpmd_patches-1.4.0-mesos-0.28.1-ubuntu-14.04.tar.gz",
"explorer-url": "/home/vagrant/gh83/riak_explorer-1.2.0.patch-ubuntu-14.04.tar.gz",
"cpus": 0.25,
"mem": 512.0,
"disk": 1000.0
},
"director": {
"url": "/vagrant/framework/riak-mesos-director/packages/riak_mesos_director-1.0.1-ubuntu-14.04.tar.gz",
"use-public": false,
"cpus": 0.5,
"mem": 256.0
}
}
}
{
"riak": {
"framework-name": "riak",
"hostname": "riak.marathon.mesos",
"marathon": "marathon.mesos:8080",
"master": "leader.mesos:5050",
"zk": "leader.mesos:2181",
"user": "root",
"role": "riak",
"auth-principal": "riak",
"auth-provider": "",
"auth-secret-file": "",
"instances": 1,
"failover-timeout": 10000.0,
"healthcheck-grace-period-seconds": 300,
"healthcheck-interval-seconds": 60,
"healthcheck-timeout-seconds": 20,
"healthcheck-max-consecutive-failures": 5,
"constraints": [],
"scheduler": {
"url": "/home/vagrant/gh83/old/riak_mesos_scheduler-1.5.0-mesos-1.0.0-ubuntu-14.04.tar.gz",
"cpus": 0.5,
"mem": 256.0,
"constraints": []
},
"executor": {
"url": "/home/vagrant/gh83/riak_mesos_executor-1.4.0-mesos-0.28.1-ubuntu-14.04.tar.gz",
"cpus": 0.1,
"mem": 512.0
},
"node": {
"url": "/home/vagrant/gh83/riak-2.1.4-ubuntu-14.04.tar.gz",
"patches-url": "/home/vagrant/gh83/riak_erlpmd_patches-1.4.0-mesos-0.28.1-ubuntu-14.04.tar.gz",
"explorer-url": "/home/vagrant/gh83/riak_explorer-1.2.0.patch-ubuntu-14.04.tar.gz",
"cpus": 0.25,
"mem": 512.0,
"disk": 1000.0
},
"director": {
"url": "/vagrant/framework/riak-mesos-director/packages/riak_mesos_director-1.0.1-ubuntu-14.04.tar.gz",
"use-public": false,
"cpus": 0.5,
"mem": 256.0
}
}
}
ln -nsf /home/vagrant/gh83/old/config.json /etc/riak-mesos/config.json
riak-mesos framework install
riak-mesos cluster create
riak-mesos node add --nodes 3
riak-mesos node transfers wait-for-service # repeat until complete
PORT=$(riak-mesos node info | jq .[].location.http_port)
./load_manpages.sh $PORT | tee loaded-keys.out # Or you could just > loaded-keys.out (shrug)
./check_loaded.sh $PORT loaded-keys.out # Should print nothing - this only prints if a key is missing
riak-mesos cluster restart # wait for riak-default-1 to restart...
PORT=$(riak-mesos node info | jq .[].location.http_port) # port changes on restart
riak-mesos node transfers wait-for-service # repeat, or just don't bother, because:
./check_loaded.sh $PORT loaded-keys.out # Will print all the keys. ALL OF THEM.
# Blow the cluster away - we know it's broken
riak-mesos cluster destroy
riak-mesos framework uninstall
ln -nsf /home/vagrant/gh83/new/config.json /etc/riak-mesos/config.json
riak-mesos framework install
riak-mesos cluster create
riak-mesos node add --nodes 3
riak-mesos node transfers wait-for-service # repeat until complete
PORT=$(riak-mesos node info | jq .[].location.http_port)
./load_manpages.sh $PORT | tee loaded-keys.out # Or you could just > loaded-keys.out (shrug)
./check_loaded.sh $PORT loaded-keys.out # Should print nothing - this only prints if a key is missing
riak-mesos cluster restart # wait for riak-default-1 to restart...
PORT=$(riak-mesos node info | jq .[].location.http_port) # port changes on restart
riak-mesos node transfers wait-for-service # repeat, or just don't bother, because:
./check_loaded.sh $PORT loaded-keys.out # This time it will print nothing! 1.6.0 fixes the problem.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment