Skip to content

Instantly share code, notes, and snippets.

@dlecocq
Created September 6, 2012 22:09
Show Gist options
  • Save dlecocq/3660749 to your computer and use it in GitHub Desktop.
Save dlecocq/3660749 to your computer and use it in GitHub Desktop.
Custom Crawl Cassandra Bootstrap
# A few things must be defined -- see http://wiki.apache.org/cassandra/GettingStarted
# INITIAL_TOKEN -> In the token ring
# SEED_IP -> IP Address of the Seed node
#
# This is, for example, what I used on the first node:
# export INITIAL_TOKEN=0
# export SEED_IP=
# We need a few tools to make this tick
sudo yum install -y mdadm git boost{,-devel} automake libtool flex bison pkgconfig gcc-c++ libevent-devel zlib-devel python-devel ruby-devel openssl{,-devel} make
# Make sure the log directory is writable
sudo chmod a+rw /var/log
# Unmount the default-mounted ephemeral drive
sudo umount /media/ephemeral0/
# Now create a software raid and write it to the configuration
yes | sudo mdadm --create --verbose /dev/md0 --level=stripe --raid-devices=4 /dev/xvd{f,g,h,i}
sudo mdadm --detail --scan | sudo tee -a /etc/mdadm.conf
# Edit /etc/fstab and replace the last line with:
sudo sed -i '$ d' /etc/fstab
echo '/dev/md0 /media/raid ext4 defaults 0 0' | sudo tee -a /etc/fstab
# Now make an ext4 on /dev/md0
sudo mkfs.ext4 /dev/md0
# And load it
sudo mkdir /media/raid
sudo mount /media/raid
# Download and install thrift
cd && curl -O https://dist.apache.org/repos/dist/release/thrift/0.8.0/thrift-0.8.0.tar.gz
tar xf thrift-0.8.0.tar.gz
cd thrift-0.8.0
# Configure, build, install
./configure
# This took three tries for me for whatever reason
make -j8; make -j8; make -j8
sudo make install
# Download Cassandra, unpack
cd && curl -OL http://www.eng.lsu.edu/mirrors/apache/cassandra/0.8.10/apache-cassandra-0.8.10-bin.tar.gz
tar xf apache-cassandra-0.8.10-bin.tar.gz
# Move it somewhere appropriate, link
sudo mv apache-cassandra-0.8.10 /usr/local
sudo chown -R ec2-user:ec2-user /usr/local/apache-cassandra-0.8.10
sudo ln -s /usr/local/apache-cassandra-0.8.10 /usr/local/apache-cassandra-latest
export CASS_DIR=/media/raid
export DATA_DIR=$CASS_DIR/data
export COMMIT_DIR=$CASS_DIR/commit
export CACHES_DIR=$CASS_DIR/caches
export STORAGE_PORT=7000
export RPC_PORT=9160
export CLUSTER_NAME='custom-crawl'
# Ensure all the various directories exist, access ok
sudo mkdir -p $DATA_DIR $COMMIT_DIR $CACHES_DIR
sudo chmod a+rw $DATA_DIR $COMMIT_DIR $CACHES_DIR
# Configuration path
export CONF_PATH=/usr/local/apache-cassandra-latest/conf/cassandra.yaml
# Set up the data directory, commit log, caches
sed -ie "s;- /var/lib/cassandra/data;- $DATA_DIR;" $CONF_PATH
sed -ie "s;/var/lib/cassandra/commitlog;$COMMIT_DIR;" $CONF_PATH
sed -ie "s;/var/lib/cassandra/saved_caches;$CACHES_DIR;" $CONF_PATH
# Ports
sed -ie "s;storage_port: 7000;storage_port: $STORAGE_PORT;" $CONF_PATH
sed -ie "s;rpc_port: 9160;rpc_port: $RPC_PORT;" $CONF_PATH
# Cluster name and token
sed -ie "s;cluster_name: 'Test Cluster';cluster_name: '$CLUSTER_NAME';" $CONF_PATH
sed -ie "s;initial_token:;initial_token: $INITIAL_TOKEN;" $CONF_PATH
# Log4J Configuration; We don't need to change this, but here it is...
# Edit the next line to point to your logs directory
# log4j.appender.R.File=/var/log/cassandra/system.log
# And also for our IP address
export LOCAL_IP=`/sbin/ifconfig eth0 | grep "inet addr" | awk -F: '{print $2}' | awk '{print $1}'`
sed -ie "s;listen_address: localhost;listen_address: $LOCAL_IP;" $CONF_PATH
sed -ie "s;rpc_address: localhost;rpc_address: 0.0.0.0;" $CONF_PATH
sed -ie "s;- seeds: \"127.0.0.1\";- seeds: \"$SEED_IP\";" $CONF_PATH
if [ "$SEED_IP" == "$LOCAL_IP" ]; then
echo 'I am the seed';
fi
# Now, we can start up!
/usr/local/apache-cassandra-latest/bin/cassandra
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment