Skip to content

Instantly share code, notes, and snippets.

@seut
Last active March 13, 2023 16:40
Show Gist options
  • Save seut/497ef886db8755f9c8f27959e197149f to your computer and use it in GitHub Desktop.
Save seut/497ef886db8755f9c8f27959e197149f to your computer and use it in GitHub Desktop.
Crate Muli-Node Docker Demo
#!/bin/bash
NUM_NODES=3
NUM_SHARDS=20
#DIR=`dirname "$0"`
DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
NODE_NAMES=()
for ((i = 0; i < ${NUM_NODES}; ++i)); do
NODE_NAMES[$i]="crate_$((i+1))"
done
function join_by { local IFS="$1"; shift; echo "$*"; }
SEED_HOSTS=`join_by , $NODE_NAMES`
for ((i = 0; i < ${#NODE_NAMES[@]}; ++i)); do
port=$((4201 + i))
node_name=${NODE_NAMES[$i]}
# docker run --rm -d \
data_dir=${DIR}/data_${i}
#rm -rf $data_dir
mkdir -p $data_dir
docker run -d \
--name=${node_name} \
--net=crate \
-p ${port}:4200 \
-v ${data_dir}:/data \
--env CRATE_HEAP_SIZE=4g \
crate -Cnetwork.host=_site_ \
-Cnode.name=$node_name \
-Cdiscovery.seed_hosts=${SEED_HOSTS} \
-Ccluster.initial_master_nodes=${SEED_HOSTS} \
-Cgateway.expected_data_nodes=${NUM_NODES} \
-Cgateway.recover_after_data_nodes=${NUM_NODES}
done
# wait a little for all nodes to form a cluster
sleep 10
python -m venv $DIR/venv
source $DIR/venv/bin/activate
pip install crash
# Apply some settings
SETTINGS_STMT=$(cat <<EOF
SET GLOBAL
"cluster.routing.allocation.disk.threshold_enabled"=false
;
EOF
)
echo $SETTINGS_STMT | crash --hosts localhost:4201
NYC_TAXI_TABLE=$(cat <<EOF
CREATE TABLE IF NOT EXISTS "nyc_taxi" (
"congestion_surcharge" REAL,
"dolocationid" INTEGER,
"extra" REAL,
"fare_amount" REAL,
"improvement_surcharge" REAL,
"mta_tax" REAL,
"passenger_count" INTEGER,
"payment_type" INTEGER,
"pickup_datetime" TIMESTAMP WITH TIME ZONE,
"pulocationid" INTEGER,
"ratecodeid" INTEGER,
"store_and_fwd_flag" TEXT,
"tip_amount" REAL,
"tolls_amount" REAL,
"total_amount" REAL,
"trip_distance" REAL,
"vendorid" INTEGER)
CLUSTERED INTO ${NUM_SHARDS} SHARDS
WITH (number_of_replicas=0, column_policy='dynamic', "unassigned.node_left.delayed_timeout"=0);
EOF
)
POWER_CONSUMPTION_TABLE=$(cat <<EOF
CREATE TABLE IF NOT EXISTS power_consumption (
"ts" TIMESTAMP WITH TIME ZONE,
"Global_active_power" REAL,
"Global_reactive_power" REAL,
"Voltage" REAL,
"Global_intensity" REAL,
"Sub_metering_1" REAL,
"Sub_metering_2" REAL,
"Sub_metering_3" REAL,
"meter_id" TEXT,
"location" GEO_POINT,
"city" TEXT
) CLUSTERED INTO ${NUM_SHARDS} SHARDS
WITH (number_of_replicas=0, "unassigned.node_left.delayed_timeout"=0);
EOF
)
# create tables
#
echo $NYC_TAXI_TABLE | crash --hosts localhost:4201
echo $POWER_CONSUMPTION_TABLE | crash --hosts localhost:4201
# load data
#
NYC_TAXI_DATA_STMT=$(cat <<EOF
COPY "nyc_taxi"
FROM 'https://s3.amazonaws.com/crate.sampledata/nyc.yellowcab/yc.2019.07.gz'
WITH (compression = 'gzip')
RETURN SUMMARY;
EOF
)
echo $NYC_TAXI_DATA_STMT | crash --hosts localhost:4201
POWER_DATA_STMT=$(cat <<EOF
COPY power_consumption
FROM 'https://srv.demo.crate.io/datasets/power_consumption.json'
RETURN SUMMARY;
EOF
)
echo $POWER_DATA_STMT | crash --hosts localhost:4201
# enable replication
crash --hosts localhost:4201 -c "ALTER TABLE nyc_taxi SET (number_of_replicas=1)"
crash --hosts localhost:4201 -c "ALTER TABLE power_consumption SET (number_of_replicas=1)"
## Disconnect a node
#
# docker network disconnect crate crate_3
## Reconnect a node
#
# docker network connect crate crate_3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment