Skip to content

Instantly share code, notes, and snippets.

@Slach
Created April 16, 2024 13:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Slach/d933ecebf93edbbaed7ce0a2deeaabb7 to your computer and use it in GitHub Desktop.
Save Slach/d933ecebf93edbbaed7ce0a2deeaabb7 to your computer and use it in GitHub Desktop.
clickhouse-backup CronJob examples
apiVersion: batch/v1
kind: CronJob
metadata:
name: clickhouse-backup-cron
spec:
# every day at 00:00
# schedule: "0 0 * * *"
schedule: "* * * * *"
concurrencyPolicy: "Forbid"
jobTemplate:
spec:
backoffLimit: 1
completions: 1
parallelism: 1
template:
metadata:
labels:
app: clickhouse-backup-cron
spec:
restartPolicy: Never
containers:
- name: run-backup-cron
image: clickhouse/clickhouse-client:latest
imagePullPolicy: IfNotPresent
env:
# use first replica in each shard, use `kubectl get svc | grep test-backups`
- name: CLICKHOUSE_SERVICES
value: chi-test-backups-default-0-0,chi-test-backups-default-1-0
- name: CLICKHOUSE_PORT
value: "9000"
- name: BACKUP_USER
value: backup
- name: BACKUP_PASSWORD
value: "backup_password"
# change to 1, if you want make full backup only in $FULL_BACKUP_WEEKDAY (1 - Mon, 7 - Sun)
- name: MAKE_INCREMENT_BACKUP
value: "1"
- name: FULL_BACKUP_WEEKDAY
value: "1"
command:
- bash
- -ec
- CLICKHOUSE_SERVICES=$(echo $CLICKHOUSE_SERVICES | tr "," " ");
BACKUP_DATE=$(date +%Y-%m-%d-%H-%M-%S);
declare -A BACKUP_NAMES;
declare -A DIFF_FROM;
if [[ "" != "$BACKUP_PASSWORD" ]]; then
BACKUP_PASSWORD="--password=$BACKUP_PASSWORD";
fi;
for SERVER in $CLICKHOUSE_SERVICES; do
if [[ "1" == "$MAKE_INCREMENT_BACKUP" ]]; then
LAST_FULL_BACKUP=$(clickhouse-client -q "SELECT name FROM system.backup_list WHERE location='remote' AND name LIKE '%full%' AND desc NOT LIKE 'broken%' ORDER BY created DESC LIMIT 1 FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD);
PREV_BACKUP_NAME=$(clickhouse-client -q "SELECT name FROM system.backup_list WHERE location='remote' AND desc NOT LIKE 'broken%' ORDER BY created DESC LIMIT 1 FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD);
DIFF_FROM[$SERVER]="";
if [[ "$FULL_BACKUP_WEEKDAY" == "$(date +%u)" || "" == "$PREV_BACKUP_NAME" || "" == "$LAST_FULL_BACKUP" ]]; then
BACKUP_NAMES[$SERVER]="full-$BACKUP_DATE";
else
BACKUP_NAMES[$SERVER]="increment-$BACKUP_DATE";
DIFF_FROM[$SERVER]="--diff-from-remote=$PREV_BACKUP_NAME";
fi
else
BACKUP_NAMES[$SERVER]="full-$BACKUP_DATE";
fi;
echo "set backup name on $SERVER = ${BACKUP_NAMES[$SERVER]}";
done;
for SERVER in $CLICKHOUSE_SERVICES; do
echo "create ${BACKUP_NAMES[$SERVER]} on $SERVER";
clickhouse-client --echo -mn -q "INSERT INTO system.backup_actions(command) VALUES('create ${SERVER}-${BACKUP_NAMES[$SERVER]}')" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
done;
for SERVER in $CLICKHOUSE_SERVICES; do
while [[ "in progress" == $(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='create ${SERVER}-${BACKUP_NAMES[$SERVER]}' FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD) ]]; do
echo "still in progress ${BACKUP_NAMES[$SERVER]} on $SERVER";
sleep 1;
done;
if [[ "success" != $(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='create ${SERVER}-${BACKUP_NAMES[$SERVER]}' FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD) ]]; then
echo "error create ${BACKUP_NAMES[$SERVER]} on $SERVER";
clickhouse-client -mn --echo -q "SELECT status,error FROM system.backup_actions WHERE command='create ${SERVER}-${BACKUP_NAMES[$SERVER]}'" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
exit 1;
fi;
done;
for SERVER in $CLICKHOUSE_SERVICES; do
echo "upload ${DIFF_FROM[$SERVER]} ${BACKUP_NAMES[$SERVER]} on $SERVER";
clickhouse-client --echo -mn -q "INSERT INTO system.backup_actions(command) VALUES('upload ${DIFF_FROM[$SERVER]} ${SERVER}-${BACKUP_NAMES[$SERVER]}')" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
done;
for SERVER in $CLICKHOUSE_SERVICES; do
while [[ "in progress" == $(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='upload ${DIFF_FROM[$SERVER]} ${SERVER}-${BACKUP_NAMES[$SERVER]}'" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD) ]]; do
echo "upload still in progress ${BACKUP_NAMES[$SERVER]} on $SERVER";
sleep 5;
done;
if [[ "success" != $(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='upload ${DIFF_FROM[$SERVER]} ${SERVER}-${BACKUP_NAMES[$SERVER]}'" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD) ]]; then
echo "error ${BACKUP_NAMES[$SERVER]} on $SERVER";
clickhouse-client -mn --echo -q "SELECT status,error FROM system.backup_actions WHERE command='upload ${DIFF_FROM[$SERVER]} ${SERVER}-${BACKUP_NAMES[$SERVER]}'" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
exit 1;
fi;
clickhouse-client --echo -mn -q "INSERT INTO system.backup_actions(command) VALUES('delete local ${SERVER}-${BACKUP_NAMES[$SERVER]}')" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
done;
echo "BACKUP CREATED"
apiVersion: "clickhouse.altinity.com/v1"
kind: "ClickHouseInstallation"
metadata:
name: test-backups
spec:
defaults:
templates:
podTemplate: clickhouse-backup
dataVolumeClaimTemplate: data-volume
configuration:
users:
# use cluster Pod CIDR for more security
backup/networks/ip: 0.0.0.0/0
# PASSWORD=backup_password; echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
backup/password_sha256_hex: eb94c11d77f46a0290ba8c4fca1a7fd315b72e1e6c83146e42117c568cc3ea4d
settings:
# to allow scrape metrics via embedded prometheus protocol
prometheus/endpoint: /metrics
prometheus/port: 8888
prometheus/metrics: true
prometheus/events: true
prometheus/asynchronous_metrics: true
# need install zookeeper separately, look to https://github.com/Altinity/clickhouse-operator/tree/master/deploy/zookeeper/ for details
zookeeper:
nodes:
- host: zookeeper
port: 2181
session_timeout_ms: 5000
operation_timeout_ms: 5000
clusters:
- name: default
layout:
# 2 shards one replica in each
shardsCount: 2
replicasCount: 2
templates:
volumeClaimTemplates:
- name: data-volume
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
podTemplates:
- name: clickhouse-backup
metadata:
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '8888'
prometheus.io/path: '/metrics'
# need separate prometheus scrape config, look to https://github.com/prometheus/prometheus/issues/3756
clickhouse.backup/scrape: 'true'
clickhouse.backup/port: '7171'
clickhouse.backup/path: '/metrics'
spec:
securityContext:
runAsUser: 101
runAsGroup: 101
fsGroup: 101
containers:
- name: clickhouse-pod
image: clickhouse/clickhouse-server:22.8
command:
- clickhouse-server
- --config-file=/etc/clickhouse-server/config.xml
- name: clickhouse-backup
image: altinity/clickhouse-backup:master
imagePullPolicy: Always
command:
- bash
- -xc
- "/bin/clickhouse-backup server"
env:
- name: LOG_LEVEL
value: "debug"
- name: ALLOW_EMPTY_BACKUPS
value: "true"
- name: API_LISTEN
value: "0.0.0.0:7171"
# INSERT INTO system.backup_actions to execute backup
- name: API_CREATE_INTEGRATION_TABLES
value: "true"
- name: BACKUPS_TO_KEEP_REMOTE
value: "3"
# change it for production S3
- name: REMOTE_STORAGE
value: "s3"
- name: S3_ACL
value: "private"
- name: S3_ENDPOINT
value: http://s3-backup-minio:9000
- name: S3_BUCKET
value: clickhouse
# {shard} macro defined by clickhouse-operator
- name: S3_PATH
value: backup/shard-{shard}
- name: S3_ACCESS_KEY
value: backup-access-key
- name: S3_SECRET_KEY
value: backup-secret-key
- name: S3_FORCE_PATH_STYLE
value: "true"
# remove it for production S3
- name: S3_DISABLE_SSL
value: "true"
- name: S3_DEBUG
value: "true"
# require to avoid double scraping clickhouse and clickhouse-backup containers
ports:
- name: backup-rest
containerPort: 7171
---
apiVersion: "apps/v1"
kind: Deployment
metadata:
name: s3-backup-minio
spec:
replicas: 1
selector:
matchLabels:
app: s3-backup-minio
template:
metadata:
labels:
app: s3-backup-minio
spec:
containers:
- name: minio
image: minio/minio:latest
env:
- name: MINIO_ACCESS_KEY
value: backup-access-key
- name: MINIO_SECRET_KEY
value: backup-secret-key
command:
- sh
- -xc
- mkdir -p doc_gen_minio/export/clickhouse && minio server doc_gen_minio/export
ports:
- name: minio
containerPort: 9000
---
apiVersion: v1
kind: Service
metadata:
name: s3-backup-minio
spec:
type: ClusterIP
selector:
app: s3-backup-minio
ports:
- name: s3
port: 9000
protocol: TCP
targetPort: minio
set -x
kubectl delete chi -n test-backups test-backups
kubectl delete ns test-backups
kubectl create ns test-backups
curl -sL https://github.com/Altinity/clickhouse-operator/raw/master/deploy/operator-web-installer/clickhouse-operator-install.sh | OPERATOR_NAMESPACE=test-backups bash
kubectl apply -n test-backups -f ./minio.yaml
kubectl apply -n test-backups -f ./zookeeper.yaml
kubectl apply -n test-backups -f ./chi.yaml
while [[ "0" == $(kubectl get chi -n test-backups | grep test-backups | grep -c Completed) ]]; do
echo "Wait CHI Complete..."
sleep 1
done
kubectl apply -n test-backups -f ./backup-cronjob.yaml
while [[ "0" == $(kubectl get pod -n test-backups -l app=clickhouse-backup-cron | grep -c Completed) ]]; do
echo "Wait BACKUP Complete..."
sleep 1
done
kubectl logs -n test-backups -l app=clickhouse-backup-cron --since=24h
kubectl delete -n test-backups -f ./backup-cronjob.yaml
kubectl apply -n test-backups -f ./restore-job.yaml
while [[ "0" == $(kubectl get pod -n test-backups -l app=clickhouse-backup-restore | grep -c Completed) ]]; do
echo "Wait RESTORE Complete..."
sleep 1
done
kubectl logs -n test-backups -l app=clickhouse-backup-restore --since=24h
# example to restore latest backup
apiVersion: batch/v1
kind: Job
metadata:
name: clickhouse-backup-restore
spec:
backoffLimit: 0
template:
metadata:
name: clickhouse-backup-restore
labels:
app: clickhouse-backup-restore
spec:
restartPolicy: Never
containers:
- name: clickhouse-backup-restore
image: clickhouse/clickhouse-client:latest
imagePullPolicy: IfNotPresent
env:
# use all replicas in each shard to restore schema
- name: CLICKHOUSE_SCHEMA_RESTORE_SERVICES
value: chi-test-backups-default-0-0,chi-test-backups-default-0-1,chi-test-backups-default-1-0,chi-test-backups-default-1-1
# use only first replica in each shard to restore data
- name: CLICKHOUSE_DATA_RESTORE_SERVICES
value: chi-test-backups-default-0-0,chi-test-backups-default-1-0
- name: CLICKHOUSE_PORT
value: "9000"
- name: BACKUP_USER
value: backup
- name: BACKUP_PASSWORD
value: "backup_password"
command:
- bash
- -ec
- if [[ "" != "$BACKUP_PASSWORD" ]]; then
BACKUP_PASSWORD="--password=$BACKUP_PASSWORD";
fi;
declare -A BACKUP_NAMES;
CLICKHOUSE_SCHEMA_RESTORE_SERVICES=$(echo $CLICKHOUSE_SCHEMA_RESTORE_SERVICES | tr "," " ");
CLICKHOUSE_DATA_RESTORE_SERVICES=$(echo $CLICKHOUSE_DATA_RESTORE_SERVICES | tr "," " ");
for SERVER in $CLICKHOUSE_SCHEMA_RESTORE_SERVICES; do
LATEST_BACKUP_NAME=$(clickhouse-client -q "SELECT name FROM system.backup_list WHERE location='remote' AND desc NOT LIKE 'broken%' ORDER BY created DESC LIMIT 1 FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD);
if [[ "" == "$LATEST_BACKUP_NAME" ]]; then
echo "Remote backup not found for $SERVER";
exit 1;
fi;
BACKUP_NAMES[$SERVER]="$LATEST_BACKUP_NAME";
clickhouse-client -mn --echo -q "INSERT INTO system.backup_actions(command) VALUES('restore_remote --schema --rm ${BACKUP_NAMES[$SERVER]}')" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
done;
for SERVER in $CLICKHOUSE_SCHEMA_RESTORE_SERVICES; do
while [[ "in progress" == $(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='restore_remote --schema --rm ${BACKUP_NAMES[$SERVER]}' ORDER BY start DESC LIMIT 1 FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD) ]]; do
echo "still in progress ${BACKUP_NAMES[$SERVER]} on $SERVER";
sleep 1;
done;
RESTORE_STATUS=$(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='restore_remote --schema --rm ${BACKUP_NAMES[$SERVER]}' ORDER BY start DESC LIMIT 1 FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD);
if [[ "success" != "${RESTORE_STATUS}" ]]; then
echo "error restore_remote --schema --rm ${BACKUP_NAMES[$SERVER]} on $SERVER";
clickhouse-client -mn --echo -q "SELECT start,finish,status,error FROM system.backup_actions WHERE command='restore_remote --schema --rm ${BACKUP_NAMES[$SERVER]}'" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
exit 1;
fi;
if [[ "success" == "${RESTORE_STATUS}" ]]; then
echo "schema ${BACKUP_NAMES[$SERVER]} on $SERVER RESTORED";
clickhouse-client -q "INSERT INTO system.backup_actions(command) VALUES('delete local ${BACKUP_NAMES[$SERVER]}')" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
fi;
done;
for SERVER in $CLICKHOUSE_DATA_RESTORE_SERVICES; do
clickhouse-client -mn --echo -q "INSERT INTO system.backup_actions(command) VALUES('restore_remote --data ${BACKUP_NAMES[$SERVER]}')" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
done;
for SERVER in $CLICKHOUSE_DATA_RESTORE_SERVICES; do
while [[ "in progress" == $(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='restore_remote --data ${BACKUP_NAMES[$SERVER]}' ORDER BY start DESC LIMIT 1 FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD) ]]; do
echo "still in progress ${BACKUP_NAMES[$SERVER]} on $SERVER";
sleep 1;
done;
RESTORE_STATUS=$(clickhouse-client -mn -q "SELECT status FROM system.backup_actions WHERE command='restore_remote --data ${BACKUP_NAMES[$SERVER]}' ORDER BY start DESC LIMIT 1 FORMAT TabSeparatedRaw" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD);
if [[ "success" != "${RESTORE_STATUS}" ]]; then
echo "error restore_remote --data ${BACKUP_NAMES[$SERVER]} on $SERVER";
clickhouse-client -mn --echo -q "SELECT start,finish,status,error FROM system.backup_actions WHERE command='restore_remote --data ${BACKUP_NAMES[$SERVER]}'" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
exit 1;
fi;
echo "data ${BACKUP_NAMES[$SERVER]} on $SERVER RESTORED";
if [[ "success" == "${RESTORE_STATUS}" ]]; then
clickhouse-client -q "INSERT INTO system.backup_actions(command) VALUES('delete local ${BACKUP_NAMES[$SERVER]}')" --host="$SERVER" --port="$CLICKHOUSE_PORT" --user="$BACKUP_USER" $BACKUP_PASSWORD;
fi;
done
# Setup Service to provide access to Zookeeper for clients
apiVersion: v1
kind: Service
metadata:
# DNS would be like zookeeper.zoons
name: zookeeper
labels:
app: zookeeper
spec:
ports:
- port: 2181
name: client
- port: 7000
name: prometheus
selector:
app: zookeeper
what: node
---
# Setup Headless Service for StatefulSet
apiVersion: v1
kind: Service
metadata:
# DNS would be like zookeeper-0.zookeepers.etc
name: zookeepers
labels:
app: zookeeper
spec:
ports:
- port: 2888
name: server
- port: 3888
name: leader-election
clusterIP: None
selector:
app: zookeeper
what: node
---
# Setup max number of unavailable pods in StatefulSet
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: zookeeper-pod-disruption-budget
spec:
selector:
matchLabels:
app: zookeeper
maxUnavailable: 1
---
# Setup Zookeeper StatefulSet
# Possible params:
# 1. replicas
# 2. memory
# 3. cpu
# 4. storage
# 5. storageClassName
# 6. user to run app
apiVersion: apps/v1
kind: StatefulSet
metadata:
# nodes would be named as zookeeper-0, zookeeper-1, zookeeper-2
name: zookeeper
labels:
app: zookeeper
spec:
selector:
matchLabels:
app: zookeeper
serviceName: zookeepers
replicas: 1
updateStrategy:
type: RollingUpdate
podManagementPolicy: OrderedReady
template:
metadata:
labels:
app: zookeeper
what: node
annotations:
prometheus.io/port: '7000'
prometheus.io/scrape: 'true'
spec:
containers:
- name: kubernetes-zookeeper
imagePullPolicy: IfNotPresent
image: "docker.io/zookeeper:3.8.1"
ports:
- containerPort: 2181
name: client
- containerPort: 2888
name: server
- containerPort: 3888
name: leader-election
- containerPort: 7000
name: prometheus
env:
- name: SERVERS
value: "1"
# See those links for proper startup settings:
# https://github.com/kow3ns/kubernetes-zookeeper/blob/master/docker/scripts/start-zookeeper
# https://clickhouse.yandex/docs/en/operations/tips/#zookeeper
# https://github.com/ClickHouse/ClickHouse/issues/11781
command:
- bash
- -x
- -c
- |
HOST=`hostname -s` &&
DOMAIN=`hostname -d` &&
CLIENT_PORT=2181 &&
SERVER_PORT=2888 &&
ELECTION_PORT=3888 &&
PROMETHEUS_PORT=7000 &&
ZOO_DATA_DIR=/var/lib/zookeeper/data &&
ZOO_DATA_LOG_DIR=/var/lib/zookeeper/datalog &&
{
echo "clientPort=${CLIENT_PORT}"
echo 'tickTime=2000'
echo 'initLimit=300'
echo 'syncLimit=10'
echo 'maxClientCnxns=2000'
echo 'maxTimeToWaitForEpoch=2000'
echo 'maxSessionTimeout=60000000'
echo "dataDir=${ZOO_DATA_DIR}"
echo "dataLogDir=${ZOO_DATA_LOG_DIR}"
echo 'autopurge.snapRetainCount=10'
echo 'autopurge.purgeInterval=1'
echo 'preAllocSize=131072'
echo 'snapCount=3000000'
echo 'leaderServes=yes'
echo 'standaloneEnabled=false'
echo '4lw.commands.whitelist=*'
echo 'metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider'
echo "metricsProvider.httpPort=${PROMETHEUS_PORT}"
echo "skipACL=true"
echo "fastleader.maxNotificationInterval=10000"
} > /conf/zoo.cfg &&
{
echo "zookeeper.root.logger=CONSOLE"
echo "zookeeper.console.threshold=INFO"
echo "log4j.rootLogger=\${zookeeper.root.logger}"
echo "log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender"
echo "log4j.appender.CONSOLE.Threshold=\${zookeeper.console.threshold}"
echo "log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout"
echo "log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n"
} > /conf/log4j.properties &&
echo 'JVMFLAGS="-Xms128M -Xmx1G -XX:ActiveProcessorCount=8 -XX:+AlwaysPreTouch -Djute.maxbuffer=8388608 -XX:MaxGCPauseMillis=50"' > /conf/java.env &&
if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then
NAME=${BASH_REMATCH[1]} &&
ORD=${BASH_REMATCH[2]};
else
echo "Failed to parse name and ordinal of Pod" &&
exit 1;
fi &&
mkdir -pv ${ZOO_DATA_DIR} &&
mkdir -pv ${ZOO_DATA_LOG_DIR} &&
whoami &&
chown -Rv zookeeper "$ZOO_DATA_DIR" "$ZOO_DATA_LOG_DIR" &&
export MY_ID=$((ORD+1)) &&
echo $MY_ID > $ZOO_DATA_DIR/myid &&
for (( i=1; i<=$SERVERS; i++ )); do
echo "server.$i=$NAME-$((i-1)).$DOMAIN:$SERVER_PORT:$ELECTION_PORT" >> /conf/zoo.cfg;
done &&
if [[ $SERVERS -eq 1 ]]; then
echo "group.1=1" >> /conf/zoo.cfg;
else
echo "group.1=1:2:3" >> /conf/zoo.cfg;
fi &&
for (( i=1; i<=$SERVERS; i++ )); do
WEIGHT=1
if [[ $i == 1 ]]; then
WEIGHT=10
fi
echo "weight.$i=$WEIGHT" >> /conf/zoo.cfg;
done &&
zkServer.sh start-foreground
readinessProbe:
exec:
command:
- bash
- -c
- '
IFS=;
MNTR=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "mntr" >&3 ; tee <&3; exec 3<&- ;);
while [[ "$MNTR" == "This ZooKeeper instance is not currently serving requests" ]];
do
echo "wait mntr works";
sleep 1;
MNTR=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "mntr" >&3 ; tee <&3; exec 3<&- ;);
done;
STATE=$(echo -e $MNTR | grep zk_server_state | cut -d " " -f 2);
if [[ "$STATE" =~ "leader" ]]; then
echo "check leader state";
SYNCED_FOLLOWERS=$(echo -e $MNTR | grep zk_synced_followers | awk -F"[[:space:]]+" "{print \$2}" | cut -d "." -f 1);
if [[ "$SYNCED_FOLLOWERS" != "0" ]]; then
./bin/zkCli.sh ls /;
exit $?;
else
exit 0;
fi;
elif [[ "$STATE" =~ "follower" ]]; then
echo "check follower state";
PEER_STATE=$(echo -e $MNTR | grep zk_peer_state);
if [[ "$PEER_STATE" =~ "following - broadcast" ]]; then
./bin/zkCli.sh ls /;
exit $?;
else
exit 1;
fi;
else
exit 1;
fi
'
initialDelaySeconds: 10
periodSeconds: 60
timeoutSeconds: 60
livenessProbe:
exec:
command:
- bash
- -xc
- 'date && OK=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;); if [[ "$OK" == "imok" ]]; then exit 0; else exit 1; fi'
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
volumeMounts:
- name: datadir-volume
mountPath: /var/lib/zookeeper
# Run as a non-privileged user
securityContext:
runAsUser: 1000
fsGroup: 1000
volumeClaimTemplates:
- metadata:
name: datadir-volume
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment