Skip to content

Instantly share code, notes, and snippets.

@zshi-redhat
Created February 11, 2022 10:12
Show Gist options
  • Save zshi-redhat/4d77f95b0315aedaef97d9f601e8ebb6 to your computer and use it in GitHub Desktop.
Save zshi-redhat/4d77f95b0315aedaef97d9f601e8ebb6 to your computer and use it in GitHub Desktop.
ovnkube-master-dns.yaml
# The ovnkube control-plane components
# The pod disruption budget ensures that we keep a raft quorum
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
metadata:
name: ovn-raft-quorum-guard
namespace: hypershift-ovn
spec:
minAvailable: 2
selector:
matchLabels:
app: ovnkube-master-guest
---
apiVersion: v1
kind: Service
metadata:
name: ovnkube-master-guest
namespace: hypershift-ovn
labels:
app: ovnkube-master-guest
spec:
ports:
- port: 9641
name: north
protocol: TCP
targetPort: 9641
- port: 9642
name: south
protocol: TCP
targetPort: 9642
clusterIP: None
sessionAffinity: None
selector:
app: ovnkube-master-guest
---
kind: StatefulSet
apiVersion: apps/v1
metadata:
name: ovnkube-master-guest
namespace: hypershift-ovn
annotations:
kubernetes.io/description: |
This daemonset launches the ovn-kubernetes controller (master) networking components.
release.openshift.io/version: "4.11"
spec:
selector:
matchLabels:
app: ovnkube-master-guest
serviceName: ovnkube-master-guest
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
# by default, Deployments spin up the new pod before terminating the old one
# but we don't want that - because ovsdb holds the lock.
maxSurge: 0
maxUnavailable: 1
template:
metadata:
annotations:
target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}'
labels:
app: ovnkube-master-guest
ovn-db-pod: "true"
component: network
type: infra
openshift.io/component: network
kubernetes.io/os: "linux"
spec:
serviceAccountName: ovn-kubernetes-controller-guest
priorityClassName: "system-cluster-critical"
containers:
# ovn-northd: convert network objects in nbdb to flows in sbdb
- name: northd
image: "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:eaa4904326c4e2ca5ee926a84696e6c5bb6f40205a2c893eab4f326747ec2478"
command:
- /bin/bash
- -c
- |
set -xem
if [[ -f /env/_master ]]; then
set -o allexport
source /env/_master
set +o allexport
fi
quit() {
echo "$(date -Iseconds) - stopping ovn-northd"
OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd
echo "$(date -Iseconds) - ovn-northd stopped"
rm -f /var/run/ovn/ovn-northd.pid
exit 0
}
# end of quit
trap quit TERM INT
echo "$(date -Iseconds) - starting ovn-northd"
# namespace hosting guest ovnkube-master
ovn_kubernetes_namespace="hypershift-ovn"
# statefulset pod names for ovnkube-master
master_0_hostname="ovnkube-master-guest-0.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
master_1_hostname="ovnkube-master-guest-1.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
master_2_hostname="ovnkube-master-guest-2.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
exec ovn-northd \
--no-chdir "-vconsole:${OVN_LOG_LEVEL}" -vfile:off "-vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
--ovnnb-db "ssl:${master_0_hostname}:9641,ssl:${master_1_hostname}:9641,ssl:${master_2_hostname}:9641" \
--ovnsb-db "ssl:${master_0_hostname}:9642,ssl:${master_1_hostname}:9642,ssl:${master_2_hostname}:9642" \
--pidfile /var/run/ovn/ovn-northd.pid \
-p /ovn-cert/tls.key \
-c /ovn-cert/tls.crt \
-C /ovn-ca/ca-bundle.crt &
wait $!
lifecycle:
preStop:
exec:
command:
- OVN_MANAGE_OVSDB=no
- /usr/share/ovn/scripts/ovn-ctl
- stop_northd
# privileged securityContext to access persistent volumes of type hostPath
# may not need for other types of persistent voluments
securityContext:
privileged: true
env:
- name: OVN_LOG_LEVEL
value: dbg
volumeMounts:
- name: datadir
mountPath: /var/run/ovn
- mountPath: /env
name: env-overrides
- mountPath: /ovn-cert # not needed, but useful when exec'ing in to pod.
name: ovn-cert
- mountPath: /ovn-ca
name: ovn-ca
resources:
requests:
cpu: 10m
memory: 300Mi
terminationMessagePolicy: FallbackToLogsOnError
# nbdb: the northbound, or logical network object DB. In raft mode
- name: nbdb
image: "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:eaa4904326c4e2ca5ee926a84696e6c5bb6f40205a2c893eab4f326747ec2478"
command:
- /bin/bash
- -c
- |
set -xem
if [[ -f /env/_master ]]; then
set -o allexport
source /env/_master
set +o allexport
fi
INDEX=${HOSTNAME##*-}
quit() {
echo "$(date -Iseconds) - stopping nbdb"
/usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb
echo "$(date -Iseconds) - nbdb stopped"
rm -f /var/run/ovn/ovnnb_db.pid
exit 0
}
# end of quit
trap quit TERM INT
bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac }
# initialize variables
ovn_kubernetes_namespace=hypershift-ovn
ovndb_ctl_ssl_opts="-p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt"
transport="ssl"
ovn_raft_conn_ip_url_suffix=""
if [[ "${K8S_POD_IP}" == *":"* ]]; then
ovn_raft_conn_ip_url_suffix=":[::]"
fi
db="nb"
db_port="9641"
ovn_db_file="/etc/ovn/ovn${db}_db.db"
# checks if a db pod is part of a current cluster
db_part_of_cluster() {
local pod=${1}
local db=${2}
local port=${3}
echo "Checking if ${pod} is part of cluster"
# TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed.
init_ip=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} ${pod} -o=jsonpath='{.status.podIP}')
if [[ $? != 0 ]]; then
echo "Unable to get ${pod} ip "
return 1
fi
echo "Found ${pod} ip: $init_ip"
init_ip=$(bracketify $init_ip)
target=$(ovn-${db}ctl --timeout=5 --db=${transport}:${init_ip}:${port} ${ovndb_ctl_ssl_opts} \
--data=bare --no-headings --columns=target list connection)
if [[ "${target}" != "p${transport}:${port}${ovn_raft_conn_ip_url_suffix}" ]]; then
echo "Unable to check correct target ${target} "
return 1
fi
echo "${pod} is part of cluster"
return 0
}
# end of db_part_of_cluster
# Checks if cluster has already been initialized.
# If not it returns false and sets init_ip to CLUSTER_INITIATOR_IP
cluster_exists() {
local db=${1}
local port=${2}
# TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed.
db_pods=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-master-guest-\w+' | grep -v "metrics")
for db_pod in $db_pods; do
if db_part_of_cluster $db_pod $db $port; then
echo "${db_pod} is part of current cluster with ip: ${init_ip}!"
return 0
fi
done
# if we get here there is no cluster, set init_ip and get out
init_ip=$(bracketify $CLUSTER_INITIATOR_IP)
return 1
}
# end of cluster_exists()
OVN_ARGS="--db-nb-cluster-local-port=${OVN_NB_RAFT_PORT} \
--db-nb-cluster-local-addr=$(bracketify ${K8S_POD_IP}) \
--no-monitor \
--db-nb-cluster-local-proto=ssl \
--ovn-nb-db-ssl-key=/ovn-cert/tls.key \
--ovn-nb-db-ssl-cert=/ovn-cert/tls.crt \
--ovn-nb-db-ssl-ca-cert=/ovn-ca/ca-bundle.crt"
cluster_initiator_pod=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-master-guest-0' | grep -v "metrics")
cluster_initiator_ip=$(timeout 5 kubectl get pod ${cluster_initiator_pod} -n ${ovn_kubernetes_namespace} -o=jsonpath='{.status.podIP}')
if [[ $? != 0 ]]; then
echo "Unable to get ${cluster_initiator_pod} ip "
fi
echo "Found ${cluster_initiator_pod} ip: $cluster_initiator_ip"
CLUSTER_INITIATOR_IP="${cluster_initiator_ip}"
# CLUSTER_INITIATOR_IP="ovnkube-master-guest-0.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
echo "$(date -Iseconds) - starting nbdb CLUSTER_INITIATOR_IP=${CLUSTER_INITIATOR_IP}, K8S_NODE_IP=${K8S_NODE_IP} K8S_POD_IP=${K8S_POD_IP} K8S_POD_NAME=${K8S_POD_NAME}"
initial_raft_create=true
initialize="false"
if [[ ! -e ${ovn_db_file} ]]; then
initialize="true"
fi
if [[ "${initialize}" == "true" ]]; then
# check to see if a cluster already exists. If it does, just join it.
counter=0
cluster_found=false
while [ $counter -lt 5 ]; do
if cluster_exists ${db} ${db_port}; then
cluster_found=true
break
fi
sleep 1
counter=$((counter+1))
done
if ${cluster_found}; then
echo "Cluster already exists for DB: ${db}"
initial_raft_create=false
# join existing cluster
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--db-nb-cluster-remote-port=${OVN_NB_RAFT_PORT} \
--db-nb-cluster-remote-addr=${init_ip} \
--db-nb-cluster-remote-proto=ssl \
--ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
run_nb_ovsdb &
wait $!
else
# either we need to initialize a new cluster or wait for master to create it
if [[ "${K8S_POD_NAME}.ovnkube-master-guest.hypershift-ovn.svc.cluster.local" == "${CLUSTER_INITIATOR_IP}" ]]; then
# set DB election timer at DB creation time if OVN supports it
election_timer=
if test -n "$(/usr/share/ovn/scripts/ovn-ctl --help 2>&1 | grep "\--db-nb-election-timer")"; then
election_timer="--db-nb-election-timer=$((10*1000))"
fi
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
${election_timer} \
run_nb_ovsdb &
wait $!
else
echo "Joining the nbdb cluster with init_ip=${init_ip}..."
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--db-nb-cluster-remote-port=${OVN_NB_RAFT_PORT} \
--db-nb-cluster-remote-addr=${init_ip} \
--db-nb-cluster-remote-proto=ssl \
--ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
run_nb_ovsdb &
wait $!
fi
fi
else
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
run_nb_ovsdb &
wait $!
fi
lifecycle:
postStart:
exec:
command:
- /bin/bash
- -c
- |
set -x
# CLUSTER_INITIATOR_IP="192.168.111.20"
cluster_initiator_pod=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-master-guest-0' | grep -v "metrics")
cluster_initiator_ip=$(timeout 5 kubectl get pod ${cluster_initiator_pod} -n ${ovn_kubernetes_namespace} -o=jsonpath='{.status.podIP}')
if [[ $? != 0 ]]; then
echo "Unable to get ${cluster_initiator_pod} ip "
fi
echo "Found ${cluster_initiator_pod} ip: $cluster_initiator_ip"
CLUSTER_INITIATOR_IP="${cluster_initiator_ip}"
# CLUSTER_INITIATOR_IP="ovnkube-master-guest-0.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
rm -f /var/run/ovn/ovnnb_db.pid
# if [[ "${K8S_POD_NAME}.ovnkube-master-guest.hypershift-ovn.svc.cluster.local" == "${CLUSTER_INITIATOR_IP}" ]]; then
if [[ "${K8S_POD_IP}" == "${CLUSTER_INITIATOR_IP}" ]]; then
echo "$(date -Iseconds) - nbdb - postStart - waiting for master to be selected"
# set the connection and inactivity probe
retries=0
while ! ovn-nbctl --no-leader-only -t 5 set-connection pssl:9641 -- set connection . inactivity_probe=60000; do
(( retries += 1 ))
if [[ "${retries}" -gt 40 ]]; then
echo "$(date -Iseconds) - ERROR RESTARTING - nbdb - too many failed ovn-nbctl attempts, giving up"
exit 1
fi
sleep 2
done
# Upgrade the db if required.
DB_SCHEMA="/usr/share/ovn/ovn-nb.ovsschema"
DB_SERVER="unix:/var/run/ovn/ovnnb_db.sock"
schema_name=$(ovsdb-tool schema-name $DB_SCHEMA)
db_version=$(ovsdb-client -t 10 get-schema-version "$DB_SERVER" "$schema_name")
target_version=$(ovsdb-tool schema-version "$DB_SCHEMA")
if ovsdb-tool compare-versions "$db_version" == "$target_version"; then
:
elif ovsdb-tool compare-versions "$db_version" ">" "$target_version"; then
echo "Database $schema_name has newer schema version ($db_version) than our local schema ($target_version), possibly an upgrade is partially complete?"
else
echo "Upgrading database $schema_name from schema version $db_version to $target_version"
ovsdb-client -t 30 convert "$DB_SERVER" "$DB_SCHEMA"
fi
fi
#configure northd_probe_interval
master_0_hostname="ovnkube-master-guest-0.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
master_1_hostname="ovnkube-master-guest-1.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
master_2_hostname="ovnkube-master-guest-2.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
OVN_NB_CTL="ovn-nbctl -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \
--db "ssl:${master_0_hostname}:9641,ssl:${master_1_hostname}:9641,ssl:${master_2_hostname}:9641""
northd_probe_interval=${OVN_NORTHD_PROBE_INTERVAL:-5000}
echo "Setting northd probe interval to ${northd_probe_interval} ms"
retries=0
current_probe_interval=0
while [[ "${retries}" -lt 10 ]]; do
current_probe_interval=$(${OVN_NB_CTL} --if-exists get NB_GLOBAL . options:northd_probe_interval)
if [[ $? == 0 ]]; then
current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"')
break
else
sleep 2
(( retries += 1 ))
fi
done
if [[ "${current_probe_interval}" != "${northd_probe_interval}" ]]; then
retries=0
while [[ "${retries}" -lt 10 ]]; do
${OVN_NB_CTL} set NB_GLOBAL . options:northd_probe_interval=${northd_probe_interval}
if [[ $? != 0 ]]; then
echo "Failed to set northd probe interval to ${northd_probe_interval}. retrying....."
sleep 2
(( retries += 1 ))
else
echo "Successfully set northd probe interval to ${northd_probe_interval} ms"
break
fi
done
fi
preStop:
exec:
command:
- /usr/bin/ovn-appctl
- -t
- /var/run/ovn/ovnnb_db.ctl
- exit
readinessProbe:
initialDelaySeconds: 90
timeoutSeconds: 5
exec:
command:
- /bin/bash
- -c
- |
set -xeo pipefail
leader_status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=3 cluster/status OVN_Northbound 2>/dev/null | { grep "Leader: unknown" || true; })
if [[ ! -z "${leader_status}" ]]; then
echo "NB DB Raft leader is unknown to the cluster node."
exit 1
fi
securityContext:
privileged: true
env:
- name: OVN_LOG_LEVEL
value: dbg
- name: OVN_LOG_PATTERN_CONSOLE
value: "%D{%Y-%m-%dT%H:%M:%S.###Z}|%05N|%c%T|%p|%m"
- name: OVN_NB_RAFT_PORT
value: "9643"
- name: OVN_NORTHD_PROBE_INTERVAL
value: "5000"
- name: K8S_NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: K8S_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: K8S_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: datadir
mountPath: /var/run/ovn
- mountPath: /env
name: env-overrides
- mountPath: /ovn-cert
name: ovn-cert
- mountPath: /ovn-ca
name: ovn-ca
resources:
requests:
cpu: 10m
memory: 300Mi
ports:
- name: nb-db-port
containerPort: 9641
- name: nb-db-raft-port
containerPort: 9643
terminationMessagePolicy: FallbackToLogsOnError
# sbdb: The southbound, or flow DB. In raft mode
- name: sbdb
image: "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:eaa4904326c4e2ca5ee926a84696e6c5bb6f40205a2c893eab4f326747ec2478"
command:
- /bin/bash
- -c
- |
set -xm
if [[ -f /env/_master ]]; then
set -o allexport
source /env/_master
set +o allexport
fi
quit() {
echo "$(date -Iseconds) - stopping sbdb"
/usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb
echo "$(date -Iseconds) - sbdb stopped"
rm -f /var/run/ovn/ovnsb_db.pid
exit 0
}
# end of quit
trap quit TERM INT
bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac }
# initialize variables
ovn_kubernetes_namespace=hypershift-ovn
ovndb_ctl_ssl_opts="-p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt"
transport="ssl"
ovn_raft_conn_ip_url_suffix=""
if [[ "${K8S_POD_IP}" == *":"* ]]; then
ovn_raft_conn_ip_url_suffix=":[::]"
fi
db="sb"
db_port="9642"
ovn_db_file="/etc/ovn/ovn${db}_db.db"
# checks if a db pod is part of a current cluster
db_part_of_cluster() {
local pod=${1}
local db=${2}
local port=${3}
echo "Checking if ${pod} is part of cluster"
# TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed.
# init_ip=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} ${pod} -o=jsonpath='{.status.podIP}')
init_ip="${pod}.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
if [[ $? != 0 ]]; then
echo "Unable to get ${pod} ip "
return 1
fi
echo "Found ${pod} ip: $init_ip"
init_ip=$(bracketify $init_ip)
target=$(ovn-${db}ctl --timeout=5 --db=${transport}:${init_ip}:${port} ${ovndb_ctl_ssl_opts} \
--data=bare --no-headings --columns=target list connection)
if [[ "${target}" != "p${transport}:${port}${ovn_raft_conn_ip_url_suffix}" ]]; then
echo "Unable to check correct target ${target} "
return 1
fi
echo "${pod} is part of cluster"
return 0
}
# end of db_part_of_cluster
# Checks if cluster has already been initialized.
# If not it returns false and sets init_ip to CLUSTER_INITIATOR_IP
cluster_exists() {
local db=${1}
local port=${2}
# TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed.
db_pods=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-master-guest-\w+' | grep -v "metrics")
for db_pod in $db_pods; do
if db_part_of_cluster $db_pod $db $port; then
echo "${db_pod} is part of current cluster with ip: ${init_ip}!"
return 0
fi
done
# if we get here there is no cluster, set init_ip and get out
init_ip=$(bracketify $CLUSTER_INITIATOR_IP)
return 1
}
# end of cluster_exists()
OVN_ARGS="--db-sb-cluster-local-port=9644 \
--db-sb-cluster-local-addr=$(bracketify ${K8S_POD_NAME}.ovnkube-master-guest.hypershift-ovn.svc.cluster.local) \
--no-monitor \
--db-sb-cluster-local-proto=ssl \
--ovn-sb-db-ssl-key=/ovn-cert/tls.key \
--ovn-sb-db-ssl-cert=/ovn-cert/tls.crt \
--ovn-sb-db-ssl-ca-cert=/ovn-ca/ca-bundle.crt"
CLUSTER_INITIATOR_IP="192.168.111.20"
cluster_initiator_pod=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-master-guest-0' | grep -v "metrics")
cluster_initiator_ip=$(timeout 5 kubectl get pod ${cluster_initiator_pod} -n ${ovn_kubernetes_namespace} -o=jsonpath='{.status.podIP}')
if [[ $? != 0 ]]; then
echo "Unable to get ${cluster_initiator_pod} ip "
fi
echo "Found ${cluster_initiator_pod} ip: $cluster_initiator_ip"
# CLUSTER_INITIATOR_IP="${cluster_initiator_ip}"
CLUSTER_INITIATOR_IP="ovnkube-master-guest-0.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
echo "$(date -Iseconds) - starting sbdb CLUSTER_INITIATOR_IP=${CLUSTER_INITIATOR_IP}"
initial_raft_create=true
initialize="false"
if [[ ! -e ${ovn_db_file} ]]; then
initialize="true"
fi
if [[ "${initialize}" == "true" ]]; then
# check to see if a cluster already exists. If it does, just join it.
counter=0
cluster_found=false
while [ $counter -lt 5 ]; do
if cluster_exists ${db} ${db_port}; then
cluster_found=true
break
fi
sleep 1
counter=$((counter+1))
done
if ${cluster_found}; then
echo "Cluster already exists for DB: ${db}"
initial_raft_create=false
# join existing cluster
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--db-sb-cluster-remote-port=9644 \
--db-sb-cluster-remote-addr=${init_ip} \
--db-sb-cluster-remote-proto=ssl \
--ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
run_sb_ovsdb &
wait $!
else
# either we need to initialize a new cluster or wait for master to create it
if [[ "${K8S_POD_NAME}.ovnkube-master-guest.hypershift-ovn.svc.cluster.local" == "${CLUSTER_INITIATOR_IP}" ]]; then
# set DB election timer at DB creation time if OVN supports it
election_timer=
if test -n "$(/usr/share/ovn/scripts/ovn-ctl --help 2>&1 | grep "\--db-sb-election-timer")"; then
election_timer="--db-sb-election-timer=$((16*1000))"
fi
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
${election_timer} \
run_sb_ovsdb &
wait $!
else
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--db-sb-cluster-remote-port=9644 \
--db-sb-cluster-remote-addr=${init_ip} \
--db-sb-cluster-remote-proto=ssl \
--ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
run_sb_ovsdb &
wait $!
fi
fi
else
exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \
--ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:${OVN_LOG_PATTERN_CONSOLE}" \
run_sb_ovsdb &
wait $!
fi
lifecycle:
postStart:
exec:
command:
- /bin/bash
- -c
- |
set -x
CLUSTER_INITIATOR_IP="ovnkube-master-guest-0.ovnkube-master-guest.hypershift-ovn.svc.cluster.local"
rm -f /var/run/ovn/ovnsb_db.pid
if [[ "${K8S_POD_NAME}.ovnkube-master-guest.hypershift-ovn.svc.cluster.local" == "${CLUSTER_INITIATOR_IP}" ]]; then
echo "$(date -Iseconds) - sdb - postStart - waiting for master to be selected"
# set the connection and inactivity probe
retries=0
while ! ovn-sbctl --no-leader-only -t 5 set-connection pssl:9642 -- set connection . inactivity_probe=180000; do
(( retries += 1 ))
if [[ "${retries}" -gt 40 ]]; then
echo "$(date -Iseconds) - ERROR RESTARTING - sbdb - too many failed ovn-sbctl attempts, giving up"
exit 1
fi
sleep 2
done
# Upgrade the db if required.
DB_SCHEMA="/usr/share/ovn/ovn-sb.ovsschema"
DB_SERVER="unix:/var/run/ovn/ovnsb_db.sock"
schema_name=$(ovsdb-tool schema-name $DB_SCHEMA)
db_version=$(ovsdb-client -t 10 get-schema-version "$DB_SERVER" "$schema_name")
target_version=$(ovsdb-tool schema-version "$DB_SCHEMA")
if ovsdb-tool compare-versions "$db_version" == "$target_version"; then
:
elif ovsdb-tool compare-versions "$db_version" ">" "$target_version"; then
echo "Database $schema_name has newer schema version ($db_version) than our local schema ($target_version), possibly an upgrade is partially complete?"
else
echo "Upgrading database $schema_name from schema version $db_version to $target_version"
ovsdb-client -t 30 convert "$DB_SERVER" "$DB_SCHEMA"
fi
fi
preStop:
exec:
command:
- /usr/bin/ovn-appctl
- -t
- /var/run/ovn/ovnsb_db.ctl
- exit
readinessProbe:
initialDelaySeconds: 90
timeoutSeconds: 5
exec:
command:
- /bin/bash
- -c
- |
set -xeo pipefail
leader_status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=3 cluster/status OVN_Southbound 2>/dev/null | { grep "Leader: unknown" || true; })
if [[ ! -z "${leader_status}" ]]; then
echo "SB DB Raft leader is unknown to the cluster node."
exit 1
fi
securityContext:
privileged: true
env:
- name: OVN_LOG_LEVEL
value: dbg
- name: OVN_LOG_PATTERN_CONSOLE
value: "%D{%Y-%m-%dT%H:%M:%S.###Z}|%05N|%c%T|%p|%m"
- name: K8S_NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: K8S_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: K8S_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: datadir
mountPath: /var/run/ovn
- mountPath: /env
name: env-overrides
- mountPath: /ovn-cert
name: ovn-cert
- mountPath: /ovn-ca
name: ovn-ca
ports:
- name: sb-db-port
containerPort: 9642
- name: sb-db-raft-port
containerPort: 9644
resources:
requests:
cpu: 10m
memory: 300Mi
terminationMessagePolicy: FallbackToLogsOnError
nodeSelector:
node-role.kubernetes.io/worker: ""
beta.kubernetes.io/os: "linux"
volumes:
- name: datadir
persistentVolumeClaim:
claimName: local-pvc
- name: ovnkube-config
configMap:
name: ovnkube-config
- name: env-overrides
configMap:
name: env-overrides
optional: true
- name: ovn-ca
configMap:
name: ovn-ca
- name: ovn-cert
secret:
secretName: ovn-cert
- name: ovn-master-metrics-cert
secret:
secretName: ovn-master-metrics-cert
optional: true
tolerations:
- key: "node-role.kubernetes.io/worker"
operator: "Exists"
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
- key: "node.kubernetes.io/network-unavailable"
operator: "Exists"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment