Skip to content

Instantly share code, notes, and snippets.

@cofyc
Last active May 10, 2019 19:13
Show Gist options
  • Save cofyc/da0ec1e69f1e0f95f140f92c61b27ae0 to your computer and use it in GitHub Desktop.
Save cofyc/da0ec1e69f1e0f95f140f92c61b27ae0 to your computer and use it in GitHub Desktop.
Demo TiDB cluster files for tidb-operator in OLM
{
"apiVersion": "pingcap.com/v1alpha1",
"kind": "TidbCluster",
"metadata": {
"name": "demo",
"namespace": "operators"
},
"spec": {
"pd": {
"image": "pingcap/pd:v2.1.0",
"imagePullPolicy": "IfNotPresent",
"limits": {},
"nodeSelectorRequired": true,
"replicas": 3,
"requests": {
"storage": "1Gi"
},
"storageClassName": "standard"
},
"pvReclaimPolicy": "Retain",
"services": [
{
"name": "pd",
"type": "ClusterIP"
}
],
"tidb": {
"image": "pingcap/tidb:v2.1.0",
"imagePullPolicy": "IfNotPresent",
"limits": {},
"maxFailoverCount": 1,
"nodeSelectorRequired": true,
"replicas": 1,
"requests": {
"storage": "1Gi"
}
},
"tikv":{
"image": "pingcap/tikv:v2.1.0",
"imagePullPolicy": "IfNotPresent",
"limits": {},
"nodeSelectorRequired": true,
"replicas": 1,
"requests":{
"storage": "4Gi"
},
"storageClassName": "standard"
},
"tikvPromGateway": {
"image": "prom/pushgateway:v0.3.1",
"imagePullPolicy": "IfNotPresent",
"limits": {},
"requests": {}
},
"timezone": "UTC"
}
}
kind: Role
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: demo-discovery
namespace: operators
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
rules:
- apiGroups: ["pingcap.com"]
resources: ["tidbclusters"]
resourceNames: [demo]
verbs: ["get"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: demo-discovery
namespace: operators
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
subjects:
- kind: ServiceAccount
name: demo-discovery
roleRef:
kind: Role
name: demo-discovery
apiGroup: rbac.authorization.k8s.io
---
kind: ServiceAccount
apiVersion: v1
metadata:
name: demo-discovery
namespace: operators
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
---
apiVersion: apps/v1beta1
kind: Deployment
metadata:
name: demo-discovery
namespace: operators
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
spec:
# don't modify this replicas
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
template:
metadata:
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
spec:
serviceAccount: demo-discovery
containers:
- name: discovery
image: pingcap/tidb-operator:latest
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 250m
memory: 150Mi
requests:
cpu: 80m
memory: 50Mi
command:
- /usr/local/bin/tidb-discovery
env:
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
---
apiVersion: v1
kind: Service
metadata:
name: demo-discovery
namespace: operators
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
spec:
type: ClusterIP
ports:
- name: discovery
port: 10261
targetPort: 10261
protocol: TCP
selector:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: discovery
apiVersion: v1
kind: ConfigMap
metadata:
name: demo-pd
namespace: operators
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: pd
data:
startup-script: |-
#!/bin/sh
# This script is used to start pd containers in kubernetes cluster
# Use DownwardAPIVolumeFiles to store informations of the cluster:
# https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/#the-downward-api
#
# runmode="normal/debug"
#
set -uo pipefail
ANNOTATIONS="/etc/podinfo/annotations"
if [[ ! -f "${ANNOTATIONS}" ]]
then
echo "${ANNOTATIONS} does't exist, exiting."
exit 1
fi
source ${ANNOTATIONS} 2>/dev/null
runmode=${runmode:-normal}
if [[ X${runmode} == Xdebug ]]
then
echo "entering debug mode."
tail -f /dev/null
fi
# the general form of variable PEER_SERVICE_NAME is: "<clusterName>-pd-peer"
cluster_name=`echo ${PEER_SERVICE_NAME} | sed 's/-pd-peer//'`
domain="${HOSTNAME}.${PEER_SERVICE_NAME}.${NAMESPACE}.svc"
discovery_url="${cluster_name}-discovery.${NAMESPACE}.svc:10261"
encoded_domain_url=`echo ${domain}:2380 | base64 | tr "\n" " " | sed "s/ //g"`
elapseTime=0
period=1
threshold=30
while true; do
sleep ${period}
elapseTime=$(( elapseTime+period ))
if [[ ${elapseTime} -ge ${threshold} ]]
then
echo "waiting for pd cluster ready timeout" >&2
exit 1
fi
if nslookup ${domain} 2>/dev/null
then
echo "nslookup domain ${domain}.svc success"
break
else
echo "nslookup domain ${domain} failed" >&2
fi
done
ARGS="--data-dir=/var/lib/pd \
--name=${HOSTNAME} \
--peer-urls=http://0.0.0.0:2380 \
--advertise-peer-urls=http://${domain}:2380 \
--client-urls=http://0.0.0.0:2379 \
--advertise-client-urls=http://${domain}:2379 \
--config=/etc/pd/pd.toml \
"
if [[ -f /var/lib/pd/join ]]
then
# The content of the join file is:
# demo-pd-0=http://demo-pd-0.demo-pd-peer.demo.svc:2380,demo-pd-1=http://demo-pd-1.demo-pd-peer.demo.svc:2380
# The --join args must be:
# --join=http://demo-pd-0.demo-pd-peer.demo.svc:2380,http://demo-pd-1.demo-pd-peer.demo.svc:2380
join=`cat /var/lib/pd/join | tr "," "\n" | awk -F'=' '{print $2}' | tr "\n" ","`
join=${join%,}
ARGS="${ARGS} --join=${join}"
elif [[ ! -d /var/lib/pd/member/wal ]]
then
until result=$(wget -qO- -T 3 http://${discovery_url}/new/${encoded_domain_url} 2>/dev/null); do
echo "waiting for discovery service returns start args ..."
sleep $((RANDOM % 5))
done
ARGS="${ARGS}${result}"
fi
echo "starting pd-server ..."
sleep $((RANDOM % 10))
echo "/pd-server ${ARGS}"
exec /pd-server ${ARGS}
config-file: |-
# PD Configuration.
name = "pd"
data-dir = "default.pd"
client-urls = "http://127.0.0.1:2379"
# if not set, use ${client-urls}
advertise-client-urls = ""
peer-urls = "http://127.0.0.1:2380"
# if not set, use ${peer-urls}
advertise-peer-urls = ""
initial-cluster = ""
initial-cluster-state = ""
lease = 3
tso-save-interval = "3s"
namespace-classifier = "table"
enable-prevote = true
[security]
# Path of file that contains list of trusted SSL CAs. if set, following four settings shouldn't be empty
cacert-path = ""
# Path of file that contains X509 certificate in PEM format.
cert-path = ""
# Path of file that contains X509 key in PEM format.
key-path = ""
[log]
level = "info"
# log format, one of json, text, console
#format = "text"
# disable automatic timestamps in output
#disable-timestamp = false
# file logging
[log.file]
#filename = ""
# max log file size in MB
#max-size = 300
# max log file keep days
#max-days = 28
# maximum number of old log files to retain
#max-backups = 7
# rotate log by day
#log-rotate = true
[metric]
# prometheus client push interval, set "0s" to disable prometheus.
interval = "15s"
# prometheus pushgateway address, leaves it empty will disable prometheus.
address = ""
[schedule]
max-merge-region-size = 0
max-merge-region-keys = 0
split-merge-interval = "1h"
max-snapshot-count = 3
max-pending-peer-count = 16
max-store-down-time = "30m"
leader-schedule-limit = 4
region-schedule-limit = 4
replica-schedule-limit = 8
merge-schedule-limit = 8
tolerant-size-ratio = 5.0
# customized schedulers, the format is as below
# if empty, it will use balance-leader, balance-region, hot-region as default
# [[schedule.schedulers]]
# type = "evict-leader"
# args = ["1"]
[replication]
# The number of replicas for each region.
max-replicas = 3
# The label keys specified the location of a store.
# The placement priorities is implied by the order of label keys.
# For example, ["zone", "rack"] means that we should place replicas to
# different zones first, then to different racks if we don't have enough zones.
location-labels = ["zone", "rack", "host"]
[label-property]
# Do not assign region leaders to stores that have these tags.
# [[label-property.reject-leader]]
# key = "zone"
# value = "cn1
apiVersion: v1
kind: ConfigMap
metadata:
name: demo-tidb
namespace: operators
labels:
app.kubernetes.io/name: tidb-cluster
app.kubernetes.io/instance: demo
app.kubernetes.io/component: tidb
data:
startup-script: |-
#!/bin/sh
# This script is used to start tidb containers in kubernetes cluster
# Use DownwardAPIVolumeFiles to store informations of the cluster:
# https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/#the-downward-api
#
# runmode="normal/debug"
#
set -uo pipefail
ANNOTATIONS="/etc/podinfo/annotations"
if [[ ! -f "${ANNOTATIONS}" ]]
then
echo "${ANNOTATIONS} does't exist, exiting."
exit 1
fi
source ${ANNOTATIONS} 2>/dev/null
runmode=${runmode:-normal}
if [[ X${runmode} == Xdebug ]]
then
echo "entering debug mode."
tail -f /dev/null
fi
ARGS="--store=tikv \
--host=0.0.0.0 \
--path=${CLUSTER_NAME}-pd:2379 \
--config=/etc/tidb/tidb.toml
"
if [[ X${BINLOG_ENABLED:-} == Xtrue ]]
then
ARGS="${ARGS} --enable-binlog=true"
fi
SLOW_LOG_FILE=${SLOW_LOG_FILE:-""}
if [[ ! -z "${SLOW_LOG_FILE}" ]]
then
ARGS="${ARGS} --log-slow-query=${SLOW_LOG_FILE:-}"
fi
echo "start tidb-server ..."
echo "/tidb-server ${ARGS}"
exec /tidb-server ${ARGS}
config-file: |-
# TiDB Configuration.
# TiDB server host.
host = "0.0.0.0"
# tidb server advertise IP.
advertise-address = ""
# TiDB server port.
port = 4000
# Registered store name, [tikv, mocktikv]
store = "mocktikv"
# TiDB storage path.
path = "/tmp/tidb"
# The socket file to use for connection.
socket = ""
# Run ddl worker on this tidb-server.
run-ddl = true
# Schema lease duration, very dangerous to change only if you know what you do.
lease = "45s"
# When create table, split a separated region for it. It is recommended to
# turn off this option if there will be a large number of tables created.
split-table = true
# The limit of concurrent executed sessions.
token-limit = 1000
# Only print a log when out of memory quota.
# Valid options: ["log", "cancel"]
oom-action = "log"
# Set the memory quota for a query in bytes. Default: 32GB
mem-quota-query = 34359738368
# Enable coprocessor streaming.
enable-streaming = false
# Set system variable 'lower_case_table_names'
lower-case-table-names = 2
# Make "kill query" behavior compatible with MySQL. It's not recommend to
# turn on this option when TiDB server is behind a proxy.
compatible-kill-query = false
[log]
# Log level: debug, info, warn, error, fatal.
level = "info"
# Log format, one of json, text, console.
format = "text"
# Disable automatic timestamp in output
disable-timestamp = false
# Stores slow query log into separated files.
slow-query-file = ""
# Queries with execution time greater than this value will be logged. (Milliseconds)
slow-threshold = 300
# Queries with internal result greater than this value will be logged.
expensive-threshold = 10000
# Maximum query length recorded in log.
query-log-max-len = 2048
# File logging.
[log.file]
# Log file name.
filename = ""
# Max log file size in MB (upper limit to 4096MB).
max-size = 300
# Max log file keep days. No clean up by default.
max-days = 0
# Maximum number of old log files to retain. No clean up by default.
max-backups = 0
# Rotate log by day
log-rotate = true
[security]
# Path of file that contains list of trusted SSL CAs for connection with mysql client.
ssl-ca = ""
# Path of file that contains X509 certificate in PEM format for connection with mysql client.
ssl-cert = ""
# Path of file that contains X509 key in PEM format for connection with mysql client.
ssl-key = ""
# Path of file that contains list of trusted SSL CAs for connection with cluster components.
cluster-ssl-ca = ""
# Path of file that contains X509 certificate in PEM format for connection with cluster components.
cluster-ssl-cert = ""
# Path of file that contains X509 key in PEM format for connection with cluster components.
cluster-ssl-key = ""
[status]
# If enable status report HTTP service.
report-status = true
# TiDB status port.
status-port = 10080
# Prometheus pushgateway address, leaves it empty will disable prometheus push.
metrics-addr = ""
# Prometheus client push interval in second, set \"0\" to disable prometheus push.
metrics-interval = 15
[performance]
# Max CPUs to use, 0 use number of CPUs in the machine.
max-procs = 0
# StmtCountLimit limits the max count of statement inside a transaction.
stmt-count-limit = 5000
# Set keep alive option for tcp connection.
tcp-keep-alive = true
# Whether support cartesian product.
cross-join = true
# Stats lease duration, which influences the time of analyze and stats load.
stats-lease = "3s"
# Run auto analyze worker on this tidb-server.
run-auto-analyze = true
# Probability to use the query feedback to update stats, 0 or 1 for always false/true.
feedback-probability = 0.05
# The max number of query feedback that cache in memory.
query-feedback-limit = 1024
# Pseudo stats will be used if the ratio between the modify count and
# row count in statistics of a table is greater than it.
pseudo-estimate-ratio = 0.8
# Force the priority of all statements in a specified priority.
# The value could be "NO_PRIORITY", "LOW_PRIORITY", "HIGH_PRIORITY" or "DELAYED".
force-priority = "NO_PRIORITY"
[proxy-protocol]
# PROXY protocol acceptable client networks.
# Empty string means disable PROXY protocol, * means all networks.
networks = ""
# PROXY protocol header read timeout, unit is second
header-timeout = 5
[prepared-plan-cache]
enabled = false
capacity = 100
[opentracing]
# Enable opentracing.
enable = false
# Whether to enable the rpc metrics.
rpc-metrics = false
[opentracing.sampler]
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
type = "const"
# Param is a value passed to the sampler.
# Valid values for Param field are:
# - for "const" sampler, 0 or 1 for always false/true respectively
# - for "probabilistic" sampler, a probability between 0 and 1
# - for "rateLimiting" sampler, the number of spans per second
# - for "remote" sampler, param is the same as for "probabilistic"
# and indicates the initial sampling rate before the actual one
# is received from the mothership
param = 1.0
# SamplingServerURL is the address of jaeger-agent's HTTP sampling server
sampling-server-url = ""
# MaxOperations is the maximum number of operations that the sampler
# will keep track of. If an operation is not tracked, a default probabilistic
# sampler will be used rather than the per operation specific sampler.
max-operations = 0
# SamplingRefreshInterval controls how often the remotely controlled sampler will poll
# jaeger-agent for the appropriate sampling strategy.
sampling-refresh-interval = 0
[opentracing.reporter]
# QueueSize controls how many spans the reporter can keep in memory before it starts dropping
# new spans. The queue is continuously drained by a background go-routine, as fast as spans
# can be sent out of process.
queue-size = 0
# BufferFlushInterval controls how often the buffer is force-flushed, even if it's not full.
# It is generally not useful, as it only matters for very low traffic services.
buffer-flush-interval = 0
# LogSpans, when true, enables LoggingReporter that runs in parallel with the main reporter
# and logs all submitted spans. Main Configuration.Logger must be initialized in the code
# for this option to have any effect.
log-spans = false
# LocalAgentHostPort instructs reporter to send spans to jaeger-agent at this address
local-agent-host-port = ""
[tikv-client]
# Max gRPC connections that will be established with each tikv-server.
grpc-connection-count = 16
# After a duration of this time in seconds if the client doesn't see any activity it pings
# the server to see if the transport is still alive.
grpc-keepalive-time = 10
# After having pinged for keepalive check, the client waits for a duration of Timeout in seconds
# and if no activity is seen even after that the connection is closed.
grpc-keepalive-timeout = 3
# max time for commit command, must be twice bigger than raft election timeout.
commit-timeout = "41s"
[txn-local-latches]
# Enable local latches for transactions. Enable it when
# there are lots of conflicts between transactions.
enabled = false
capacity = 10240000
[binlog]
# enable to write binlog.
enable = false
# WriteTimeout specifies how long it will wait for writing binlog to pump.
write-timeout = "15s"
# If IgnoreError is true, when writting binlog meets error, TiDB would stop writting binlog,
# but still provide service.
ignore-error = false
apiVersion: v1
kind: ConfigMap
metadata:
name: demo-tikv
namespace: operators
labels:
app.kubernetes.io/name: tidb-clus
app.kubernetes.io/instance: demo
app.kubernetes.io/component: tikv
data:
startup-script: |-
#!/bin/sh
# This script is used to start tikv containers in kubernetes cluster
# Use DownwardAPIVolumeFiles to store informations of the cluster:
# https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/#the-downward-api
#
# runmode="normal/debug"
#
set -uo pipefail
ANNOTATIONS="/etc/podinfo/annotations"
if [[ ! -f "${ANNOTATIONS}" ]]
then
echo "${ANNOTATIONS} does't exist, exiting."
exit 1
fi
source ${ANNOTATIONS} 2>/dev/null
runmode=${runmode:-normal}
if [[ X${runmode} == Xdebug ]]
then
echo "entering debug mode."
tail -f /dev/null
fi
ARGS="--pd=${CLUSTER_NAME}-pd:2379
--advertise-addr=${HOSTNAME}.${HEADLESS_SERVICE_NAME}.${NAMESPACE}.svc:20160 \
--addr=0.0.0.0:20160 \
--data-dir=/var/lib/tikv \
--capacity=${CAPACITY} \
--config=/etc/tikv/tikv.toml
"
echo "starting tikv-server ..."
echo "/tikv-server ${ARGS}"
exec /tikv-server ${ARGS}
config-file: |-
# TiKV config template
# Human-readable big numbers:
# File size(based on byte): KB, MB, GB, TB, PB
# e.g.: 1_048_576 = "1MB"
# Time(based on ms): ms, s, m, h
# e.g.: 78_000 = "1.3m"
# log level: trace, debug, info, warning, error, critical.
# Note that `debug` and `trace` are only available in development builds.
log-level = "info"
[readpool.storage]
[readpool.coprocessor]
[server]
[storage]
[pd]
# pd endpoints
# endpoints = []
[raftstore]
# true (default value) for high reliability, this can prevent data loss when power failure.
sync-log = true
[coprocessor]
[rocksdb]
[rocksdb.lockcf]
[raftdb]
[raftdb.defaultcf]
[security]
[import]
@cofyc
Copy link
Author

cofyc commented Apr 29, 2019

Related PR: operator-framework/community-operators#175

How to test in OLM: https://github.com/operator-framework/community-operators/blob/master/docs/testing-operators.md#manual-testing-on-kubernetes

1, Manifests

apiVersion: operators.coreos.com/v1alpha1
kind: CatalogSource
metadata:
  name: example-manifests
  namespace: default
spec:
  sourceType: grpc
  image: quay.io/cofyc/olm-example-registry:latest

2, OperatorGroup

apiVersion: operators.coreos.com/v1alpha2
kind: OperatorGroup
metadata:
  name: group
  namespace: default

3, Subscription

apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
  name: tidb-subscription
  namespace: default 
spec:
  channel: beta
  name: tidb-operator
  source: example-manifests
  sourceNamespace: default

@tennix
Copy link

tennix commented Apr 30, 2019

Seems the default max-open files in minikube is 1024, however TiKV uses 40960 by default, so the startup check fails. We have to explicitly set max-open-files in tikv configmap to 1024.

@tennix
Copy link

tennix commented Apr 30, 2019

Here is the modified tikv configmap, which set max-open-files explicitly to 1024.

apiVersion: v1
kind: ConfigMap
metadata:
  name: demo-tikv
  namespace: operators
  labels:
    app.kubernetes.io/name: tidb-clus
    app.kubernetes.io/instance: demo
    app.kubernetes.io/component: tikv
data:
  startup-script: |-
    #!/bin/sh
    
    # This script is used to start tikv containers in kubernetes cluster
    
    # Use DownwardAPIVolumeFiles to store informations of the cluster:
    # https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/#the-downward-api
    #
    #   runmode="normal/debug"
    #
    
    set -uo pipefail
    ANNOTATIONS="/etc/podinfo/annotations"
    
    if [[ ! -f "${ANNOTATIONS}" ]]
    then
        echo "${ANNOTATIONS} does't exist, exiting."
        exit 1
    fi
    source ${ANNOTATIONS} 2>/dev/null
    
    runmode=${runmode:-normal}
    if [[ X${runmode} == Xdebug ]]
    then
    	echo "entering debug mode."
    	tail -f /dev/null
    fi
    
    ARGS="--pd=${CLUSTER_NAME}-pd:2379
    --advertise-addr=${HOSTNAME}.${HEADLESS_SERVICE_NAME}.${NAMESPACE}.svc:20160 \
    --addr=0.0.0.0:20160 \
    --data-dir=/var/lib/tikv \
    --capacity=${CAPACITY} \
    --config=/etc/tikv/tikv.toml
    "
    
    echo "starting tikv-server ..."
    echo "/tikv-server ${ARGS}"
    exec /tikv-server ${ARGS}
    
  config-file: |-
    # TiKV config template
    #  Human-readable big numbers:
    #   File size(based on byte): KB, MB, GB, TB, PB
    #    e.g.: 1_048_576 = "1MB"
    #   Time(based on ms): ms, s, m, h
    #    e.g.: 78_000 = "1.3m"
    
    # log level: trace, debug, info, warning, error, critical.
    # Note that `debug` and `trace` are only available in development builds.
    log-level = "info"
    [readpool.storage]
    
    [readpool.coprocessor]
    
    [server]
    
    [storage]
    
    [pd]
    # pd endpoints
    # endpoints = []
    [raftstore]
    # true (default value) for high reliability, this can prevent data loss when power failure.
    sync-log = true
    [coprocessor]
    [rocksdb]
    max-open-files = 1024
    [rocksdb.lockcf]
    [raftdb]
    max-open-files = 1024
    [raftdb.defaultcf]
    [security]
    [import]

@SamiSousa
Copy link

@tennix @cofyc Could you explain these demo resources? If the admin is expected to create all these resources manually, then what does the operator do with the example CR provided?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment