Last active
November 25, 2020 14:31
-
-
Save fabiog1901/fc09e6fd98d0419b4528ca1c9553d478 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################# | |
# NETWORK CHAOS # | |
################# | |
--- | |
apiVersion: chaos-mesh.org/v1alpha1 | |
kind: NetworkChaos | |
metadata: | |
name: delay-uswest-useast | |
labels: | |
app: cockroachdb | |
spec: | |
action: delay # chaos action | |
mode: all | |
selector: # define the pods belong to dc-a | |
pods: | |
default: # namespace of the target pods | |
- roach-seattle-1 | |
- roach-seattle-2 | |
- roach-seattle-3 | |
- jumpbox | |
delay: | |
latency: "60ms" | |
direction: to | |
target: | |
selector: # define the pods belong to dc-b and dc-c | |
pods: | |
default: # namespace of the target pods | |
- roach-newyork-1 | |
- roach-newyork-2 | |
- roach-newyork-3 | |
mode: all | |
--- | |
apiVersion: chaos-mesh.org/v1alpha1 | |
kind: NetworkChaos | |
metadata: | |
name: delay-uswest-euwest | |
labels: | |
app: cockroachdb | |
spec: | |
action: delay # chaos action | |
mode: all | |
selector: # define the pods belong to dc-a | |
pods: | |
default: # namespace of the target pods | |
- roach-london-1 | |
- roach-london-2 | |
- roach-london-3 | |
- jumpbox | |
delay: | |
latency: "180ms" | |
direction: to | |
target: | |
selector: # define the pods belong to dc-b and dc-c | |
pods: | |
default: # namespace of the target pods | |
- roach-seattle-1 | |
- roach-seattle-2 | |
- roach-seattle-3 | |
mode: all | |
--- | |
apiVersion: chaos-mesh.org/v1alpha1 | |
kind: NetworkChaos | |
metadata: | |
name: delay-useast-euwest | |
labels: | |
app: cockroachdb | |
spec: | |
action: delay # chaos action | |
mode: all | |
selector: # define the pods belong to dc-a | |
pods: | |
default: # namespace of the target pods | |
- roach-newyork-1 | |
- roach-newyork-2 | |
- roach-newyork-3 | |
- jumpbox | |
delay: | |
latency: "120ms" | |
direction: to | |
target: | |
selector: # define the pods belong to dc-b and dc-c | |
pods: | |
default: # namespace of the target pods | |
- roach-london-1 | |
- roach-london-2 | |
- roach-london-3 | |
mode: all |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############ | |
# SERVICES # | |
############ | |
--- | |
# us-west2 | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: us-west2 | |
labels: | |
app: cockroachdb | |
spec: | |
type: NodePort | |
ports: | |
# SQL client port | |
- name: grpc | |
port: 26257 | |
targetPort: 26257 | |
nodePort: 31257 | |
# Admin UI | |
- name: http | |
port: 8080 | |
targetPort: 8080 | |
nodePort: 31080 | |
selector: | |
app: cockroachdb | |
region: us-west2 | |
--- | |
# us-east4 | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: us-east4 | |
labels: | |
app: cockroachdb | |
spec: | |
type: NodePort | |
ports: | |
# SQL client port | |
- name: grpc | |
port: 26257 | |
targetPort: 26257 | |
nodePort: 31258 | |
# Admin UI | |
- name: http | |
port: 8080 | |
targetPort: 8080 | |
nodePort: 31180 | |
selector: | |
app: cockroachdb | |
region: us-east4 | |
--- | |
# eu-west2 | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: eu-west2 | |
labels: | |
app: cockroachdb | |
spec: | |
type: NodePort | |
ports: | |
# SQL client port | |
- name: grpc | |
port: 26257 | |
targetPort: 26257 | |
nodePort: 31259 | |
# Admin UI | |
- name: http | |
port: 8080 | |
targetPort: 8080 | |
nodePort: 31280 | |
selector: | |
app: cockroachdb | |
region: eu-west2 | |
--- | |
# intra-node service | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: cockroachdb | |
labels: | |
app: cockroachdb | |
annotations: | |
service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" | |
prometheus.io/scrape: "true" | |
prometheus.io/path: "_status/vars" | |
prometheus.io/port: "8080" | |
spec: | |
ports: | |
- port: 26257 | |
targetPort: 26257 | |
name: grpc | |
- port: 8080 | |
targetPort: 8080 | |
name: http | |
publishNotReadyAddresses: true | |
clusterIP: None | |
selector: | |
app: cockroachdb | |
############## | |
# PODS + PVC # | |
############## | |
--- | |
# roach-seattle-1 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-seattle-1 | |
labels: | |
app: cockroachdb | |
region: us-west2 | |
spec: | |
hostname: roach-seattle-1 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-seattle-1 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=us-west2,zone=a | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-seattle-1-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-seattle-1-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-seattle-2 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-seattle-2 | |
labels: | |
app: cockroachdb | |
region: us-west2 | |
spec: | |
hostname: roach-seattle-2 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-seattle-2 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=us-west2,zone=b | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-seattle-2-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-seattle-2-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-seattle-3 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-seattle-3 | |
labels: | |
app: cockroachdb | |
region: us-west2 | |
spec: | |
hostname: roach-seattle-3 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-seattle-3 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=us-west2,zone=c | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-seattle-3-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-seattle-3-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-newyork-1 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-newyork-1 | |
labels: | |
app: cockroachdb | |
region: us-east4 | |
spec: | |
hostname: roach-newyork-1 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-newyork-1 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=us-east4,zone=a | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-newyork-1-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-newyork-1-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-newyork-2 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-newyork-2 | |
labels: | |
app: cockroachdb | |
region: us-east4 | |
spec: | |
hostname: roach-newyork-2 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-newyork-2 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=us-east4,zone=b | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-newyork-2-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-newyork-2-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-newyork-3 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-newyork-3 | |
labels: | |
app: cockroachdb | |
region: us-east4 | |
spec: | |
hostname: roach-newyork-3 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-newyork-3 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=us-east4,zone=c | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-newyork-3-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-newyork-3-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-london-1 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-london-1 | |
labels: | |
app: cockroachdb | |
region: eu-west2 | |
spec: | |
hostname: roach-london-1 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-london-1 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=eu-west2,zone=a | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-london-1-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-london-1-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-london-2 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-london-2 | |
labels: | |
app: cockroachdb | |
region: eu-west2 | |
spec: | |
hostname: roach-london-2 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-london-2 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=eu-west2,zone=b | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-london-2-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-london-2-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
--- | |
# roach-london-3 | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: roach-london-3 | |
labels: | |
app: cockroachdb | |
region: eu-west2 | |
spec: | |
hostname: roach-london-3 | |
subdomain: cockroachdb | |
containers: | |
- name: roach-london-3 | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 26257 | |
name: grpc | |
- containerPort: 8080 | |
name: http | |
livenessProbe: | |
httpGet: | |
path: "/health" | |
port: http | |
initialDelaySeconds: 30 | |
periodSeconds: 5 | |
readinessProbe: | |
httpGet: | |
path: "/health?ready=1" | |
port: http | |
initialDelaySeconds: 10 | |
periodSeconds: 5 | |
failureThreshold: 2 | |
volumeMounts: | |
- name: datadir | |
mountPath: /cockroach/cockroach-data | |
env: | |
- name: COCKROACH_CHANNEL | |
value: kubernetes-insecure | |
- name: GOMAXPROCS | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.cpu | |
divisor: "1" | |
- name: MEMORY_LIMIT_MIB | |
valueFrom: | |
resourceFieldRef: | |
resource: limits.memory | |
divisor: "1Mi" | |
command: | |
- "/bin/bash" | |
- "-ecx" | |
- exec | |
/cockroach/cockroach | |
start | |
--logtostderr | |
--insecure | |
--advertise-host $(hostname -f) | |
--http-addr 0.0.0.0 | |
--join roach-seattle-1.cockroachdb,roach-newyork-1.cockroachdb,roach-london-1.cockroachdb | |
--cache $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB | |
--locality=region=eu-west2,zone=c | |
terminationGracePeriodSeconds: 60 | |
volumes: | |
- name: datadir | |
persistentVolumeClaim: | |
claimName: roach-london-3-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: roach-london-3-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
######################## | |
# INIT AND CONFIG JOBS # | |
######################## | |
--- | |
apiVersion: batch/v1 | |
kind: Job | |
metadata: | |
name: cluster-init | |
labels: | |
app: cockroachdb | |
spec: | |
template: | |
spec: | |
containers: | |
- name: cluster-init | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
command: | |
- "/cockroach/cockroach" | |
- "init" | |
- "--insecure" | |
- "--host=roach-seattle-1.cockroachdb" | |
restartPolicy: OnFailure | |
--- | |
apiVersion: batch/v1 | |
kind: Job | |
metadata: | |
name: cluster-sql-init | |
labels: | |
app: cockroachdb | |
spec: | |
template: | |
spec: | |
containers: | |
- name: cluster-sql-init | |
image: cockroachdb/cockroach:latest | |
imagePullPolicy: IfNotPresent | |
command: | |
- "/cockroach/cockroach" | |
- "sql" | |
- "--insecure" | |
- "--url" | |
- "postgresql://roach-seattle-1.cockroachdb:26257/defaultdb?sslmode=disable" | |
- "-e" | |
- "UPSERT into system.locations VALUES ('region', 'us-east4', 37.478397, -76.453077), ('region', 'us-west2', 43.804133, -120.554201), ('region', 'eu-west2', 51.5073509, -0.1277583);" | |
restartPolicy: OnFailure | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############# | |
# MINIO # | |
############# | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: minio | |
labels: | |
app: minio | |
spec: | |
type: NodePort | |
ports: | |
# UI | |
- name: http | |
port: 9000 | |
targetPort: 9000 | |
nodePort: 31900 | |
selector: | |
app: minio | |
--- | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: minio | |
labels: | |
app: minio | |
spec: | |
hostname: minio | |
containers: | |
- name: minio | |
image: minio/minio | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 9000 | |
name: http | |
volumeMounts: | |
- name: minio-data | |
mountPath: /data | |
args: | |
- server | |
- /data | |
volumes: | |
- name: minio-data | |
persistentVolumeClaim: | |
claimName: minio-data | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: minio-data | |
labels: | |
app: cockroachdb | |
spec: | |
accessModes: | |
- ReadWriteMany | |
volumeMode: Filesystem | |
storageClassName: standard | |
resources: | |
requests: | |
storage: 1Gi | |
################## | |
# PROMETHEUS # | |
################## | |
--- | |
apiVersion: v1 | |
kind: ConfigMap | |
metadata: | |
name: prometheus-config | |
data: | |
prometheus.yml: | | |
--- | |
global: | |
scrape_interval: 10s | |
evaluation_interval: 10s | |
rule_files: | |
# what to alert for | |
- /etc/prometheus/alerts.rules.yml | |
# what metrics to collect | |
- /etc/prometheus/aggregation.rules.yml | |
# whom to alert | |
alerting: | |
alertmanagers: | |
- static_configs: | |
- targets: | |
- cockroachdb:9093 | |
scrape_configs: | |
- job_name: "cockroachdb" | |
metrics_path: "/_status/vars" | |
scheme: "http" | |
tls_config: | |
insecure_skip_verify: true | |
static_configs: | |
# what hosts to monitor | |
- targets: | |
- roach-seattle-1.cockroachdb:8080 | |
- roach-seattle-2.cockroachdb:8080 | |
- roach-seattle-3.cockroachdb:8080 | |
- roach-newyork-1.cockroachdb:8080 | |
- roach-newyork-2.cockroachdb:8080 | |
- roach-newyork-3.cockroachdb:8080 | |
- roach-london-1.cockroachdb:8080 | |
- roach-london-2.cockroachdb:8080 | |
- roach-london-3.cockroachdb:8080 | |
labels: | |
cluster: "crdb" | |
alerts.rules.yml: | | |
groups: | |
- name: rules/alerts.rules | |
rules: | |
# Alert for any instance that is unreachable for >15 minutes. | |
- alert: InstanceDead | |
expr: up{job="cockroachdb"} == 0 | |
for: 1m | |
annotations: | |
description: '{{ $labels.instance }} for cluster {{ $labels.cluster }} has been | |
down for more than 15 minutes.' | |
summary: Instance {{ $labels.instance }} dead | |
# Alert for any instance that is not ready for a while. | |
- alert: InstanceNotReady | |
# This alert applies only to Kubernetes deployments and requires that you run kube-state-metrics: https://github.com/kubernetes/kube-state-metrics | |
expr: kube_statefulset_status_replicas_ready{statefulset="cockroachdb"} != kube_statefulset_status_replicas{statefulset="cockroachdb"} | |
for: 45m | |
annotations: | |
description: 'there has been an unready replica for cluster {{ $labels.cluster }} | |
for more than 15 minutes.' | |
summary: Instance not ready | |
# Alert on instance restarts. | |
- alert: InstanceRestart | |
expr: resets(sys_uptime{job="cockroachdb"}[24h]) > 1 | |
annotations: | |
description: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted | |
{{ $value }} time(s) in 24h' | |
summary: Instance {{ $labels.instance }} restarted | |
# Alert on flapping instances (frequent restarts). | |
- alert: InstancesFlapping | |
# Aggregated. | |
# This alert assumes that rolling restarts or rolling upgrades leave at least 3 minutes between each node being updated or restarted. | |
expr: sum by (cluster)(resets(sys_uptime{job="cockroachdb"}[5m])) > 2 | |
annotations: | |
description: 'instances in cluster {{ $labels.cluster }} restarted | |
{{ $value }} time(s) in 5m' | |
summary: Instances in {{ $labels.cluster }} flapping | |
# Alert on flapping instances (frequent restarts). | |
- alert: InstanceFlapping | |
# Un-aggregated. | |
expr: resets(sys_uptime{job="cockroachdb"}[10m]) > 1 | |
annotations: | |
description: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted | |
{{ $value }} time(s) in 10m' | |
summary: Instance {{ $labels.instance }} flapping | |
# Alert on version mismatch. | |
# This alert is intentionally loose (4 hours) to allow for rolling upgrades. | |
# This may need to be adjusted for large clusters. | |
- alert: VersionMismatch | |
expr: count by(cluster) (count_values by(tag, cluster) ("version", build_timestamp{job="cockroachdb"})) | |
> 1 | |
for: 4h | |
annotations: | |
description: Cluster {{ $labels.cluster }} running {{ $value }} different versions | |
summary: Binary version mismatch on {{ $labels.cluster }} | |
# Available capacity alerts. | |
- alert: StoreDiskLow | |
expr: capacity_available:ratio{job="cockroachdb"} < 0.15 | |
annotations: | |
summary: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value | |
}} available disk fraction | |
- alert: ClusterDiskLow | |
expr: cluster:capacity_available:ratio{job="cockroachdb"} < 0.2 | |
annotations: | |
summary: Cluster {{ $labels.cluster }} at {{ $value }} available disk fraction | |
# Unavailable ranges. | |
- alert: UnavailableRanges | |
expr: (sum by(instance, cluster) (ranges_unavailable{job="cockroachdb"})) > 0 | |
for: 10m | |
annotations: | |
summary: Instance {{ $labels.instance }} has {{ $value }} unavailable ranges | |
# Cockroach-measured clock offset nearing limit (by default, servers kill themselves at 400ms from the mean, so alert at 300ms) | |
- alert: ClockOffsetNearMax | |
expr: clock_offset_meannanos{job="cockroachdb"} > 300 * 1000 * 1000 | |
for: 5m | |
annotations: | |
summary: Clock on {{ $labels.instance }} as measured by cockroach is offset by {{ $value }} nanoseconds from the cluster mean # Certificate expiration. Alerts are per node. | |
- alert: CACertificateExpiresSoon | |
expr: (security_certificate_expiration_ca{job="cockroachdb"} > 0) and (security_certificate_expiration_ca{job="cockroachdb"} | |
- time()) < 86400 * 366 | |
labels: | |
frequency: daily | |
annotations: | |
summary: CA certificate for {{ $labels.instance }} expires in less than a year | |
- alert: ClientCACertificateExpiresSoon | |
expr: (security_certificate_expiration_client_ca{job="cockroachdb"} > 0) and (security_certificate_expiration_client_ca{job="cockroachdb"} | |
- time()) < 86400 * 366 | |
labels: | |
frequency: daily | |
annotations: | |
summary: Client CA certificate for {{ $labels.instance }} expires in less than a year | |
- alert: UICACertificateExpiresSoon | |
expr: (security_certificate_expiration_ui_ca{job="cockroachdb"} > 0) and (security_certificate_expiration_ui_ca{job="cockroachdb"} | |
- time()) < 86400 * 366 | |
labels: | |
frequency: daily | |
annotations: | |
summary: UI CA certificate for {{ $labels.instance }} expires in less than a year | |
- alert: NodeCertificateExpiresSoon | |
expr: (security_certificate_expiration_node{job="cockroachdb"} > 0) and (security_certificate_expiration_node{job="cockroachdb"} | |
- time()) < 86400 * 183 | |
labels: | |
frequency: daily | |
annotations: | |
summary: Node certificate for {{ $labels.instance }} expires in less than six months | |
- alert: NodeClientCertificateExpiresSoon | |
expr: (security_certificate_expiration_node_client{job="cockroachdb"} > 0) and (security_certificate_expiration_node_client{job="cockroachdb"} | |
- time()) < 86400 * 183 | |
labels: | |
frequency: daily | |
annotations: | |
summary: Client certificate for {{ $labels.instance }} expires in less than six months | |
- alert: UICertificateExpiresSoon | |
expr: (security_certificate_expiration_ui{job="cockroachdb"} > 0) and (security_certificate_expiration_ui{job="cockroachdb"} | |
- time()) < 86400 * 20 | |
labels: | |
frequency: daily | |
annotations: | |
summary: UI certificate for {{ $labels.instance }} expires in less than 20 days | |
# Slow Latch/Lease/Raft requests. | |
- alert: SlowLatchRequest | |
expr: requests_slow_latch{job="cockroachdb"} > 0 | |
for: 5m | |
labels: | |
severity: testing | |
annotations: | |
summary: '{{ $value }} slow latch requests on {{ $labels.instance }}' | |
- alert: SlowLeaseRequest | |
expr: requests_slow_lease{job="cockroachdb"} > 0 | |
for: 5m | |
labels: | |
severity: testing | |
annotations: | |
summary: '{{ $value }} slow lease requests on {{ $labels.instance }}' | |
- alert: SlowRaftRequest | |
expr: requests_slow_raft{job="cockroachdb"} > 0 | |
for: 5m | |
labels: | |
severity: testing | |
annotations: | |
summary: '{{ $value }} slow raft requests on {{ $labels.instance }}' | |
# Getting close to open file descriptor limit. | |
- alert: HighOpenFDCount | |
expr: sys_fd_open{job="cockroachdb"} / sys_fd_softlimit{job="cockroachdb"} > 0.8 | |
for: 10m | |
annotations: | |
summary: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value | |
}} fraction used' | |
# Prometheus disk getting full. | |
- alert: PrometheusDiskLow | |
expr: node_filesystem_free{cluster="prometheus",job="node_exporter_prometheus",mountpoint="/data"} | |
/ node_filesystem_size{cluster="prometheus",job="node_exporter_prometheus",mountpoint="/data"} | |
< 0.2 | |
for: 10m | |
labels: | |
severity: testing | |
annotations: | |
summary: 'Prometheus storage is almost full: {{ $value }} fraction free' | |
aggregation.rules.yml: | | |
# This file contains aggregation rules, specifically: | |
# "node:X" node-level aggregation of a per-store metric X | |
# "cluster:X" cluster-level aggregation of a per-store or per-node metric X | |
# | |
# Most aggregation rules should use the "without (label1, label2, ...)" keyword | |
# to keep all labels but the ones specified. | |
groups: | |
- name: rules/aggregation.rules | |
rules: | |
- record: node:capacity | |
expr: sum without(store) (capacity{job="cockroachdb"}) | |
- record: cluster:capacity | |
expr: sum without(instance) (node:capacity{job="cockroachdb"}) | |
- record: node:capacity_available | |
expr: sum without(store) (capacity_available{job="cockroachdb"}) | |
- record: cluster:capacity_available | |
expr: sum without(instance) (node:capacity_available{job="cockroachdb"}) | |
- record: capacity_available:ratio | |
expr: capacity_available{job="cockroachdb"} / capacity{job="cockroachdb"} | |
- record: node:capacity_available:ratio | |
expr: node:capacity_available{job="cockroachdb"} / node:capacity{job="cockroachdb"} | |
- record: cluster:capacity_available:ratio | |
expr: cluster:capacity_available{job="cockroachdb"} / cluster:capacity{job="cockroachdb"} | |
# Histogram rules: these are fairly expensive to compute live, so we precompute a few percetiles. | |
- record: txn_durations_bucket:rate1m | |
expr: rate(txn_durations_bucket{job="cockroachdb"}[1m]) | |
- record: txn_durations:rate1m:quantile_50 | |
expr: histogram_quantile(0.5, txn_durations_bucket:rate1m) | |
- record: txn_durations:rate1m:quantile_75 | |
expr: histogram_quantile(0.75, txn_durations_bucket:rate1m) | |
- record: txn_durations:rate1m:quantile_90 | |
expr: histogram_quantile(0.9, txn_durations_bucket:rate1m) | |
- record: txn_durations:rate1m:quantile_95 | |
expr: histogram_quantile(0.95, txn_durations_bucket:rate1m) | |
- record: txn_durations:rate1m:quantile_99 | |
expr: histogram_quantile(0.99, txn_durations_bucket:rate1m) | |
- record: exec_latency_bucket:rate1m | |
expr: rate(exec_latency_bucket{job="cockroachdb"}[1m]) | |
- record: exec_latency:rate1m:quantile_50 | |
expr: histogram_quantile(0.5, exec_latency_bucket:rate1m) | |
- record: exec_latency:rate1m:quantile_75 | |
expr: histogram_quantile(0.75, exec_latency_bucket:rate1m) | |
- record: exec_latency:rate1m:quantile_90 | |
expr: histogram_quantile(0.9, exec_latency_bucket:rate1m) | |
- record: exec_latency:rate1m:quantile_95 | |
expr: histogram_quantile(0.95, exec_latency_bucket:rate1m) | |
- record: exec_latency:rate1m:quantile_99 | |
expr: histogram_quantile(0.99, exec_latency_bucket:rate1m) | |
- record: round_trip_latency_bucket:rate1m | |
expr: rate(round_trip_latency_bucket{job="cockroachdb"}[1m]) | |
- record: round_trip_latency:rate1m:quantile_50 | |
expr: histogram_quantile(0.5, round_trip_latency_bucket:rate1m) | |
- record: round_trip_latency:rate1m:quantile_75 | |
expr: histogram_quantile(0.75, round_trip_latency_bucket:rate1m) | |
- record: round_trip_latency:rate1m:quantile_90 | |
expr: histogram_quantile(0.9, round_trip_latency_bucket:rate1m) | |
- record: round_trip_latency:rate1m:quantile_95 | |
expr: histogram_quantile(0.95, round_trip_latency_bucket:rate1m) | |
- record: round_trip_latency:rate1m:quantile_99 | |
expr: histogram_quantile(0.99, round_trip_latency_bucket:rate1m) | |
- record: sql_exec_latency_bucket:rate1m | |
expr: rate(sql_exec_latency_bucket{job="cockroachdb"}[1m]) | |
- record: sql_exec_latency:rate1m:quantile_50 | |
expr: histogram_quantile(0.5, sql_exec_latency_bucket:rate1m) | |
- record: sql_exec_latency:rate1m:quantile_75 | |
expr: histogram_quantile(0.75, sql_exec_latency_bucket:rate1m) | |
- record: sql_exec_latency:rate1m:quantile_90 | |
expr: histogram_quantile(0.9, sql_exec_latency_bucket:rate1m) | |
- record: sql_exec_latency:rate1m:quantile_95 | |
expr: histogram_quantile(0.95, sql_exec_latency_bucket:rate1m) | |
- record: sql_exec_latency:rate1m:quantile_99 | |
expr: histogram_quantile(0.99, sql_exec_latency_bucket:rate1m) | |
- record: raft_process_logcommit_latency_bucket:rate1m | |
expr: rate(raft_process_logcommit_latency_bucket{job="cockroachdb"}[1m]) | |
- record: raft_process_logcommit_latency:rate1m:quantile_50 | |
expr: histogram_quantile(0.5, raft_process_logcommit_latency_bucket:rate1m) | |
- record: raft_process_logcommit_latency:rate1m:quantile_75 | |
expr: histogram_quantile(0.75, raft_process_logcommit_latency_bucket:rate1m) | |
- record: raft_process_logcommit_latency:rate1m:quantile_90 | |
expr: histogram_quantile(0.9, raft_process_logcommit_latency_bucket:rate1m) | |
- record: raft_process_logcommit_latency:rate1m:quantile_95 | |
expr: histogram_quantile(0.95, raft_process_logcommit_latency_bucket:rate1m) | |
- record: raft_process_logcommit_latency:rate1m:quantile_99 | |
expr: histogram_quantile(0.99, raft_process_logcommit_latency_bucket:rate1m) | |
- record: raft_process_commandcommit_latency_bucket:rate1m | |
expr: rate(raft_process_commandcommit_latency_bucket{job="cockroachdb"}[1m]) | |
- record: raft_process_commandcommit_latency:rate1m:quantile_50 | |
expr: histogram_quantile(0.5, raft_process_commandcommit_latency_bucket:rate1m) | |
- record: raft_process_commandcommit_latency:rate1m:quantile_75 | |
expr: histogram_quantile(0.75, raft_process_commandcommit_latency_bucket:rate1m) | |
- record: raft_process_commandcommit_latency:rate1m:quantile_90 | |
expr: histogram_quantile(0.9, raft_process_commandcommit_latency_bucket:rate1m) | |
- record: raft_process_commandcommit_latency:rate1m:quantile_95 | |
expr: histogram_quantile(0.95, raft_process_commandcommit_latency_bucket:rate1m) | |
- record: raft_process_commandcommit_latency:rate1m:quantile_99 | |
expr: histogram_quantile(0.99, raft_process_commandcommit_latency_bucket:rate1m) | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: prom | |
labels: | |
app: prom | |
spec: | |
type: NodePort | |
ports: | |
# UI | |
- name: http | |
port: 9090 | |
targetPort: 9090 | |
nodePort: 31990 | |
selector: | |
app: cockroachdb | |
--- | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: prom | |
labels: | |
app: cockroachdb | |
spec: | |
hostname: prom | |
#subdomain: cockroachdb | |
containers: | |
- name: prom | |
image: prom/prometheus | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 9000 | |
name: http | |
volumeMounts: | |
- name: prometheus-config | |
mountPath: /etc/prometheus/prometheus.yml | |
subPath: prometheus.yml | |
- name: prometheus-config | |
mountPath: /etc/prometheus/aggregation.rules.yml | |
subPath: aggregation.rules.yml | |
- name: prometheus-config | |
mountPath: /etc/prometheus/alerts.rules.yml | |
subPath: alerts.rules.yml | |
volumes: | |
- name: prometheus-config | |
configMap: | |
name: prometheus-config | |
################ | |
# ALERTMANAGER # | |
################ | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: alertmgr | |
labels: | |
app: alertmgr | |
spec: | |
type: NodePort | |
ports: | |
# UI | |
- name: http | |
port: 9093 | |
targetPort: 9093 | |
nodePort: 31993 | |
selector: | |
app: cockroachdb | |
--- | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: alertmgr | |
labels: | |
app: cockroachdb | |
spec: | |
hostname: alertmgr | |
#subdomain: cockroachdb | |
containers: | |
- name: alertmgr | |
image: quay.io/prometheus/alertmanager:latest | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 9093 | |
name: http | |
########### | |
# GRAFANA # | |
########### | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: grafana | |
labels: | |
app: grafana | |
spec: | |
type: NodePort | |
ports: | |
# UI | |
- name: http | |
port: 3000 | |
targetPort: 3000 | |
nodePort: 32000 | |
selector: | |
app: grafana | |
--- | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: grafana | |
labels: | |
app: grafana | |
spec: | |
hostname: grafana | |
containers: | |
- name: grafana | |
image: grafana/grafana | |
imagePullPolicy: IfNotPresent | |
ports: | |
- containerPort: 3000 | |
name: http |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment