Skip to content

Instantly share code, notes, and snippets.

@gullitmiranda
Created February 22, 2022 23:19
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gullitmiranda/fd585f2a73b8ee62e465d314703e696f to your computer and use it in GitHub Desktop.
Save gullitmiranda/fd585f2a73b8ee62e465d314703e696f to your computer and use it in GitHub Desktop.
Elixir + k8s startup/liveness/readiness probs
spec:
template:
spec:
containers:
- name: {{ .Chart.Name }}
# ...
#
## Application health checks
# initialDelaySeconds: Number of seconds after the container has started before startup, liveness or readiness probes are initiated.
# periodSeconds: How often (in seconds) to perform the probe. Default to 10 seconds. The minimum value is 1.
# timeoutSeconds: Number of seconds after which the probe times out. Defaults to 1 second. The minimum value is 1.
# successThreshold: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness. The minimum value is 1.
# failureThreshold: When a Pod starts and the probe fails, Kubernetes will try failureThreshold times before giving up. Giving up in case of liveness probe means restarting the Pod. In case of readiness probe, the Pod will be marked Unready. Defaults to 3. The minimum value is 1.
startupProbe:
httpGet:
path: /health/startup
port: http
initialDelaySeconds: 15
periodSeconds: 3
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 10
livenessProbe:
httpGet:
path: /health/liveness
port: http
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 10
readinessProbe:
httpGet:
path: /health/readiness
port: http
initialDelaySeconds: 5
timeoutSeconds: 10
periodSeconds: 5
successThreshold: 1
failureThreshold: 10
defmodule MyApp.Health do
@moduledoc """
Check various health attributes of the application
"""
@doc """
Check if required services are loaded and startup tasks completed
"""
def has_started? do
[
is_alive?()
# # Startup tasks have been completed
# MyApp.CacheHelper.finished?(),
# # An external service has connected
# MyApp.SomeService.status() == :connected
]
|> Enum.all?()
end
@doc """
Check if app is alive and working
"""
def is_alive? do
Application.get_env(:my_app, :ecto_repos)
|> Enum.map(&Task.async(fn -> &1.is_alive?() end))
|> Task.await_many()
|> Enum.all?()
end
@doc """
Check if app should be serving public traffic
"""
def is_ready? do
!Application.get_env(:my_app, :maintenance_mode)
end
end
defmodule MyApp.Repo do
use Ecto.Repo,
otp_app: :my_app,
adapter: Ecto.Adapters.Postgres
# ...
def is_alive? do
Ecto.Adapters.SQL.query!(__MODULE__, "SELECT 1")
rescue
_e -> false
end
def get_by_keys(queryable, attrs, keys, opts \\ []) do
get_by(queryable, filter_clauses_by_keys(attrs, keys), opts)
end
defp filter_clauses_by_keys(attrs, keys) do
attrs
|> Enum.into([])
|> Keyword.take(keys)
end
end
defmodule MyAppWeb.Endpoint do
use Phoenix.Endpoint, otp_app: :hive
use Absinthe.Phoenix.Endpoint
use Sentry.Phoenix.Endpoint
# ...
# Adding routes check the application health check the module MyAppWeb.Plug.HealthProbe to more info
plug(MyAppWeb.Plug.HealthProbe)
# ...
end
defmodule MyAppWeb.Plug.HealthProbe do
@moduledoc """
The MyAppWeb health probe
This plug middleware check the application health.
## References
- https://shyr.io/blog/kubernetes-health-probes-elixir
- https://blog.lelonek.me/liveness-and-readiness-probes-for-kubernetes-in-phoenix-application-890e24d0737e
"""
import Plug.Conn
@default_checker MyApp.Health
@path_startup "/health/startup"
@path_liveness "/health/liveness"
@path_readiness "/health/readiness"
def init(opts) do
[checker: @default_checker]
|> Keyword.merge(opts)
end
def call(%Plug.Conn{request_path: @path_startup} = conn, opts),
do: health_response(conn, opts[:checker].has_started?())
def call(%Plug.Conn{request_path: @path_liveness} = conn, opts),
do: health_response(conn, opts[:checker].is_alive?())
def call(%Plug.Conn{request_path: @path_readiness} = conn, opts),
do: health_response(conn, opts[:checker].is_ready?())
def call(conn, _opts), do: conn
# Respond according to health checks
defp health_response(conn, true) do
conn
|> send_resp(200, "OK")
|> halt()
end
defp health_response(conn, false) do
conn
|> send_resp(503, "SERVICE UNAVAILABLE")
|> halt()
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment