Last active
March 26, 2024 20:43
-
-
Save langburd/47eb88420f6b382c576c0dfa4e3bd8cf to your computer and use it in GitHub Desktop.
Kubernetes service to expose deployed image tag as Prometheus metrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Kubernetes service to expose deployed image tag as Prometheus metrics.""" | |
import os | |
from flask import Flask, Response | |
from kubernetes import client, config | |
from prometheus_client import CollectorRegistry, Gauge, generate_latest | |
app = Flask(__name__) | |
# Load Kubernetes configuration based on environment | |
if os.getenv("KUBECONFIG"): | |
config.load_kube_config() | |
else: | |
config.load_incluster_config() | |
v1 = client.AppsV1Api() | |
# Get namespaces from environment variable | |
namespaces_env = os.getenv("NAMESPACES", "default") | |
namespaces = namespaces_env.split(",") | |
deployments_to_skip_env = os.getenv("DEPLOYMENTS_TO_SKIP", "komodor-agent") | |
deployments_to_skip = deployments_to_skip_env.split(",") | |
@app.route("/metrics") | |
def metrics(): | |
"""Exposes deployment status as Prometheus metrics.""" | |
registry = CollectorRegistry() | |
# Create a gauge for each deployment | |
replicasets_gauge = Gauge( | |
"company_image_tag", | |
"Latest image tag of Company's deployment", | |
["namespace", "deployment", "status", "image_tag"], | |
registry=registry, | |
) | |
# Get all deployments in the namespaces and create metrics | |
for namespace in namespaces: | |
# Get all deployments in the namespace | |
ret = v1.list_namespaced_deployment(namespace) | |
for deployment in ret.items: | |
deployment_name = deployment.metadata.name | |
# Skip the deployment if its name is in the list of deployments to skip | |
if deployment_name in deployments_to_skip: | |
continue | |
# Get the status and latest image tag of the deployment | |
status, latest_image_tag = get_deployment_status_and_image_tag(deployment) | |
# Only create metric if status is not "scaled_down" | |
if status != "scaled_down": | |
replicasets_gauge.labels( | |
namespace, deployment_name, status, latest_image_tag | |
).set(1) | |
# Return the metrics | |
return Response(generate_latest(registry), mimetype="text/plain") | |
def get_deployment_status_and_image_tag(deployment): | |
"""Returns the status and latest image tag of a deployment.""" | |
# Get the UID of the deployment | |
deployment_uid = deployment.metadata.uid | |
# Get all the ReplicaSets in the deployment's namespace | |
all_replicasets_in_ns = v1.list_namespaced_replica_set( | |
deployment.metadata.namespace | |
) | |
# Filter out the ReplicaSets that belong to the deployment | |
replicasets_of_deployment = [ | |
replicaset | |
for replicaset in all_replicasets_in_ns.items | |
if any( | |
owner.uid == deployment_uid | |
for owner in replicaset.metadata.owner_references | |
) | |
] | |
# Filter out the running ReplicaSets | |
running_replicasets = [] | |
for replicaset in replicasets_of_deployment: | |
if replicaset.status.replicas and replicaset.status.replicas > 0: | |
running_replicasets.append(replicaset) | |
# If there are no running ReplicaSets, the deployment is scaled down | |
if not running_replicasets: | |
return "scaled_down", None | |
# Get the latest ReplicaSet | |
latest_replicaset = max( | |
running_replicasets, | |
key=lambda x: int( | |
x.metadata.annotations.get("deployment.kubernetes.io/revision", 0) | |
), | |
) | |
# Get the latest image tag of the latest ReplicaSet | |
latest_image_tag = get_latest_image_tag(latest_replicaset) | |
# If there are more than one running ReplicaSets, the deployment is not healthy | |
if len(running_replicasets) > 1: | |
return "red", latest_image_tag | |
# If there is only one running ReplicaSet, the deployment is healthy | |
return "green", latest_image_tag | |
def get_latest_image_tag(replicaset): | |
"""Returns the latest image tag of a replicaset.""" | |
# Get the image tag of the first container in the replicaset | |
containers = replicaset.spec.template.spec.containers | |
if not containers: | |
return None | |
# Get the image tag of the first container | |
for container in containers: | |
image = container.image | |
_, _, image_tag = image.rpartition(":") | |
# Return the image tag | |
return image_tag | |
@app.route("/healthcheck") | |
def healthcheck(): | |
"""Health check route returning 200.""" | |
return Response("OK", status=200) | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=8080) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment