apiVersion: v1
kind: Service
metadata:
annotations:
cloud.google.com/neg: '{"ingress":true}'
labels:
k8s-app: nvidia-gpu-device-plugin
name: gpu-metrics
namespace: kube-system
spec:
clusterIP: 10.91.153.164
clusterIPs:
- 10.91.153.164
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- port: 2112
selector:
k8s-app: nvidia-gpu-device-plugin
k run -it -n kube-system --image=infoblox/dnstools --rm --restart=Never --command foo -- sh -c "curl -s gpu-metrics:2112/metrics | grep nvidia"
duty_cycle_gpu_node{accelerator_id="GPU-a49cb537-29c2-b84c-090b-f69a2b70b972",make="nvidia",model="Tesla T4"} 0
memory_total_gpu_node{accelerator_id="GPU-a49cb537-29c2-b84c-090b-f69a2b70b972",make="nvidia",model="Tesla T4"} 1.5843721216e+10
memory_used_gpu_node{accelerator_id="GPU-a49cb537-29c2-b84c-090b-f69a2b70b972",make="nvidia",model="Tesla T4"} 131072
pod "foo" deleted