GKE events log monitoring (Cloud Function)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package k8seventalert | |
import ( | |
"context" | |
"encoding/base64" | |
"encoding/json" | |
"errors" | |
"fmt" | |
"os" | |
"time" | |
"github.com/slack-go/slack" | |
"google.golang.org/api/container/v1" | |
v1 "k8s.io/api/core/v1" | |
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | |
"k8s.io/client-go/kubernetes" | |
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp" | |
"k8s.io/client-go/rest" | |
"k8s.io/client-go/tools/clientcmd/api" | |
) | |
type PubSubMessage struct { | |
Data []byte `json:"data"` | |
} | |
type Data struct { | |
JsonPayload v1.Event `json:"jsonPayload"` | |
Resource Resource `json:"resource"` | |
Severity string `json:"severity"` | |
Timestamp *time.Time `json:"timestamp"` | |
} | |
type Resource struct { | |
Labels Labels `json:"labels"` | |
} | |
type Labels struct { | |
ClusterName string `json:"cluster_name"` | |
} | |
const ( | |
botName = "BOT_NAME" | |
botEmoji = ":BOT_EMOJI:" | |
) | |
func ReceivePubSub(ctx context.Context, m PubSubMessage) error { | |
var data Data | |
err := json.Unmarshal(m.Data, &data) | |
if err != nil { | |
return err | |
} | |
var oomKilledPods []v1.Pod | |
if data.JsonPayload.Reason == "OOMKilling" { | |
oomKilledPods, err = retrieveOOMKilledPods(ctx) | |
if err != nil { | |
return err | |
} | |
} | |
for i := 1; ; i++ { | |
if i >= 10 { | |
return errors.New("too many, something wrong") | |
} | |
channelEnv := fmt.Sprintf("SLACK_CHANNEL%d", i) | |
hookEnv := fmt.Sprintf("SLACK_HOOK%d", i) | |
channel := os.Getenv(channelEnv) | |
hook := os.Getenv(hookEnv) | |
if channel == "" || hook == "" { | |
break | |
} | |
message := buildMessage(channel, data) | |
err := slack.PostWebhook(hook, message) | |
if err != nil { | |
return err | |
} | |
if data.JsonPayload.Reason == "OOMKilling" { | |
message = buildOOMMessage(channel, oomKilledPods) | |
err := slack.PostWebhook(hook, message) | |
if err != nil { | |
return err | |
} | |
} | |
} | |
return nil | |
} | |
func retrieveOOMKilledPods(ctx context.Context) ([]v1.Pod, error) { | |
clientset, err := newClientset(ctx) | |
if err != nil { | |
return nil, err | |
} | |
podList, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{}) | |
if err != nil { | |
return nil, err | |
} | |
oomKilledPods := make([]v1.Pod, 0) | |
for _, pod := range podList.Items { | |
for _, status := range pod.Status.ContainerStatuses { | |
if status.State.Terminated != nil && status.State.Terminated.Reason == "OOMKilled" { | |
oomKilledPods = append(oomKilledPods, pod) | |
break | |
} | |
} | |
} | |
return oomKilledPods, nil | |
} | |
func getColor(data Data) string { | |
switch data.Severity { | |
case "DEBUG": | |
return "good" | |
case "INFO": | |
return "good" | |
case "WARNING": | |
return "warning" | |
case "ERROR": | |
return "danger" | |
default: | |
return "" | |
} | |
} | |
func buildMessage(channel string, data Data) *slack.WebhookMessage { | |
color := getColor(data) | |
return &slack.WebhookMessage{ | |
Channel: channel, | |
Username: botName, | |
IconEmoji: botEmoji, | |
Attachments: []slack.Attachment{ | |
{ | |
Title: data.JsonPayload.Reason, | |
Text: data.JsonPayload.Message, | |
Color: color, | |
Fields: []slack.AttachmentField{ | |
{ | |
Title: "Kind", | |
Value: data.JsonPayload.InvolvedObject.Kind, | |
Short: true, | |
}, | |
{ | |
Title: "Namespace", | |
Value: data.JsonPayload.InvolvedObject.Namespace, | |
Short: true, | |
}, | |
{ | |
Title: "Name", | |
Value: data.JsonPayload.InvolvedObject.Name, | |
Short: true, | |
}, | |
{ | |
Title: "Cluster", | |
Value: data.Resource.Labels.ClusterName, | |
Short: true, | |
}, | |
}, | |
Ts: json.Number(fmt.Sprint(data.Timestamp.Unix())), | |
}, | |
}, | |
} | |
} | |
func buildOOMMessage(channel string, oomKilledPods []v1.Pod) *slack.WebhookMessage { | |
if len(oomKilledPods) == 0 { | |
return &slack.WebhookMessage{ | |
Username: botName, | |
IconEmoji: botEmoji, | |
Channel: channel, | |
Text: "no OOMKilled pods found", | |
} | |
} | |
attachments := make([]slack.Attachment, len(oomKilledPods)) | |
for i, pod := range oomKilledPods { | |
attachments[i] = slack.Attachment{ | |
Text: fmt.Sprintf("%s/%s\n(startTime: %s)", pod.Namespace, pod.Name, pod.Status.StartTime.Format(time.RFC3339)), | |
} | |
} | |
return &slack.WebhookMessage{ | |
Username: botName, | |
IconEmoji: botEmoji, | |
Channel: channel, | |
Text: "OOMKilled pods:", | |
Attachments: attachments, | |
} | |
} | |
// reference: https://github.com/kubernetes/client-go/issues/424#issuecomment-718231274 | |
// He says that the `WrapTransport` must be supplied, but it actually works without it. | |
func newClientset(ctx context.Context) (*kubernetes.Clientset, error) { | |
containerService, err := container.NewService(ctx) | |
if err != nil { | |
return nil, err | |
} | |
name := fmt.Sprintf("projects/%s/locations/%s/clusters/%s", | |
os.Getenv("GCP_PROJECT"), | |
os.Getenv("FUNCTION_REGION"), | |
os.Getenv("CLUSTER_NAME"), | |
) | |
cluster, err := containerService.Projects.Locations.Clusters.Get(name).Do() | |
if err != nil { | |
return nil, err | |
} | |
caData, err := base64.StdEncoding.DecodeString(cluster.MasterAuth.ClusterCaCertificate) | |
if err != nil { | |
return nil, err | |
} | |
return kubernetes.NewForConfig(&rest.Config{ | |
Host: cluster.Endpoint, | |
AuthProvider: &api.AuthProviderConfig{ | |
Name: "gcp", | |
}, | |
TLSClientConfig: rest.TLSClientConfig{ | |
Insecure: false, | |
CAData: caData, | |
}, | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
required env vars
SLACK_CHANNEL1
/SLACK_HOOK1
, [SLACK_CHANNEL2
/SLACK_HOOK2
, ...]GCP_PROJECT
FUNCTION_REGION
CLUSTER_NAME
and change
botName
andbotEmoji
as your like