Skip to content

Instantly share code, notes, and snippets.

@tggreene
Created June 14, 2022 16:28
Show Gist options
  • Save tggreene/77d0a06252e54cd75b38d04ed9ccc14e to your computer and use it in GitHub Desktop.
Save tggreene/77d0a06252e54cd75b38d04ed9ccc14e to your computer and use it in GitHub Desktop.
Retain N Checkpoints
(ns excel.api.s3
(:require [cognitect.aws.client.api :as aws]
[integrant.core :as ig]
[clojure.tools.logging :as log]))
(defmethod ig/init-key ::client
[_ _]
(create-s3-client))
;; Code below may end up being a periodic background process for limiting s3
;; objects
;; Using a retention policy is possibly undesirable as the service essentially
;; needs to be able to retrieve _some_ checkpoint even if's quite stale, a
;; better blanket policy for the long term is n most recent checkpoints
(defn build-object-list
([s3-client bucket token]
(build-object-list s3-client bucket nil token))
([s3-client bucket prefix token]
(lazy-seq
(let [response (aws/invoke s3-client
{:op :ListObjectsV2
:request (cond-> {:Bucket bucket}
token (assoc :ContinuationToken token)
prefix (assoc :Prefix prefix))})]
(if (:IsTruncated response)
(concat (:Contents response) (build-object-list s3-client bucket prefix (:NextContinuationToken response)))
(:Contents response))))))
(defn list-all-objects
([s3-client bucket]
(build-object-list s3-client bucket nil))
([s3-client bucket prefix]
(build-object-list s3-client bucket prefix nil)))
(defn delete-objects
[s3-client bucket objects]
(aws/invoke s3-client
{:op :DeleteObjects
:request
{:Bucket bucket}}))
(defn checkpoint-key
[{:keys [Key]}]
(re-find #"checkpoint-\d+-\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}-\d{2}:\d{2}" Key))
(defn delete-all-objects
[s3-client bucket objects]
(when (seq objects)
(let [[target rest] (split-at 1000 objects)
response (aws/invoke
s3-client
{:op :DeleteObjects
:request
{:Bucket bucket
:Delete
{:Objects (map #(select-keys % [:Key]) target)}}})]
(if (:Error response)
(throw (ex-info "delete-all-objects failed" response))
(log/infof "Deleted %d objects from %s" (count target) bucket))
(when (seq rest)
(delete-all-objects s3-client bucket rest)))))
(def checkpoint-retention 10)
(defn remove-stale-checkpoints
[s3-client bucket]
(let [all-objects (list-all-objects s3-client bucket)
grouped-objects (->> all-objects
(group-by checkpoint-key)
(map #(hash-map :key (key %) :objects (val %)))
(sort-by :key))
stale-objects (->> grouped-objects
(drop-last checkpoint-retention)
(mapcat :objects))]
(when (seq stale-objects)
(delete-all-objects s3-client bucket stale-objects)
(log/infof "Removed %d stale checkpoints"
(- (count grouped-objects)
checkpoint-retention)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment