Skip to content

Instantly share code, notes, and snippets.

@realgenekim
Last active March 4, 2024 20:05
Show Gist options
  • Save realgenekim/17f9a7ae48aaf2e03df3cc80326a5094 to your computer and use it in GitHub Desktop.
Save realgenekim/17f9a7ae48aaf2e03df3cc80326a5094 to your computer and use it in GitHub Desktop.
A monsterously bad function before rewriting it. This was before rewriting it, inspired by @christoph-neumann and @justone in their Functional Design in Clojure podcast!!
(>defn interpret-photo-from-client
" input: db, photo/id (uuid), prompt string (can be 'default'), and options map "
[db uuid prompt & {:keys [async? model]
:or {async? false} :as opts}]
[(? #(instance? xtdb.node.XtdbNode %)) uuid? string? (s/* (s/or :keyword keyword? :bool boolean?)) => map?]
(log/warn :prompt-photo :model model :uuid uuid :async? async?)
(let [
record (xtp/photo-xtdb-fresh-url-uuid db uuid)
url (-> record
:photo/url
vu/xform-url-size-big)
b64 (-> url
(ol/url->stream)
(ol/stream-to-base64))
prompt (if (= prompt "default")
(slurp "resources-openai/images/podcast-screenshot.txt")
prompt)
_ (log/warn :prompt-photo :prompt prompt)
_ (log/warn :prompt-photo :async? async? :uuid uuid :url url)
;summary (slurp "/tmp/summary")
begin-ms (System/currentTimeMillis)
; must reutrn map
summary (time
(if (= model :gpt-4-vision-preview)
(let [retval (gpt4v/prompt-photo b64 prompt)
_ (log/warn :prompt-photo :first-retval retval)
interpreted (gpt4v/interpret-prompt-photo {:summary (-> retval :summary)})]
interpreted)
; llava
(if async?
(ol/interpret-photo-async! b64 {:prompt prompt
:json? true})
(ol/interpret-photo-sync! b64 {:prompt prompt}))))
_ (do
(def SUMMMARY summary)
(def UUID uuid)
(def RECORD record)
(def URL url)
(ol/write-decoded-base64-to-file b64 "/tmp/decoded.jpg")
(spit "/tmp/summary" summary)
0)
elapsed-ms (- (System/currentTimeMillis) begin-ms)
;new-photo-record (create-photo-summary-record-and-attach-to-parent! db uuid summary elapsed-ms)]
retval {:url url
:summary summary}]
;(def NEWRECORD new-photo-record)
(log/warn :prompt-photo :elapsed elapsed-ms)
(log/warn :prompt-photo :retval retval)
retval))
@slipset
Copy link

slipset commented Mar 4, 2024

Splitting that one fn in three (almost)

(defn load-photo-url-from-db [db photo-id]
  (xtp/photo-xtdb-fresh-url-uuid db photo-id))

(defn photo-url-to-b64-string [photo-url]
  (let [b64 (-> photo-url
                :photo/url
                vu/xform-url-size-big
                (ol/url->stream)
                (ol/stream-to-base64))
        filename "/tmp/decoded.jpg"]
    (ol/write-decoded-base64-to-file b64 filename)
    {:photo-b64-string b64
     :local-filname filename}))

(defn prompt-vision-llm-pass1 [{:keys [photo-b64-string] :as bag} prompt]
  (merge bag {:pass1 {:summary (gpt4v/prompt-photo photo-b64-string prompt)}}))

(defn store-summary [bag pass-kw]
  (spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
  bag)

(defn analyze-screenshot [{:keys [local-filename] :as bag}]
  (let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
        youtube-percentage (if is-screenshot?
                             (detect-red/detect-percentage-complete local-filename)
                             nil)]
    (merge bag {:screenshot (if (nil? youtube-percentage)
                              {:is-screenshot? is-screenshot?}
                              {:is-screenshot? is-screenshot?
                               :youtube-percentage youtube-percentage})})))

(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
  (merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
                                                          screenshot))}))

(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
  (let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
                                                          :prompt pass2-prompt}
                                                         screenshot))
        success? (not (-> interpreted :summary :error))]

    (merge bag {:pass2 {:summary interpreted
                        :success? success?}})))

(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
  (create-photo-summary-record-and-attach-to-parent! db photo-id
                                                     (-> pass2 :summary :summary)
                                                     0 ; don't store the timeing perhaps
                                                     ))

(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))

(defn doit [db photo-id]
  (-> (load-photo-url-from-db db photo-id)
      (photo-url-to-b64-string)
      (prompt-vision-llm-pass1 default-prompt)
      (store-summary :pass1)
      (analyze-screenshot)
      (pass2-generate-prompt)
      (pass2-summary-to-edn)
      (write-to-database db photo-id)))

@slipset
Copy link

slipset commented Mar 4, 2024

I could noodle with this forever:
Splitting photo-url-to-b64-string into it's two responsibilities

(defn url->b64 [url]
  (-> url
      :photo/url
      vu/xform-url-size-big
      (ol/url->stream)
      (ol/stream-to-base64)))

(defn write-to-file [b64 filename]
  (ol/write-decoded-base64-to-file b64 filename)
  {:photo-b64-string b64
   :local-filname filename})

(defn prompt-vision-llm-pass1 [{:keys [photo-b64-string] :as bag} prompt]
  (merge bag {:pass1 {:summary (gpt4v/prompt-photo photo-b64-string prompt)}}))

(defn store-summary [bag pass-kw]
  (spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
  bag)

(defn analyze-screenshot [{:keys [local-filename] :as bag}]
  (let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
        youtube-percentage (if is-screenshot?
                             (detect-red/detect-percentage-complete local-filename)
                             nil)]
    (merge bag {:screenshot (if (nil? youtube-percentage)
                              {:is-screenshot? is-screenshot?}
                              {:is-screenshot? is-screenshot?
                               :youtube-percentage youtube-percentage})})))

(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
  (merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
                                                          screenshot))}))

(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
  (let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
                                                          :prompt pass2-prompt}
                                                         screenshot))
        success? (not (-> interpreted :summary :error))]

    (merge bag {:pass2 {:summary interpreted
                        :success? success?}})))

(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
  (create-photo-summary-record-and-attach-to-parent! db photo-id
                                                     (-> pass2 :summary :summary)
                                                     0 ; don't store the timeing perhaps
                                                     ))

(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))

(defn doit [db photo-id]
  (-> (xtp/photo-xtdb-fresh-url-uuid db photo-id)
      (url->b64)
      (write-to-file "tmp/decoded.jpg")
      (prompt-vision-llm-pass1 default-prompt)
      (store-summary :pass1)
      (analyze-screenshot)
      (pass2-generate-prompt)
      (pass2-summary-to-edn)
      (write-to-database db photo-id)))

@slipset
Copy link

slipset commented Mar 4, 2024

You're not using the local-filename in your threading thing, so no need to add that to the bag:

(defn url->b64 [url]
  (-> url
      :photo/url
      vu/xform-url-size-big
      (ol/url->stream)
      (ol/stream-to-base64)))

(defn write-to-file [b64 filename]
  (ol/write-decoded-base64-to-file b64 filename)
  b64)

(defn prompt-vision-llm-pass1 [b64 prompt]
  (merge bag {:pass1 {:summary (gpt4v/prompt-photo b64 prompt)}}))

(defn store-summary [bag pass-kw]
  (spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
  bag)

(defn analyze-screenshot [bag local-filename]
  (let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
        youtube-percentage (if is-screenshot?
                             (detect-red/detect-percentage-complete local-filename)
                             nil)]
    (merge bag {:screenshot (if (nil? youtube-percentage)
                              {:is-screenshot? is-screenshot?}
                              {:is-screenshot? is-screenshot?
                               :youtube-percentage youtube-percentage})})))

(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
  (merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
                                                          screenshot))}))

(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
  (let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
                                                          :prompt pass2-prompt}
                                                         screenshot))
        success? (not (-> interpreted :summary :error))]

    (merge bag {:pass2 {:summary interpreted
                        :success? success?}})))

(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
  (create-photo-summary-record-and-attach-to-parent! db photo-id
                                                     (-> pass2 :summary :summary)
                                                     0 ; don't store the timeing perhaps
                                                     ))

(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))

(defn doit [db photo-id]
  (-> (xtp/photo-xtdb-fresh-url-uuid db photo-id)
      (url->b64)
      (write-to-file "tmp/decoded.jpg")
      (prompt-vision-llm-pass1 default-prompt)
      (store-summary :pass1)
      (analyze-screenshot "tmp/decoded.jpg")
      (pass2-generate-prompt)
      (pass2-summary-to-edn)
      (write-to-database db photo-id)))

@slipset
Copy link

slipset commented Mar 4, 2024

It is now trivial to add back the timings under the various passes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment