-
-
Save realgenekim/17f9a7ae48aaf2e03df3cc80326a5094 to your computer and use it in GitHub Desktop.
A monsterously bad function before rewriting it. This was before rewriting it, inspired by @christoph-neumann and @justone in their Functional Design in Clojure podcast!!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(>defn interpret-photo-from-client | |
" input: db, photo/id (uuid), prompt string (can be 'default'), and options map " | |
[db uuid prompt & {:keys [async? model] | |
:or {async? false} :as opts}] | |
[(? #(instance? xtdb.node.XtdbNode %)) uuid? string? (s/* (s/or :keyword keyword? :bool boolean?)) => map?] | |
(log/warn :prompt-photo :model model :uuid uuid :async? async?) | |
(let [ | |
record (xtp/photo-xtdb-fresh-url-uuid db uuid) | |
url (-> record | |
:photo/url | |
vu/xform-url-size-big) | |
b64 (-> url | |
(ol/url->stream) | |
(ol/stream-to-base64)) | |
prompt (if (= prompt "default") | |
(slurp "resources-openai/images/podcast-screenshot.txt") | |
prompt) | |
_ (log/warn :prompt-photo :prompt prompt) | |
_ (log/warn :prompt-photo :async? async? :uuid uuid :url url) | |
;summary (slurp "/tmp/summary") | |
begin-ms (System/currentTimeMillis) | |
; must reutrn map | |
summary (time | |
(if (= model :gpt-4-vision-preview) | |
(let [retval (gpt4v/prompt-photo b64 prompt) | |
_ (log/warn :prompt-photo :first-retval retval) | |
interpreted (gpt4v/interpret-prompt-photo {:summary (-> retval :summary)})] | |
interpreted) | |
; llava | |
(if async? | |
(ol/interpret-photo-async! b64 {:prompt prompt | |
:json? true}) | |
(ol/interpret-photo-sync! b64 {:prompt prompt})))) | |
_ (do | |
(def SUMMMARY summary) | |
(def UUID uuid) | |
(def RECORD record) | |
(def URL url) | |
(ol/write-decoded-base64-to-file b64 "/tmp/decoded.jpg") | |
(spit "/tmp/summary" summary) | |
0) | |
elapsed-ms (- (System/currentTimeMillis) begin-ms) | |
;new-photo-record (create-photo-summary-record-and-attach-to-parent! db uuid summary elapsed-ms)] | |
retval {:url url | |
:summary summary}] | |
;(def NEWRECORD new-photo-record) | |
(log/warn :prompt-photo :elapsed elapsed-ms) | |
(log/warn :prompt-photo :retval retval) | |
retval)) |
I could noodle with this forever:
Splitting photo-url-to-b64-string
into it's two responsibilities
(defn url->b64 [url]
(-> url
:photo/url
vu/xform-url-size-big
(ol/url->stream)
(ol/stream-to-base64)))
(defn write-to-file [b64 filename]
(ol/write-decoded-base64-to-file b64 filename)
{:photo-b64-string b64
:local-filname filename})
(defn prompt-vision-llm-pass1 [{:keys [photo-b64-string] :as bag} prompt]
(merge bag {:pass1 {:summary (gpt4v/prompt-photo photo-b64-string prompt)}}))
(defn store-summary [bag pass-kw]
(spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
bag)
(defn analyze-screenshot [{:keys [local-filename] :as bag}]
(let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
youtube-percentage (if is-screenshot?
(detect-red/detect-percentage-complete local-filename)
nil)]
(merge bag {:screenshot (if (nil? youtube-percentage)
{:is-screenshot? is-screenshot?}
{:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})})))
(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
(merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
screenshot))}))
(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
(let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
:prompt pass2-prompt}
screenshot))
success? (not (-> interpreted :summary :error))]
(merge bag {:pass2 {:summary interpreted
:success? success?}})))
(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
(create-photo-summary-record-and-attach-to-parent! db photo-id
(-> pass2 :summary :summary)
0 ; don't store the timeing perhaps
))
(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))
(defn doit [db photo-id]
(-> (xtp/photo-xtdb-fresh-url-uuid db photo-id)
(url->b64)
(write-to-file "tmp/decoded.jpg")
(prompt-vision-llm-pass1 default-prompt)
(store-summary :pass1)
(analyze-screenshot)
(pass2-generate-prompt)
(pass2-summary-to-edn)
(write-to-database db photo-id)))
You're not using the local-filename in your threading thing, so no need to add that to the bag:
(defn url->b64 [url]
(-> url
:photo/url
vu/xform-url-size-big
(ol/url->stream)
(ol/stream-to-base64)))
(defn write-to-file [b64 filename]
(ol/write-decoded-base64-to-file b64 filename)
b64)
(defn prompt-vision-llm-pass1 [b64 prompt]
(merge bag {:pass1 {:summary (gpt4v/prompt-photo b64 prompt)}}))
(defn store-summary [bag pass-kw]
(spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
bag)
(defn analyze-screenshot [bag local-filename]
(let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
youtube-percentage (if is-screenshot?
(detect-red/detect-percentage-complete local-filename)
nil)]
(merge bag {:screenshot (if (nil? youtube-percentage)
{:is-screenshot? is-screenshot?}
{:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})})))
(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
(merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
screenshot))}))
(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
(let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
:prompt pass2-prompt}
screenshot))
success? (not (-> interpreted :summary :error))]
(merge bag {:pass2 {:summary interpreted
:success? success?}})))
(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
(create-photo-summary-record-and-attach-to-parent! db photo-id
(-> pass2 :summary :summary)
0 ; don't store the timeing perhaps
))
(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))
(defn doit [db photo-id]
(-> (xtp/photo-xtdb-fresh-url-uuid db photo-id)
(url->b64)
(write-to-file "tmp/decoded.jpg")
(prompt-vision-llm-pass1 default-prompt)
(store-summary :pass1)
(analyze-screenshot "tmp/decoded.jpg")
(pass2-generate-prompt)
(pass2-summary-to-edn)
(write-to-database db photo-id)))
It is now trivial to add back the timings under the various passes.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Splitting that one fn in three (almost)