-
-
Save realgenekim/17f9a7ae48aaf2e03df3cc80326a5094 to your computer and use it in GitHub Desktop.
A monsterously bad function before rewriting it. This was before rewriting it, inspired by @christoph-neumann and @justone in their Functional Design in Clojure podcast!!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(>defn interpret-photo-from-client | |
" input: db, photo/id (uuid), prompt string (can be 'default'), and options map " | |
[db uuid prompt & {:keys [async? model] | |
:or {async? false} :as opts}] | |
[(? #(instance? xtdb.node.XtdbNode %)) uuid? string? (s/* (s/or :keyword keyword? :bool boolean?)) => map?] | |
(log/warn :prompt-photo :model model :uuid uuid :async? async?) | |
(let [ | |
record (xtp/photo-xtdb-fresh-url-uuid db uuid) | |
url (-> record | |
:photo/url | |
vu/xform-url-size-big) | |
b64 (-> url | |
(ol/url->stream) | |
(ol/stream-to-base64)) | |
prompt (if (= prompt "default") | |
(slurp "resources-openai/images/podcast-screenshot.txt") | |
prompt) | |
_ (log/warn :prompt-photo :prompt prompt) | |
_ (log/warn :prompt-photo :async? async? :uuid uuid :url url) | |
;summary (slurp "/tmp/summary") | |
begin-ms (System/currentTimeMillis) | |
; must reutrn map | |
summary (time | |
(if (= model :gpt-4-vision-preview) | |
(let [retval (gpt4v/prompt-photo b64 prompt) | |
_ (log/warn :prompt-photo :first-retval retval) | |
interpreted (gpt4v/interpret-prompt-photo {:summary (-> retval :summary)})] | |
interpreted) | |
; llava | |
(if async? | |
(ol/interpret-photo-async! b64 {:prompt prompt | |
:json? true}) | |
(ol/interpret-photo-sync! b64 {:prompt prompt})))) | |
_ (do | |
(def SUMMMARY summary) | |
(def UUID uuid) | |
(def RECORD record) | |
(def URL url) | |
(ol/write-decoded-base64-to-file b64 "/tmp/decoded.jpg") | |
(spit "/tmp/summary" summary) | |
0) | |
elapsed-ms (- (System/currentTimeMillis) begin-ms) | |
;new-photo-record (create-photo-summary-record-and-attach-to-parent! db uuid summary elapsed-ms)] | |
retval {:url url | |
:summary summary}] | |
;(def NEWRECORD new-photo-record) | |
(log/warn :prompt-photo :elapsed elapsed-ms) | |
(log/warn :prompt-photo :retval retval) | |
retval)) |
(ns gene)
; input: uuid
; output: photo url
; notes: this shouldn't live in the photo-ops namespace! because it
; shouldn't need to know about the databse!
(defn load-photo-url-from-db [db photo-id]
(xtp/photo-xtdb-fresh-url-uuid db photo-id))
(defn photo-url-to-b64-string [photo-url]
(let [b64 (-> photo-url
:photo/url
vu/xform-url-size-big
(ol/url->stream)
(ol/stream-to-base64))
filename "/tmp/decoded.jpg"]
(ol/write-decoded-base64-to-file b64 filename)
{:photo-b64-string b64
:local-filname filename}))
(defn prompt-vision-llm-pass1 [pass1-prompt photo-b64-string]
(let [prompt (if (= pass1-prompt "default")
(slurp "resources-openai/images/podcast-screenshot.txt")
pass1-prompt)
summary (gpt4v/prompt-photo photo-b64-string prompt)]
(spit "/tmp/summary" summary)
summary))
(defn analyze-screenshot [local-filename]
(let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
youtube-percentage (if is-screenshot?
(detect-red/detect-percentage-complete local-filename)
nil)]
(if (nil? youtube-percentage)
{:is-screenshot? is-screenshot?}
{:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})))
(defn pass2-generate-prompt [pass1-summary is-screenshot? youtube-percentage]
(gpt4v/generate-prompt {:summary (-> pass1-summary :summary)
:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage}))
(defn pass2-summary-to-edn [pass1-summary pass2-prompt is-screenshot? youtube-percentage]
(let [
interpreted (gpt4v/interpret-prompt-photo {:summary (-> pass1-summary :summary)
:prompt pass2-prompt
:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})
success? (not (-> interpreted :summary :error))]
{:pass2-summary interpreted
:pass2-success? success?}))
(defn write-to-database [db photo-id pass2-summary]
(create-photo-summary-record-and-attach-to-parent! db photo-id
(-> pass2-summary :summary)
0 ; don't store the timeing perhaps
))
(defn doit
" input: a giant state that contains everything needed to steps, or entire sequence of steps
output: modified state that we can feed into the next step
"
[{:keys [state db photo-id photo-url
photo-b64-string local-filename
pass1-prompt model pass1-summary
is-screenshot? youtube-percentage
pass2-prompt pass2-summary pass2-success?]
:or {state :load-photo-from-db}
:as bag}]
(case state
:load-photo-from-db
(merge bag {:photo-url (load-photo-url-from-db db photo-id)
:state :photo-url-to-b64-string})
:photo-url-to-b64-string
(merge bag {:state :prompt-vision-llm-pass1
; args for next pass
:pass1-prompt "default"
:model :gpt-4-vision-preview}
(photo-url-to-b64-string photo-url))
:prompt-vision-llm-pass1
(merge bag {:pass1-summary (prompt-vision-llm-pass1 pass1-prompt photo-b64-string)})
; input: b64-string
; output: is-mobile-screenshot? youtube-percentage-progress-bar
:analyze-screenshot
(merge bag {:state :pass2-generate-prompt} (analyze-screenshot local-filename))
:pass2-generate-prompt
(merge bag {:state :pass2-summary-to-edn
:pass2-prompt (pass2-generate-prompt pass1-summary is-screenshot? youtube-percentage)})
:pass2-summary-to-edn
(merge bag {:state :write-to-database} (pass2-summary-to-edn pass1-summary pass2-prompt is-screenshot? youtube-percentage))
; input: pass2-summary pass2-success? db
; output: none
:write-to-database
(merge bag {:state :done
:dbwrite-retval (write-to-database db photo-id pass2-summary)})
:done
(do
(log/warn :DONE :nothing-to-do!)
bag)))
first draft, more coming :)
Since it seems to me that you've written your doit as a state-machine, but it only allows for one way through the state machine, I'd argue
(defn load-photo-url-from-db [db photo-id]
(xtp/photo-xtdb-fresh-url-uuid db photo-id))
(defn photo-url-to-b64-string [photo-url]
(let [b64 (-> photo-url
:photo/url
vu/xform-url-size-big
(ol/url->stream)
(ol/stream-to-base64))
filename "/tmp/decoded.jpg"]
(ol/write-decoded-base64-to-file b64 filename)
{:photo-b64-string b64
:local-filname filename}))
(defn prompt-vision-llm-pass1 [{:keys [photo-b64-string] :as bag} pass1-prompt]
;; I'd probs split this into three different fns
(let [prompt (if (= pass1-prompt "default")
(slurp "resources-openai/images/podcast-screenshot.txt")
pass1-prompt)
summary (gpt4v/prompt-photo photo-b64-string prompt)]
(spit "/tmp/summary" summary)
(merge bag {:pass1 {:summary summary}})))
(defn analyze-screenshot [{:keys [local-filename] :as bag}]
(let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
youtube-percentage (if is-screenshot?
(detect-red/detect-percentage-complete local-filename)
nil)]
(merge bag {:screenshot (if (nil? youtube-percentage)
{:is-screenshot? is-screenshot?}
{:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})})))
(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
(merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
screenshot))}))
(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
(let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
:prompt pass2-prompt}
screenshot))
success? (not (-> interpreted :summary :error))]
(merge bag {:pass2 {:summary interpreted
:success? success?}})))
(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
(create-photo-summary-record-and-attach-to-parent! db photo-id
(-> pass2 :summary :summary)
0 ; don't store the timeing perhaps
))
(defn doit [db photo-id]
(-> (load-photo-url-from-db db photo-id)
(photo-url-to-b64-string)
(prompt-vision-llm-pass1 "default")
(analyze-screenshot)
(pass2-generate-prompt)
(pass2-summary-to-edn)
(write-to-database db photo-id)))
Expresses that clearer
Splitting that one fn in three (almost)
(defn load-photo-url-from-db [db photo-id]
(xtp/photo-xtdb-fresh-url-uuid db photo-id))
(defn photo-url-to-b64-string [photo-url]
(let [b64 (-> photo-url
:photo/url
vu/xform-url-size-big
(ol/url->stream)
(ol/stream-to-base64))
filename "/tmp/decoded.jpg"]
(ol/write-decoded-base64-to-file b64 filename)
{:photo-b64-string b64
:local-filname filename}))
(defn prompt-vision-llm-pass1 [{:keys [photo-b64-string] :as bag} prompt]
(merge bag {:pass1 {:summary (gpt4v/prompt-photo photo-b64-string prompt)}}))
(defn store-summary [bag pass-kw]
(spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
bag)
(defn analyze-screenshot [{:keys [local-filename] :as bag}]
(let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
youtube-percentage (if is-screenshot?
(detect-red/detect-percentage-complete local-filename)
nil)]
(merge bag {:screenshot (if (nil? youtube-percentage)
{:is-screenshot? is-screenshot?}
{:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})})))
(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
(merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
screenshot))}))
(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
(let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
:prompt pass2-prompt}
screenshot))
success? (not (-> interpreted :summary :error))]
(merge bag {:pass2 {:summary interpreted
:success? success?}})))
(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
(create-photo-summary-record-and-attach-to-parent! db photo-id
(-> pass2 :summary :summary)
0 ; don't store the timeing perhaps
))
(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))
(defn doit [db photo-id]
(-> (load-photo-url-from-db db photo-id)
(photo-url-to-b64-string)
(prompt-vision-llm-pass1 default-prompt)
(store-summary :pass1)
(analyze-screenshot)
(pass2-generate-prompt)
(pass2-summary-to-edn)
(write-to-database db photo-id)))
I could noodle with this forever:
Splitting photo-url-to-b64-string
into it's two responsibilities
(defn url->b64 [url]
(-> url
:photo/url
vu/xform-url-size-big
(ol/url->stream)
(ol/stream-to-base64)))
(defn write-to-file [b64 filename]
(ol/write-decoded-base64-to-file b64 filename)
{:photo-b64-string b64
:local-filname filename})
(defn prompt-vision-llm-pass1 [{:keys [photo-b64-string] :as bag} prompt]
(merge bag {:pass1 {:summary (gpt4v/prompt-photo photo-b64-string prompt)}}))
(defn store-summary [bag pass-kw]
(spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
bag)
(defn analyze-screenshot [{:keys [local-filename] :as bag}]
(let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
youtube-percentage (if is-screenshot?
(detect-red/detect-percentage-complete local-filename)
nil)]
(merge bag {:screenshot (if (nil? youtube-percentage)
{:is-screenshot? is-screenshot?}
{:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})})))
(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
(merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
screenshot))}))
(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
(let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
:prompt pass2-prompt}
screenshot))
success? (not (-> interpreted :summary :error))]
(merge bag {:pass2 {:summary interpreted
:success? success?}})))
(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
(create-photo-summary-record-and-attach-to-parent! db photo-id
(-> pass2 :summary :summary)
0 ; don't store the timeing perhaps
))
(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))
(defn doit [db photo-id]
(-> (xtp/photo-xtdb-fresh-url-uuid db photo-id)
(url->b64)
(write-to-file "tmp/decoded.jpg")
(prompt-vision-llm-pass1 default-prompt)
(store-summary :pass1)
(analyze-screenshot)
(pass2-generate-prompt)
(pass2-summary-to-edn)
(write-to-database db photo-id)))
You're not using the local-filename in your threading thing, so no need to add that to the bag:
(defn url->b64 [url]
(-> url
:photo/url
vu/xform-url-size-big
(ol/url->stream)
(ol/stream-to-base64)))
(defn write-to-file [b64 filename]
(ol/write-decoded-base64-to-file b64 filename)
b64)
(defn prompt-vision-llm-pass1 [b64 prompt]
(merge bag {:pass1 {:summary (gpt4v/prompt-photo b64 prompt)}}))
(defn store-summary [bag pass-kw]
(spit (str "/tmp/" (name pass-kw)) (-> bag pass-kw :summary))
bag)
(defn analyze-screenshot [bag local-filename]
(let [is-screenshot? (screenshots/is-image-iphone-screenshot? local-filename)
youtube-percentage (if is-screenshot?
(detect-red/detect-percentage-complete local-filename)
nil)]
(merge bag {:screenshot (if (nil? youtube-percentage)
{:is-screenshot? is-screenshot?}
{:is-screenshot? is-screenshot?
:youtube-percentage youtube-percentage})})))
(defn pass2-generate-prompt [{:keys [pass1 screenshot] :as bag}]
(merge bag {:pass2-prompt (gpt4v/generate-prompt (merge {:summary (-> pass1 :summary :summary)}
screenshot))}))
(defn pass2-summary-to-edn [{:keys [pass1 pass2-prompt screenshot] :as bag}]
(let [interpreted (gpt4v/interpret-prompt-photo (merge {:summary (-> pass1 :summary :summary)
:prompt pass2-prompt}
screenshot))
success? (not (-> interpreted :summary :error))]
(merge bag {:pass2 {:summary interpreted
:success? success?}})))
(defn write-to-database [{:keys [pass2] :as bag} db photo-id ]
(create-photo-summary-record-and-attach-to-parent! db photo-id
(-> pass2 :summary :summary)
0 ; don't store the timeing perhaps
))
(def default-prompt (slurp "resources-openai/images/podcast-screenshot.txt"))
(defn doit [db photo-id]
(-> (xtp/photo-xtdb-fresh-url-uuid db photo-id)
(url->b64)
(write-to-file "tmp/decoded.jpg")
(prompt-vision-llm-pass1 default-prompt)
(store-summary :pass1)
(analyze-screenshot "tmp/decoded.jpg")
(pass2-generate-prompt)
(pass2-summary-to-edn)
(write-to-database db photo-id)))
It is now trivial to add back the timings under the various passes.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's the rewritten version:
I'm so ridiculously happy with it! Thank you @christoph-neumann and @justone!