Skip to content

Instantly share code, notes, and snippets.

@jaor
Last active August 23, 2022 15:31
Show Gist options
  • Save jaor/4fd9cf9f8d619717af0997ea021cc566 to your computer and use it in GitHub Desktop.
Save jaor/4fd9cf9f8d619717af0997ea021cc566 to your computer and use it in GitHub Desktop.
dataset to image composite
{
"name": "dataset to image composite",
"kind": "script",
"description": "Transform back a dataset with images to a source composite",
"source_code": "script.whizzml",
"inputs": [
{
"name": "dataset",
"type": "dataset-id",
"description": "The input dataset"
},
{
"name": "fields",
"type": "list",
"default": [],
"description": "A list of fields to add as label fields"
},
{
"name": "component-batch-size",
"type": "number",
"default": 100,
"description": "Components are added in batches of this size"
},
{
"name": "row-value-batch-size",
"type": "number",
"default": 5,
"description": "Values for labels are updated in batches of this size"
}
],
"outputs": [
{
"name": "source",
"type": "source-id",
"description": "The resulting composite"
}
]
}
(define (unique-name names name idx)
(let (name (str name " (" idx ")"))
(if (member? name names) (unique-name names name (+ 1 idx)) name)))
(define (with-unique-label-names fds)
(let (names (map (lambda (f) (f "name")) fds))
(if (= (count (set* names)) (count names))
(map (lambda (f) (assoc f "label_name" (f "name"))) fds)
(let (labels
(reduce (lambda (ns f)
(let (n (f "name")
p (str n " (" (f "provenance" "2") ")"))
(cond (not (member? n ns)) (append ns n)
(not (member? p ns)) (append ns p)
(unique-name ns n 3))))
[] fds))
(map (lambda (f n) (assoc f "label_name" n)) fds labels)))))
(define (auto-generated? f) (f "auto_generated" false))
(define (remove-image-features fds im-name)
(let (im-rx (str "^" (re-quote im-name) "\\..+"))
(filter (lambda (f) (not (matches? im-rx (f "name")))) fds)))
(define (sample-fields dataset fields)
(let (fds (resource-fields dataset)
imgs (filter (lambda (f) (or (image-field? f) (text-field? f)))
(values fds))
img (if (= 1 (count imgs))
(head imgs)
(raise "The input dataset must contain a single image field"))
fields (if (empty? fields)
(values fds)
(map (lambda (f) (find-field fds f)) fields))
labels (filter (lambda (f)
(and (not (auto-generated? f))
(not (image-field? f))
(not (path-field? f))))
fields)
labels (remove-image-features labels (img "name")))
(with-unique-label-names (cons img labels))))
(define (rows-to-values rows names)
(iterate (res [] row rows)
(let (id (head row)
vs (map (lambda (v name)
{"field" name "value" (or v "") "components" [id]})
(tail row)
names))
(if (empty? id) res (concat res vs)))))
(define (update-row-values src vs)
(when (not (empty? vs))
(update-and-wait (wait src) {"row_values" (take row-value-batch-size vs)})
(update-row-values src (drop row-value-batch-size vs))))
(define (add-rows composite sample names offset max-rows)
(log-progress (+ 0.1 (* 0.85 (- 1 (/ (- max-rows offset) max-rows)))))
(if (< offset max-rows)
(let (size (max row-value-batch-size component-batch-size)
s (fetch sample {"mode" "linear" "row_offset" offset "rows" size})
rows (s ["sample" "rows"] []))
(when (not (empty? rows))
(log-info "Adding labels [" (+ 1 offset)
", " (min (+ offset size) max-rows) "]")
(update-row-values composite (rows-to-values rows names))
(add-rows composite sample names (+ offset (count rows)) max-rows)))
composite))
(define (components image-id sample offset limit)
(let (s (fetch sample
{"mode" "linear"
"row_offset" offset "rows" limit
"fields" [image-id]}))
(map head (s ["sample" "rows"] []))))
(define (add-components composite iid sample offset max-rows)
(log-progress (* 0.1 (- 1 (/ (- max-rows offset) max-rows))))
(when (< offset max-rows)
(log-info "Adding components [" (+ 1 offset)
", " (min max-rows (+ offset component-batch-size)) "]")
(let (srcs (components iid sample offset component-batch-size))
(when (not (empty? srcs))
(update-and-wait composite {"add_sources" srcs})
(add-components composite iid sample (+ offset (count srcs)) max-rows)))))
(define (create-image-composite name fields sample max-rows)
(let (src (wait (create-source {"sources" [] "name" name}))
ifd (head fields)
lbs (map (lambda (f) {"name" (f "label_name") "optype" (f "optype")})
(tail fields)))
(add-components src (ifd "id") sample 0 max-rows)
(wait (if (empty? lbs) src (update src {"new_fields" lbs})))))
(define (create-editable-composite dataset fields)
(log-progress 0.0)
(let (fds (sample-fields dataset fields)
sample (create-sample {"dataset" dataset
"input_fields" (map (lambda (f) (f "id")) fds)
"temp" true})
name (str "editable " (resource-name dataset))
rows (resource-property (wait sample) "max_rows")
names (map (lambda (f) (f "label_name")) (tail fds)))
(log-info "Creating composite with " rows " components"
(if (empty? names)
""
(str " and " (count names) " label fields " names)))
(add-rows (create-image-composite name fds sample rows) sample names 0 rows)))
(define source (create-editable-composite dataset fields))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment