Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Filtering a random row in a group
{
"name": "Pick random row in group",
"description": "Selects one of the rows (at random) from the ones grouped by a list of fields",
"inputs": [
{
"name": "dataset-id",
"type": "dataset-id",
"description": "The original dataset id to be filtered"
},
{
"name": "fields-list",
"type": "list",
"description": "List of the fields to be used to group the rows."
}
],
"outputs": [
{
"name": "filtered-dataset",
"type": "dataset-id",
"description": "The dataset ID for the filtered rows."
}
]
}
(define fields (resource-fields dataset-id))
(define find-fields-fn (lambda (x) (find-field fields x)))
(define filter-id-fn (lambda (x) (when (map? x) (x "id"))))
(define order-by-fields (filter string? (map filter-id-fn (map find-fields-fn fields-list))))
(if (= (count order-by-fields) 0)
(raise "Could not find a grouping field list")
(log-info order-by-fields))
(define order-strs (append (map (lambda (x) (str "A.`" x "`")) order-by-fields) "RAND()"))
(define order-str (join ", " order-strs))
(define filter-str-fn (lambda (x) (flatline "(= (f {{x}}) (f {{x}} -1))")))
(define filter-strs (map filter-str-fn order-by-fields))
(define filter-str (join " " filter-strs))
(define ordered-dataset (create-and-wait-dataset {"origin_datasets" [dataset-id]
"sql_query" (flatline "select A.* from A order by {order-str}")
"origin_dataset_names" (assoc {} dataset-id "A")}))
(define filtered-dataset (try (create-and-wait-dataset {"origin_dataset" ordered-dataset
"lisp_filter" (flatline "(not (and {filter-str}))")})
(catch e (log-info "Error: " e))))
(delete ordered-dataset)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment