Skip to content

Instantly share code, notes, and snippets.

@jaor
Last active January 5, 2019 05:25
Show Gist options
  • Save jaor/e54322cbd9750a5f408d58486d03d392 to your computer and use it in GitHub Desktop.
Save jaor/e54322cbd9750a5f408d58486d03d392 to your computer and use it in GitHub Desktop.
Incremental anomaly detection
{
"name": "Incremental anomalies",
"kind": "script",
"description": "Adds a new dataset to a collection and computes a set of anomalies for the new, extended data",
"source_code": "script.whizzml",
"imports":[
],
"inputs":[
{
"name": "url",
"type": "string",
"description": "URL of the new data to fetch (e.g., an s3:// or http:// URL)"
},
{
"name": "max-datasets",
"type": "number",
"description": "maximum number of datasets to use",
"default": 10
},
{
"name": "tag",
"type": "string",
"description": "tag used to identify previous datasets to merge",
"default": "ongoing-anomalies"
},
{
"name": "anomalies",
"type": "number",
"description": "number of anomalies to compute",
"default": 10
}],
"outputs":[
{
"name": "anomaly",
"type": "anomaly-id",
"description": "ID of the final anomaly detector"
},
{
"name": "top-anomalies",
"type": "list",
"description": "A list of the top anomalies in the created detector"
}]
}
;; Helper function: Retrieves the last n datasets with the given tag.
;; We're assuming n is less than 20: a more robust version would need
;; to paginate if we want to use more than that.
(define (get-datasets tag n)
(resource-ids (list-datasets {"limit" n "tags__in" tag})))
;; Helper function: Creates a source and dataset from the remote data
;; fetched via the given url
(define (create-new-dataset url tag)
(let (src (create-source {"remote" url "tags" [tag]}))
(create-dataset src {"tags" [tag]})))
;; Creates an anomaly with n top anomalies, combining up to
;; max-datasets, including one created from the given remote data via
;; url. Applicable datasets are identified by tag.
(define (compute-anomalies url tag n max-datasets)
(let (new-ds (create-new-dataset url tag)
dss (get-datasets tag max-datasets))
;; sanity check: our list must start with the dataset just created
(or (= (head dss) new-ds)
(raise (str "Unexpected head of dataset list:"
(head dss) " != " new-ds)))
;; all that is left is creating an anomaly detector with the
;; dataset collection
(wait (create-anomaly {"datasets" dss "tags" [tag] "top_n" n}))))
(define anomaly (compute-anomalies url tag anomalies max-datasets))
(define top-anomalies ((fetch anomaly) ["model" "top_anomalies"] []))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment