Skip to content

Instantly share code, notes, and snippets.

@petersen-poul
Last active April 21, 2017 18:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save petersen-poul/9247024bc6cd2900d6ccd022406adef3 to your computer and use it in GitHub Desktop.
Save petersen-poul/9247024bc6cd2900d6ccd022406adef3 to your computer and use it in GitHub Desktop.
{
"name": "Assign Field Types by Field Name",
"description": "Sometimes, the automatic field detection does not assign field types correctly. This is especially a problem with fields that have a lot of missing values since the detection process only takes a peek at the data to determine if a field should be numeric, categorical, etc. This script allows you to alter the field types for a source based on the name of each field. Just put a partial match for the name in the list for the type you want to assign, and it will change all the fields whose name contain that string.",
"inputs": [
{
"name": "source",
"description": "Source to update.",
"type": "source-id"
},
{
"name": "number-field-match",
"description": "Field name patterns that should be numeric",
"type": "list",
"default": []
},
{
"name": "category-field-match",
"description": "Field name patterns that should be categorical",
"type": "list",
"default": []
},
{
"name": "text-field-match",
"description": "Field name patterns that should be text",
"type": "list",
"default": []
},
{
"name": "items-field-match",
"description": "Field name patterns that should be items",
"type": "list",
"default": []
},
{
"name": "date-field-match",
"description": "Field name patterns that should be datetime",
"type": "list",
"default": []
}
],
"outputs": [
{
"name": "updated-src",
"description": "A link to the updated source with the modified field types",
"type": "source-id"
}
]
}
;
; Compare the field name to a list of patterns. Return
; true/false if it matches any
;
(define (field-matches? fieldname patterns)
(loop (els patterns)
(if (= els []) false
(let (el (head els) rest (tail els))
(if (contains-string? el fieldname) true (recur rest))))))
;
; Given a source id or record, update the field optypes by searching
; for matching field names in each of the input arrays.
;
(define (field-typer source nums cats txts itms dats)
(let
(source-rec (fetch source)
src-field-ids (keys (get source-rec "fields"))
optype-map
(loop (ids src-field-ids update-map {})
(if (= ids []) update-map
(let
(this (head ids)
rest (tail ids)
field-name (get-in source-rec ["fields" this "name"]))
(cond
(field-matches? field-name nums)
(recur rest (assoc update-map this {"optype" "numeric"}))
(field-matches? field-name cats)
(recur rest (assoc update-map this {"optype" "categorical"}))
(field-matches? field-name txts)
(recur rest (assoc update-map this {"optype" "text"}))
(field-matches? field-name itms)
(recur rest (assoc update-map this {"optype" "items"}))
(field-matches? field-name dats)
(recur rest (assoc update-map this {"optype" "datetime"}))
(recur rest update-map))))))
(update-and-wait source {"fields" optype-map})))
(define updated-src (field-typer source number-field-match category-field-match text-field-match items-field-match date-field-match))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment