Last active
April 21, 2017 18:44
-
-
Save petersen-poul/9247024bc6cd2900d6ccd022406adef3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "Assign Field Types by Field Name", | |
"description": "Sometimes, the automatic field detection does not assign field types correctly. This is especially a problem with fields that have a lot of missing values since the detection process only takes a peek at the data to determine if a field should be numeric, categorical, etc. This script allows you to alter the field types for a source based on the name of each field. Just put a partial match for the name in the list for the type you want to assign, and it will change all the fields whose name contain that string.", | |
"inputs": [ | |
{ | |
"name": "source", | |
"description": "Source to update.", | |
"type": "source-id" | |
}, | |
{ | |
"name": "number-field-match", | |
"description": "Field name patterns that should be numeric", | |
"type": "list", | |
"default": [] | |
}, | |
{ | |
"name": "category-field-match", | |
"description": "Field name patterns that should be categorical", | |
"type": "list", | |
"default": [] | |
}, | |
{ | |
"name": "text-field-match", | |
"description": "Field name patterns that should be text", | |
"type": "list", | |
"default": [] | |
}, | |
{ | |
"name": "items-field-match", | |
"description": "Field name patterns that should be items", | |
"type": "list", | |
"default": [] | |
}, | |
{ | |
"name": "date-field-match", | |
"description": "Field name patterns that should be datetime", | |
"type": "list", | |
"default": [] | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "updated-src", | |
"description": "A link to the updated source with the modified field types", | |
"type": "source-id" | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; | |
; Compare the field name to a list of patterns. Return | |
; true/false if it matches any | |
; | |
(define (field-matches? fieldname patterns) | |
(loop (els patterns) | |
(if (= els []) false | |
(let (el (head els) rest (tail els)) | |
(if (contains-string? el fieldname) true (recur rest)))))) | |
; | |
; Given a source id or record, update the field optypes by searching | |
; for matching field names in each of the input arrays. | |
; | |
(define (field-typer source nums cats txts itms dats) | |
(let | |
(source-rec (fetch source) | |
src-field-ids (keys (get source-rec "fields")) | |
optype-map | |
(loop (ids src-field-ids update-map {}) | |
(if (= ids []) update-map | |
(let | |
(this (head ids) | |
rest (tail ids) | |
field-name (get-in source-rec ["fields" this "name"])) | |
(cond | |
(field-matches? field-name nums) | |
(recur rest (assoc update-map this {"optype" "numeric"})) | |
(field-matches? field-name cats) | |
(recur rest (assoc update-map this {"optype" "categorical"})) | |
(field-matches? field-name txts) | |
(recur rest (assoc update-map this {"optype" "text"})) | |
(field-matches? field-name itms) | |
(recur rest (assoc update-map this {"optype" "items"})) | |
(field-matches? field-name dats) | |
(recur rest (assoc update-map this {"optype" "datetime"})) | |
(recur rest update-map)))))) | |
(update-and-wait source {"fields" optype-map}))) | |
(define updated-src (field-typer source number-field-match category-field-match text-field-match items-field-match date-field-match)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment