Last active
November 18, 2015 01:57
-
-
Save awholegunch/24cd5b37930494428027 to your computer and use it in GitHub Desktop.
PythonEvaluator Demo JSON for CDAP ETL Batch Pipeline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"artifact": { | |
"name": "cdap-etl-batch", | |
"scope": "SYSTEM", | |
"version": "3.2.1" | |
}, | |
"config": { | |
"source": { | |
"name": "File", | |
"label": "File", | |
"properties": { | |
"path": "files/data.json" | |
} | |
}, | |
"sinks": [ | |
{ | |
"name": "Table", | |
"label": "Table", | |
"properties": { | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}", | |
"name": "test-id", | |
"schema.row.field": "id" | |
} | |
} | |
], | |
"transforms": [ | |
{ | |
"name": "PythonEvaluator", | |
"label": "PythonEvaluator", | |
"properties": { | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}", | |
"script": "def transform(record, emitter, context):\n import uuid\n record['id'] = uuid.uuid1().hex\n del record['ts']\n context.getMetrics().count('ids.created', 1)\n context.getLogger().info(\"Created ID: %s\" % record['id'])\n emitter.emit(record)\n" | |
} | |
} | |
], | |
"schedule": "* * * * *" | |
}, | |
"description": "Extract-Transform-Load (ETL) Batch Application", | |
"name": "PythonEvaluatorDemo", | |
"ui": { | |
"nodes": { | |
"File-source-1": { | |
"id": "File-source-1", | |
"name": "File", | |
"label": "File", | |
"icon": "fa-plug", | |
"style": { | |
"top": "130px", | |
"left": "458px" | |
}, | |
"description": "Batch source for File Systems", | |
"outputSchema": null, | |
"pluginTemplate": null, | |
"errorDatasetName": "", | |
"validationFields": null, | |
"lock": null, | |
"properties": { | |
"path": "files/data.json", | |
"maxSplitSize": "", | |
"fileSystemProperties": "", | |
"fileRegex": "", | |
"inputFormatClass": "", | |
"timeTable": "" | |
}, | |
"type": "batchsource", | |
"valid": true, | |
"requiredFieldCount": 0, | |
"error": false, | |
"warning": false, | |
"selected": false | |
}, | |
"PythonEvaluator-transform-2": { | |
"id": "PythonEvaluator-transform-2", | |
"name": "PythonEvaluator", | |
"label": "PythonEvaluator", | |
"icon": "fa-plug", | |
"style": { | |
"top": "130px", | |
"left": "658px" | |
}, | |
"description": "Executes user-provided Python code that transforms one record into another.", | |
"outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}", | |
"pluginTemplate": null, | |
"errorDatasetName": "", | |
"validationFields": null, | |
"lock": null, | |
"properties": { | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}", | |
"script": "def transform(record, emitter, context):\n import uuid\n record['id'] = uuid.uuid1().hex\n del record['ts']\n context.getMetrics().count('ids.created', 1)\n context.getLogger().info(\"Created ID: %s\" % record['id'])\n emitter.emit(record)\n" | |
}, | |
"type": "transform", | |
"requiredFieldCount": 0, | |
"error": false, | |
"warning": false, | |
"valid": true, | |
"selected": true | |
}, | |
"Table-sink-3": { | |
"id": "Table-sink-3", | |
"name": "Table", | |
"label": "Table", | |
"icon": "fa-table", | |
"style": { | |
"top": "130px", | |
"left": "858px" | |
}, | |
"description": "Writes records to a Table with one record field mapping to the Table rowkey, and all other record fields mapping to Table columns.", | |
"outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}", | |
"pluginTemplate": null, | |
"errorDatasetName": "", | |
"validationFields": null, | |
"lock": null, | |
"properties": { | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}", | |
"name": "test-id", | |
"schema.row.field": "id" | |
}, | |
"type": "batchsink", | |
"requiredFieldCount": 0, | |
"error": false, | |
"warning": false, | |
"valid": true, | |
"selected": false | |
} | |
}, | |
"connections": [ | |
{ | |
"source": "File-source-1", | |
"target": "PythonEvaluator-transform-2" | |
}, | |
{ | |
"source": "PythonEvaluator-transform-2", | |
"target": "Table-sink-3" | |
} | |
] | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment