Skip to content

Instantly share code, notes, and snippets.

@awholegunch
Last active November 18, 2015 01:57
Show Gist options
  • Save awholegunch/24cd5b37930494428027 to your computer and use it in GitHub Desktop.
Save awholegunch/24cd5b37930494428027 to your computer and use it in GitHub Desktop.
PythonEvaluator Demo JSON for CDAP ETL Batch Pipeline
{
"artifact": {
"name": "cdap-etl-batch",
"scope": "SYSTEM",
"version": "3.2.1"
},
"config": {
"source": {
"name": "File",
"label": "File",
"properties": {
"path": "files/data.json"
}
},
"sinks": [
{
"name": "Table",
"label": "Table",
"properties": {
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"name": "test-id",
"schema.row.field": "id"
}
}
],
"transforms": [
{
"name": "PythonEvaluator",
"label": "PythonEvaluator",
"properties": {
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"script": "def transform(record, emitter, context):\n import uuid\n record['id'] = uuid.uuid1().hex\n del record['ts']\n context.getMetrics().count('ids.created', 1)\n context.getLogger().info(\"Created ID: %s\" % record['id'])\n emitter.emit(record)\n"
}
}
],
"schedule": "* * * * *"
},
"description": "Extract-Transform-Load (ETL) Batch Application",
"name": "PythonEvaluatorDemo",
"ui": {
"nodes": {
"File-source-1": {
"id": "File-source-1",
"name": "File",
"label": "File",
"icon": "fa-plug",
"style": {
"top": "130px",
"left": "458px"
},
"description": "Batch source for File Systems",
"outputSchema": null,
"pluginTemplate": null,
"errorDatasetName": "",
"validationFields": null,
"lock": null,
"properties": {
"path": "files/data.json",
"maxSplitSize": "",
"fileSystemProperties": "",
"fileRegex": "",
"inputFormatClass": "",
"timeTable": ""
},
"type": "batchsource",
"valid": true,
"requiredFieldCount": 0,
"error": false,
"warning": false,
"selected": false
},
"PythonEvaluator-transform-2": {
"id": "PythonEvaluator-transform-2",
"name": "PythonEvaluator",
"label": "PythonEvaluator",
"icon": "fa-plug",
"style": {
"top": "130px",
"left": "658px"
},
"description": "Executes user-provided Python code that transforms one record into another.",
"outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"pluginTemplate": null,
"errorDatasetName": "",
"validationFields": null,
"lock": null,
"properties": {
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"script": "def transform(record, emitter, context):\n import uuid\n record['id'] = uuid.uuid1().hex\n del record['ts']\n context.getMetrics().count('ids.created', 1)\n context.getLogger().info(\"Created ID: %s\" % record['id'])\n emitter.emit(record)\n"
},
"type": "transform",
"requiredFieldCount": 0,
"error": false,
"warning": false,
"valid": true,
"selected": true
},
"Table-sink-3": {
"id": "Table-sink-3",
"name": "Table",
"label": "Table",
"icon": "fa-table",
"style": {
"top": "130px",
"left": "858px"
},
"description": "Writes records to a Table with one record field mapping to the Table rowkey, and all other record fields mapping to Table columns.",
"outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"pluginTemplate": null,
"errorDatasetName": "",
"validationFields": null,
"lock": null,
"properties": {
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"id\",\"type\":\"string\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"name": "test-id",
"schema.row.field": "id"
},
"type": "batchsink",
"requiredFieldCount": 0,
"error": false,
"warning": false,
"valid": true,
"selected": false
}
},
"connections": [
{
"source": "File-source-1",
"target": "PythonEvaluator-transform-2"
},
{
"source": "PythonEvaluator-transform-2",
"target": "Table-sink-3"
}
]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment