Created
May 9, 2019 04:25
-
-
Save metadaddy/6f7df4398b4455f20797b07ae312badb to your computer and use it in GitHub Desktop.
StreamSets Data Collector 3.8.0 pipeline to Validate JSON against a schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"pipelineConfig" : { | |
"schemaVersion" : 6, | |
"version" : 12, | |
"pipelineId" : "ValidateJSONDatab0dfa94e-faf3-42ec-9a02-5122d048fe4c", | |
"title" : "Validate JSON Data", | |
"description" : "", | |
"uuid" : "d504e9d3-d4ea-4d52-8fd8-565592400d2e", | |
"configuration" : [ { | |
"name" : "executionMode", | |
"value" : "STANDALONE" | |
}, { | |
"name" : "edgeHttpUrl", | |
"value" : "http://localhost:18633" | |
}, { | |
"name" : "deliveryGuarantee", | |
"value" : "AT_LEAST_ONCE" | |
}, { | |
"name" : "startEventStage", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1" | |
}, { | |
"name" : "stopEventStage", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1" | |
}, { | |
"name" : "shouldRetry", | |
"value" : true | |
}, { | |
"name" : "retryAttempts", | |
"value" : -1 | |
}, { | |
"name" : "notifyOnStates", | |
"value" : [ "RUN_ERROR", "STOPPED", "FINISHED" ] | |
}, { | |
"name" : "emailIDs", | |
"value" : [ ] | |
}, { | |
"name" : "constants", | |
"value" : [ { | |
"key" : "audit", | |
"value" : "https://streamsets-request-bin.herokuapp.com/zz456ozz" | |
}, { | |
"key" : "test", | |
"value" : "a#b" | |
} ] | |
}, { | |
"name" : "badRecordsHandling", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_recordstolocalfilesystem_ToErrorLocalFSDTarget::1" | |
}, { | |
"name" : "errorRecordPolicy", | |
"value" : "ORIGINAL_RECORD" | |
}, { | |
"name" : "workerCount", | |
"value" : 0 | |
}, { | |
"name" : "clusterSlaveMemory", | |
"value" : 2048 | |
}, { | |
"name" : "clusterSlaveJavaOpts", | |
"value" : "-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -Dlog4j.debug" | |
}, { | |
"name" : "clusterLauncherEnv", | |
"value" : [ ] | |
}, { | |
"name" : "mesosDispatcherURL", | |
"value" : null | |
}, { | |
"name" : "hdfsS3ConfDir", | |
"value" : null | |
}, { | |
"name" : "rateLimit", | |
"value" : 10 | |
}, { | |
"name" : "maxRunners", | |
"value" : 0 | |
}, { | |
"name" : "shouldCreateFailureSnapshot", | |
"value" : true | |
}, { | |
"name" : "runnerIdleTIme", | |
"value" : 60 | |
}, { | |
"name" : "webhookConfigs", | |
"value" : [ ] | |
}, { | |
"name" : "sparkConfigs", | |
"value" : [ ] | |
}, { | |
"name" : "statsAggregatorStage", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_StatsNullDTarget::1" | |
}, { | |
"name" : "testOriginStage", | |
"value" : "streamsets-datacollector-dev-lib::com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource::3" | |
}, { | |
"name" : "logLevel", | |
"value" : "INFO" | |
}, { | |
"name" : "amazonEMRConfig.userRegion", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.userRegionCustom", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.accessKey", | |
"value" : "" | |
}, { | |
"name" : "amazonEMRConfig.secretKey", | |
"value" : "" | |
}, { | |
"name" : "amazonEMRConfig.s3StagingUri", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.provisionNewCluster", | |
"value" : false | |
}, { | |
"name" : "amazonEMRConfig.clusterId", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.clusterPrefix", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.terminateCluster", | |
"value" : false | |
}, { | |
"name" : "amazonEMRConfig.enableEMRDebugging", | |
"value" : true | |
}, { | |
"name" : "amazonEMRConfig.s3LogUri", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.serviceRole", | |
"value" : "EMR_DefaultRole" | |
}, { | |
"name" : "amazonEMRConfig.jobFlowRole", | |
"value" : "EMR_EC2_DefaultRole" | |
}, { | |
"name" : "amazonEMRConfig.visibleToAllUsers", | |
"value" : true | |
}, { | |
"name" : "amazonEMRConfig.ec2SubnetId", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.masterSecurityGroup", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.slaveSecurityGroup", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.instanceCount", | |
"value" : 2 | |
}, { | |
"name" : "amazonEMRConfig.masterInstanceType", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.masterInstanceTypeCustom", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.slaveInstanceType", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.slaveInstanceTypeCustom", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.loggingEnabled", | |
"value" : true | |
}, { | |
"name" : "memoryLimit", | |
"value" : "${jvm:maxMemoryMB() * 0.85}" | |
}, { | |
"name" : "memoryLimitExceeded", | |
"value" : "LOG" | |
}, { | |
"name" : "clusterConfig.clusterType", | |
"value" : "LOCAL" | |
}, { | |
"name" : "clusterConfig.sparkMasterUrl", | |
"value" : "local[*]" | |
}, { | |
"name" : "clusterConfig.deployMode", | |
"value" : "CLIENT" | |
}, { | |
"name" : "clusterConfig.hadoopUserName", | |
"value" : "hdfs" | |
}, { | |
"name" : "databricksConfig.baseUrl", | |
"value" : null | |
}, { | |
"name" : "databricksConfig.credentialType", | |
"value" : null | |
}, { | |
"name" : "databricksConfig.username", | |
"value" : "" | |
}, { | |
"name" : "databricksConfig.password", | |
"value" : "" | |
}, { | |
"name" : "databricksConfig.token", | |
"value" : "" | |
}, { | |
"name" : "clusterConfig.sparkAppName", | |
"value" : "${pipeline:title()}" | |
}, { | |
"name" : "clusterConfig.yarnKerberosEnabled", | |
"value" : false | |
}, { | |
"name" : "clusterConfig.yarnKerberosPrincipal", | |
"value" : "name@DOMAIN" | |
}, { | |
"name" : "clusterConfig.yarnKerberosKeytab", | |
"value" : null | |
}, { | |
"name" : "clusterConfig.stagingDir", | |
"value" : "/streamsets" | |
} ], | |
"uiInfo" : { | |
"previewConfig" : { | |
"previewSource" : "CONFIGURED_SOURCE", | |
"batchSize" : 10, | |
"timeout" : "300000", | |
"writeToDestinations" : false, | |
"executeLifecycleEvents" : false, | |
"showHeader" : true, | |
"showFieldType" : true, | |
"rememberMe" : false | |
} | |
}, | |
"fragments" : [ ], | |
"stages" : [ { | |
"instanceName" : "DevRawDataSource_01", | |
"library" : "streamsets-datacollector-dev-lib", | |
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource", | |
"stageVersion" : "3", | |
"configuration" : [ { | |
"name" : "rawData", | |
"value" : "{\"employee\":\"Rick\", \"email\":\"rick@gmail.com\", \"department\":\"audit\"}\n{\"employee\":\"Rick\", \"address\":\"Times square\", \"salary\":10000, \"department\":\"sales\"}\n{\"employee\":\"Bob\", \"address\":\"Times square\", \"salary\":10000, \"department\":\"audit\"}\n{\"employee\":\"Bob\", \"email\":\"bob@gmail.com\", \"department\":\"sales\"}" | |
}, { | |
"name" : "stopAfterFirstBatch", | |
"value" : true | |
}, { | |
"name" : "eventData", | |
"value" : null | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Read data as text", | |
"xPos" : 60, | |
"yPos" : 50, | |
"stageType" : "SOURCE" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ "DevRawDataSource_01OutputLane15572651869340" ], | |
"eventLanes" : [ ], | |
"services" : [ { | |
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService", | |
"serviceVersion" : 1, | |
"configuration" : [ { | |
"name" : "displayFormats", | |
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML" | |
}, { | |
"name" : "dataFormat", | |
"value" : "TEXT" | |
}, { | |
"name" : "dataFormatConfig.compression", | |
"value" : "NONE" | |
}, { | |
"name" : "dataFormatConfig.filePatternInArchive", | |
"value" : "*" | |
}, { | |
"name" : "dataFormatConfig.charset", | |
"value" : "UTF-8" | |
}, { | |
"name" : "dataFormatConfig.removeCtrlChars", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.textMaxLineLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.useCustomDelimiter", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customDelimiter", | |
"value" : "\\r\\n" | |
}, { | |
"name" : "dataFormatConfig.includeCustomDelimiterInTheText", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.jsonContent", | |
"value" : "MULTIPLE_OBJECTS" | |
}, { | |
"name" : "dataFormatConfig.jsonMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.csvFileFormat", | |
"value" : "CSV" | |
}, { | |
"name" : "dataFormatConfig.csvHeader", | |
"value" : "NO_HEADER" | |
}, { | |
"name" : "dataFormatConfig.csvAllowExtraColumns", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvExtraColumnPrefix", | |
"value" : "_extra_" | |
}, { | |
"name" : "dataFormatConfig.csvMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.csvCustomDelimiter", | |
"value" : "|" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterFieldDelimiter", | |
"value" : "||" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterLineDelimiter", | |
"value" : "${str:unescapeJava('\\\\n')}" | |
}, { | |
"name" : "dataFormatConfig.csvCustomEscape", | |
"value" : "\\" | |
}, { | |
"name" : "dataFormatConfig.csvCustomQuote", | |
"value" : "\"" | |
}, { | |
"name" : "dataFormatConfig.csvEnableComments", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvCommentMarker", | |
"value" : "#" | |
}, { | |
"name" : "dataFormatConfig.csvIgnoreEmptyLines", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.csvRecordType", | |
"value" : "LIST_MAP" | |
}, { | |
"name" : "dataFormatConfig.csvSkipStartLines", | |
"value" : 0 | |
}, { | |
"name" : "dataFormatConfig.parseNull", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.nullConstant", | |
"value" : "\\\\N" | |
}, { | |
"name" : "dataFormatConfig.xmlRecordElement", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.includeFieldXpathAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xPathNamespaceContext", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.outputFieldAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xmlMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.logMode", | |
"value" : "GROK" | |
}, { | |
"name" : "dataFormatConfig.logMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.retainOriginalLine", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customLogFormat", | |
"value" : "%h %l %u %t \"%r\" %>s %b" | |
}, { | |
"name" : "dataFormatConfig.regex", | |
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)" | |
}, { | |
"name" : "dataFormatConfig.fieldPathsToGroupName", | |
"value" : [ { | |
"fieldPath" : "/", | |
"group" : 1 | |
} ] | |
}, { | |
"name" : "dataFormatConfig.grokPatternDefinition", | |
"value" : "" | |
}, { | |
"name" : "dataFormatConfig.grokPattern", | |
"value" : "%{WORD:verb} %{PATH:file}" | |
}, { | |
"name" : "dataFormatConfig.onParseError", | |
"value" : "ERROR" | |
}, { | |
"name" : "dataFormatConfig.maxStackTraceLines", | |
"value" : 50 | |
}, { | |
"name" : "dataFormatConfig.enableLog4jCustomLogFormat", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.log4jCustomLogFormat", | |
"value" : "%r [%t] %-5p %c %x - %m%n" | |
}, { | |
"name" : "dataFormatConfig.avroSchemaSource", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.avroSchema", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaRegistryUrls", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.schemaLookupMode", | |
"value" : "SUBJECT" | |
}, { | |
"name" : "dataFormatConfig.subject", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaId", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.protoDescriptorFile", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.messageType", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.isDelimited", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.binaryMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.datagramMode", | |
"value" : "SYSLOG" | |
}, { | |
"name" : "dataFormatConfig.typesDbPath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.convertTime", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excludeInterval", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.authFilePath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesMode", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSize", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMs", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.wholeFileMaxObjectLen", | |
"value" : 8192 | |
}, { | |
"name" : "dataFormatConfig.rateLimit", | |
"value" : "-1" | |
}, { | |
"name" : "dataFormatConfig.verifyChecksum", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excelHeader", | |
"value" : null | |
} ] | |
} ] | |
}, { | |
"instanceName" : "JSONParser_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_jsonparser_JsonParserDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "fieldPathToParse", | |
"value" : "/text" | |
}, { | |
"name" : "removeCtrlChars", | |
"value" : false | |
}, { | |
"name" : "parsedFieldPath", | |
"value" : "/json" | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Parse JSON data", | |
"xPos" : 280, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "DevRawDataSource_01OutputLane15572651869340" ], | |
"outputLanes" : [ "JSONParser_01OutputLane15573745798530" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "StaticLookup_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_kv_local_LocalLookupDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "conf.values", | |
"value" : [ { | |
"key" : "audit", | |
"value" : "{ \"$schema\": \"http://json-schema.org/draft-04/schema#\", \"type\": \"object\", \"properties\": { \"employee\": { \"type\": \"string\" }, \"email\": { \"type\": \"string\" }, \"department\": { \"type\": \"string\" } }, \"required\": [ \"employee\", \"email\", \"department\" ] }" | |
}, { | |
"key" : "sales", | |
"value" : "{ \"$schema\": \"http://json-schema.org/draft-04/schema#\", \"type\": \"object\", \"properties\": { \"employee\": { \"type\": \"string\" }, \"address\": { \"type\": \"string\" }, \"salary\": { \"type\": \"integer\" }, \"department\": { \"type\": \"string\" } }, \"required\": [ \"employee\", \"address\", \"salary\", \"department\" ] }" | |
} ] | |
}, { | |
"name" : "conf.mode", | |
"value" : "BATCH" | |
}, { | |
"name" : "conf.lookups", | |
"value" : [ { | |
"keyExpr" : "${record:value('/json/department')}", | |
"outputFieldPath" : "/schema" | |
} ] | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Lookup schema", | |
"xPos" : 500, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "JSONParser_01OutputLane15573745798530" ], | |
"outputLanes" : [ "StaticLookup_01OutputLane15573729405360" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "JSONParser_02", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_jsonparser_JsonParserDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "fieldPathToParse", | |
"value" : "/schema" | |
}, { | |
"name" : "removeCtrlChars", | |
"value" : false | |
}, { | |
"name" : "parsedFieldPath", | |
"value" : "/schema" | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Parse schema", | |
"xPos" : 720, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "StaticLookup_01OutputLane15573729405360" ], | |
"outputLanes" : [ "JSONParser_02OutputLane15573747961910" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "JythonEvaluator_01", | |
"library" : "streamsets-datacollector-jython_2_7-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_jython_JythonDProcessor", | |
"stageVersion" : "2", | |
"configuration" : [ { | |
"name" : "processingMode", | |
"value" : "BATCH" | |
}, { | |
"name" : "initScript", | |
"value" : "#\n# Available Objects:\n#\n# state: a dict that is preserved between invocations of this script. \n# Useful for caching bits of data e.g. counters.\n#\n# log.<loglevel>(msg, obj...): use instead of print to send log messages to the log4j log instead of stdout.\n# loglevel is any log4j level: e.g. info, error, warn, trace.\n# sdcFunctions.getFieldNull(Record, 'field path'): Receive a constant defined above \n# to check if the field is typed field with value null\n# sdcFunctions.createMap(boolean listMap): Create a map for use as a field in a record.\n# Pass True to this function to create a list map (ordered map)\n#\n\n# state['connection'] = Connection().open()" | |
}, { | |
"name" : "script", | |
"value" : "# Append jsonschema location to system path\nimport sys\n# Customize for your deployment\nsys.path.append('/Users/pat/Library/Python/2.7/lib/python/site-packages')\nsys.path.append('/Users/pat/Library/Python/2.7/lib/python/site-packages/pip/_vendor')\n\nfrom jsonschema import validate\n\nfor record in records:\n try:\n # If no exception is raised by validate(), the instance is valid.\n validate(instance=record.value['json'], schema=record.value['schema'])\n\n # Write record to processor output\n output.write(record)\n\n except Exception as e:\n # Send record to error\n error.write(record, str(e))" | |
}, { | |
"name" : "destroyScript", | |
"value" : "#\n# Available Objects:\n#\n# state: a dict that is preserved between invocations of this script. \n# Useful for caching bits of data e.g. counters.\n#\n# log.<loglevel>(msg, obj...): use instead of print to send log messages to the log4j log instead of stdout.\n# loglevel is any log4j level: e.g. info, error, warn, trace.\n# sdcFunctions.getFieldNull(Record, 'field path'): Receive a constant defined above \n# to check if the field is typed field with value null\n# sdcFunctions.createMap(boolean listMap): Create a map for use as a field in a record.\n# Pass True to this function to create a list map (ordered map)\n# sdcFunctions.createEvent(String type, int version): Creates a new event.\n# Create new empty event with standard headers.\n# sdcFunctions.toEvent(Record): Send event to event stream\n# Only events created with sdcFunctions.createEvent are supported.\n#\n\n# state['connection'].close()" | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Validate data against schema", | |
"xPos" : 940, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "JSONParser_02OutputLane15573747961910" ], | |
"outputLanes" : [ "JythonEvaluator_01OutputLane15573729788680" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "LocalFS_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_localfilesystem_LocalFileSystemDTarget", | |
"stageVersion" : "3", | |
"configuration" : [ { | |
"name" : "configs.uniquePrefix", | |
"value" : "sdc-${sdc:id()}" | |
}, { | |
"name" : "configs.fileNameSuffix", | |
"value" : null | |
}, { | |
"name" : "configs.dirPathTemplateInHeader", | |
"value" : false | |
}, { | |
"name" : "configs.dirPathTemplate", | |
"value" : "/tmp/out/${YYYY()}-${MM()}-${DD()}-${hh()}" | |
}, { | |
"name" : "configs.timeZoneID", | |
"value" : "UTC" | |
}, { | |
"name" : "configs.timeDriver", | |
"value" : "${time:now()}" | |
}, { | |
"name" : "configs.maxRecordsPerFile", | |
"value" : 0 | |
}, { | |
"name" : "configs.maxFileSize", | |
"value" : 0 | |
}, { | |
"name" : "configs.idleTimeout", | |
"value" : "${1 * HOURS}" | |
}, { | |
"name" : "configs.compression", | |
"value" : "NONE" | |
}, { | |
"name" : "configs.otherCompression", | |
"value" : null | |
}, { | |
"name" : "configs.fileType", | |
"value" : "TEXT" | |
}, { | |
"name" : "configs.keyEl", | |
"value" : "${uuid()}" | |
}, { | |
"name" : "configs.lateRecordsLimit", | |
"value" : "${1 * HOURS}" | |
}, { | |
"name" : "configs.rollIfHeader", | |
"value" : false | |
}, { | |
"name" : "configs.rollHeaderName", | |
"value" : "roll" | |
}, { | |
"name" : "configs.lateRecordsAction", | |
"value" : "SEND_TO_ERROR" | |
}, { | |
"name" : "configs.lateRecordsDirPathTemplate", | |
"value" : "/tmp/late/${YYYY()}-${MM()}-${DD()}" | |
}, { | |
"name" : "configs.dataFormat", | |
"value" : "JSON" | |
}, { | |
"name" : "configs.hdfsPermissionCheck", | |
"value" : true | |
}, { | |
"name" : "configs.permissionEL", | |
"value" : null | |
}, { | |
"name" : "configs.skipOldTempFileRecovery", | |
"value" : false | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.charset", | |
"value" : "UTF-8" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.csvFileFormat", | |
"value" : "CSV" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.csvHeader", | |
"value" : "NO_HEADER" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.csvReplaceNewLines", | |
"value" : true | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.csvReplaceNewLinesString", | |
"value" : " " | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.csvCustomDelimiter", | |
"value" : "|" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.csvCustomEscape", | |
"value" : "\\" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.csvCustomQuote", | |
"value" : "\"" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.jsonMode", | |
"value" : "MULTIPLE_OBJECTS" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.textFieldPath", | |
"value" : "/text" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.textRecordSeparator", | |
"value" : "\\n" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.textFieldMissingAction", | |
"value" : "ERROR" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.textEmptyLineIfNull", | |
"value" : false | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.avroSchemaSource", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.avroSchema", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.registerSchema", | |
"value" : false | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.schemaRegistryUrlsForRegistration", | |
"value" : [ ] | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.schemaRegistryUrls", | |
"value" : [ ] | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.schemaLookupMode", | |
"value" : "SUBJECT" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.subject", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.subjectToRegister", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.schemaId", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.avroCompression", | |
"value" : "NULL" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.binaryFieldPath", | |
"value" : "/" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.protoDescriptorFile", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.messageType", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.fileNameEL", | |
"value" : null | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.wholeFileExistsAction", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.includeChecksumInTheEvents", | |
"value" : false | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.checksumAlgorithm", | |
"value" : "MD5" | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.xmlPrettyPrint", | |
"value" : true | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.xmlValidateSchema", | |
"value" : false | |
}, { | |
"name" : "configs.dataGeneratorFormatConfig.xmlSchema", | |
"value" : null | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Write valid records to disk", | |
"xPos" : 1160, | |
"yPos" : 50, | |
"stageType" : "TARGET" | |
}, | |
"inputLanes" : [ "JythonEvaluator_01OutputLane15573729788680" ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
} ], | |
"errorStage" : { | |
"instanceName" : "WritetoFile_ErrorStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_recordstolocalfilesystem_ToErrorLocalFSDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "directory", | |
"value" : "/tmp/" | |
}, { | |
"name" : "uniquePrefix", | |
"value" : "sdc-error-${sdc:id()}" | |
}, { | |
"name" : "rotationIntervalSecs", | |
"value" : "${1 * HOURS}" | |
}, { | |
"name" : "maxFileSizeMbs", | |
"value" : 512 | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Error Records - Write to File", | |
"xPos" : 500, | |
"yPos" : 50, | |
"stageType" : "TARGET" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, | |
"info" : { | |
"pipelineId" : "ValidateJSONDatab0dfa94e-faf3-42ec-9a02-5122d048fe4c", | |
"title" : "Validate JSON Data", | |
"description" : "", | |
"created" : 1557375426335, | |
"lastModified" : 1557375426585, | |
"creator" : "admin", | |
"lastModifier" : "admin", | |
"lastRev" : "0", | |
"uuid" : "d504e9d3-d4ea-4d52-8fd8-565592400d2e", | |
"valid" : false, | |
"metadata" : { | |
"labels" : [ ] | |
}, | |
"name" : "ValidateJSONDatab0dfa94e-faf3-42ec-9a02-5122d048fe4c", | |
"sdcVersion" : "3.8.0", | |
"sdcId" : "2e5c18f6-2e5b-11e9-a456-3ff9fba8972e" | |
}, | |
"metadata" : { | |
"labels" : [ ] | |
}, | |
"statsAggregatorStage" : { | |
"instanceName" : "Discard_StatsAggregatorStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_StatsNullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Stats Aggregator - Discard", | |
"xPos" : 280, | |
"yPos" : 50, | |
"stageType" : "TARGET" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, | |
"startEventStages" : [ { | |
"instanceName" : "Discard_StartEventStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Start Event - Discard", | |
"xPos" : 280, | |
"yPos" : 50, | |
"stageType" : "TARGET" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
} ], | |
"stopEventStages" : [ { | |
"instanceName" : "Discard_StopEventStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Stop Event - Discard", | |
"xPos" : 719, | |
"yPos" : 50, | |
"stageType" : "TARGET" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
} ], | |
"testOriginStage" : { | |
"instanceName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStage", | |
"library" : "streamsets-datacollector-dev-lib", | |
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource", | |
"stageVersion" : "3", | |
"configuration" : [ { | |
"name" : "rawData", | |
"value" : "{\n \"f1\": \"abc\",\n \"f2\": \"xyz\",\n \"f3\": \"lmn\"\n}" | |
}, { | |
"name" : "stopAfterFirstBatch", | |
"value" : false | |
}, { | |
"name" : "eventData", | |
"value" : null | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
} ], | |
"uiInfo" : { | |
"stageType" : "SOURCE", | |
"label" : "Test Origin - Dev Raw Data Source" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStageOutputLane1" ], | |
"eventLanes" : [ ], | |
"services" : [ { | |
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService", | |
"serviceVersion" : 1, | |
"configuration" : [ { | |
"name" : "displayFormats", | |
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML" | |
}, { | |
"name" : "dataFormat", | |
"value" : "JSON" | |
}, { | |
"name" : "dataFormatConfig.compression", | |
"value" : "NONE" | |
}, { | |
"name" : "dataFormatConfig.filePatternInArchive", | |
"value" : "*" | |
}, { | |
"name" : "dataFormatConfig.charset", | |
"value" : "UTF-8" | |
}, { | |
"name" : "dataFormatConfig.removeCtrlChars", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.textMaxLineLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.useCustomDelimiter", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customDelimiter", | |
"value" : "\\r\\n" | |
}, { | |
"name" : "dataFormatConfig.includeCustomDelimiterInTheText", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.jsonContent", | |
"value" : "MULTIPLE_OBJECTS" | |
}, { | |
"name" : "dataFormatConfig.jsonMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.csvFileFormat", | |
"value" : "CSV" | |
}, { | |
"name" : "dataFormatConfig.csvHeader", | |
"value" : "NO_HEADER" | |
}, { | |
"name" : "dataFormatConfig.csvAllowExtraColumns", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvExtraColumnPrefix", | |
"value" : "_extra_" | |
}, { | |
"name" : "dataFormatConfig.csvMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.csvCustomDelimiter", | |
"value" : "|" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterFieldDelimiter", | |
"value" : "||" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterLineDelimiter", | |
"value" : "${str:unescapeJava('\\\\n')}" | |
}, { | |
"name" : "dataFormatConfig.csvCustomEscape", | |
"value" : "\\" | |
}, { | |
"name" : "dataFormatConfig.csvCustomQuote", | |
"value" : "\"" | |
}, { | |
"name" : "dataFormatConfig.csvEnableComments", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvCommentMarker", | |
"value" : "#" | |
}, { | |
"name" : "dataFormatConfig.csvIgnoreEmptyLines", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvRecordType", | |
"value" : "LIST_MAP" | |
}, { | |
"name" : "dataFormatConfig.csvSkipStartLines", | |
"value" : 0 | |
}, { | |
"name" : "dataFormatConfig.parseNull", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.nullConstant", | |
"value" : "\\\\N" | |
}, { | |
"name" : "dataFormatConfig.xmlRecordElement", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.includeFieldXpathAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xPathNamespaceContext", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.outputFieldAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xmlMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.logMode", | |
"value" : "COMMON_LOG_FORMAT" | |
}, { | |
"name" : "dataFormatConfig.logMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.retainOriginalLine", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customLogFormat", | |
"value" : "%h %l %u %t \"%r\" %>s %b" | |
}, { | |
"name" : "dataFormatConfig.regex", | |
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)" | |
}, { | |
"name" : "dataFormatConfig.fieldPathsToGroupName", | |
"value" : [ { | |
"fieldPath" : "/", | |
"group" : 1 | |
} ] | |
}, { | |
"name" : "dataFormatConfig.grokPatternDefinition", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.grokPattern", | |
"value" : "%{COMMONAPACHELOG}" | |
}, { | |
"name" : "dataFormatConfig.onParseError", | |
"value" : "ERROR" | |
}, { | |
"name" : "dataFormatConfig.maxStackTraceLines", | |
"value" : 50 | |
}, { | |
"name" : "dataFormatConfig.enableLog4jCustomLogFormat", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.log4jCustomLogFormat", | |
"value" : "%r [%t] %-5p %c %x - %m%n" | |
}, { | |
"name" : "dataFormatConfig.avroSchemaSource", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.avroSchema", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaRegistryUrls", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.schemaLookupMode", | |
"value" : "SUBJECT" | |
}, { | |
"name" : "dataFormatConfig.subject", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaId", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.protoDescriptorFile", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.messageType", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.isDelimited", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.binaryMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.datagramMode", | |
"value" : "SYSLOG" | |
}, { | |
"name" : "dataFormatConfig.typesDbPath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.convertTime", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excludeInterval", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.authFilePath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesMode", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSize", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMs", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.wholeFileMaxObjectLen", | |
"value" : 8192 | |
}, { | |
"name" : "dataFormatConfig.rateLimit", | |
"value" : "-1" | |
}, { | |
"name" : "dataFormatConfig.verifyChecksum", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excelHeader", | |
"value" : null | |
} ] | |
} ] | |
}, | |
"valid" : true, | |
"issues" : { | |
"stageIssues" : { }, | |
"pipelineIssues" : [ ], | |
"issueCount" : 0 | |
}, | |
"previewable" : true | |
}, | |
"pipelineRules" : { | |
"schemaVersion" : 3, | |
"version" : 2, | |
"metricsRuleDefinitions" : [ { | |
"id" : "badRecordsAlertID", | |
"alertText" : "High incidence of Error Records", | |
"metricId" : "pipeline.batchErrorRecords.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > 100}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1528936184502, | |
"valid" : true | |
}, { | |
"id" : "stageErrorAlertID", | |
"alertText" : "High incidence of Stage Errors", | |
"metricId" : "pipeline.batchErrorMessages.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > 100}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1528936184502, | |
"valid" : true | |
}, { | |
"id" : "idleGaugeID", | |
"alertText" : "Pipeline is Idle", | |
"metricId" : "RuntimeStatsGauge.gauge", | |
"metricType" : "GAUGE", | |
"metricElement" : "TIME_OF_LAST_RECEIVED_RECORD", | |
"condition" : "${time:now() - value() > 120000}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1528936184502, | |
"valid" : true | |
}, { | |
"id" : "batchTimeAlertID", | |
"alertText" : "Batch taking more time to process", | |
"metricId" : "RuntimeStatsGauge.gauge", | |
"metricType" : "GAUGE", | |
"metricElement" : "CURRENT_BATCH_AGE", | |
"condition" : "${value() > 200}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1528936184502, | |
"valid" : true | |
}, { | |
"id" : "memoryLimitAlertID", | |
"alertText" : "Memory limit for pipeline exceeded", | |
"metricId" : "pipeline.memoryConsumed.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > (jvm:maxMemoryMB() * 0.65)}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1528936184502, | |
"valid" : true | |
} ], | |
"dataRuleDefinitions" : [ ], | |
"driftRuleDefinitions" : [ ], | |
"uuid" : "892b345d-5cd7-4b75-87db-2b1efcd39f3c", | |
"configuration" : [ { | |
"name" : "emailIDs", | |
"value" : [ ] | |
}, { | |
"name" : "webhookConfigs", | |
"value" : [ ] | |
} ], | |
"ruleIssues" : [ ], | |
"configIssues" : [ ] | |
}, | |
"libraryDefinitions" : null | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment