Skip to content

Instantly share code, notes, and snippets.

@metadaddy
Created May 9, 2019 04:25
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save metadaddy/6f7df4398b4455f20797b07ae312badb to your computer and use it in GitHub Desktop.
Save metadaddy/6f7df4398b4455f20797b07ae312badb to your computer and use it in GitHub Desktop.
StreamSets Data Collector 3.8.0 pipeline to Validate JSON against a schema
{
"pipelineConfig" : {
"schemaVersion" : 6,
"version" : 12,
"pipelineId" : "ValidateJSONDatab0dfa94e-faf3-42ec-9a02-5122d048fe4c",
"title" : "Validate JSON Data",
"description" : "",
"uuid" : "d504e9d3-d4ea-4d52-8fd8-565592400d2e",
"configuration" : [ {
"name" : "executionMode",
"value" : "STANDALONE"
}, {
"name" : "edgeHttpUrl",
"value" : "http://localhost:18633"
}, {
"name" : "deliveryGuarantee",
"value" : "AT_LEAST_ONCE"
}, {
"name" : "startEventStage",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1"
}, {
"name" : "stopEventStage",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1"
}, {
"name" : "shouldRetry",
"value" : true
}, {
"name" : "retryAttempts",
"value" : -1
}, {
"name" : "notifyOnStates",
"value" : [ "RUN_ERROR", "STOPPED", "FINISHED" ]
}, {
"name" : "emailIDs",
"value" : [ ]
}, {
"name" : "constants",
"value" : [ {
"key" : "audit",
"value" : "https://streamsets-request-bin.herokuapp.com/zz456ozz"
}, {
"key" : "test",
"value" : "a#b"
} ]
}, {
"name" : "badRecordsHandling",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_recordstolocalfilesystem_ToErrorLocalFSDTarget::1"
}, {
"name" : "errorRecordPolicy",
"value" : "ORIGINAL_RECORD"
}, {
"name" : "workerCount",
"value" : 0
}, {
"name" : "clusterSlaveMemory",
"value" : 2048
}, {
"name" : "clusterSlaveJavaOpts",
"value" : "-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -Dlog4j.debug"
}, {
"name" : "clusterLauncherEnv",
"value" : [ ]
}, {
"name" : "mesosDispatcherURL",
"value" : null
}, {
"name" : "hdfsS3ConfDir",
"value" : null
}, {
"name" : "rateLimit",
"value" : 10
}, {
"name" : "maxRunners",
"value" : 0
}, {
"name" : "shouldCreateFailureSnapshot",
"value" : true
}, {
"name" : "runnerIdleTIme",
"value" : 60
}, {
"name" : "webhookConfigs",
"value" : [ ]
}, {
"name" : "sparkConfigs",
"value" : [ ]
}, {
"name" : "statsAggregatorStage",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_StatsNullDTarget::1"
}, {
"name" : "testOriginStage",
"value" : "streamsets-datacollector-dev-lib::com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource::3"
}, {
"name" : "logLevel",
"value" : "INFO"
}, {
"name" : "amazonEMRConfig.userRegion",
"value" : null
}, {
"name" : "amazonEMRConfig.userRegionCustom",
"value" : null
}, {
"name" : "amazonEMRConfig.accessKey",
"value" : ""
}, {
"name" : "amazonEMRConfig.secretKey",
"value" : ""
}, {
"name" : "amazonEMRConfig.s3StagingUri",
"value" : null
}, {
"name" : "amazonEMRConfig.provisionNewCluster",
"value" : false
}, {
"name" : "amazonEMRConfig.clusterId",
"value" : null
}, {
"name" : "amazonEMRConfig.clusterPrefix",
"value" : null
}, {
"name" : "amazonEMRConfig.terminateCluster",
"value" : false
}, {
"name" : "amazonEMRConfig.enableEMRDebugging",
"value" : true
}, {
"name" : "amazonEMRConfig.s3LogUri",
"value" : null
}, {
"name" : "amazonEMRConfig.serviceRole",
"value" : "EMR_DefaultRole"
}, {
"name" : "amazonEMRConfig.jobFlowRole",
"value" : "EMR_EC2_DefaultRole"
}, {
"name" : "amazonEMRConfig.visibleToAllUsers",
"value" : true
}, {
"name" : "amazonEMRConfig.ec2SubnetId",
"value" : null
}, {
"name" : "amazonEMRConfig.masterSecurityGroup",
"value" : null
}, {
"name" : "amazonEMRConfig.slaveSecurityGroup",
"value" : null
}, {
"name" : "amazonEMRConfig.instanceCount",
"value" : 2
}, {
"name" : "amazonEMRConfig.masterInstanceType",
"value" : null
}, {
"name" : "amazonEMRConfig.masterInstanceTypeCustom",
"value" : null
}, {
"name" : "amazonEMRConfig.slaveInstanceType",
"value" : null
}, {
"name" : "amazonEMRConfig.slaveInstanceTypeCustom",
"value" : null
}, {
"name" : "amazonEMRConfig.loggingEnabled",
"value" : true
}, {
"name" : "memoryLimit",
"value" : "${jvm:maxMemoryMB() * 0.85}"
}, {
"name" : "memoryLimitExceeded",
"value" : "LOG"
}, {
"name" : "clusterConfig.clusterType",
"value" : "LOCAL"
}, {
"name" : "clusterConfig.sparkMasterUrl",
"value" : "local[*]"
}, {
"name" : "clusterConfig.deployMode",
"value" : "CLIENT"
}, {
"name" : "clusterConfig.hadoopUserName",
"value" : "hdfs"
}, {
"name" : "databricksConfig.baseUrl",
"value" : null
}, {
"name" : "databricksConfig.credentialType",
"value" : null
}, {
"name" : "databricksConfig.username",
"value" : ""
}, {
"name" : "databricksConfig.password",
"value" : ""
}, {
"name" : "databricksConfig.token",
"value" : ""
}, {
"name" : "clusterConfig.sparkAppName",
"value" : "${pipeline:title()}"
}, {
"name" : "clusterConfig.yarnKerberosEnabled",
"value" : false
}, {
"name" : "clusterConfig.yarnKerberosPrincipal",
"value" : "name@DOMAIN"
}, {
"name" : "clusterConfig.yarnKerberosKeytab",
"value" : null
}, {
"name" : "clusterConfig.stagingDir",
"value" : "/streamsets"
} ],
"uiInfo" : {
"previewConfig" : {
"previewSource" : "CONFIGURED_SOURCE",
"batchSize" : 10,
"timeout" : "300000",
"writeToDestinations" : false,
"executeLifecycleEvents" : false,
"showHeader" : true,
"showFieldType" : true,
"rememberMe" : false
}
},
"fragments" : [ ],
"stages" : [ {
"instanceName" : "DevRawDataSource_01",
"library" : "streamsets-datacollector-dev-lib",
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource",
"stageVersion" : "3",
"configuration" : [ {
"name" : "rawData",
"value" : "{\"employee\":\"Rick\", \"email\":\"rick@gmail.com\", \"department\":\"audit\"}\n{\"employee\":\"Rick\", \"address\":\"Times square\", \"salary\":10000, \"department\":\"sales\"}\n{\"employee\":\"Bob\", \"address\":\"Times square\", \"salary\":10000, \"department\":\"audit\"}\n{\"employee\":\"Bob\", \"email\":\"bob@gmail.com\", \"department\":\"sales\"}"
}, {
"name" : "stopAfterFirstBatch",
"value" : true
}, {
"name" : "eventData",
"value" : null
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
} ],
"uiInfo" : {
"description" : "",
"label" : "Read data as text",
"xPos" : 60,
"yPos" : 50,
"stageType" : "SOURCE"
},
"inputLanes" : [ ],
"outputLanes" : [ "DevRawDataSource_01OutputLane15572651869340" ],
"eventLanes" : [ ],
"services" : [ {
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService",
"serviceVersion" : 1,
"configuration" : [ {
"name" : "displayFormats",
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML"
}, {
"name" : "dataFormat",
"value" : "TEXT"
}, {
"name" : "dataFormatConfig.compression",
"value" : "NONE"
}, {
"name" : "dataFormatConfig.filePatternInArchive",
"value" : "*"
}, {
"name" : "dataFormatConfig.charset",
"value" : "UTF-8"
}, {
"name" : "dataFormatConfig.removeCtrlChars",
"value" : false
}, {
"name" : "dataFormatConfig.textMaxLineLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.useCustomDelimiter",
"value" : false
}, {
"name" : "dataFormatConfig.customDelimiter",
"value" : "\\r\\n"
}, {
"name" : "dataFormatConfig.includeCustomDelimiterInTheText",
"value" : false
}, {
"name" : "dataFormatConfig.jsonContent",
"value" : "MULTIPLE_OBJECTS"
}, {
"name" : "dataFormatConfig.jsonMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.csvFileFormat",
"value" : "CSV"
}, {
"name" : "dataFormatConfig.csvHeader",
"value" : "NO_HEADER"
}, {
"name" : "dataFormatConfig.csvAllowExtraColumns",
"value" : false
}, {
"name" : "dataFormatConfig.csvExtraColumnPrefix",
"value" : "_extra_"
}, {
"name" : "dataFormatConfig.csvMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.csvCustomDelimiter",
"value" : "|"
}, {
"name" : "dataFormatConfig.multiCharacterFieldDelimiter",
"value" : "||"
}, {
"name" : "dataFormatConfig.multiCharacterLineDelimiter",
"value" : "${str:unescapeJava('\\\\n')}"
}, {
"name" : "dataFormatConfig.csvCustomEscape",
"value" : "\\"
}, {
"name" : "dataFormatConfig.csvCustomQuote",
"value" : "\""
}, {
"name" : "dataFormatConfig.csvEnableComments",
"value" : false
}, {
"name" : "dataFormatConfig.csvCommentMarker",
"value" : "#"
}, {
"name" : "dataFormatConfig.csvIgnoreEmptyLines",
"value" : true
}, {
"name" : "dataFormatConfig.csvRecordType",
"value" : "LIST_MAP"
}, {
"name" : "dataFormatConfig.csvSkipStartLines",
"value" : 0
}, {
"name" : "dataFormatConfig.parseNull",
"value" : false
}, {
"name" : "dataFormatConfig.nullConstant",
"value" : "\\\\N"
}, {
"name" : "dataFormatConfig.xmlRecordElement",
"value" : null
}, {
"name" : "dataFormatConfig.includeFieldXpathAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xPathNamespaceContext",
"value" : [ ]
}, {
"name" : "dataFormatConfig.outputFieldAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xmlMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.logMode",
"value" : "GROK"
}, {
"name" : "dataFormatConfig.logMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.retainOriginalLine",
"value" : false
}, {
"name" : "dataFormatConfig.customLogFormat",
"value" : "%h %l %u %t \"%r\" %>s %b"
}, {
"name" : "dataFormatConfig.regex",
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)"
}, {
"name" : "dataFormatConfig.fieldPathsToGroupName",
"value" : [ {
"fieldPath" : "/",
"group" : 1
} ]
}, {
"name" : "dataFormatConfig.grokPatternDefinition",
"value" : ""
}, {
"name" : "dataFormatConfig.grokPattern",
"value" : "%{WORD:verb} %{PATH:file}"
}, {
"name" : "dataFormatConfig.onParseError",
"value" : "ERROR"
}, {
"name" : "dataFormatConfig.maxStackTraceLines",
"value" : 50
}, {
"name" : "dataFormatConfig.enableLog4jCustomLogFormat",
"value" : false
}, {
"name" : "dataFormatConfig.log4jCustomLogFormat",
"value" : "%r [%t] %-5p %c %x - %m%n"
}, {
"name" : "dataFormatConfig.avroSchemaSource",
"value" : null
}, {
"name" : "dataFormatConfig.avroSchema",
"value" : null
}, {
"name" : "dataFormatConfig.schemaRegistryUrls",
"value" : [ ]
}, {
"name" : "dataFormatConfig.schemaLookupMode",
"value" : "SUBJECT"
}, {
"name" : "dataFormatConfig.subject",
"value" : null
}, {
"name" : "dataFormatConfig.schemaId",
"value" : null
}, {
"name" : "dataFormatConfig.protoDescriptorFile",
"value" : null
}, {
"name" : "dataFormatConfig.messageType",
"value" : null
}, {
"name" : "dataFormatConfig.isDelimited",
"value" : true
}, {
"name" : "dataFormatConfig.binaryMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.datagramMode",
"value" : "SYSLOG"
}, {
"name" : "dataFormatConfig.typesDbPath",
"value" : null
}, {
"name" : "dataFormatConfig.convertTime",
"value" : false
}, {
"name" : "dataFormatConfig.excludeInterval",
"value" : true
}, {
"name" : "dataFormatConfig.authFilePath",
"value" : null
}, {
"name" : "dataFormatConfig.netflowOutputValuesMode",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSize",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMs",
"value" : -1
}, {
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.wholeFileMaxObjectLen",
"value" : 8192
}, {
"name" : "dataFormatConfig.rateLimit",
"value" : "-1"
}, {
"name" : "dataFormatConfig.verifyChecksum",
"value" : false
}, {
"name" : "dataFormatConfig.excelHeader",
"value" : null
} ]
} ]
}, {
"instanceName" : "JSONParser_01",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_jsonparser_JsonParserDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "fieldPathToParse",
"value" : "/text"
}, {
"name" : "removeCtrlChars",
"value" : false
}, {
"name" : "parsedFieldPath",
"value" : "/json"
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"description" : "",
"label" : "Parse JSON data",
"xPos" : 280,
"yPos" : 50,
"stageType" : "PROCESSOR"
},
"inputLanes" : [ "DevRawDataSource_01OutputLane15572651869340" ],
"outputLanes" : [ "JSONParser_01OutputLane15573745798530" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "StaticLookup_01",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_kv_local_LocalLookupDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "conf.values",
"value" : [ {
"key" : "audit",
"value" : "{ \"$schema\": \"http://json-schema.org/draft-04/schema#\", \"type\": \"object\", \"properties\": { \"employee\": { \"type\": \"string\" }, \"email\": { \"type\": \"string\" }, \"department\": { \"type\": \"string\" } }, \"required\": [ \"employee\", \"email\", \"department\" ] }"
}, {
"key" : "sales",
"value" : "{ \"$schema\": \"http://json-schema.org/draft-04/schema#\", \"type\": \"object\", \"properties\": { \"employee\": { \"type\": \"string\" }, \"address\": { \"type\": \"string\" }, \"salary\": { \"type\": \"integer\" }, \"department\": { \"type\": \"string\" } }, \"required\": [ \"employee\", \"address\", \"salary\", \"department\" ] }"
} ]
}, {
"name" : "conf.mode",
"value" : "BATCH"
}, {
"name" : "conf.lookups",
"value" : [ {
"keyExpr" : "${record:value('/json/department')}",
"outputFieldPath" : "/schema"
} ]
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"description" : "",
"label" : "Lookup schema",
"xPos" : 500,
"yPos" : 50,
"stageType" : "PROCESSOR"
},
"inputLanes" : [ "JSONParser_01OutputLane15573745798530" ],
"outputLanes" : [ "StaticLookup_01OutputLane15573729405360" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "JSONParser_02",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_jsonparser_JsonParserDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "fieldPathToParse",
"value" : "/schema"
}, {
"name" : "removeCtrlChars",
"value" : false
}, {
"name" : "parsedFieldPath",
"value" : "/schema"
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"description" : "",
"label" : "Parse schema",
"xPos" : 720,
"yPos" : 50,
"stageType" : "PROCESSOR"
},
"inputLanes" : [ "StaticLookup_01OutputLane15573729405360" ],
"outputLanes" : [ "JSONParser_02OutputLane15573747961910" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "JythonEvaluator_01",
"library" : "streamsets-datacollector-jython_2_7-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_jython_JythonDProcessor",
"stageVersion" : "2",
"configuration" : [ {
"name" : "processingMode",
"value" : "BATCH"
}, {
"name" : "initScript",
"value" : "#\n# Available Objects:\n#\n# state: a dict that is preserved between invocations of this script. \n# Useful for caching bits of data e.g. counters.\n#\n# log.<loglevel>(msg, obj...): use instead of print to send log messages to the log4j log instead of stdout.\n# loglevel is any log4j level: e.g. info, error, warn, trace.\n# sdcFunctions.getFieldNull(Record, 'field path'): Receive a constant defined above \n# to check if the field is typed field with value null\n# sdcFunctions.createMap(boolean listMap): Create a map for use as a field in a record.\n# Pass True to this function to create a list map (ordered map)\n#\n\n# state['connection'] = Connection().open()"
}, {
"name" : "script",
"value" : "# Append jsonschema location to system path\nimport sys\n# Customize for your deployment\nsys.path.append('/Users/pat/Library/Python/2.7/lib/python/site-packages')\nsys.path.append('/Users/pat/Library/Python/2.7/lib/python/site-packages/pip/_vendor')\n\nfrom jsonschema import validate\n\nfor record in records:\n try:\n # If no exception is raised by validate(), the instance is valid.\n validate(instance=record.value['json'], schema=record.value['schema'])\n\n # Write record to processor output\n output.write(record)\n\n except Exception as e:\n # Send record to error\n error.write(record, str(e))"
}, {
"name" : "destroyScript",
"value" : "#\n# Available Objects:\n#\n# state: a dict that is preserved between invocations of this script. \n# Useful for caching bits of data e.g. counters.\n#\n# log.<loglevel>(msg, obj...): use instead of print to send log messages to the log4j log instead of stdout.\n# loglevel is any log4j level: e.g. info, error, warn, trace.\n# sdcFunctions.getFieldNull(Record, 'field path'): Receive a constant defined above \n# to check if the field is typed field with value null\n# sdcFunctions.createMap(boolean listMap): Create a map for use as a field in a record.\n# Pass True to this function to create a list map (ordered map)\n# sdcFunctions.createEvent(String type, int version): Creates a new event.\n# Create new empty event with standard headers.\n# sdcFunctions.toEvent(Record): Send event to event stream\n# Only events created with sdcFunctions.createEvent are supported.\n#\n\n# state['connection'].close()"
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"description" : "",
"label" : "Validate data against schema",
"xPos" : 940,
"yPos" : 50,
"stageType" : "PROCESSOR"
},
"inputLanes" : [ "JSONParser_02OutputLane15573747961910" ],
"outputLanes" : [ "JythonEvaluator_01OutputLane15573729788680" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "LocalFS_01",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_localfilesystem_LocalFileSystemDTarget",
"stageVersion" : "3",
"configuration" : [ {
"name" : "configs.uniquePrefix",
"value" : "sdc-${sdc:id()}"
}, {
"name" : "configs.fileNameSuffix",
"value" : null
}, {
"name" : "configs.dirPathTemplateInHeader",
"value" : false
}, {
"name" : "configs.dirPathTemplate",
"value" : "/tmp/out/${YYYY()}-${MM()}-${DD()}-${hh()}"
}, {
"name" : "configs.timeZoneID",
"value" : "UTC"
}, {
"name" : "configs.timeDriver",
"value" : "${time:now()}"
}, {
"name" : "configs.maxRecordsPerFile",
"value" : 0
}, {
"name" : "configs.maxFileSize",
"value" : 0
}, {
"name" : "configs.idleTimeout",
"value" : "${1 * HOURS}"
}, {
"name" : "configs.compression",
"value" : "NONE"
}, {
"name" : "configs.otherCompression",
"value" : null
}, {
"name" : "configs.fileType",
"value" : "TEXT"
}, {
"name" : "configs.keyEl",
"value" : "${uuid()}"
}, {
"name" : "configs.lateRecordsLimit",
"value" : "${1 * HOURS}"
}, {
"name" : "configs.rollIfHeader",
"value" : false
}, {
"name" : "configs.rollHeaderName",
"value" : "roll"
}, {
"name" : "configs.lateRecordsAction",
"value" : "SEND_TO_ERROR"
}, {
"name" : "configs.lateRecordsDirPathTemplate",
"value" : "/tmp/late/${YYYY()}-${MM()}-${DD()}"
}, {
"name" : "configs.dataFormat",
"value" : "JSON"
}, {
"name" : "configs.hdfsPermissionCheck",
"value" : true
}, {
"name" : "configs.permissionEL",
"value" : null
}, {
"name" : "configs.skipOldTempFileRecovery",
"value" : false
}, {
"name" : "configs.dataGeneratorFormatConfig.charset",
"value" : "UTF-8"
}, {
"name" : "configs.dataGeneratorFormatConfig.csvFileFormat",
"value" : "CSV"
}, {
"name" : "configs.dataGeneratorFormatConfig.csvHeader",
"value" : "NO_HEADER"
}, {
"name" : "configs.dataGeneratorFormatConfig.csvReplaceNewLines",
"value" : true
}, {
"name" : "configs.dataGeneratorFormatConfig.csvReplaceNewLinesString",
"value" : " "
}, {
"name" : "configs.dataGeneratorFormatConfig.csvCustomDelimiter",
"value" : "|"
}, {
"name" : "configs.dataGeneratorFormatConfig.csvCustomEscape",
"value" : "\\"
}, {
"name" : "configs.dataGeneratorFormatConfig.csvCustomQuote",
"value" : "\""
}, {
"name" : "configs.dataGeneratorFormatConfig.jsonMode",
"value" : "MULTIPLE_OBJECTS"
}, {
"name" : "configs.dataGeneratorFormatConfig.textFieldPath",
"value" : "/text"
}, {
"name" : "configs.dataGeneratorFormatConfig.textRecordSeparator",
"value" : "\\n"
}, {
"name" : "configs.dataGeneratorFormatConfig.textFieldMissingAction",
"value" : "ERROR"
}, {
"name" : "configs.dataGeneratorFormatConfig.textEmptyLineIfNull",
"value" : false
}, {
"name" : "configs.dataGeneratorFormatConfig.avroSchemaSource",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.avroSchema",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.registerSchema",
"value" : false
}, {
"name" : "configs.dataGeneratorFormatConfig.schemaRegistryUrlsForRegistration",
"value" : [ ]
}, {
"name" : "configs.dataGeneratorFormatConfig.schemaRegistryUrls",
"value" : [ ]
}, {
"name" : "configs.dataGeneratorFormatConfig.schemaLookupMode",
"value" : "SUBJECT"
}, {
"name" : "configs.dataGeneratorFormatConfig.subject",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.subjectToRegister",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.schemaId",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.avroCompression",
"value" : "NULL"
}, {
"name" : "configs.dataGeneratorFormatConfig.binaryFieldPath",
"value" : "/"
}, {
"name" : "configs.dataGeneratorFormatConfig.protoDescriptorFile",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.messageType",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.fileNameEL",
"value" : null
}, {
"name" : "configs.dataGeneratorFormatConfig.wholeFileExistsAction",
"value" : "TO_ERROR"
}, {
"name" : "configs.dataGeneratorFormatConfig.includeChecksumInTheEvents",
"value" : false
}, {
"name" : "configs.dataGeneratorFormatConfig.checksumAlgorithm",
"value" : "MD5"
}, {
"name" : "configs.dataGeneratorFormatConfig.xmlPrettyPrint",
"value" : true
}, {
"name" : "configs.dataGeneratorFormatConfig.xmlValidateSchema",
"value" : false
}, {
"name" : "configs.dataGeneratorFormatConfig.xmlSchema",
"value" : null
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"description" : "",
"label" : "Write valid records to disk",
"xPos" : 1160,
"yPos" : 50,
"stageType" : "TARGET"
},
"inputLanes" : [ "JythonEvaluator_01OutputLane15573729788680" ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
} ],
"errorStage" : {
"instanceName" : "WritetoFile_ErrorStage",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_recordstolocalfilesystem_ToErrorLocalFSDTarget",
"stageVersion" : "1",
"configuration" : [ {
"name" : "directory",
"value" : "/tmp/"
}, {
"name" : "uniquePrefix",
"value" : "sdc-error-${sdc:id()}"
}, {
"name" : "rotationIntervalSecs",
"value" : "${1 * HOURS}"
}, {
"name" : "maxFileSizeMbs",
"value" : 512
} ],
"uiInfo" : {
"description" : "",
"label" : "Error Records - Write to File",
"xPos" : 500,
"yPos" : 50,
"stageType" : "TARGET"
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
},
"info" : {
"pipelineId" : "ValidateJSONDatab0dfa94e-faf3-42ec-9a02-5122d048fe4c",
"title" : "Validate JSON Data",
"description" : "",
"created" : 1557375426335,
"lastModified" : 1557375426585,
"creator" : "admin",
"lastModifier" : "admin",
"lastRev" : "0",
"uuid" : "d504e9d3-d4ea-4d52-8fd8-565592400d2e",
"valid" : false,
"metadata" : {
"labels" : [ ]
},
"name" : "ValidateJSONDatab0dfa94e-faf3-42ec-9a02-5122d048fe4c",
"sdcVersion" : "3.8.0",
"sdcId" : "2e5c18f6-2e5b-11e9-a456-3ff9fba8972e"
},
"metadata" : {
"labels" : [ ]
},
"statsAggregatorStage" : {
"instanceName" : "Discard_StatsAggregatorStage",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_StatsNullDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"description" : "",
"label" : "Stats Aggregator - Discard",
"xPos" : 280,
"yPos" : 50,
"stageType" : "TARGET"
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
},
"startEventStages" : [ {
"instanceName" : "Discard_StartEventStage",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"description" : "",
"label" : "Start Event - Discard",
"xPos" : 280,
"yPos" : 50,
"stageType" : "TARGET"
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
} ],
"stopEventStages" : [ {
"instanceName" : "Discard_StopEventStage",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"description" : "",
"label" : "Stop Event - Discard",
"xPos" : 719,
"yPos" : 50,
"stageType" : "TARGET"
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
} ],
"testOriginStage" : {
"instanceName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStage",
"library" : "streamsets-datacollector-dev-lib",
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource",
"stageVersion" : "3",
"configuration" : [ {
"name" : "rawData",
"value" : "{\n \"f1\": \"abc\",\n \"f2\": \"xyz\",\n \"f3\": \"lmn\"\n}"
}, {
"name" : "stopAfterFirstBatch",
"value" : false
}, {
"name" : "eventData",
"value" : null
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
} ],
"uiInfo" : {
"stageType" : "SOURCE",
"label" : "Test Origin - Dev Raw Data Source"
},
"inputLanes" : [ ],
"outputLanes" : [ "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStageOutputLane1" ],
"eventLanes" : [ ],
"services" : [ {
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService",
"serviceVersion" : 1,
"configuration" : [ {
"name" : "displayFormats",
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML"
}, {
"name" : "dataFormat",
"value" : "JSON"
}, {
"name" : "dataFormatConfig.compression",
"value" : "NONE"
}, {
"name" : "dataFormatConfig.filePatternInArchive",
"value" : "*"
}, {
"name" : "dataFormatConfig.charset",
"value" : "UTF-8"
}, {
"name" : "dataFormatConfig.removeCtrlChars",
"value" : false
}, {
"name" : "dataFormatConfig.textMaxLineLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.useCustomDelimiter",
"value" : false
}, {
"name" : "dataFormatConfig.customDelimiter",
"value" : "\\r\\n"
}, {
"name" : "dataFormatConfig.includeCustomDelimiterInTheText",
"value" : false
}, {
"name" : "dataFormatConfig.jsonContent",
"value" : "MULTIPLE_OBJECTS"
}, {
"name" : "dataFormatConfig.jsonMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.csvFileFormat",
"value" : "CSV"
}, {
"name" : "dataFormatConfig.csvHeader",
"value" : "NO_HEADER"
}, {
"name" : "dataFormatConfig.csvAllowExtraColumns",
"value" : false
}, {
"name" : "dataFormatConfig.csvExtraColumnPrefix",
"value" : "_extra_"
}, {
"name" : "dataFormatConfig.csvMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.csvCustomDelimiter",
"value" : "|"
}, {
"name" : "dataFormatConfig.multiCharacterFieldDelimiter",
"value" : "||"
}, {
"name" : "dataFormatConfig.multiCharacterLineDelimiter",
"value" : "${str:unescapeJava('\\\\n')}"
}, {
"name" : "dataFormatConfig.csvCustomEscape",
"value" : "\\"
}, {
"name" : "dataFormatConfig.csvCustomQuote",
"value" : "\""
}, {
"name" : "dataFormatConfig.csvEnableComments",
"value" : false
}, {
"name" : "dataFormatConfig.csvCommentMarker",
"value" : "#"
}, {
"name" : "dataFormatConfig.csvIgnoreEmptyLines",
"value" : false
}, {
"name" : "dataFormatConfig.csvRecordType",
"value" : "LIST_MAP"
}, {
"name" : "dataFormatConfig.csvSkipStartLines",
"value" : 0
}, {
"name" : "dataFormatConfig.parseNull",
"value" : false
}, {
"name" : "dataFormatConfig.nullConstant",
"value" : "\\\\N"
}, {
"name" : "dataFormatConfig.xmlRecordElement",
"value" : null
}, {
"name" : "dataFormatConfig.includeFieldXpathAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xPathNamespaceContext",
"value" : [ ]
}, {
"name" : "dataFormatConfig.outputFieldAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xmlMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.logMode",
"value" : "COMMON_LOG_FORMAT"
}, {
"name" : "dataFormatConfig.logMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.retainOriginalLine",
"value" : false
}, {
"name" : "dataFormatConfig.customLogFormat",
"value" : "%h %l %u %t \"%r\" %>s %b"
}, {
"name" : "dataFormatConfig.regex",
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)"
}, {
"name" : "dataFormatConfig.fieldPathsToGroupName",
"value" : [ {
"fieldPath" : "/",
"group" : 1
} ]
}, {
"name" : "dataFormatConfig.grokPatternDefinition",
"value" : null
}, {
"name" : "dataFormatConfig.grokPattern",
"value" : "%{COMMONAPACHELOG}"
}, {
"name" : "dataFormatConfig.onParseError",
"value" : "ERROR"
}, {
"name" : "dataFormatConfig.maxStackTraceLines",
"value" : 50
}, {
"name" : "dataFormatConfig.enableLog4jCustomLogFormat",
"value" : false
}, {
"name" : "dataFormatConfig.log4jCustomLogFormat",
"value" : "%r [%t] %-5p %c %x - %m%n"
}, {
"name" : "dataFormatConfig.avroSchemaSource",
"value" : null
}, {
"name" : "dataFormatConfig.avroSchema",
"value" : null
}, {
"name" : "dataFormatConfig.schemaRegistryUrls",
"value" : [ ]
}, {
"name" : "dataFormatConfig.schemaLookupMode",
"value" : "SUBJECT"
}, {
"name" : "dataFormatConfig.subject",
"value" : null
}, {
"name" : "dataFormatConfig.schemaId",
"value" : null
}, {
"name" : "dataFormatConfig.protoDescriptorFile",
"value" : null
}, {
"name" : "dataFormatConfig.messageType",
"value" : null
}, {
"name" : "dataFormatConfig.isDelimited",
"value" : false
}, {
"name" : "dataFormatConfig.binaryMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.datagramMode",
"value" : "SYSLOG"
}, {
"name" : "dataFormatConfig.typesDbPath",
"value" : null
}, {
"name" : "dataFormatConfig.convertTime",
"value" : false
}, {
"name" : "dataFormatConfig.excludeInterval",
"value" : false
}, {
"name" : "dataFormatConfig.authFilePath",
"value" : null
}, {
"name" : "dataFormatConfig.netflowOutputValuesMode",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSize",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMs",
"value" : -1
}, {
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.wholeFileMaxObjectLen",
"value" : 8192
}, {
"name" : "dataFormatConfig.rateLimit",
"value" : "-1"
}, {
"name" : "dataFormatConfig.verifyChecksum",
"value" : false
}, {
"name" : "dataFormatConfig.excelHeader",
"value" : null
} ]
} ]
},
"valid" : true,
"issues" : {
"stageIssues" : { },
"pipelineIssues" : [ ],
"issueCount" : 0
},
"previewable" : true
},
"pipelineRules" : {
"schemaVersion" : 3,
"version" : 2,
"metricsRuleDefinitions" : [ {
"id" : "badRecordsAlertID",
"alertText" : "High incidence of Error Records",
"metricId" : "pipeline.batchErrorRecords.counter",
"metricType" : "COUNTER",
"metricElement" : "COUNTER_COUNT",
"condition" : "${value() > 100}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1528936184502,
"valid" : true
}, {
"id" : "stageErrorAlertID",
"alertText" : "High incidence of Stage Errors",
"metricId" : "pipeline.batchErrorMessages.counter",
"metricType" : "COUNTER",
"metricElement" : "COUNTER_COUNT",
"condition" : "${value() > 100}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1528936184502,
"valid" : true
}, {
"id" : "idleGaugeID",
"alertText" : "Pipeline is Idle",
"metricId" : "RuntimeStatsGauge.gauge",
"metricType" : "GAUGE",
"metricElement" : "TIME_OF_LAST_RECEIVED_RECORD",
"condition" : "${time:now() - value() > 120000}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1528936184502,
"valid" : true
}, {
"id" : "batchTimeAlertID",
"alertText" : "Batch taking more time to process",
"metricId" : "RuntimeStatsGauge.gauge",
"metricType" : "GAUGE",
"metricElement" : "CURRENT_BATCH_AGE",
"condition" : "${value() > 200}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1528936184502,
"valid" : true
}, {
"id" : "memoryLimitAlertID",
"alertText" : "Memory limit for pipeline exceeded",
"metricId" : "pipeline.memoryConsumed.counter",
"metricType" : "COUNTER",
"metricElement" : "COUNTER_COUNT",
"condition" : "${value() > (jvm:maxMemoryMB() * 0.65)}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1528936184502,
"valid" : true
} ],
"dataRuleDefinitions" : [ ],
"driftRuleDefinitions" : [ ],
"uuid" : "892b345d-5cd7-4b75-87db-2b1efcd39f3c",
"configuration" : [ {
"name" : "emailIDs",
"value" : [ ]
}, {
"name" : "webhookConfigs",
"value" : [ ]
} ],
"ruleIssues" : [ ],
"configIssues" : [ ]
},
"libraryDefinitions" : null
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment