Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@jeff303
Created May 8, 2020 18:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeff303/ed1401dfbbda87ee1878394c01b65032 to your computer and use it in GitHub Desktop.
Save jeff303/ed1401dfbbda87ee1878394c01b65032 to your computer and use it in GitHub Desktop.
StreamSets Data Collector field restructuring example
{
"pipelineConfig" : {
"schemaVersion" : 6,
"version" : 16,
"pipelineId" : "FieldRestructuring5b840130-fc26-4363-8e1d-b075a6db10a2",
"title" : "Field Restructuring",
"description" : "",
"uuid" : "23890b10-6da9-4de1-b7e0-a8c32bd3e679",
"configuration" : [ {
"name" : "executionMode",
"value" : "STANDALONE"
}, {
"name" : "edgeHttpUrl",
"value" : "http://localhost:18633"
}, {
"name" : "deliveryGuarantee",
"value" : "AT_LEAST_ONCE"
}, {
"name" : "testOriginStage",
"value" : "streamsets-datacollector-dev-lib::com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource::3"
}, {
"name" : "startEventStage",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1"
}, {
"name" : "stopEventStage",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1"
}, {
"name" : "shouldRetry",
"value" : true
}, {
"name" : "triggerInterval",
"value" : 2000
}, {
"name" : "retryAttempts",
"value" : -1
}, {
"name" : "ludicrousMode",
"value" : false
}, {
"name" : "ludicrousModeInputCount",
"value" : false
}, {
"name" : "advancedErrorHandling",
"value" : false
}, {
"name" : "notifyOnStates",
"value" : [ "RUN_ERROR", "STOPPED", "FINISHED" ]
}, {
"name" : "emailIDs",
"value" : [ ]
}, {
"name" : "constants",
"value" : [ ]
}, {
"name" : "badRecordsHandling",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1"
}, {
"name" : "errorRecordPolicy",
"value" : "ORIGINAL_RECORD"
}, {
"name" : "statsAggregatorStage",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_StatsDpmDirectlyDTarget::1"
}, {
"name" : "workerCount",
"value" : 0
}, {
"name" : "clusterSlaveMemory",
"value" : 2048
}, {
"name" : "clusterSlaveJavaOpts",
"value" : "-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -Dlog4j.debug"
}, {
"name" : "clusterLauncherEnv",
"value" : [ ]
}, {
"name" : "mesosDispatcherURL",
"value" : null
}, {
"name" : "logLevel",
"value" : "INFO"
}, {
"name" : "hdfsS3ConfDir",
"value" : null
}, {
"name" : "rateLimit",
"value" : 0
}, {
"name" : "maxRunners",
"value" : 0
}, {
"name" : "shouldCreateFailureSnapshot",
"value" : true
}, {
"name" : "runnerIdleTIme",
"value" : 60
}, {
"name" : "webhookConfigs",
"value" : [ ]
}, {
"name" : "sparkConfigs",
"value" : [ ]
}, {
"name" : "preprocessScript",
"value" : "/*\nThe following script define a method\nthat increments an integer by 1 \nand registers it as a UDF with \nthe SparkSession, which can be accessed\nusing the variable named \"spark\":\ndef inc(i: Integer): Integer = {\n i + 1\n}\nspark.udf.register (\"inc\", inc _)\n\n*/"
}, {
"name" : "clusterConfig.clusterType",
"value" : "LOCAL"
}, {
"name" : "clusterConfig.sparkMasterUrl",
"value" : "local[*]"
}, {
"name" : "clusterConfig.deployMode",
"value" : "CLIENT"
}, {
"name" : "clusterConfig.hadoopUserName",
"value" : null
}, {
"name" : "clusterConfig.sparkAppName",
"value" : "${pipeline:title()}"
}, {
"name" : "clusterConfig.stagingDir",
"value" : "/streamsets"
}, {
"name" : "clusterConfig.useYarnKerberosKeytab",
"value" : false
}, {
"name" : "clusterConfig.yarnKerberosKeytabSource",
"value" : "PROPERTIES_FILE"
}, {
"name" : "clusterConfig.yarnKerberosKeytab",
"value" : null
}, {
"name" : "clusterConfig.yarnKerberosPrincipal",
"value" : "name@DOMAIN"
}, {
"name" : "databricksConfig.baseUrl",
"value" : null
}, {
"name" : "databricksConfig.credentialType",
"value" : null
}, {
"name" : "databricksConfig.username",
"value" : ""
}, {
"name" : "databricksConfig.password",
"value" : ""
}, {
"name" : "databricksConfig.token",
"value" : ""
}, {
"name" : "databricksConfig.provisionNewCluster",
"value" : true
}, {
"name" : "databricksConfig.clusterId",
"value" : null
}, {
"name" : "databricksConfig.clusterConfig",
"value" : "{\n \"num_workers\": 8,\n \"spark_version\": \"5.3.x-scala2.11\",\n \"node_type_id\": \"i3.xlarge\"\n}"
}, {
"name" : "databricksConfig.terminateCluster",
"value" : false
}, {
"name" : "livyConfig.baseUrl",
"value" : "https://localhost:30443/gateway/default/livy/v1/"
}, {
"name" : "livyConfig.username",
"value" : ""
}, {
"name" : "livyConfig.password",
"value" : ""
}, {
"name" : "amazonEMRConfig.userRegion",
"value" : null
}, {
"name" : "amazonEMRConfig.userRegionCustom",
"value" : null
}, {
"name" : "amazonEMRConfig.accessKey",
"value" : ""
}, {
"name" : "amazonEMRConfig.secretKey",
"value" : ""
}, {
"name" : "amazonEMRConfig.s3StagingUri",
"value" : null
}, {
"name" : "amazonEMRConfig.provisionNewCluster",
"value" : false
}, {
"name" : "amazonEMRConfig.clusterId",
"value" : null
}, {
"name" : "amazonEMRConfig.clusterPrefix",
"value" : null
}, {
"name" : "amazonEMRConfig.terminateCluster",
"value" : false
}, {
"name" : "amazonEMRConfig.loggingEnabled",
"value" : true
}, {
"name" : "amazonEMRConfig.s3LogUri",
"value" : null
}, {
"name" : "amazonEMRConfig.enableEMRDebugging",
"value" : true
}, {
"name" : "amazonEMRConfig.serviceRole",
"value" : "EMR_DefaultRole"
}, {
"name" : "amazonEMRConfig.jobFlowRole",
"value" : "EMR_EC2_DefaultRole"
}, {
"name" : "amazonEMRConfig.visibleToAllUsers",
"value" : true
}, {
"name" : "amazonEMRConfig.ec2SubnetId",
"value" : null
}, {
"name" : "amazonEMRConfig.masterSecurityGroup",
"value" : null
}, {
"name" : "amazonEMRConfig.slaveSecurityGroup",
"value" : null
}, {
"name" : "amazonEMRConfig.instanceCount",
"value" : 2
}, {
"name" : "amazonEMRConfig.masterInstanceType",
"value" : null
}, {
"name" : "amazonEMRConfig.masterInstanceTypeCustom",
"value" : null
}, {
"name" : "amazonEMRConfig.slaveInstanceType",
"value" : null
}, {
"name" : "amazonEMRConfig.slaveInstanceTypeCustom",
"value" : null
} ],
"uiInfo" : {
"previewConfig" : {
"previewSource" : "CONFIGURED_SOURCE",
"batchSize" : 10,
"timeout" : 30000,
"writeToDestinations" : false,
"executeLifecycleEvents" : false,
"showHeader" : false,
"showFieldType" : true,
"rememberMe" : false
}
},
"fragments" : [ ],
"stages" : [ {
"instanceName" : "DevRawDataSource_01",
"library" : "streamsets-datacollector-dev-lib",
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource",
"stageVersion" : "3",
"configuration" : [ {
"name" : "rawData",
"value" : "{\n \"data1\": [\n {\n \"vlan\": \"195\",\n \"vlanname\": \"Subnet-54.14.195\"\n },\n {\n \"vlan\": \"195\",\n \"vlanname\": \"Subnet-54.14.193\"\n }\n ]\n}"
}, {
"name" : "stopAfterFirstBatch",
"value" : false
}, {
"name" : "eventData",
"value" : null
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
} ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "SOURCE",
"icon" : "dev.png",
"description" : "",
"label" : "Input Data",
"xPos" : 60
},
"inputLanes" : [ ],
"outputLanes" : [ "DevRawDataSource_01OutputLane15889586173500" ],
"eventLanes" : [ ],
"services" : [ {
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService",
"serviceVersion" : 1,
"configuration" : [ {
"name" : "displayFormats",
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML"
}, {
"name" : "dataFormat",
"value" : "JSON"
}, {
"name" : "dataFormatConfig.compression",
"value" : "NONE"
}, {
"name" : "dataFormatConfig.filePatternInArchive",
"value" : "*"
}, {
"name" : "dataFormatConfig.charset",
"value" : "UTF-8"
}, {
"name" : "dataFormatConfig.removeCtrlChars",
"value" : false
}, {
"name" : "dataFormatConfig.textMaxLineLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.useCustomDelimiter",
"value" : false
}, {
"name" : "dataFormatConfig.customDelimiter",
"value" : "\\r\\n"
}, {
"name" : "dataFormatConfig.includeCustomDelimiterInTheText",
"value" : false
}, {
"name" : "dataFormatConfig.jsonContent",
"value" : "MULTIPLE_OBJECTS"
}, {
"name" : "dataFormatConfig.jsonMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.csvFileFormat",
"value" : "CSV"
}, {
"name" : "dataFormatConfig.csvHeader",
"value" : "NO_HEADER"
}, {
"name" : "dataFormatConfig.csvAllowExtraColumns",
"value" : false
}, {
"name" : "dataFormatConfig.csvExtraColumnPrefix",
"value" : "_extra_"
}, {
"name" : "dataFormatConfig.csvMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.csvCustomDelimiter",
"value" : "|"
}, {
"name" : "dataFormatConfig.multiCharacterFieldDelimiter",
"value" : "||"
}, {
"name" : "dataFormatConfig.multiCharacterLineDelimiter",
"value" : "${str:unescapeJava('\\\\n')}"
}, {
"name" : "dataFormatConfig.csvCustomEscape",
"value" : "\\"
}, {
"name" : "dataFormatConfig.csvCustomQuote",
"value" : "\""
}, {
"name" : "dataFormatConfig.csvEnableComments",
"value" : false
}, {
"name" : "dataFormatConfig.csvCommentMarker",
"value" : "#"
}, {
"name" : "dataFormatConfig.csvIgnoreEmptyLines",
"value" : true
}, {
"name" : "dataFormatConfig.csvRecordType",
"value" : "LIST_MAP"
}, {
"name" : "dataFormatConfig.csvSkipStartLines",
"value" : 0
}, {
"name" : "dataFormatConfig.parseNull",
"value" : false
}, {
"name" : "dataFormatConfig.nullConstant",
"value" : "\\\\N"
}, {
"name" : "dataFormatConfig.xmlRecordElement",
"value" : null
}, {
"name" : "dataFormatConfig.includeFieldXpathAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xPathNamespaceContext",
"value" : [ ]
}, {
"name" : "dataFormatConfig.outputFieldAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xmlMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.logMode",
"value" : "COMMON_LOG_FORMAT"
}, {
"name" : "dataFormatConfig.logMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.retainOriginalLine",
"value" : false
}, {
"name" : "dataFormatConfig.customLogFormat",
"value" : "%h %l %u %t \"%r\" %>s %b"
}, {
"name" : "dataFormatConfig.regex",
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)"
}, {
"name" : "dataFormatConfig.fieldPathsToGroupName",
"value" : [ {
"fieldPath" : "/",
"group" : 1
} ]
}, {
"name" : "dataFormatConfig.grokPatternDefinition",
"value" : null
}, {
"name" : "dataFormatConfig.grokPattern",
"value" : "%{COMMONAPACHELOG}"
}, {
"name" : "dataFormatConfig.onParseError",
"value" : "ERROR"
}, {
"name" : "dataFormatConfig.maxStackTraceLines",
"value" : 50
}, {
"name" : "dataFormatConfig.enableLog4jCustomLogFormat",
"value" : false
}, {
"name" : "dataFormatConfig.log4jCustomLogFormat",
"value" : "%r [%t] %-5p %c %x - %m%n"
}, {
"name" : "dataFormatConfig.avroSchemaSource",
"value" : null
}, {
"name" : "dataFormatConfig.avroSchema",
"value" : null
}, {
"name" : "dataFormatConfig.schemaRegistryUrls",
"value" : [ ]
}, {
"name" : "dataFormatConfig.basicAuth",
"value" : ""
}, {
"name" : "dataFormatConfig.schemaLookupMode",
"value" : "SUBJECT"
}, {
"name" : "dataFormatConfig.subject",
"value" : null
}, {
"name" : "dataFormatConfig.schemaId",
"value" : null
}, {
"name" : "dataFormatConfig.avroSkipUnionIndex",
"value" : false
}, {
"name" : "dataFormatConfig.protoDescriptorFile",
"value" : null
}, {
"name" : "dataFormatConfig.messageType",
"value" : null
}, {
"name" : "dataFormatConfig.isDelimited",
"value" : true
}, {
"name" : "dataFormatConfig.binaryMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.datagramMode",
"value" : "SYSLOG"
}, {
"name" : "dataFormatConfig.typesDbPath",
"value" : null
}, {
"name" : "dataFormatConfig.convertTime",
"value" : false
}, {
"name" : "dataFormatConfig.excludeInterval",
"value" : true
}, {
"name" : "dataFormatConfig.authFilePath",
"value" : null
}, {
"name" : "dataFormatConfig.netflowOutputValuesMode",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSize",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMs",
"value" : -1
}, {
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.wholeFileMaxObjectLen",
"value" : 8192
}, {
"name" : "dataFormatConfig.rateLimit",
"value" : "-1"
}, {
"name" : "dataFormatConfig.verifyChecksum",
"value" : false
}, {
"name" : "dataFormatConfig.excelHeader",
"value" : null
}, {
"name" : "dataFormatConfig.excelSkipCellsWithNoHeader",
"value" : false
}, {
"name" : "dataFormatConfig.excelReadAllSheets",
"value" : true
}, {
"name" : "dataFormatConfig.excelSheetNames",
"value" : [ ]
} ]
} ]
}, {
"instanceName" : "FieldMapper_01",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "fieldMapperConfig.operateOn",
"value" : "FIELD_NAMES"
}, {
"name" : "fieldMapperConfig.conditionalExpression",
"value" : "${f:name() == 'vlan'}"
}, {
"name" : "fieldMapperConfig.mappingExpression",
"value" : "${str:concat('new_', str:concat(f:name(), '_map'))}"
}, {
"name" : "fieldMapperConfig.aggregationExpression",
"value" : ""
}, {
"name" : "fieldMapperConfig.structureChangeAllowed",
"value" : true
}, {
"name" : "fieldMapperConfig.appendListValues",
"value" : false
}, {
"name" : "fieldMapperConfig.maintainOriginalPaths",
"value" : true
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "PROCESSOR",
"icon" : "iconfinder_thefreeforty_map_1243687.png",
"description" : "",
"label" : "Add vlan_map",
"xPos" : 280
},
"inputLanes" : [ "DevRawDataSource_01OutputLane15889586173500" ],
"outputLanes" : [ "FieldMapper_01OutputLane15889586435950" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "FieldMapper_02",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "fieldMapperConfig.operateOn",
"value" : "FIELD_VALUES"
}, {
"name" : "fieldMapperConfig.conditionalExpression",
"value" : "${f:name() == 'new_vlan_map'}"
}, {
"name" : "fieldMapperConfig.mappingExpression",
"value" : "${emptyMap()}"
}, {
"name" : "fieldMapperConfig.aggregationExpression",
"value" : "${fields}"
}, {
"name" : "fieldMapperConfig.structureChangeAllowed",
"value" : true
}, {
"name" : "fieldMapperConfig.appendListValues",
"value" : false
}, {
"name" : "fieldMapperConfig.maintainOriginalPaths",
"value" : false
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "PROCESSOR",
"icon" : "iconfinder_thefreeforty_map_1243687.png",
"description" : "",
"label" : "Change vlan_map to Map",
"xPos" : 500
},
"inputLanes" : [ "FieldMapper_01OutputLane15889586435950" ],
"outputLanes" : [ "FieldMapper_02OutputLane15889587787500" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "FieldMapper_04",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "fieldMapperConfig.operateOn",
"value" : "FIELD_NAMES"
}, {
"name" : "fieldMapperConfig.conditionalExpression",
"value" : "${f:name() == 'vlan'}"
}, {
"name" : "fieldMapperConfig.mappingExpression",
"value" : "${str:concat(f:name(), 'id')}"
}, {
"name" : "fieldMapperConfig.aggregationExpression",
"value" : "${fields}"
}, {
"name" : "fieldMapperConfig.structureChangeAllowed",
"value" : true
}, {
"name" : "fieldMapperConfig.appendListValues",
"value" : false
}, {
"name" : "fieldMapperConfig.maintainOriginalPaths",
"value" : false
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "PROCESSOR",
"icon" : "iconfinder_thefreeforty_map_1243687.png",
"description" : "",
"label" : "Suffix id",
"xPos" : 720
},
"inputLanes" : [ "FieldMapper_02OutputLane15889587787500" ],
"outputLanes" : [ "FieldMapper_04OutputLane15889601175990" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "FieldMapper_03",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "fieldMapperConfig.operateOn",
"value" : "FIELD_PATHS"
}, {
"name" : "fieldMapperConfig.conditionalExpression",
"value" : "${str:startsWith(f:name(), 'vlan')}"
}, {
"name" : "fieldMapperConfig.mappingExpression",
"value" : "${str:replace(f:path(), '/vlan', '/new_vlan_map/')}"
}, {
"name" : "fieldMapperConfig.aggregationExpression",
"value" : ""
}, {
"name" : "fieldMapperConfig.structureChangeAllowed",
"value" : true
}, {
"name" : "fieldMapperConfig.appendListValues",
"value" : false
}, {
"name" : "fieldMapperConfig.maintainOriginalPaths",
"value" : false
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "PROCESSOR",
"icon" : "iconfinder_thefreeforty_map_1243687.png",
"description" : "",
"label" : "Move vlan fields into vlan_map",
"xPos" : 940
},
"inputLanes" : [ "FieldMapper_04OutputLane15889601175990" ],
"outputLanes" : [ "FieldMapper_03OutputLane15889588568720" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "FieldMapper_05",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor",
"stageVersion" : "1",
"configuration" : [ {
"name" : "fieldMapperConfig.operateOn",
"value" : "FIELD_NAMES"
}, {
"name" : "fieldMapperConfig.conditionalExpression",
"value" : "${f:name() == 'new_vlan_map'}"
}, {
"name" : "fieldMapperConfig.mappingExpression",
"value" : "vlan"
}, {
"name" : "fieldMapperConfig.aggregationExpression",
"value" : "${fields}"
}, {
"name" : "fieldMapperConfig.structureChangeAllowed",
"value" : true
}, {
"name" : "fieldMapperConfig.appendListValues",
"value" : false
}, {
"name" : "fieldMapperConfig.maintainOriginalPaths",
"value" : false
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
} ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "PROCESSOR",
"icon" : "iconfinder_thefreeforty_map_1243687.png",
"description" : "",
"label" : "Rename back to vlan",
"xPos" : 1160
},
"inputLanes" : [ "FieldMapper_03OutputLane15889588568720" ],
"outputLanes" : [ "FieldMapper_05OutputLane15889601773700" ],
"eventLanes" : [ ],
"services" : [ ]
}, {
"instanceName" : "Trash_01",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_NullDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "TARGET",
"icon" : "trash.png",
"description" : "",
"label" : "Trash",
"xPos" : 1380
},
"inputLanes" : [ "FieldMapper_05OutputLane15889601773700" ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
} ],
"errorStage" : {
"instanceName" : "Discard_ErrorStage",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "TARGET",
"icon" : "",
"description" : "",
"label" : "Error Records - Discard",
"xPos" : 919
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
},
"info" : {
"pipelineId" : "FieldRestructuring5b840130-fc26-4363-8e1d-b075a6db10a2",
"title" : "Field Restructuring",
"description" : "",
"created" : 1588958611231,
"lastModified" : 1588960341758,
"creator" : "admin",
"lastModifier" : "admin",
"lastRev" : "0",
"uuid" : "23890b10-6da9-4de1-b7e0-a8c32bd3e679",
"valid" : true,
"metadata" : {
"labels" : [ ]
},
"name" : "FieldRestructuring5b840130-fc26-4363-8e1d-b075a6db10a2",
"sdcVersion" : "3.13.0",
"sdcId" : "92c115fc-9150-11ea-86cc-1d4da6ab472f"
},
"metadata" : {
"labels" : [ ]
},
"statsAggregatorStage" : {
"instanceName" : "statsAggregatorStageInstance",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_StatsDpmDirectlyDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"stageType" : "TARGET",
"label" : "Stats Aggregator -Write Directly to Control Hub - statistics are not aggregated across Data Collectors"
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
},
"startEventStages" : [ {
"instanceName" : "Discard_StartEventStage",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "TARGET",
"icon" : "",
"description" : "",
"label" : "Start Event - Discard",
"xPos" : 280
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
} ],
"stopEventStages" : [ {
"instanceName" : "Discard_StopEventStage",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget",
"stageVersion" : "1",
"configuration" : [ ],
"uiInfo" : {
"yPos" : 50,
"stageType" : "TARGET",
"icon" : "",
"description" : "",
"label" : "Stop Event - Discard",
"xPos" : 280
},
"inputLanes" : [ ],
"outputLanes" : [ ],
"eventLanes" : [ ],
"services" : [ ]
} ],
"testOriginStage" : {
"instanceName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStage",
"library" : "streamsets-datacollector-dev-lib",
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource",
"stageVersion" : "3",
"configuration" : [ {
"name" : "rawData",
"value" : "{\n \"f1\": \"abc\",\n \"f2\": \"xyz\",\n \"f3\": \"lmn\"\n}"
}, {
"name" : "stopAfterFirstBatch",
"value" : false
}, {
"name" : "eventData",
"value" : null
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
} ],
"uiInfo" : {
"stageType" : "SOURCE",
"label" : "Test Origin - Dev Raw Data Source"
},
"inputLanes" : [ ],
"outputLanes" : [ "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStageOutputLane1" ],
"eventLanes" : [ ],
"services" : [ {
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService",
"serviceVersion" : 1,
"configuration" : [ {
"name" : "displayFormats",
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML"
}, {
"name" : "dataFormat",
"value" : "JSON"
}, {
"name" : "dataFormatConfig.compression",
"value" : "NONE"
}, {
"name" : "dataFormatConfig.filePatternInArchive",
"value" : "*"
}, {
"name" : "dataFormatConfig.charset",
"value" : "UTF-8"
}, {
"name" : "dataFormatConfig.removeCtrlChars",
"value" : false
}, {
"name" : "dataFormatConfig.textMaxLineLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.useCustomDelimiter",
"value" : false
}, {
"name" : "dataFormatConfig.customDelimiter",
"value" : "\\r\\n"
}, {
"name" : "dataFormatConfig.includeCustomDelimiterInTheText",
"value" : false
}, {
"name" : "dataFormatConfig.jsonContent",
"value" : "MULTIPLE_OBJECTS"
}, {
"name" : "dataFormatConfig.jsonMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.csvFileFormat",
"value" : "CSV"
}, {
"name" : "dataFormatConfig.csvHeader",
"value" : "NO_HEADER"
}, {
"name" : "dataFormatConfig.csvAllowExtraColumns",
"value" : false
}, {
"name" : "dataFormatConfig.csvExtraColumnPrefix",
"value" : "_extra_"
}, {
"name" : "dataFormatConfig.csvMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.csvCustomDelimiter",
"value" : "|"
}, {
"name" : "dataFormatConfig.multiCharacterFieldDelimiter",
"value" : "||"
}, {
"name" : "dataFormatConfig.multiCharacterLineDelimiter",
"value" : "${str:unescapeJava('\\\\n')}"
}, {
"name" : "dataFormatConfig.csvCustomEscape",
"value" : "\\"
}, {
"name" : "dataFormatConfig.csvCustomQuote",
"value" : "\""
}, {
"name" : "dataFormatConfig.csvEnableComments",
"value" : false
}, {
"name" : "dataFormatConfig.csvCommentMarker",
"value" : "#"
}, {
"name" : "dataFormatConfig.csvIgnoreEmptyLines",
"value" : false
}, {
"name" : "dataFormatConfig.csvRecordType",
"value" : "LIST_MAP"
}, {
"name" : "dataFormatConfig.csvSkipStartLines",
"value" : 0
}, {
"name" : "dataFormatConfig.parseNull",
"value" : false
}, {
"name" : "dataFormatConfig.nullConstant",
"value" : "\\\\N"
}, {
"name" : "dataFormatConfig.xmlRecordElement",
"value" : null
}, {
"name" : "dataFormatConfig.includeFieldXpathAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xPathNamespaceContext",
"value" : [ ]
}, {
"name" : "dataFormatConfig.outputFieldAttributes",
"value" : false
}, {
"name" : "dataFormatConfig.xmlMaxObjectLen",
"value" : 4096
}, {
"name" : "dataFormatConfig.logMode",
"value" : "COMMON_LOG_FORMAT"
}, {
"name" : "dataFormatConfig.logMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.retainOriginalLine",
"value" : false
}, {
"name" : "dataFormatConfig.customLogFormat",
"value" : "%h %l %u %t \"%r\" %>s %b"
}, {
"name" : "dataFormatConfig.regex",
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)"
}, {
"name" : "dataFormatConfig.fieldPathsToGroupName",
"value" : [ {
"fieldPath" : "/",
"group" : 1
} ]
}, {
"name" : "dataFormatConfig.grokPatternDefinition",
"value" : null
}, {
"name" : "dataFormatConfig.grokPattern",
"value" : "%{COMMONAPACHELOG}"
}, {
"name" : "dataFormatConfig.onParseError",
"value" : "ERROR"
}, {
"name" : "dataFormatConfig.maxStackTraceLines",
"value" : 50
}, {
"name" : "dataFormatConfig.enableLog4jCustomLogFormat",
"value" : false
}, {
"name" : "dataFormatConfig.log4jCustomLogFormat",
"value" : "%r [%t] %-5p %c %x - %m%n"
}, {
"name" : "dataFormatConfig.avroSchemaSource",
"value" : null
}, {
"name" : "dataFormatConfig.avroSchema",
"value" : null
}, {
"name" : "dataFormatConfig.schemaRegistryUrls",
"value" : [ ]
}, {
"name" : "dataFormatConfig.basicAuth",
"value" : ""
}, {
"name" : "dataFormatConfig.schemaLookupMode",
"value" : "SUBJECT"
}, {
"name" : "dataFormatConfig.subject",
"value" : null
}, {
"name" : "dataFormatConfig.schemaId",
"value" : null
}, {
"name" : "dataFormatConfig.avroSkipUnionIndex",
"value" : false
}, {
"name" : "dataFormatConfig.protoDescriptorFile",
"value" : null
}, {
"name" : "dataFormatConfig.messageType",
"value" : null
}, {
"name" : "dataFormatConfig.isDelimited",
"value" : false
}, {
"name" : "dataFormatConfig.binaryMaxObjectLen",
"value" : 1024
}, {
"name" : "dataFormatConfig.datagramMode",
"value" : "SYSLOG"
}, {
"name" : "dataFormatConfig.typesDbPath",
"value" : null
}, {
"name" : "dataFormatConfig.convertTime",
"value" : false
}, {
"name" : "dataFormatConfig.excludeInterval",
"value" : false
}, {
"name" : "dataFormatConfig.authFilePath",
"value" : null
}, {
"name" : "dataFormatConfig.netflowOutputValuesMode",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSize",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMs",
"value" : -1
}, {
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram",
"value" : "RAW_AND_INTERPRETED"
}, {
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram",
"value" : -1
}, {
"name" : "dataFormatConfig.wholeFileMaxObjectLen",
"value" : 8192
}, {
"name" : "dataFormatConfig.rateLimit",
"value" : "-1"
}, {
"name" : "dataFormatConfig.verifyChecksum",
"value" : false
}, {
"name" : "dataFormatConfig.excelHeader",
"value" : null
}, {
"name" : "dataFormatConfig.excelSkipCellsWithNoHeader",
"value" : false
}, {
"name" : "dataFormatConfig.excelReadAllSheets",
"value" : false
}, {
"name" : "dataFormatConfig.excelSheetNames",
"value" : [ ]
} ]
} ]
},
"valid" : true,
"issues" : {
"pipelineIssues" : [ ],
"stageIssues" : { },
"issueCount" : 0
},
"previewable" : true
},
"pipelineRules" : {
"schemaVersion" : 3,
"version" : 2,
"metricsRuleDefinitions" : [ {
"id" : "badRecordsAlertID",
"alertText" : "High incidence of Error Records",
"metricId" : "pipeline.batchErrorRecords.counter",
"metricType" : "COUNTER",
"metricElement" : "COUNTER_COUNT",
"condition" : "${value() > 100}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1588958611278,
"valid" : true
}, {
"id" : "stageErrorAlertID",
"alertText" : "High incidence of Stage Errors",
"metricId" : "pipeline.batchErrorMessages.counter",
"metricType" : "COUNTER",
"metricElement" : "COUNTER_COUNT",
"condition" : "${value() > 100}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1588958611278,
"valid" : true
}, {
"id" : "idleGaugeID",
"alertText" : "Pipeline is Idle",
"metricId" : "RuntimeStatsGauge.gauge",
"metricType" : "GAUGE",
"metricElement" : "TIME_OF_LAST_RECEIVED_RECORD",
"condition" : "${time:now() - value() > 120000}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1588958611278,
"valid" : true
}, {
"id" : "batchTimeAlertID",
"alertText" : "Batch taking more time to process",
"metricId" : "RuntimeStatsGauge.gauge",
"metricType" : "GAUGE",
"metricElement" : "CURRENT_BATCH_AGE",
"condition" : "${value() > 200}",
"sendEmail" : false,
"enabled" : false,
"timestamp" : 1588958611278,
"valid" : true
} ],
"dataRuleDefinitions" : [ ],
"driftRuleDefinitions" : [ ],
"uuid" : "71aafe13-1763-42ee-9753-1edf48008dc9",
"configuration" : [ {
"name" : "emailIDs",
"value" : [ ]
}, {
"name" : "webhookConfigs",
"value" : [ ]
} ],
"ruleIssues" : [ ],
"configIssues" : [ ]
},
"libraryDefinitions" : null
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment