Last active
April 11, 2017 23:07
-
-
Save metadaddy/9dff892eafb7b498696d85fb0f061ccd to your computer and use it in GitHub Desktop.
Manipulate fields in StreamSets Data Collector - download and import into SDC 2.4.0.0 or above!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"pipelineConfig" : { | |
"schemaVersion" : 2, | |
"version" : 5, | |
"uuid" : "e027d7d1-12db-4d9a-8f9a-827937c7c4df", | |
"title" : "Field Manipulations", | |
"description" : "", | |
"configuration" : [ { | |
"name" : "executionMode", | |
"value" : "STANDALONE" | |
}, { | |
"name" : "deliveryGuarantee", | |
"value" : "AT_LEAST_ONCE" | |
}, { | |
"name" : "shouldRetry", | |
"value" : true | |
}, { | |
"name" : "retryAttempts", | |
"value" : -1 | |
}, { | |
"name" : "memoryLimit", | |
"value" : "${jvm:maxMemoryMB() * 0.65}" | |
}, { | |
"name" : "memoryLimitExceeded", | |
"value" : "STOP_PIPELINE" | |
}, { | |
"name" : "notifyOnStates", | |
"value" : [ "RUN_ERROR", "STOPPED", "FINISHED" ] | |
}, { | |
"name" : "emailIDs", | |
"value" : [ ] | |
}, { | |
"name" : "constants", | |
"value" : [ ] | |
}, { | |
"name" : "badRecordsHandling", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1" | |
}, { | |
"name" : "clusterSlaveMemory", | |
"value" : 1024 | |
}, { | |
"name" : "clusterSlaveJavaOpts", | |
"value" : "-XX:PermSize=128M -XX:MaxPermSize=256M -Dhttps.protocols=TLSv1.2,TLSv1.1 -Dlog4j.debug" | |
}, { | |
"name" : "clusterLauncherEnv", | |
"value" : [ ] | |
}, { | |
"name" : "mesosDispatcherURL", | |
"value" : null | |
}, { | |
"name" : "hdfsS3ConfDir", | |
"value" : null | |
}, { | |
"name" : "rateLimit", | |
"value" : 0 | |
}, { | |
"name" : "statsAggregatorStage", | |
"value" : "" | |
} ], | |
"uiInfo" : { | |
"previewConfig" : { | |
"previewSource" : "CONFIGURED_SOURCE", | |
"batchSize" : 10, | |
"timeout" : 10000, | |
"writeToDestinations" : false, | |
"showHeader" : false, | |
"showFieldType" : true, | |
"rememberMe" : false | |
} | |
}, | |
"stages" : [ { | |
"instanceName" : "DevRawDataSource_01", | |
"library" : "streamsets-datacollector-dev-lib", | |
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource", | |
"stageVersion" : "2", | |
"configuration" : [ { | |
"name" : "dataFormat", | |
"value" : "JSON" | |
}, { | |
"name" : "dataFormatConfig.compression", | |
"value" : "NONE" | |
}, { | |
"name" : "dataFormatConfig.filePatternInArchive", | |
"value" : "*" | |
}, { | |
"name" : "dataFormatConfig.charset", | |
"value" : "UTF-8" | |
}, { | |
"name" : "dataFormatConfig.removeCtrlChars", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.textMaxLineLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.useCustomDelimiter", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customDelimiter", | |
"value" : "\\r\\n" | |
}, { | |
"name" : "dataFormatConfig.includeCustomDelimiterInTheText", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.jsonContent", | |
"value" : "MULTIPLE_OBJECTS" | |
}, { | |
"name" : "dataFormatConfig.jsonMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.csvFileFormat", | |
"value" : "CSV" | |
}, { | |
"name" : "dataFormatConfig.csvHeader", | |
"value" : "NO_HEADER" | |
}, { | |
"name" : "dataFormatConfig.csvMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.csvCustomDelimiter", | |
"value" : "|" | |
}, { | |
"name" : "dataFormatConfig.csvCustomEscape", | |
"value" : "\\" | |
}, { | |
"name" : "dataFormatConfig.csvCustomQuote", | |
"value" : "\"" | |
}, { | |
"name" : "dataFormatConfig.csvEnableComments", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvCommentMarker", | |
"value" : "#" | |
}, { | |
"name" : "dataFormatConfig.csvIgnoreEmptyLines", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.csvRecordType", | |
"value" : "LIST_MAP" | |
}, { | |
"name" : "dataFormatConfig.csvSkipStartLines", | |
"value" : 0 | |
}, { | |
"name" : "dataFormatConfig.parseNull", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.nullConstant", | |
"value" : "\\\\N" | |
}, { | |
"name" : "dataFormatConfig.xmlRecordElement", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.xPathNamespaceContext", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.xmlMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.logMode", | |
"value" : "COMMON_LOG_FORMAT" | |
}, { | |
"name" : "dataFormatConfig.logMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.retainOriginalLine", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customLogFormat", | |
"value" : "%h %l %u %t \"%r\" %>s %b" | |
}, { | |
"name" : "dataFormatConfig.regex", | |
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)" | |
}, { | |
"name" : "dataFormatConfig.fieldPathsToGroupName", | |
"value" : [ { | |
"fieldPath" : "/", | |
"group" : 1 | |
} ] | |
}, { | |
"name" : "dataFormatConfig.grokPatternDefinition", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.grokPattern", | |
"value" : "%{COMMONAPACHELOG}" | |
}, { | |
"name" : "dataFormatConfig.onParseError", | |
"value" : "ERROR" | |
}, { | |
"name" : "dataFormatConfig.maxStackTraceLines", | |
"value" : 50 | |
}, { | |
"name" : "dataFormatConfig.enableLog4jCustomLogFormat", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.log4jCustomLogFormat", | |
"value" : "%r [%t] %-5p %c %x - %m%n" | |
}, { | |
"name" : "dataFormatConfig.avroSchemaSource", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.avroSchema", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaRegistryUrls", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.schemaLookupMode", | |
"value" : "SUBJECT" | |
}, { | |
"name" : "dataFormatConfig.subject", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaId", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.protoDescriptorFile", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.messageType", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.isDelimited", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.binaryMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.datagramMode", | |
"value" : "SYSLOG" | |
}, { | |
"name" : "dataFormatConfig.typesDbPath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.convertTime", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excludeInterval", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.authFilePath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.wholeFileMaxObjectLen", | |
"value" : 8192 | |
}, { | |
"name" : "dataFormatConfig.rateLimit", | |
"value" : "-1" | |
}, { | |
"name" : "dataFormatConfig.verifyChecksum", | |
"value" : false | |
}, { | |
"name" : "rawData", | |
"value" : "{\n \"status\": 0,\n \"results\": [\n {\n \"name\": \"StreamSets\",\n \"address\" : {\n \"street\": \"2 Bryant St\",\n \"city\": \"San Francisco\",\n \"state\": \"CA\",\n \"zip\": \"94105\"\n },\n \"phone\": \"(415) 851-1018\"\n },\n {\n \"name\": \"Salesforce\",\n \"address\" : {\n \"street\": \"1 Market St\",\n \"city\": \"San Francisco\",\n \"state\": \"CA\",\n \"zip\": \"94105\"\n },\n \"phone\": \"(415) 901-7000\"\n }\n ]\n}" | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Dev Raw Data Source 1", | |
"xPos" : 60, | |
"yPos" : 50, | |
"stageType" : "SOURCE" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ "DevRawDataSource_01OutputLane14919318303350" ], | |
"eventLanes" : [ ] | |
}, { | |
"instanceName" : "FieldPivoter_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_listpivot_ListPivotDProcessor", | |
"stageVersion" : "2", | |
"configuration" : [ { | |
"name" : "listPath", | |
"value" : "/results" | |
}, { | |
"name" : "copyFields", | |
"value" : true | |
}, { | |
"name" : "newPath", | |
"value" : "/" | |
}, { | |
"name" : "saveOriginalFieldName", | |
"value" : false | |
}, { | |
"name" : "originalFieldNamePath", | |
"value" : null | |
}, { | |
"name" : "onStagePreConditionFailure", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Field Pivoter 1", | |
"xPos" : 280, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "DevRawDataSource_01OutputLane14919318303350" ], | |
"outputLanes" : [ "FieldPivoter_01OutputLane14919462671300" ], | |
"eventLanes" : [ ] | |
}, { | |
"instanceName" : "FieldFlattener_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_fieldflattener_FieldFlattenerDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "config.flattenType", | |
"value" : "ENTIRE_RECORD" | |
}, { | |
"name" : "config.fields", | |
"value" : [ ] | |
}, { | |
"name" : "config.nameSeparator", | |
"value" : "." | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Field Flattener 1", | |
"xPos" : 500, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "FieldPivoter_01OutputLane14919462671300" ], | |
"outputLanes" : [ "FieldFlattener_01OutputLane14919318571250" ], | |
"eventLanes" : [ ] | |
}, { | |
"instanceName" : "FieldRenamer_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_fieldrenamer_FieldRenamerDProcessor", | |
"stageVersion" : "2", | |
"configuration" : [ { | |
"name" : "renameMapping", | |
"value" : [ { | |
"fromFieldExpression" : "/'address\\.(.*)'", | |
"toFieldExpression" : "/$1" | |
} ] | |
}, { | |
"name" : "errorHandler.nonExistingFromFieldHandling", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "errorHandler.existingToFieldHandling", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "errorHandler.multipleFromFieldsMatching", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Field Renamer 1", | |
"xPos" : 720, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "FieldFlattener_01OutputLane14919318571250" ], | |
"outputLanes" : [ "FieldRenamer_01OutputLane14919446468120" ], | |
"eventLanes" : [ ] | |
}, { | |
"instanceName" : "FieldSplitter_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_splitter_SplitterDProcessor", | |
"stageVersion" : "2", | |
"configuration" : [ { | |
"name" : "fieldPath", | |
"value" : "/street" | |
}, { | |
"name" : "separator", | |
"value" : "\\s+" | |
}, { | |
"name" : "fieldPathsForSplits", | |
"value" : [ "/street_number", "/street_name" ] | |
}, { | |
"name" : "onStagePreConditionFailure", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "tooManySplitsAction", | |
"value" : "TO_LAST_FIELD" | |
}, { | |
"name" : "remainingSplitsPath", | |
"value" : null | |
}, { | |
"name" : "originalFieldAction", | |
"value" : "REMOVE" | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Field Splitter 1", | |
"xPos" : 940, | |
"yPos" : 50, | |
"stageType" : "PROCESSOR" | |
}, | |
"inputLanes" : [ "FieldRenamer_01OutputLane14919446468120" ], | |
"outputLanes" : [ "FieldSplitter_01OutputLane14919477043810" ], | |
"eventLanes" : [ ] | |
}, { | |
"instanceName" : "Trash_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_NullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Trash 1", | |
"xPos" : 1160, | |
"yPos" : 50, | |
"stageType" : "TARGET" | |
}, | |
"inputLanes" : [ "FieldSplitter_01OutputLane14919477043810" ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ] | |
} ], | |
"errorStage" : { | |
"instanceName" : "Discard_ErrorStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"description" : "", | |
"label" : "Error Records - Discard", | |
"xPos" : 60, | |
"yPos" : 50, | |
"stageType" : "TARGET" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ] | |
}, | |
"info" : { | |
"name" : "85a1b191-b0f0-426e-9d6c-79d24d64a68f", | |
"title" : "Field Manipulations", | |
"description" : "", | |
"created" : 1491944630740, | |
"lastModified" : 1491949120326, | |
"creator" : "admin", | |
"lastModifier" : "admin", | |
"lastRev" : "0", | |
"uuid" : "e027d7d1-12db-4d9a-8f9a-827937c7c4df", | |
"valid" : true, | |
"metadata" : { | |
"labels" : [ ] | |
} | |
}, | |
"metadata" : { | |
"labels" : [ ] | |
}, | |
"statsAggregatorStage" : null, | |
"previewable" : true, | |
"issues" : { | |
"stageIssues" : { }, | |
"pipelineIssues" : [ ], | |
"issueCount" : 0 | |
}, | |
"valid" : true | |
}, | |
"pipelineRules" : { | |
"metricsRuleDefinitions" : [ { | |
"id" : "badRecordsAlertID", | |
"alertText" : "High incidence of Error Records", | |
"metricId" : "pipeline.batchErrorRecords.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > 100}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1491931813412, | |
"valid" : true | |
}, { | |
"id" : "stageErrorAlertID", | |
"alertText" : "High incidence of Stage Errors", | |
"metricId" : "pipeline.batchErrorMessages.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > 100}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1491931813412, | |
"valid" : true | |
}, { | |
"id" : "idleGaugeID", | |
"alertText" : "Pipeline is Idle", | |
"metricId" : "RuntimeStatsGauge.gauge", | |
"metricType" : "GAUGE", | |
"metricElement" : "TIME_OF_LAST_RECEIVED_RECORD", | |
"condition" : "${time:now() - value() > 120000}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1491931813412, | |
"valid" : true | |
}, { | |
"id" : "batchTimeAlertID", | |
"alertText" : "Batch taking more time to process", | |
"metricId" : "RuntimeStatsGauge.gauge", | |
"metricType" : "GAUGE", | |
"metricElement" : "CURRENT_BATCH_AGE", | |
"condition" : "${value() > 200}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1491931813412, | |
"valid" : true | |
}, { | |
"id" : "memoryLimitAlertID", | |
"alertText" : "Memory limit for pipeline exceeded", | |
"metricId" : "pipeline.memoryConsumed.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > (jvm:maxMemoryMB() * 0.65)}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1491931813412, | |
"valid" : true | |
} ], | |
"dataRuleDefinitions" : [ ], | |
"driftRuleDefinitions" : [ ], | |
"emailIds" : [ ], | |
"uuid" : "2f39dbb9-e875-4b47-b3f2-5f42eed23b62", | |
"ruleIssues" : [ ] | |
}, | |
"libraryDefinitions" : null | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment