Skip to content

Instantly share code, notes, and snippets.

@royaltm
Created May 29, 2017 17:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save royaltm/175f6bd49e83a28db85c4c544567a357 to your computer and use it in GitHub Desktop.
Save royaltm/175f6bd49e83a28db85c4c544567a357 to your computer and use it in GitHub Desktop.
druid hadoop reingest job spec
{
"type": "index_hadoop",
"hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client:2.7.3"],
"spec": {
"dataSchema": {
"dataSource": "FOO_SOURCE",
"parser": {
"type": "hadoopyString",
"parseSpec": {
"format": "json",
"timestampSpec": {
"column": "timestamp",
"format": "auto"
},
"dimensionsSpec": {
"dimensions": [],
"dimensionExclusions": [],
"spatialDimensions": []
}
}
},
"metricsSpec": [
{
"type": "count",
"name": "rows"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": "NONE",
"intervals": ["2017-01-01T00:00:00.000Z/2017-01-14T00:00:00.000Z"]
}
},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "dataSource",
"ingestionSpec": {
"dataSource": "FOO_SOURCE",
"intervals": ["2017-01-01T00:00:00.000Z/2017-01-14T00:00:00.000Z"],
"segments": null
}
}
},
"tuningConfig": {
"type": "hadoop",
"maxRowsInMemory": 75000,
"partitionsSpec": {
"type": "hashed",
"targetPartitionSize": 5000000
},
"forceExtendableShardSpecs": true,
"jobProperties": {
"mapreduce.job.classloader": "true",
"mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop."
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment