Skip to content

Instantly share code, notes, and snippets.

@b-slim
Created January 26, 2017 00:17
Show Gist options
  • Save b-slim/5ad5f7a516eb91e25126a021807b3159 to your computer and use it in GitHub Desktop.
Save b-slim/5ad5f7a516eb91e25126a021807b3159 to your computer and use it in GitHub Desktop.
druid indexing job spec file
{
"type" : "index_hadoop",
"spec" : {
"dataSchema" : {
"dataSource" : "wikipedia",
"parser" : {
"type" : "hadoopyString",
"parseSpec" : {
"format" : "json",
"timestampSpec" : {
"column" : "time",
"format" : "auto"
},
"dimensionsSpec" : {
"dimensions": ["page","language","user","unpatrolled","newPage","robot","anonymous","namespace","continent","country","region","city"],
"dimensionExclusions" : [],
"spatialDimensions" : []
}
}
},
"metricsSpec" : [
{
"type" : "count",
"name" : "count"
},
{
"type" : "doubleSum",
"name" : "added",
"fieldName" : "added"
},
{
"type" : "doubleSum",
"name" : "deleted",
"fieldName" : "deleted"
},
{
"type" : "doubleSum",
"name" : "delta",
"fieldName" : "delta"
}
],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "DAY",
"queryGranularity" : "HOUR",
"intervals" : [ "2015-09-12T00:00:00.000Z/2015-09-13T00:00:00.000Z" ]
}
},
"ioConfig" : {
"type" : "hadoop",
"metadataUpdateSpec" : {
"type":"postgresql",
"connectURI" : "jdbc:postgresql://XXXXXX.us-west-2.rds.amazonaws.com:5432/XXXX",
"password" : "XXX",
"segmentTable" : "druid_segments",
"user" : "XXX"
},
"segmentOutputPath" : "s3://druid-cloud/",
"inputSpec" : {
"type" : "static",
"paths" : "s3://druid-cloud/wikiticker-2016-06-27-sampled.json"
}
},
"tuningConfig" : {
"type": "hadoop",
"workingPath": "/tmp/druid-indexing",
"leaveIntermediate":true,
"partitionsSpec": {
"type": "hashed",
"targetPartitionSize": 5000000,
"maxPartitionSize": 75000000,
"assumeGrouped": false,
"numShards": -1,
"partitionDimensions": []
},
"jobProperties" : {
"mapreduce.job.user.classpath.first": "true",
"mapreduce.map.memory.mb" : 2048,
"mapreduce.map.java.opts" : "-server -Xmx1536m -Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.reduce.memory.mb" : 2048,
"mapreduce.reduce.java.opts" : "-server -Xmx2560m -Duser.timezone=UTC -Dfile.encoding=UTF-8",
"fs.s3.awsAccessKeyId" : "XXXXXX",
"fs.s3.awsSecretAccessKey" : "XXXXXX",
"fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
"fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
"io.compression.codecs" : "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec"}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment