Skip to content

Instantly share code, notes, and snippets.

@shaharck
Created August 20, 2018 23:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shaharck/7885e8f8066e38f1d0963618fe2e2207 to your computer and use it in GitHub Desktop.
Save shaharck/7885e8f8066e38f1d0963618fe2e2207 to your computer and use it in GitHub Desktop.
druid_task.json
{
"type": "index_hadoop",
"spec": {
"dataSchema": {
"dataSource": "sample",
"parser": {
"type": "parquet",
"parseSpec": {
"format": "timeAndDims",
"timestampSpec": {
"column": "hitstimestamp",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"dimensionsSpec": {
"dimensions": [
"type"
],
"dimensionExclusions": [],
"spatialDimensions": []
}
}
},
"metricsSpec": [
{
"name": "count",
"type": "count"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "HOUR",
"queryGranularity" : "NONE",
"intervals": ["2018-07-22/2018-07-23"]
}
},
"ioConfig": {
"type" : "hadoop",
"inputSpec": {
"type": "static",
"inputFormat": "io.druid.data.input.parquet.DruidParquetInputFormat",
"paths": "s3n://mypath/part-00493-0622f31f-0a5d-4b2e-b740-03e7c90f30c7-c000.snappy.parquet"
}
},
"tuningConfig": {
"type" : "hadoop",
"reportParseExceptions" : true,
"jobProperties" : {
"fs.s3n.awsAccessKeyId" : "...",
"fs.s3n.awsSecretAccessKey" : "...",
"fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
"fs.s3.awsAccessKeyId" : "...",
"fs.s3.awsSecretAccessKey" : "...",
"fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
"io.compression.codecs" : "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec",
"mapreduce.map.java.opts" : "-server -Xmx1536m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps",
"mapreduce.reduce.java.opts" : "-server -Xmx2560m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps"
},
"partitionsSpec": {
"type": "hashed",
"targetPartitionSize" : 5000000
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment