Skip to content

Instantly share code, notes, and snippets.

@l15k4
Created April 26, 2016 15:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save l15k4/abcb148dda1d0f8174d4b216388504a2 to your computer and use it in GitHub Desktop.
Save l15k4/abcb148dda1d0f8174d4b216388504a2 to your computer and use it in GitHub Desktop.
val exclusions = List( // removed cid c-geo:c3
"timestamp","d_memberID","gwid","diid","c-geo:st","c-geo:ac","c-geo:c2","c-geo:cn","c-geo:ct","c-geo:dc","c-geo:la","c-geo:lo","c-geo:pc","c-geo:rc","c-geo:sp",
"ua","ua-cp","ua-os","accept","accept-charset","accept-encoding","accept-language","connection","from","x-wap-profile","x-att-deviceid","via","x-forwarded-for","x-uidh","forwarded","referrer","c-ip","ce","count","ctr","logic","max","miid","msg","npid","cs-uri-stem"
)
val segmentGrn = Granularity.DAY
private def hadoopTask(interval: String) =
IndexTask(
IndexTask.hadoopType,
IngestionSpec(
DataSchema(
"gwiq",
Parser.hadoopyString(
ParseSpec.json(
TimestampSpec("timestamp"), DimensionsSpec(List.empty, exclusions, List.empty)
)
),
List(Aggregation.count("count"), Aggregation.hll("gwid", "gwid"), Aggregation.hll("d_memberID", "d_memberID")),
GranularitySpec.uniform(List(interval), Some(segmentGrn.toString), Some(Granularity.DAY.toString))
),
IoConfig.hadoop(InputSpec.granularity(Granularity.DAY.toString, s"s3n://${sys.env("AWS_ACCESS_KEY_ID")}:${sys.env("AWS_SECRET_ACCESS_KEY")}@gwiq-views-p/gwiq/base-json-gz", ".*", Some("yyyy/MM/dd"))),
Some(TuningConfig(TuningConfig.hadoopType))
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment