This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example | |
import akka.kafka.scaladsl.{Consumer, Producer} | |
import akka.kafka.{ConsumerSettings, ProducerSettings} | |
import akka.stream.scaladsl.{Sink, Source} | |
import kafka.utils.ZkUtils | |
import org.apache.kafka.clients.consumer.ConsumerConfig | |
import org.apache.kafka.clients.producer.ProducerRecord | |
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} | |
import org.scalatest.BeforeAndAfterEach |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example | |
import akka.kafka.scaladsl.{Consumer, Producer} | |
import akka.kafka.{ConsumerSettings, ProducerSettings} | |
import akka.stream.scaladsl.{Sink, Source} | |
import org.apache.kafka.clients.consumer.ConsumerConfig | |
import org.apache.kafka.clients.producer.ProducerRecord | |
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} | |
import org.scalatest.time.{Second, Seconds, Span} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def runHybrid(s3Keys: Seq[String], parallelism: Int): Stats = { | |
sc.parallelize(s3Keys, parallelism) | |
.mapPartitions { keys => | |
val s3 = IO.getS3(s3Id, s3Key, s3Region) | |
def read = keys.map(key => key -> MiLogFileParser.getBytes(s3.getObject(inputBucket, key).getObjectContent, true)) | |
val(contentByKey, took) = IO.profile( () => read ) | |
readingAcc.add(took) | |
contentByKey |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example | |
import akka.kafka.scaladsl.{Consumer, Producer} | |
import akka.kafka.{ConsumerSettings, ProducerSettings} | |
import akka.stream.scaladsl.{Sink, Source} | |
import org.apache.kafka.clients.consumer.ConsumerConfig | |
import org.apache.kafka.clients.producer.ProducerRecord | |
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer} | |
import org.scalatest.time.{Second, Seconds, Span} | |
import scala.concurrent.duration._ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
trait Storage | |
case class ViewId(id: String, access: String = "rw", base: Boolean = false) | |
case class S3Storage(bucket: String, path: String) extends Storage | |
case class DruidStorage(coordinatorHost: String, overlordHost: String, dataSource: String) extends Storage | |
sealed trait Partitioning |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Extensions | |
# | |
druid.extensions.directory=dist/druid/extensions | |
druid.extensions.hadoopDependenciesDir=dist/druid/hadoop-dependencies | |
# | |
# Logging | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example | |
import java.util.Properties | |
import akka.event.Logging | |
import akka.kafka.scaladsl.{Consumer, Producer} | |
import akka.kafka.{ConsumerSettings, ProducerSettings} | |
import akka.stream.scaladsl.{Sink, Source} | |
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig} | |
import org.apache.kafka.clients.consumer.ConsumerConfig |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"task": "index_hadoop_gwiq_2016-05-02T07:27:30.883Z", | |
"payload": { | |
"id": "index_hadoop_gwiq_2016-05-02T07:27:30.883Z", | |
"spec": { | |
"dataSchema": { | |
"dataSource": "gwiq", | |
"parser": { | |
"type": "hadoopyString", | |
"parseSpec": { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker-machine create \ | |
--driver amazonec2 \ | |
--amazonec2-access-key ${AWS_ACCESS_KEY_ID} \ | |
--amazonec2-secret-key ${AWS_SECRET_ACCESS_KEY} \ | |
--amazonec2-region ${REGION} \ | |
# --amazonec2-private-address-only \ | |
--amazonec2-vpc-id vpc-fcf8e29e \ | |
--amazonec2-instance-type ${INSTANCE_TYPE} \ | |
--amazonec2-root-size ${VOLUME_SIZE} \ | |
--amazonec2-security-group ${SECURITY_GROUP} \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val exclusions = List( // removed cid c-geo:c3 | |
"timestamp","d_memberID","gwid","diid","c-geo:st","c-geo:ac","c-geo:c2","c-geo:cn","c-geo:ct","c-geo:dc","c-geo:la","c-geo:lo","c-geo:pc","c-geo:rc","c-geo:sp", | |
"ua","ua-cp","ua-os","accept","accept-charset","accept-encoding","accept-language","connection","from","x-wap-profile","x-att-deviceid","via","x-forwarded-for","x-uidh","forwarded","referrer","c-ip","ce","count","ctr","logic","max","miid","msg","npid","cs-uri-stem" | |
) | |
val segmentGrn = Granularity.DAY | |
private def hadoopTask(interval: String) = | |
IndexTask( | |
IndexTask.hadoopType, | |
IngestionSpec( |