Skip to content

Instantly share code, notes, and snippets.

Avatar
🎯
Focusing

Jakub Liska l15k4

🎯
Focusing
  • Globalwebindex
  • Prague, Czech Republic
View GitHub Profile
View AkkaStreamReadingFile.scala
import java.io.File
import akka.stream.scaladsl.Source
import akka.stream.stage.{Context, PushStage, SyncDirective}
import com.typesafe.scalalogging.LazyLogging
import scala.util.Try
import scala.util.control.NonFatal
case class SimpleStage[A](file: File)(post: (File) => Unit) extends PushStage[A, A] with LazyLogging {
View task.json
{
"type" : "index",
"spec" : {
"dataSchema" : {
"dataSource" : "gwiq",
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "json",
"timestampSpec" : {
View spark-disk-persistence-problem.scala
sc.hadoopConfiguration.set("fs.s3n.impl","org.apache.hadoop.fs.s3native.NativeS3FileSystem")
val coreRdd = sc.textFile("s3n://somewhere/*.tsv").map(_.split("\t")).map( fields => Row(fields:_*) )
val dataFrame = sqlContext.createDataFrame(coreRdd, StructType(someHeaderFields))
dataFrame.registerTempTable("core")
dataFrame.persist(StorageLevel.DISK_ONLY)
val result = sc.sql("SELECT COUNT(*) FROM core") // result is correct
val result2 = sc.sql("SELECT COUNT(*) FROM core") // result2 from cache is correct
View ES Compatibility
java.lang.IllegalStateException: unable to upgrade the mappings for the index [mi_2015_07_w4], reason: [Mapper for [_timestamp] conflicts with existing mapping in other types:
[mapper [_timestamp] has different [store] values, mapper [_timestamp] has different [doc_values] values, cannot change from disabled to enabled]]
at org.elasticsearch.cluster.metadata.MetaDataIndexUpgradeService.checkMappingsCompatibility(MetaDataIndexUpgradeService.java:339)
at org.elasticsearch.cluster.metadata.MetaDataIndexUpgradeService.upgradeIndexMetaData(MetaDataIndexUpgradeService.java:116)
at org.elasticsearch.gateway.GatewayMetaState.pre20Upgrade(GatewayMetaState.java:228)
at org.elasticsearch.gateway.GatewayMetaState.<init>(GatewayMetaState.java:87)
View task.json
{
"type" : "index",
"spec" : {
"dataSchema" : {
"dataSource" : "gwiq",
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "json",
"timestampSpec" : {
View DruidDockerfile
FROM imply/imply
RUN sed -i 's/-Xmx256m/-Xmx1024m/g' conf-quickstart/druid/overlord/jvm.config
RUN sed -i 's/-Xmx64m/-Xmx192m/g' conf-quickstart/druid/middleManager/jvm.config
RUN sed -i 's/-Xmx1g/-Xmx2g/g' conf-quickstart/druid/historical/jvm.config
RUN sed -i 's/-XX:MaxDirectMemorySize=1280m/-XX:MaxDirectMemorySize=2g/g' conf-quickstart/druid/historical/jvm.config
RUN sed -i 's/-Xmx256m/-Xmx512m/g' conf-quickstart/druid/coordinator/jvm.config
RUN sed -i 's/druid.indexer.storage.type=metadata/druid.indexer.storage.type=local/g' conf-quickstart/druid/overlord/runtime.properties
View hadoop-indexing-task.json
{
"type" : "index_hadoop",
"spec" : {
"dataSchema" : {
"dataSource" : "wikipedia",
"parser" : {
"type" : "hadoopyString",
"parseSpec" : {
"format" : "json",
"timestampSpec" : {
View hadoop_indexing_task.json
{
"type" : "index_hadoop",
"spec" : {
"dataSchema" : {
"dataSource" : "gwiq",
"parser" : {
"type" : "hadoopyString",
"parseSpec" : {
"format" : "json",
"timestampSpec" : {
View hll_query.json
{
"queryType": "timeseries",
"dataSource": "gwiq",
"intervals": [
"2015-01-01T00Z/2015-01-01T04Z"
],
"granularity": "day",
"aggregations": [
{
"type": "hyperUnique",
View TypeClass.scala
trait ResponseReader[Q <: Query, R <: Response, C[X] <: TraversableOnce[X]] {
def read(response: String): C[R]
}
object ResponseReader {
implicit object SelectResponseReader extends ResponseReader[SelectQuery, SelectResponse, Option] {
def read(json: String): Option[SelectResponse] = ObjMapper.readValue[List[SelectResponse]](json).headOption
}
implicit object TimeSeriesResponseReader extends ResponseReader[TimeSeriesQuery, TimeSeriesResponse, Option] {
def read(json: String): Option[TimeSeriesResponse] = ObjMapper.readValue[List[TimeSeriesResponse]](json).headOption