Sam Bessalah samklr

## parquet-size.scala
val hdfs: org.apache.hadoop.fs.FileSystem =
  org.apache.hadoop.fs.FileSystem.get(
    new org.apache.hadoop.conf.Configuration())

val hadoopPath= new org.apache.hadoop.fs.Path("hdfs://localhost:9000/tmp")
val recursive = false
val ri = hdfs.listFiles(hadoopPath, recursive)
val it = new Iterator[org.apache.hadoop.fs.LocatedFileStatus]() {
  override def hasNext = ri.hasNext
  override def next() = ri.next()

## spark-jobserver-docker-macos.md

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              4 stars
            
          
                jaceklaskowski
                / spark-jobserver-docker-macos.md
            
            
              Last active
              August 1, 2018 11:28
            
              
                How to run spark-jobserver on Docker and Mac OS (using docker-machine)
              
          
    From https://github.com/spark-jobserver/spark-jobserver#getting-started-with-spark-job-server:

The easiest way to get started is to try the Docker container which prepackages a Spark distribution with the job server and lets you start and deploy it.

➜  spark-jobserver git:(master) docker-machine version
docker-machine version 0.7.0, build a650a40

// https://gist.github.com/radekg/ec5a1575c450a48e5cba


## Client.java
package com.lambdaworks.redis.experimental.mbean;

import java.lang.management.ManagementFactory;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;

import javax.management.JMException;
import javax.management.MBeanServer;

## HyperLogLogStoreUDAF.scala
class HyperLogLogStoreUDAF extends UserDefinedAggregateFunction {

  override def inputSchema = new StructType()
    .add("stringInput", BinaryType)

  override def update(buffer: MutableAggregationBuffer, input: Row) = {
    // This input Row only has a single column storing the input value in String (or other Binary data).
    // We only update the buffer when the input value is not null.
    if (!input.isNullAt(0)) {
      if (buffer.isNullAt(0)) {

## TestLabelledGeneric.scala
package testGeneric

import scala.language.higherKinds
import scalaz.Functor


object TestLabelledGeneric {

  case class Ahoy(name:String, y:Int, l:Int)

## JedisCluster.java
import redis.clients.jedis.HostAndPort;
import redis.clients.jedis.JedisCluster;

import java.util.HashSet;
import java.util.Set;

public class JedisCluster {
    public static void main(String[] args) {

        Set<HostAndPort> connectionPoints = new HashSet<HostAndPort>();

## spark-thred-safe.scala
object ServerSparkContext {

  private[this] lazy val _sqlContext = {
    val conf = new SparkConf()
      .setAppName("....")
    val sc = new SparkContext(conf)

    // TODO: Bug in Spark: http://stackoverflow.com/questions/30323212
    val ctx = new HiveContext(sc)
    ctx.setConf("spark.sql.hive.convertMetastoreParquet", "false")

## MyExtendedRedisClient.java

import javax.enterprise.inject.Alternative;

import com.lambdaworks.redis.RedisClient;
import com.lambdaworks.redis.RedisURI;
import com.lambdaworks.redis.StatefulRedisConnectionImpl;
import com.lambdaworks.redis.codec.RedisCodec;
import com.lambdaworks.redis.protocol.CommandHandler;
import com.lambdaworks.redis.pubsub.PubSubCommandHandler;
import com.lambdaworks.redis.pubsub.StatefulRedisPubSubConnectionImpl;

## .block
license: gpl-3.0

## golang_job_queue.md

      
              4 files
            
          
              124 forks
            
          
              9 comments
            
          
              348 stars
            
          
                harlow
                / golang_job_queue.md
            
            
              Last active
              April 24, 2024 10:21
            
              
                Job queues in Golang
              
          
    Golang Workers / Job Queue

A running example of the code from:

http://marcio.io/2015/07/handling-1-million-requests-per-minute-with-golang
http://nesv.github.io/golang/2014/02/25/worker-queues-in-go.html

This gist creates a working example from blog post, and a alternate example using simple worker pool.
TLDR: if you want simple and controlled concurrency use a worker pool.
	val hdfs: org.apache.hadoop.fs.FileSystem =
	org.apache.hadoop.fs.FileSystem.get(
	new org.apache.hadoop.conf.Configuration())

	val hadoopPath= new org.apache.hadoop.fs.Path("hdfs://localhost:9000/tmp")
	val recursive = false
	val ri = hdfs.listFiles(hadoopPath, recursive)
	val it = new Iterator[org.apache.hadoop.fs.LocatedFileStatus]() {
	override def hasNext = ri.hasNext
	override def next() = ri.next()
	package com.lambdaworks.redis.experimental.mbean;

	import java.lang.management.ManagementFactory;
	import java.util.HashMap;
	import java.util.Map;
	import java.util.concurrent.ConcurrentHashMap;
	import java.util.concurrent.TimeUnit;

	import javax.management.JMException;
	import javax.management.MBeanServer;
	class HyperLogLogStoreUDAF extends UserDefinedAggregateFunction {

	override def inputSchema = new StructType()
	.add("stringInput", BinaryType)

	override def update(buffer: MutableAggregationBuffer, input: Row) = {
	// This input Row only has a single column storing the input value in String (or other Binary data).
	// We only update the buffer when the input value is not null.
	if (!input.isNullAt(0)) {
	if (buffer.isNullAt(0)) {
	package testGeneric

	import scala.language.higherKinds
	import scalaz.Functor


	object TestLabelledGeneric {

	case class Ahoy(name:String, y:Int, l:Int)
	import redis.clients.jedis.HostAndPort;
	import redis.clients.jedis.JedisCluster;

	import java.util.HashSet;
	import java.util.Set;

	public class JedisCluster {
	public static void main(String[] args) {

	Set<HostAndPort> connectionPoints = new HashSet<HostAndPort>();
	object ServerSparkContext {

	private[this] lazy val _sqlContext = {
	val conf = new SparkConf()
	.setAppName("....")
	val sc = new SparkContext(conf)

	// TODO: Bug in Spark: http://stackoverflow.com/questions/30323212
	val ctx = new HiveContext(sc)
	ctx.setConf("spark.sql.hive.convertMetastoreParquet", "false")

	import javax.enterprise.inject.Alternative;

	import com.lambdaworks.redis.RedisClient;
	import com.lambdaworks.redis.RedisURI;
	import com.lambdaworks.redis.StatefulRedisConnectionImpl;
	import com.lambdaworks.redis.codec.RedisCodec;
	import com.lambdaworks.redis.protocol.CommandHandler;
	import com.lambdaworks.redis.pubsub.PubSubCommandHandler;
	import com.lambdaworks.redis.pubsub.StatefulRedisPubSubConnectionImpl;