Ivan sadikovi

## partitioning.scala
// let's say we have a sequence of numbers sorted in descending order
val data = Seq(9, 8, 7, 6, 5, 4, 3, 2, 1)
val max = 5
val buckets = 3

val data = Seq(4, 4, 4, 3, 3, 3, 1, 1, 1)
val max = 5
val buckets = 3

val data = Seq(10, 8, 2, 2, 2, 1, 1, 1, 1)

## docker-mesos.sh
docker run -d --net=host \
  -e MESOS_PORT=5050 \
  -e MESOS_ZK=zk://127.0.0.1:2181/mesos \
  -e MESOS_QUORUM=1 \
  -e MESOS_REGISTRY=in_memory \
  -e MESOS_LOG_DIR=/var/log/mesos \
  -e MESOS_WORK_DIR=/var/tmp/mesos \
  -v "$(pwd)/log/mesos:/var/log/mesos" \
  -v "$(pwd)/tmp/mesos:/var/tmp/mesos" \
  mesosphere/mesos-master:0.28.0-2.0.16.ubuntu1404

## Quicksort.java
public class Quicksort {
  private static int partition(int[] arr, int start, int end) {
    int p = arr[end];
    int i = start;
    for (int j = start; j < end; j++) {
      if (arr[j] <= p) {
        int tmp = arr[j];
        arr[j] = arr[i];
        arr[i] = tmp;
        i++;

## README.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                sadikovi
                / README.md
            
            
              Created
              October 10, 2016 09:32
            
              
                Big-O Algorithm complexity
              
          
    Big-O Algorithm complexity cheat sheet

http://bigocheatsheet.com/

  
## spark_flame_graphs.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                sadikovi
                / spark_flame_graphs.md
            
            
              Created
              October 9, 2016 07:55
                — forked from kayousterhout/spark_flame_graphs.md
            
          
    Generating Flame Graphs for Apache Spark

Flame graphs are a nifty debugging tool to determine where CPU time is being spent.  Using the Java Flight recorder, you can do this for Java processes without adding significant runtime overhead.
When are flame graphs useful?

Shivaram Venkataraman and I have found these flame recordings to be useful for diagnosing coarse-grained performance problems. We started using them at the suggestion of Josh Rosen, who quickly made one for the Spark scheduler when we were talking to him about why the scheduler caps out at a throughput of a few thousand tasks per second. Josh generated a graph similar to the one below, which illustrates that a significant amount of time is spent in serialization (if you click in the top right hand corner and search for "serialize", you can see that 78.6% of the sampled CPU time was spent in serialization). We used this insight to spee

  
## Dockerfile
FROM dylanmei/zeppelin

ENV DEBIAN_FRONTEND noninteractive

# instal matplotlib
RUN apt-get update && apt-get install -y software-properties-common python3-matplotlib

# update matplotlib config, otherwise display fails
RUN sed -i -e 's/backend      : TkAgg/backend      : Agg/g' /etc/matplotlibrc

## LRUCache.scala
import java.util.HashMap

class Node[K, V](var key: K, var value: V) {
  var next: Node[K, V] = null
  override def toString(): String = s"Node($key, $value) -> $next"
}

class LRUCache[K, V](val size: Int = 5) {
  require(size > 0, s"Expected size > 0, found $size")
  val map = new HashMap[K, Node[K, V]]()

## solution.scala
// naive approach, runs in O(n^2*k) time and O(k) space
def func1(words: Array[String]): Int = {
  var product = 0
  val set: java.util.HashSet[Char] = new java.util.HashSet[Char]()
  for (i <- 0 until words.length) {
    set.clear()
    for (c <- words(i)) {
      set.add(c)
    }


## HashRangePartitioner.scala
import org.apache.spark.Partitioner
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext

class HashRangePartitioner(
    @transient val sqlContext: SQLContext,
    val min: Int,
    val max: Int,
    numBuckets: Option[Int] = None)
  extends Partitioner {

## PointType.scala
package org.apache.spark

import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.types._

@SQLUserDefinedType(udt = classOf[PointType])
case class Point(mac: String, start: Long, end: Long) {
  override def hashCode(): Int = {
    31 * (31 * mac.hashCode) + start.hashCode
  }
	// let's say we have a sequence of numbers sorted in descending order
	val data = Seq(9, 8, 7, 6, 5, 4, 3, 2, 1)
	val max = 5
	val buckets = 3

	val data = Seq(4, 4, 4, 3, 3, 3, 1, 1, 1)
	val max = 5
	val buckets = 3

	val data = Seq(10, 8, 2, 2, 2, 1, 1, 1, 1)
	docker run -d --net=host \
	-e MESOS_PORT=5050 \
	-e MESOS_ZK=zk://127.0.0.1:2181/mesos \
	-e MESOS_QUORUM=1 \
	-e MESOS_REGISTRY=in_memory \
	-e MESOS_LOG_DIR=/var/log/mesos \
	-e MESOS_WORK_DIR=/var/tmp/mesos \
	-v "$(pwd)/log/mesos:/var/log/mesos" \
	-v "$(pwd)/tmp/mesos:/var/tmp/mesos" \
	mesosphere/mesos-master:0.28.0-2.0.16.ubuntu1404
	public class Quicksort {
	private static int partition(int[] arr, int start, int end) {
	int p = arr[end];
	int i = start;
	for (int j = start; j < end; j++) {
	if (arr[j] <= p) {
	int tmp = arr[j];
	arr[j] = arr[i];
	arr[i] = tmp;
	i++;
	FROM dylanmei/zeppelin

	ENV DEBIAN_FRONTEND noninteractive

	# instal matplotlib
	RUN apt-get update && apt-get install -y software-properties-common python3-matplotlib

	# update matplotlib config, otherwise display fails
	RUN sed -i -e 's/backend : TkAgg/backend : Agg/g' /etc/matplotlibrc
	import java.util.HashMap

	class Node[K, V](var key: K, var value: V) {
	var next: Node[K, V] = null
	override def toString(): String = s"Node($key, $value) -> $next"
	}

	class LRUCache[K, V](val size: Int = 5) {
	require(size > 0, s"Expected size > 0, found $size")
	val map = new HashMap[K, Node[K, V]]()
	// naive approach, runs in O(n^2*k) time and O(k) space
	def func1(words: Array[String]): Int = {
	var product = 0
	val set: java.util.HashSet[Char] = new java.util.HashSet[Char]()
	for (i <- 0 until words.length) {
	set.clear()
	for (c <- words(i)) {
	set.add(c)
	}
	import org.apache.spark.Partitioner
	import org.apache.spark.rdd.RDD
	import org.apache.spark.sql.SQLContext

	class HashRangePartitioner(
	@transient val sqlContext: SQLContext,
	val min: Int,
	val max: Int,
	numBuckets: Option[Int] = None)
	extends Partitioner {
	package org.apache.spark

	import org.apache.spark.sql.catalyst.util._
	import org.apache.spark.sql.types._

	@SQLUserDefinedType(udt = classOf[PointType])
	case class Point(mac: String, start: Long, end: Long) {
	override def hashCode(): Int = {
	31 * (31 * mac.hashCode) + start.hashCode
	}