Instantly share code, notes, and snippets.

View SparkRainer.scala
import com.stripe.rainier.core.{Normal, Poisson}
import com.stripe.rainier.sampler.{RNG, ScalaRNG}
import org.apache.spark.{SparkConf, SparkContext}
object Driver {
implicit val rng: RNG = ScalaRNG(1527608515939L)
val DROP_BURN_IN = 100
/*
Refer to StackOverflow Q, about serializing methods/objects:
View Discrete.scala
package com.stripe.rainier.core
import com.stripe.rainier.compute.{Evaluator, If, Real}
trait Discrete extends Distribution[Int] {
self: Discrete =>
val emptyEvaluator = new Evaluator(Map.empty)
def logDensity(v: Real): Real
View README.md

nb approximation to normal threshold

There's many ways to test if a negative binomial is approximately normal: e.g.

  • visualize the qq plot
  • normalize the nb sample and perform shapiro-wilkes test

Below is an image of the envelope where the negative binomial parameters create a distribution that is approximately normal.

View Discrete.scala
/**
* Bernoulli distribution with expectation `p`
*
* @param p The probability of success
*/
final case class Bernoulli(p: Real) extends Discrete {
val generator: Generator[Int] =
Generator.require(Set(p)) { (r, n) =>
val u = r.standardUniform
val l = n.toDouble(p)
View mixture_of_experts.py
from keras.models import Model
from keras.layers import Input, Dense, concatenate, dot
from numpy.random import randint
import numpy as np
def my_model(n=20):
inputs = Input(shape=(n,))
m1 = Dense(1)(inputs)
View expert_mixtures.py
from keras.models import Model
from keras.layers import Input, Lambda, Dense
from keras.utils import to_categorical
from numpy.random import randint
import numpy as np
def belief_per_model(x):
x1, x2, x3, x4 = x
return x1 * .2 + x2 * .3 + x3 * .4 + x4 * .1
View MapRawData.scala
package com.dvidr.counts
import com.twitter.algebird.{HLL, HyperLogLogMonoid}
import org.apache.spark.rdd.RDD
case class EmailSchema(sender: String,
to: String,
cc: String,
bcc: String,
sentDate: String,
View Vagrantfile
Vagrant.configure("2") do |config|
config.vm.box = "ubuntu/xenial64"
config.vm.hostname = "spark.xenial.box"
config.vm.network :private_network, ip: "192.168.0.42"
config.vm.synced_folder "./data", "/vagrant_data"
config.vm.provider "virtualbox" do |vb|
vb.gui = false
vb.memory = 4096
View MapperLMDB.java
/**
* gradle clean
* gradle build
* <p>
* hadoop jar build/libs/mapper-lmdb-1.0-SNAPSHOT.jar com.dvidr.MapperLMDB src/main/resources/keys.txt src/main/resources/output
*/
package com.dvidr;
import org.apache.hadoop.conf.Configuration;
View PivotTable.java
/**
* gradle clean
* gradle build
*
* hadoop jar build/libs/pivot-table-1.0-SNAPSHOT.jar com.dvidr.PivotTable src/main/resources/pivotdata.txt src/main/resources/output
*
*/
package com.dvidr;