David Rodriguez DavidRdgz

## Vagrantfile
Vagrant.configure("2") do |config|
  config.vm.box = "ubuntu/xenial64"
  config.vm.hostname = "spark.xenial.box"
  config.vm.network :private_network, ip: "192.168.0.42"

  config.vm.synced_folder "./data", "/vagrant_data"

  config.vm.provider "virtualbox" do |vb|
    vb.gui = false
    vb.memory = 4096

## MapRawData.scala
package com.dvidr.counts

import com.twitter.algebird.{HLL, HyperLogLogMonoid}
import org.apache.spark.rdd.RDD

case class EmailSchema(sender: String,
                       to: String,
                       cc: String,
                       bcc: String,
                       sentDate: String,

## expert_mixtures.py
from keras.models import Model
from keras.layers import Input, Lambda, Dense
from keras.utils import to_categorical
from numpy.random import randint
import numpy as np


def belief_per_model(x):
    x1, x2, x3, x4 = x
    return x1 * .2 + x2 * .3 + x3 * .4 + x4 * .1

## mixture_of_experts.py
from keras.models import Model
from keras.layers import Input, Dense, concatenate, dot
from numpy.random import randint
import numpy as np


def my_model(n=20):
    inputs = Input(shape=(n,))

    m1 = Dense(1)(inputs)

## Discrete.scala
/**
  * Bernoulli distribution with expectation `p`
  *
  * @param p The probability of success
  */
final case class Bernoulli(p: Real) extends Discrete {
  val generator: Generator[Int] =
    Generator.require(Set(p)) { (r, n) =>
      val u = r.standardUniform
      val l = n.toDouble(p)

## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                DavidRdgz
                / README.md
            
            
              Last active
              August 5, 2018 18:42
            
              
                Negative Binomial Approximation to Normal Threshold
              
          
    nb approximation to normal threshold

There's many ways to test if a negative binomial is approximately normal: e.g.

visualize the qq plot
normalize the nb sample and perform shapiro-wilkes test

Below is an image of the envelope where the negative binomial parameters create a distribution that is approximately normal.

  
## Discrete.scala
package com.stripe.rainier.core

import com.stripe.rainier.compute.{Evaluator, If, Real}

trait Discrete extends Distribution[Int] {
  self: Discrete =>
  val emptyEvaluator = new Evaluator(Map.empty)

  def logDensity(v: Real): Real

## SparkRainer.scala
import com.stripe.rainier.core.{Normal, Poisson}
import com.stripe.rainier.sampler.{RNG, ScalaRNG}
import org.apache.spark.{SparkConf, SparkContext}

object Driver {
  implicit val rng: RNG = ScalaRNG(1527608515939L)
  val DROP_BURN_IN = 100

  /*
   Refer to StackOverflow Q, about serializing methods/objects:

## Numpy.scala
package numpy

trait NumpyWriter[A] {
  def lessThan(list: List[A])(value: A): List[A]
  def greaterThan(list: List[A])(value: A): List[A]
  def multiply(list: List[A])(value: A): List[A]
  def add(list: List[A])(value: A): List[A]
  def subtract(list: List[A])(value: A): List[A]
}

## annotations.py
"""
A simple example of decluttering the settings for pandas so
that when developing the model and testing it, the dataframe
is a little cleaner and more readable.

"""

def pandas_defaults(defaults, pd):
    def decorator(f):
        def wrapper(*args, **kwargs):
	Vagrant.configure("2") do \|config\|
	config.vm.box = "ubuntu/xenial64"
	config.vm.hostname = "spark.xenial.box"
	config.vm.network :private_network, ip: "192.168.0.42"

	config.vm.synced_folder "./data", "/vagrant_data"

	config.vm.provider "virtualbox" do \|vb\|
	vb.gui = false
	vb.memory = 4096
	package com.dvidr.counts

	import com.twitter.algebird.{HLL, HyperLogLogMonoid}
	import org.apache.spark.rdd.RDD

	case class EmailSchema(sender: String,
	to: String,
	cc: String,
	bcc: String,
	sentDate: String,
	from keras.models import Model
	from keras.layers import Input, Lambda, Dense
	from keras.utils import to_categorical
	from numpy.random import randint
	import numpy as np


	def belief_per_model(x):
	x1, x2, x3, x4 = x
	return x1 * .2 + x2 * .3 + x3 * .4 + x4 * .1
	from keras.models import Model
	from keras.layers import Input, Dense, concatenate, dot
	from numpy.random import randint
	import numpy as np


	def my_model(n=20):
	inputs = Input(shape=(n,))

	m1 = Dense(1)(inputs)
	/**
	* Bernoulli distribution with expectation `p`
	*
	* @param p The probability of success
	*/
	final case class Bernoulli(p: Real) extends Discrete {
	val generator: Generator[Int] =
	Generator.require(Set(p)) { (r, n) =>
	val u = r.standardUniform
	val l = n.toDouble(p)
	package com.stripe.rainier.core

	import com.stripe.rainier.compute.{Evaluator, If, Real}

	trait Discrete extends Distribution[Int] {
	self: Discrete =>
	val emptyEvaluator = new Evaluator(Map.empty)

	def logDensity(v: Real): Real
	import com.stripe.rainier.core.{Normal, Poisson}
	import com.stripe.rainier.sampler.{RNG, ScalaRNG}
	import org.apache.spark.{SparkConf, SparkContext}

	object Driver {
	implicit val rng: RNG = ScalaRNG(1527608515939L)
	val DROP_BURN_IN = 100

	/*
	Refer to StackOverflow Q, about serializing methods/objects:
	package numpy

	trait NumpyWriter[A] {
	def lessThan(list: List[A])(value: A): List[A]
	def greaterThan(list: List[A])(value: A): List[A]
	def multiply(list: List[A])(value: A): List[A]
	def add(list: List[A])(value: A): List[A]
	def subtract(list: List[A])(value: A): List[A]
	}
	"""
	A simple example of decluttering the settings for pandas so
	that when developing the model and testing it, the dataframe
	is a little cleaner and more readable.

	"""

	def pandas_defaults(defaults, pd):
	def decorator(f):
	def wrapper(args, *kwargs):