Skip to content

Instantly share code, notes, and snippets.

@andrewheiss
Last active April 13, 2023 19:08
Show Gist options
  • Save andrewheiss/3306dced3af45bb32987c44c4077ad50 to your computer and use it in GitHub Desktop.
Save andrewheiss/3306dced3af45bb32987c44c4077ad50 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(brms)

example_data <- tibble(outcome = c(rep(1, 200),
                                   sample(2:31, 200, replace = TRUE),
                                   rep(32, 150))) |> 
  mutate(outcome_0 = outcome - 1) |> 
  mutate(outcome_collapsed = case_when(
    outcome == 1 ~ "1",
    outcome >= 2 & outcome <= 31 ~ "2-31",
    outcome == 32 ~ "32"
  )) |> 
  mutate(outcome_collapsed = factor(outcome_collapsed, ordered = TRUE))

ggplot(example_data, aes(x = outcome)) +
  geom_histogram(binwidth = 1, boundary = 0, color = "white")

model_zero_poisson <- brm(
  bf(outcome_0 ~ 1),
  data = example_data,
  family = zero_inflated_poisson(),
  cores = 4, refresh = 0
)
pp_check(model_zero_poisson)

model_mixture_poisson <- brm(
  bf(outcome ~ 1),
  data = example_data,
  family = mixture(poisson, poisson),
  cores = 4, refresh = 0
)
pp_check(model_mixture_poisson)

ggplot(example_data, aes(x = outcome_collapsed)) +
  geom_bar()

model_collapsed <- brm(
  bf(outcome_collapsed ~ 1),
  data = example_data,
  family = cumulative,
  cores = 4, refresh = 0
)
pp_check(model_collapsed, type = "bars")

Created on 2023-03-30 with reprex v2.0.2

@dmi3kno
Copy link

dmi3kno commented Mar 30, 2023

Here's Muhammad likelihood model. I think you can add posterior checks using the quantile function i implemented here as well

I also added Govindarajulu with quantile-based likelihood. It requires a bracketing rootfinder, which is missing in Stan ATM, so I brewed my own. It converges ok, it is just quite slow and has trouble starting sometimes.

# Muhammad likelihood

library(tidyverse)
library(cmdstanr)
set.seed(42)
bayesplot::bayesplot_theme_set(hrbrthemes::theme_ipsum_rc())

y <- c(rep(1, 200),
        sample(2:31, 200, replace = TRUE),
        rep(32, 150)) 

y |>hist()

mod_txt <- 
'
functions {
 real muhammad_qf(real u, real theta, real alpha, real beta){
   if(theta <= 0 || alpha <= 0 || beta <=0 ) reject("All parameters must be positve!");
   return theta*exp(inv(beta)*(1-u^(-inv(alpha))));
 }
 real muhammad_cdf(real x, real theta, real alpha, real beta){
   return (1-beta*log(x*inv(theta)))^(-alpha);
 }
 real muhammad_pdf(real x, real theta, real alpha, real beta){
   return alpha*beta*inv(x)*(1-beta*log(x*inv(theta)))^(-1-alpha);
 }
 real muhammad_lpdf(real x, real theta, real alpha, real beta){
   return log(alpha)+log(beta)-log(x)+log(1-beta*log(x*inv(theta)))*(-1-alpha);
 }
}
data {
  int<lower=0> N;
  vector[N] y;
}
transformed data{
  real theta=max(y);
}

parameters {
  real<lower=0> alpha;
  real<lower=0> beta;
}

model {
  alpha~exponential(1);
  beta~exponential(1);
  for(i in 1:N)
    target+= muhammad_lpdf(y[i] | theta, alpha, beta);
}
'


mod_txt_govi <-
'
functions{ 
real govindarajulu_s_qf(real p, real gamma, real sigma) {
  if(is_nan(p)) reject("p can not be nan!");
  if(p==0 || p==1) reject("p should be between 0 and 1!");
  real gammap1 = gamma+1;
  real pgamma = pow(p, gamma);
  return sigma*(gammap1*pgamma-gamma*pgamma*p);
}
vector govindarajulu_v_qf(vector p, real gamma, real sigma) {
  int N = rows(p);
  vector[N] res=rep_vector(not_a_number(), N);
  for (i in 1:N){
      res[i]=govindarajulu_s_qf(p[i], gamma, sigma);
    }
  return res;
}
real govindarajulu_s_qdf(real p, real gamma, real sigma) {
  real gammap1 = gamma+1;
  real pgammam1 = pow(p, gamma-1);
  return sigma*gamma*gammap1*pgammam1*(1-p);
}
real govindarajulu_s_ldqf_lpdf(real p, real gamma, real sigma){
  return -log(gamma)-log1p(gamma)-gamma*log(p)+log(p)-log(sigma)-log1m(p);
}

real rootfun(real u0, vector params, data real y_r){
  return y_r - govindarajulu_s_qf(u0, params[1], params[2]);
}

real iqf_brent01(vector params, data real y_r, data real rel_tol, data real max_steps, int verbose){
  // reimplemented from https://nickcdryan.com/2017/09/13/root-finding-algorithms-in-python-line-search-bisection-secant-newton-raphson-boydens-inverse-quadratic-interpolation-brents/
  // relevant https://math.stackexchange.com/questions/1464795/what-are-the-difference-between-some-basic-numerical-root-finding-methods
  // fortran implementation https://www.cantorsparadise.com/some-root-finding-algorithms-5c6fa8a4a165
    real u0=rel_tol;
    real u1=1-rel_tol;
    int steps_taken=0;
    real f0=rootfun(u0, params, y_r);
    real f1=rootfun(u1, params, y_r);
    real tmp=0;

    if(f0*f1>0) reject("Root is not bracketed! Params", params);

    // swap for non-decreasing function
    if (abs(f0)<abs(f1)){
      tmp=u0;
      u0=u1;
      u1=tmp;
      tmp=f0;
      f0=f1;
      f1=tmp;
    }
    real u2=u0;
    real f2=f0;
    int mflag=1;
    real n=0;
    real d=0;

    while(steps_taken<max_steps && abs(u1-u0)>rel_tol){
      f0=rootfun(u0, params, y_r);
      f1=rootfun(u1, params, y_r);
      f2=rootfun(u2, params, y_r);

      if(f0!=f2 && f1 !=f2){
        real l0=(u0*f1*f2)/((f0-f1)*(f0-f2));
        real l1=(u1*f0*f2)/((f1-f0)*(f1-f2));
        real l2=(u2*f1*f0)/((f2-f0)*(f2-f1));
        n = l0+l1+l2;
      } else {
        n = u1-(f1*(u1-u0)/(f1-f0));
      }

      if((n<(3*u0+u1)/4 || n>u1) ||
      (mflag==1 && abs(n-u1)>=abs(u1-u2)/2.0) ||
      (mflag==0 && abs(n-u1)>=abs(u2-d)/2.0) ||
      (mflag==1 && abs(u1-u2)<rel_tol ) ||
      (mflag==0 && abs(u2-d)<rel_tol)
      ){
        n=(u0+u1)/2.0;
        mflag=1;
      } else {
        mflag=0;
      }
      real fn=rootfun(n, params, y_r);
      d=u2;
      u2=u1;
      if(f0*fn<0){
        u1=n;
      } else {
        u0=n;
      }
      if(abs(f0)<abs(f1)){
        tmp=u0;
        u0=u1;
        u1=tmp;
      }
      steps_taken+=1;
    }
   if(verbose) print("Steps taken ", steps_taken);
    return u1;
  }
} // end of block
data {
  int<lower=0> N; // size of the data
  vector[N] y; // data on variable level
}
transformed data {
  real<lower=0> sigma=max(y)+1e-6;
  real rel_tol = 1e-12; // for algebra solver
  int max_steps= 1e3;// for algebra solver
  int verbose=0;
}
parameters {
  real<lower=0> gamma; // gamma for govindarajulu
}
model {
  vector[N] u;
  target += exponential_lpdf(gamma | 1);
  for (i in 1:N){
   u[i] = iqf_brent01(to_vector({gamma, sigma}), y[i], rel_tol, max_steps, verbose);
   target += govindarajulu_s_ldqf_lpdf(u[i] | gamma, sigma);
  }
}
'

# mod <- cmdstanr::cmdstan_model(write_stan_file(mod_txt_govi))
mod <- cmdstanr::cmdstan_model(write_stan_file(mod_txt))
fit <- mod$sample(data=list(y=y, N=length(y)))
fit |> posterior::as_draws_array() %>% 
  bayesplot::mcmc_combo()

Rplot01
Rplot02

@andrewheiss
Copy link
Author

Whoa, this is awesome @dmi3kno—thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment