Skip to content

Instantly share code, notes, and snippets.

@vankesteren
vankesteren / importance_sampling_vs_lintsampler.py
Last active June 26, 2024 18:25
Comparing lintsampler to basic uniform importance sampling
# Comparing lintsampler to basic uniform importance sampling
from scipy.stats import norm, uniform
import numpy as np
import matplotlib.pyplot as plt
from lintsampler import LintSampler
NSAMPLES = 1000000
# GMM example
def gmm_pdf(x):
@vankesteren
vankesteren / probsocsim.R
Created June 19, 2024 10:41
Example of a probabilistic social simulation
# Simple probabilistic simulation script
# Causal graph:
# NetIncome -> + CulturalActivities
# NetIncome -> + SportsActivities
# NetIncome -> - Debts
# NetIncome -> + SocialComparison
# SportsActivities -> + Health
# SportsActivities -> + Partnership
# CulturalActivities -> + SocialComparison
@vankesteren
vankesteren / permutefun.jl
Last active May 30, 2024 15:12
Permuting data to induce complex bivariate relations.
using StatsBase: sample, mean, cor
using LinearAlgebra: norm
using Plots, Random
"""
permutefun!(x::Vector, y::Vector, rule::Function, score::Real; tol::Number = 1e-3, max_iter::Int = 10_000, max_search::Number = 100, verbose::Bool = true)
Permute y values to approximate a correlation between x and y of ρ.
# Arguments
@vankesteren
vankesteren / rol_model.jl
Last active May 10, 2024 10:13
Bayesian inference for rank-ordered logit model in Julia's Turing.jl
using Turing
using LogExpFunctions: logsumexp
using DataFrames
# The rank ordered logit model in Turing
# The rank-ordered logit likelihood
function rank_ordered_logit(ordered_skills::Vector{<:Real})
ll = 0.0
for m in 1:(length(ordered_skills) - 1)
ll += ordered_skills[m] - logsumexp(ordered_skills[m:end])
@vankesteren
vankesteren / idealpoint.R
Last active April 25, 2024 19:55
Ideal point model in stan
# Ideal point model in stan / R
# example taken & simplified from https://medewitt.github.io/resources/stan_ideal_point.html
library(tidyverse)
library(cmdstanr)
# simulate data: 100 legislators, 150 votes
set.seed(1834)
N_legislators <- 50
N_bills <- 150
@vankesteren
vankesteren / elbo.jl
Last active March 21, 2024 15:50
Figuring out some ELBO stuff...
# Let's figure out this ELBO thing
using Distributions, StatsPlots, Optim, Random
Random.seed!(45)
# The target distribution. Assume we don't know it but
# we can compute the (unnormalized) logpdf and sample
# from it. For illustration, let's make it a weird mixture
comps = [Normal(2, 3), Normal(-3, 1.5), LogNormal(3, 0.4)]
probs = [.1, .1, .8]
p = MixtureModel(comps, probs)
@vankesteren
vankesteren / drplot_marginal.R
Last active December 9, 2023 19:50
Marginal density ratio plot
#' Marginal density ratio plot
#'
#' A plot to compare two (continuous) distributions in the
#' relative number of occurrences on a particular variable.
#' The transparency of the background histogram indicates
#' how much data is available at that location.
#'
#' @param dr_fit fitted model from the densityratio package
#' @param var <[`data-masked`][dplyr::dplyr_data_masking]> variable from the data
#'
@vankesteren
vankesteren / penalized_synthetic_control.R
Created November 24, 2023 09:58
penalized synthetic control estimation (Abadie & L'Hour, 2021)
#' Penalized synthetic control estimator
#'
#' Estimate synthetic control with penalization
#' according to Abadie & L'Hour.
#'
#' @param X1 treated unit covariates
#' @param X0 donor units covariates
#' @param v variable weights
#' @param lambda penalization parameter
#' @param ... osqp settings using osqp::osqpSettings()
@vankesteren
vankesteren / proportion_intervals.R
Created October 12, 2023 12:53
Uncertainty intervals for a proportion
# Different 95% uncertainty intervals for a proportion
dat <- c(rep(0, 38), rep(1, 2))
# normal approximation on probability scale
ci_normal <- function(dat) {
mu <- mean(dat)
se <- sqrt(mu * (1 - mu) / length(dat))
return(c(
"2.5 %" = mu + qnorm(0.025)*s_normal,
"97.5 %" = mu + qnorm(0.975)*se
@vankesteren
vankesteren / network_autocorrelation.R
Last active September 20, 2023 15:29
Network autocorrelation model
# simulate and estimate a network autocorrelation model
set.seed(45)
N <- 200
A <- matrix(rbinom(N*N, 1, 0.2), N)
diag(A) <- 0
A2 <- matrix(rbinom(N*N, 1, 0.2), N)
diag(A2) <- 0
An <- A / rowSums(A) # row-normalized ????
# params