Erik-Jan van Kesteren vankesteren

## importance_sampling_vs_lintsampler.py
# Comparing lintsampler to basic uniform importance sampling
from scipy.stats import norm, uniform
import numpy as np
import matplotlib.pyplot as plt
from lintsampler import LintSampler

NSAMPLES = 1000000

# GMM example
def gmm_pdf(x):

## probsocsim.R
# Simple probabilistic simulation script

# Causal graph:
# NetIncome -> + CulturalActivities
# NetIncome -> + SportsActivities
# NetIncome -> - Debts
# NetIncome -> + SocialComparison
# SportsActivities -> + Health
# SportsActivities -> + Partnership
# CulturalActivities -> + SocialComparison

## permutefun.jl
using StatsBase: sample, mean, cor
using LinearAlgebra: norm
using Plots, Random

"""
    permutefun!(x::Vector, y::Vector, rule::Function, score::Real; tol::Number = 1e-3, max_iter::Int = 10_000, max_search::Number = 100, verbose::Bool = true)

Permute y values to approximate a correlation between x and y of ρ.

# Arguments

## rol_model.jl
using Turing
using LogExpFunctions: logsumexp
using DataFrames

# The rank ordered logit model in Turing
# The rank-ordered logit likelihood
function rank_ordered_logit(ordered_skills::Vector{<:Real})
    ll = 0.0
    for m in 1:(length(ordered_skills) - 1)
        ll += ordered_skills[m] - logsumexp(ordered_skills[m:end])

## idealpoint.R
# Ideal point model in stan / R
# example taken & simplified from https://medewitt.github.io/resources/stan_ideal_point.html
library(tidyverse)
library(cmdstanr)

# simulate data: 100 legislators, 150 votes
set.seed(1834)
N_legislators <- 50
N_bills <- 150

## elbo.jl
# Let's figure out this ELBO thing
using Distributions, StatsPlots, Optim, Random
Random.seed!(45)

# The target distribution. Assume we don't know it but
# we can compute the (unnormalized) logpdf and sample
# from it. For illustration, let's make it a weird mixture
comps = [Normal(2, 3), Normal(-3, 1.5), LogNormal(3, 0.4)]
probs = [.1, .1, .8]
p = MixtureModel(comps, probs)

## drplot_marginal.R
#' Marginal density ratio plot
#'
#' A plot to compare two (continuous) distributions in the
#' relative number of occurrences on a particular variable.
#' The transparency of the background histogram indicates
#' how much data is available at that location.
#'
#' @param dr_fit fitted model from the densityratio package
#' @param var <[`data-masked`][dplyr::dplyr_data_masking]> variable from the data
#'

## penalized_synthetic_control.R
#' Penalized synthetic control estimator
#'
#' Estimate synthetic control with penalization
#' according to Abadie & L'Hour.
#'
#' @param X1 treated unit covariates
#' @param X0 donor units covariates
#' @param v variable weights
#' @param lambda penalization parameter
#' @param ... osqp settings using osqp::osqpSettings()

## proportion_intervals.R
# Different 95% uncertainty intervals for a proportion
dat <- c(rep(0, 38), rep(1, 2))

# normal approximation on probability scale
ci_normal <- function(dat) {
  mu <- mean(dat)
  se <- sqrt(mu * (1 - mu) / length(dat))
  return(c(
    "2.5 %"  = mu + qnorm(0.025)*s_normal,
    "97.5 %" = mu + qnorm(0.975)*se

## network_autocorrelation.R
# simulate and estimate a network autocorrelation model
set.seed(45)
N <- 200
A <- matrix(rbinom(N*N, 1, 0.2), N)
diag(A) <- 0
A2 <- matrix(rbinom(N*N, 1, 0.2), N)
diag(A2) <- 0
An <- A / rowSums(A) # row-normalized ????

# params
	# Comparing lintsampler to basic uniform importance sampling
	from scipy.stats import norm, uniform
	import numpy as np
	import matplotlib.pyplot as plt
	from lintsampler import LintSampler

	NSAMPLES = 1000000

	# GMM example
	def gmm_pdf(x):
	# Simple probabilistic simulation script

	# Causal graph:
	# NetIncome -> + CulturalActivities
	# NetIncome -> + SportsActivities
	# NetIncome -> - Debts
	# NetIncome -> + SocialComparison
	# SportsActivities -> + Health
	# SportsActivities -> + Partnership
	# CulturalActivities -> + SocialComparison
	using StatsBase: sample, mean, cor
	using LinearAlgebra: norm
	using Plots, Random

	"""
	permutefun!(x::Vector, y::Vector, rule::Function, score::Real; tol::Number = 1e-3, max_iter::Int = 10_000, max_search::Number = 100, verbose::Bool = true)

	Permute y values to approximate a correlation between x and y of ρ.

	# Arguments
	using Turing
	using LogExpFunctions: logsumexp
	using DataFrames

	# The rank ordered logit model in Turing
	# The rank-ordered logit likelihood
	function rank_ordered_logit(ordered_skills::Vector{<:Real})
	ll = 0.0
	for m in 1:(length(ordered_skills) - 1)
	ll += ordered_skills[m] - logsumexp(ordered_skills[m:end])
	# Ideal point model in stan / R
	# example taken & simplified from https://medewitt.github.io/resources/stan_ideal_point.html
	library(tidyverse)
	library(cmdstanr)

	# simulate data: 100 legislators, 150 votes
	set.seed(1834)
	N_legislators <- 50
	N_bills <- 150
	# Let's figure out this ELBO thing
	using Distributions, StatsPlots, Optim, Random
	Random.seed!(45)

	# The target distribution. Assume we don't know it but
	# we can compute the (unnormalized) logpdf and sample
	# from it. For illustration, let's make it a weird mixture
	comps = [Normal(2, 3), Normal(-3, 1.5), LogNormal(3, 0.4)]
	probs = [.1, .1, .8]
	p = MixtureModel(comps, probs)
	#' Marginal density ratio plot
	#'
	#' A plot to compare two (continuous) distributions in the
	#' relative number of occurrences on a particular variable.
	#' The transparency of the background histogram indicates
	#' how much data is available at that location.
	#'
	#' @param dr_fit fitted model from the densityratio package
	#' @param var <[`data-masked`][dplyr::dplyr_data_masking]> variable from the data
	#'
	#' Penalized synthetic control estimator
	#'
	#' Estimate synthetic control with penalization
	#' according to Abadie & L'Hour.
	#'
	#' @param X1 treated unit covariates
	#' @param X0 donor units covariates
	#' @param v variable weights
	#' @param lambda penalization parameter
	#' @param ... osqp settings using osqp::osqpSettings()
	# Different 95% uncertainty intervals for a proportion
	dat <- c(rep(0, 38), rep(1, 2))

	# normal approximation on probability scale
	ci_normal <- function(dat) {
	mu <- mean(dat)
	se <- sqrt(mu * (1 - mu) / length(dat))
	return(c(
	"2.5 %" = mu + qnorm(0.025)*s_normal,
	"97.5 %" = mu + qnorm(0.975)*se
	# simulate and estimate a network autocorrelation model
	set.seed(45)
	N <- 200
	A <- matrix(rbinom(N*N, 1, 0.2), N)
	diag(A) <- 0
	A2 <- matrix(rbinom(N*N, 1, 0.2), N)
	diag(A2) <- 0
	An <- A / rowSums(A) # row-normalized ????

	# params