Skip to content

Instantly share code, notes, and snippets.

@twiecki
Created December 9, 2011 16:23
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save twiecki/1452214 to your computer and use it in GitHub Desktop.
Save twiecki/1452214 to your computer and use it in GitHub Desktop.
Simulating drift-processes on the GPU using PyCuda
from __future__ import division
import pycuda.compiler
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import pycuda.curandom
from pycuda.cumath import exp as pycuda_exp
from pycuda.compiler import SourceModule
import matplotlib.pyplot as plt
from kabuki.utils import scipy_stochastic
import numpy as np
import scipy as sp
import scipy.stats
import pymc as pm
import hddm
code = """
#include <curand_kernel.h>
extern "C"
{
__global__ void sim_drift(curandState *global_state, float const v, float const V, float const a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, float *out)
{
float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time;
int idx = blockIdx.x*blockDim.x + threadIdx.x;
curandState local_state = global_state[idx];
/* Sample variability parameters. */
start_delay = curand_uniform(&local_state)*T + (t-T/2);
start_point = (curand_uniform(&local_state)*Z + (z-Z/2))*a;
drift_rate = curand_normal(&local_state)*V + v;
/* Set up drift variables. */
prob_up = .5f*(1+sqrtf(dt)/intra_sv*drift_rate);
step_size = sqrtf(dt)*intra_sv;
time = start_delay;
position = start_point;
/* Simulate particle movement until threshold is crossed. */
while (position > 0 & position < a) {
rand = curand_uniform(&local_state);
position += ((rand < prob_up)*2 - 1) * step_size;
time += dt;
}
/* Save back state. */
global_state[idx] = local_state;
/* Figure out boundary, save result. */
if (position <= 0) {
out[idx] = -time;
}
else {
out[idx] = time;
}
}
__global__ void sim_drift_var_thresh(curandState *global_state, float const v, float const V, float const *a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, int const a_len, float *out)
{
float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time;
int idx = blockIdx.x*blockDim.x + threadIdx.x;
int x_pos = 0;
curandState local_state = global_state[idx];
start_delay = curand_uniform(&local_state)*T + (t-T/2);
start_point = curand_uniform(&local_state)*Z + (z-Z/2);
drift_rate = curand_normal(&local_state)*V + v;
prob_up = .5f*(1+sqrtf(dt)/intra_sv*drift_rate);
step_size = sqrtf(dt)*intra_sv;
time = 0;
position = start_point;
while (fabs(position) < a[x_pos] & time < a_len) {
rand = curand_uniform(&local_state);
position += ((rand < prob_up)*2 - 1) * step_size;
time += dt;
x_pos++;
}
time += start_delay;
global_state[idx] = local_state;
if (position <= 0) {
out[idx] = -time;
}
else {
out[idx] = time;
}
}
__global__ void fill_normal(curandState *global_state, float *out)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
curandState local_state = global_state[idx];
out[idx] = curand_normal(&local_state);
global_state[idx] = local_state;
}
/* __global__ void sim_drift_switch(curandState *global_state, float const vpp, float const vcc, float const a, float const z, float const t, float const tcc, float const dt, float const intra_sv, float *out)
{
float start_delay, start_point, rand, prob_up_pp, prob_up_cc, position, step_size, time;
int idx = blockIdx.x*blockDim.x + threadIdx.x;
curandState local_state = global_state[idx];
start_delay = t;
start_point = z;
prob_up_pp = .5f*(1+sqrtf(dt)/intra_sv*vpp);
prob_up_cc = .5f*(1+sqrtf(dt)/intra_sv*vcc);
step_size = sqrtf(dt)*intra_sv;
time = 0;
position = start_point;
while (fabs(position) < a) {
rand = curand_uniform(&local_state);
if time < tcc {
position += ((rand < prob_up_pp)*2 - 1) * step_size;
}
else {
position += ((rand < prob_up_cc)*2 - 1) * step_size;
time += dt;
}
time += start_delay;
global_state[idx] = local_state;
if (position <= 0) {
out[idx] = -time;
}
else {
out[idx] = time;
}
}
*/
}
"""
mod = SourceModule(code, keep=False, no_extern_c=True)
_size = 512
_sim_drift_cuda = mod.get_function("sim_drift")
_sim_drift_var_thresh_cuda = mod.get_function("sim_drift_var_thresh")
fill_normal = mod.get_function("fill_normal")
_generator = None
_thresh = []
_thresh_gpu = None
_out = None
def sim_drift(v, V, a, z, Z, t, T, size=512, dt=1e-4, update=False, return_gpu=False):
global _generator, _out, _size
size = np.long(size)
if _generator is None or update:
_generator = pycuda.curandom.XORWOWRandomNumberGenerator()
max_size = _generator.generators_per_block
if size // max_size > 1:
print "too big"
if _out is None or update:
_out = gpuarray.empty(size, dtype=np.float32)
_sim_drift_cuda(_generator.state, np.float32(v), np.float32(V), np.float32(a), np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), _out, block=(64, 1, 1), grid=(size // 64 + 1, 1))
if return_gpu:
return _out
else:
return _out.get()
def sim_drift_var_thresh(v, V, a, z, Z, t, T, max_time, size=512, dt=1e-4, update=False, return_gpu=False):
global _generator, _thresh, _thresh_gpu, _out
# Init
if _generator is None or update:
_generator = pycuda.curandom.XORWOWRandomNumberGenerator()
max_size = _generator.generators_per_block
if size / max_size > 1:
print "too big"
if _thresh_gpu is None or update or np.any(_thresh != a):
if isinstance(a, pycuda.gpuarray.GPUArray):
_thresh_gpu = a
else:
_thresh_gpu = gpuarray.to_gpu(a)
if _out is None or update:
_out = gpuarray.empty(size, dtype=np.float32)
_sim_drift_var_thresh_cuda(_generator.state, np.float32(v), np.float32(V), _thresh_gpu, np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), np.float32(max_time), _out, block=(64, 1, 1), grid=(size // 64 + 1,1))
if return_gpu:
return _out
else:
return _out.get()
def gen_weibull_gpu(a, k, l, max_time=5, dt=1e-4):
max_time = np.float32(max_time)
x = pycuda.gpuarray.arange(0., max_time, dt, dtype=np.float32)
# Weibull pdf
thresh_func_gpu = k / l * (x / l)**(k - 1) * pycuda_exp(-(x / l)**k)
thresh_func_gpu *= a
return thresh_func_gpu
sum_stats_len = 2 * 10 + 1
def compute_stats_vec(data):
stats_vec = np.empty(sum_stats_len)
lower = np.abs(data[data<0])
upper = data[data>0]
for i, data_resp in enumerate([lower, upper]):
# mean
stats_vec[i * 10 + 0] = np.mean(data_resp)
# std
stats_vec[i * 10 + 1] = np.std(data_resp)
# skew
stats_vec[i * 10 + 2] = sp.stats.skew(data_resp)
# 7 quantiles
stats_vec[i * 10 + 3:i * 10 + 9] = stats_vec.extend(sp.stats.mstats.mquantiles(data_resp, np.linspace(0,1,8, endpoint=False)[1:]))
# Perc of resps
stats_vec[-1] = len(upper) / len(lower)
return stats_vec
def synth_likelihood(data, v, V, a, a_k, a_l, z, Z, t, T, samples=50, drifts_per_sample=512):
"""Compute synthetic likelihood for collapsing threshold wfpt."""
def mv_normal_like(s, mu, cov):
s = np.asmatrix(s)
mu = np.asmatrix(mu)
cov = np.asmatrix(cov)
return .5 * (s - mu) * (cov**-1) * (s - mu).T - .5 * np.log(np.linalg.det(cov))
summary_vecs = np.empty((samples, sum_stats_len))
thresh = gen_weibull_gpu(a, a_k, a_l)
for sample in range(samples):
rts = sim_drift_var_thresh(v, V, thresh, z, Z, t, T)
summary_vecs[sample, :] = compute_stats_vec(rts.get())
summary_data = compute_stats_vec(data)
mean = np.mean(summary_vecs, axis=0)
cov = np.cov(summary_vecs)
# Evaluate synth likelihood
logp = mv_normal_like(summary_data, mean, cov)
return logp
WienerVarThresh = pm.stochastic_from_dist(name="Wiener synthetic likelihood",
logp=synth_likelihood,
dtype=np.float32,
mv=False)
class wfpt_gpu_gen(hddm.likelihoods.wfpt_gen):
sampling_method = 'gpu'
def _rvs(self, v, V, a, z, Z, t, T):
param_dict = {'v':v, 'z':z, 't':t, 'a':a, 'Z':Z, 'V':V, 'T':T}
print self._size
if self.sampling_method == 'gpu':
size = 100
rts = []
for i in np.round(np.arange(0, self._size, size)):
sampled_rts = sim_drift(v, V, a, z, Z, t, T, size=size, dt=self.dt)
rts.append(sampled_rts)
sampled_rts = np.concatenate(rts)
else:
sampled_rts = hddm.generate.gen_rts(param_dict, method=self.sampling_method, samples=self._size, dt=self.dt)
return sampled_rts[:self._size]
wfpt_gpu_like = scipy_stochastic(wfpt_gpu_gen, name='wfpt_gpu')
def main():
thresh_func = gen_weibull_gpu(3, 1, 1)
max_time = 5.
dt = 1e-4
thresh_const = np.ones(max_time/dt, dtype=np.float32)
thresh_func_const = pycuda.gpuarray.to_gpu(thresh_const)
print thresh_func_const.get()
plt.plot(np.arange(0, max_time, dt), thresh_func.get())
plt.plot(np.arange(0, max_time, dt), -thresh_func.get())
#thresh_func = np.array(a*np.exp(-rate*np.linspace(0, max_time, steps)), dtype=np.float32)
size = 412
out = sim_drift_var_thresh(.5, .1, thresh_func, 0., .1, .3, .1, max_time, size=size, update=True)
plt.figure()
plt.hist(out, bins=40)
out = sim_drift(1, .1, 2, .5, .1, .3, .1, size)
plt.figure()
plt.hist(out, bins=40)
plt.show()
if __name__ == '__main__':
size = 512
generator = pycuda.curandom.XORWOWRandomNumberGenerator()
max_size = generator.generators_per_block
out = gpuarray.empty(size, dtype=np.float32)
fill_normal(generator.state, out, block=(64, 1, 1), grid=(size // 64 + 1, 1))
data = out.get()
print data.mean()
print data.std()
if size > max_size:
print data[:max_size].mean()
print data[:max_size].std()
plt.hist(data, bins=50)
plt.show()
#main()
x = np.linspace(-5, 5, 100)
sampler = wfpt_gpu_like.rv
params = hddm.generate.gen_rand_params()
data = sampler.rvs(params['v'], params['V'], params['a'], params['z'],
params['Z'], params['t'], params['T'], size=size)
hist = np.histogram(data, range=(-5,5), bins=100, density=True)[0]
cumsum = np.cumsum(data)
plt.plot(x, sampler.cdf(x, params['v'], params['V'], params['a'], params['z'],
params['Z'], params['t'], params['T']))
plt.plot(x, hist)
print cumsum.shape
plt.show()
from __future__ import division
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import scipy.stats
import pymc as pm
samples = np.random.randn(1000)
def plot_synth_samples(data, lower_samples=10, upper_samples=200, lower_dps=10, upper_dps=200, interval=50):
x, y = np.meshgrid(np.arange(lower_samples, upper_samples, interval), np.arange(lower_dps, upper_dps, interval))
#plt.figure()
for i in range(len(x)):
for j in range(len(y)):
plt.subplot(x.shape[0], x.shape[1], i*x.shape[0]+j+1)
if i == len(x)-1:
plt.xlabel('%d'%x[i,j])
if j == 0:
plt.ylabel('%d'%y[i,j])
#plot_synth_like(data, samples=np.round(x[i, j]), dataset_samples=np.round(y[i, j]))
def plot_synth_like(data, lower_mu=-1, upper_mu=1, lower_sig=.8, upper_sig=1.5, steps=50, samples=100, dataset_samples=100, plot=True):
x, y = np.meshgrid(np.linspace(lower_mu, upper_mu, steps), np.linspace(lower_sig, upper_sig, steps))
z = np.empty_like(x)
for i in range(len(x)):
for j in range(len(y)):
z[i,j] = synth_likelihood(data, x[i,j], y[i,j], samples=samples, dataset_samples=dataset_samples)
plt.contourf(x, y, z)
def plot_erroranalysis(data_samples=(10,), dataset_samples=(10,), datasets=200):
x = dataset_samples
y = np.empty(x.shape, dtype=np.float)
for data_sample in data_samples:
data = np.random.randn(data_sample)
for i, dataset_sample in enumerate(dataset_samples):
errors = []
sl_sum = 0
pt_sum = 0
for rep in range(1, 400):
# Chose two random mu pts
mu1 = 0
mu2 = (np.random.rand()-.5)
# Evaluate true likelihood
pt1 = pm.normal_like(data, mu=mu1, tau=1**-2)
pt2 = pm.normal_like(data, mu=mu2, tau=1**-2)
ptr = pt1 / pt2
pt_sum += pt1
pt_sum += pt2
#print ptr
# Evaluate synth likelihood
ps1 = synth_likelihood(data, mu1, 1, dataset_samples=x[i], samples=datasets)
ps2 = synth_likelihood(data, mu2, 1, dataset_samples=x[i], samples=datasets)
sl_sum += ps1
sl_sum += ps2
pts = ps1 / ps2
#print pts
errors.append((pts - ptr)**2)
print pt_sum
print sl_sum
y[i] = np.mean(errors)
plt.plot(x, y, label='%i' % data_sample)
plt.xlabel('Number of samples per dataset')
plt.ylabel('MSE')
plt.legend()
def plot_ratio_analysis(data_samples=(100,), dataset_samples=(100,), datasets=100):
x, y = np.meshgrid(data_samples, dataset_samples)
z = np.empty(x.shape, dtype=np.float)
for i, data_sample in enumerate(data_samples):
for j, dataset_sample in enumerate(dataset_samples):
data = np.random.randn(x[j, i])
errors = []
sl_sum = 0
pt_sum = 0
for rep in range(1, 200):
# Chose two random mu pts
mu1 = (np.random.rand()-.5) * 3
mu2 = (np.random.rand()-.5) * 3
# Evaluate true likelihood
pt1 = pm.normal_like(data, mu=mu1, tau=1)
pt2 = pm.normal_like(data, mu=mu2, tau=1)
ptr = pt1 / pt2
pt_sum += pt1
pt_sum += pt2
#print ptr
# Evaluate synth likelihood
ps1 = synth_likelihood(data, mu1, 1, dataset_samples=y[j, i], samples=datasets)
ps2 = synth_likelihood(data, mu2, 1, dataset_samples=y[j, i], samples=datasets)
sl_sum += ps1
sl_sum += ps2
pts = ps1 / ps2
#print pts
errors.append((pts - ptr)**2)
print pt_sum
print sl_sum
z[j, i] = np.mean(errors)
print x[j, i], y[j,i], z[j, i]
print x
print y
print z
cont = plt.contourf(x, y, z)
plt.colorbar(cont)
plt.xlabel('Number of samples per dataset')
plt.ylabel('Size of input data.')
def synth_likelihood(data, mu, std, samples=100, dataset_samples=100):
"""Compute synthetic likelihood for collapsing threshold wfpt."""
def mv_normal_like(s, mu, cov):
s = np.asmatrix(s)
mu = np.asmatrix(mu)
cov = np.asmatrix(cov)
return .5 * (s - mu) * (cov**-1) * (s - mu).T - .5 * np.log(np.linalg.det(cov))
true_sum = np.array((data.mean(), data.std())) #, np.sum(data), data.var()))
sum_stats = np.empty((samples, 2))
for sample in range(samples):
s = np.random.randn(dataset_samples)*std + mu
sum_stats[sample,:] = s.mean(), s.std() #, np.sum(s), s.var()
mean = np.mean(sum_stats, axis=0)
cov = np.cov(sum_stats.T)
# Evaluate synth likelihood
logp = mv_normal_like(true_sum, mean, cov)
return -logp
synth = pm.stochastic_from_dist(name="Wiener synthetic likelihood",
logp=synth_likelihood,
dtype=np.float32,
mv=False)
mu = pm.Uniform('mu', lower=-5, upper=5, value=0)
std = pm.Uniform('std', lower=.1, upper=2, value=1)
sl = synth('Synthetic likelihood', value=samples, mu=mu, std=std, observed=True)
#rl = pm.Normal('Regular likelihood', value=samples, mu=mu, tau=std**-2, observed=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment