twiecki/sim_drift_gpu.py

## sim_drift_gpu.py
from __future__ import division

import pycuda.compiler
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import pycuda.curandom
from pycuda.cumath import exp as pycuda_exp
from pycuda.compiler import SourceModule
import matplotlib.pyplot as plt
from kabuki.utils import scipy_stochastic

import numpy as np
import scipy as sp
import scipy.stats
import pymc as pm

import hddm

code = """
    #include <curand_kernel.h>

    extern "C"
    {
    __global__ void sim_drift(curandState *global_state, float const v, float const V, float const a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, float *out)
    {
        float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time;
        int idx = blockIdx.x*blockDim.x + threadIdx.x;

        curandState local_state = global_state[idx];

        /* Sample variability parameters. */
        start_delay = curand_uniform(&local_state)*T + (t-T/2);
        start_point = (curand_uniform(&local_state)*Z + (z-Z/2))*a;
        drift_rate = curand_normal(&local_state)*V + v;

        /* Set up drift variables. */
        prob_up = .5f*(1+sqrtf(dt)/intra_sv*drift_rate);
        step_size = sqrtf(dt)*intra_sv;
        time = start_delay;
        position = start_point;

        /* Simulate particle movement until threshold is crossed. */
        while (position > 0 & position < a) {
            rand = curand_uniform(&local_state);
            position += ((rand < prob_up)*2 - 1) * step_size;
            time += dt;
        }

        /* Save back state. */
        global_state[idx] = local_state;

        /* Figure out boundary, save result. */
        if (position <= 0) {
            out[idx] = -time;
        }
        else {
            out[idx] = time;
        }
    }

    __global__ void sim_drift_var_thresh(curandState *global_state, float const v, float const V, float const *a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, int const a_len, float *out)
    {
        float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time;
        int idx = blockIdx.x*blockDim.x + threadIdx.x;
        int x_pos = 0;

        curandState local_state = global_state[idx];

        start_delay = curand_uniform(&local_state)*T + (t-T/2);
        start_point = curand_uniform(&local_state)*Z + (z-Z/2);
        drift_rate = curand_normal(&local_state)*V + v;

        prob_up = .5f*(1+sqrtf(dt)/intra_sv*drift_rate);
        step_size = sqrtf(dt)*intra_sv;
        time = 0;
        position = start_point;

        while (fabs(position) < a[x_pos] & time < a_len) {
            rand = curand_uniform(&local_state);
            position += ((rand < prob_up)*2 - 1) * step_size;
            time += dt;
            x_pos++;
        }

        time += start_delay;

        global_state[idx] = local_state;

        if (position <= 0) {
            out[idx] = -time;
        }
        else {
            out[idx] = time;
        }
    }

    __global__ void fill_normal(curandState *global_state, float *out)
    {
        int idx = blockIdx.x*blockDim.x + threadIdx.x;

        curandState local_state = global_state[idx];

        out[idx] = curand_normal(&local_state);

        global_state[idx] = local_state;
    }

/*    __global__ void sim_drift_switch(curandState *global_state, float const vpp, float const vcc, float const a, float const z, float const t, float const tcc, float const dt, float const intra_sv, float *out)
    {
        float start_delay, start_point, rand, prob_up_pp, prob_up_cc, position, step_size, time;
        int idx = blockIdx.x*blockDim.x + threadIdx.x;

        curandState local_state = global_state[idx];

        start_delay = t;
        start_point = z;

        prob_up_pp = .5f*(1+sqrtf(dt)/intra_sv*vpp);
        prob_up_cc = .5f*(1+sqrtf(dt)/intra_sv*vcc);

        step_size = sqrtf(dt)*intra_sv;
        time = 0;
        position = start_point;

        while (fabs(position) < a) {
            rand = curand_uniform(&local_state);
            if time < tcc {
                position += ((rand < prob_up_pp)*2 - 1) * step_size;
            }
            else {
                position += ((rand < prob_up_cc)*2 - 1) * step_size;
            time += dt;
        }

        time += start_delay;

        global_state[idx] = local_state;

        if (position <= 0) {
            out[idx] = -time;
        }
        else {
            out[idx] = time;
        }
    }
*/
    }
    """

mod = SourceModule(code, keep=False, no_extern_c=True)

_size = 512

_sim_drift_cuda = mod.get_function("sim_drift")
_sim_drift_var_thresh_cuda = mod.get_function("sim_drift_var_thresh")
fill_normal = mod.get_function("fill_normal")

_generator = None
_thresh = []
_thresh_gpu = None
_out = None

def sim_drift(v, V, a, z, Z, t, T, size=512, dt=1e-4, update=False, return_gpu=False):
    global _generator, _out, _size
    size = np.long(size)
    if _generator is None or update:
        _generator = pycuda.curandom.XORWOWRandomNumberGenerator()
    max_size = _generator.generators_per_block
    if size // max_size > 1:
        print "too big"
    if _out is None or update:
        _out = gpuarray.empty(size, dtype=np.float32)

    _sim_drift_cuda(_generator.state, np.float32(v), np.float32(V), np.float32(a), np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), _out, block=(64, 1, 1), grid=(size // 64 + 1, 1))

    if return_gpu:
        return _out
    else:
        return _out.get()


def sim_drift_var_thresh(v, V, a, z, Z, t, T, max_time, size=512, dt=1e-4, update=False, return_gpu=False):
    global _generator, _thresh, _thresh_gpu, _out

    # Init
    if _generator is None or update:
        _generator = pycuda.curandom.XORWOWRandomNumberGenerator()
    max_size = _generator.generators_per_block
    if size / max_size > 1:
        print "too big"
    if _thresh_gpu is None or update or np.any(_thresh != a):
        if isinstance(a, pycuda.gpuarray.GPUArray):
            _thresh_gpu = a
        else:
            _thresh_gpu = gpuarray.to_gpu(a)
    if _out is None or update:
        _out = gpuarray.empty(size, dtype=np.float32)

    _sim_drift_var_thresh_cuda(_generator.state, np.float32(v), np.float32(V), _thresh_gpu, np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), np.float32(max_time), _out, block=(64, 1, 1), grid=(size // 64 + 1,1))

    if return_gpu:
        return _out
    else:
        return _out.get()


def gen_weibull_gpu(a, k, l, max_time=5, dt=1e-4):
    max_time = np.float32(max_time)
    x = pycuda.gpuarray.arange(0., max_time, dt, dtype=np.float32)

    # Weibull pdf
    thresh_func_gpu = k / l * (x / l)**(k - 1) * pycuda_exp(-(x / l)**k)
    thresh_func_gpu *= a
    return thresh_func_gpu

sum_stats_len = 2 * 10 + 1

def compute_stats_vec(data):
    stats_vec = np.empty(sum_stats_len)
    lower = np.abs(data[data<0])
    upper = data[data>0]
    for i, data_resp in enumerate([lower, upper]):
        # mean
        stats_vec[i * 10 + 0] = np.mean(data_resp)
        # std
        stats_vec[i * 10 + 1] = np.std(data_resp)
        # skew
        stats_vec[i * 10 + 2] = sp.stats.skew(data_resp)
        # 7 quantiles
        stats_vec[i * 10 + 3:i * 10 + 9] = stats_vec.extend(sp.stats.mstats.mquantiles(data_resp, np.linspace(0,1,8, endpoint=False)[1:]))

    # Perc of resps
    stats_vec[-1] = len(upper) / len(lower)

    return stats_vec

def synth_likelihood(data, v, V, a, a_k, a_l, z, Z, t, T, samples=50, drifts_per_sample=512):
    """Compute synthetic likelihood for collapsing threshold wfpt."""
    def mv_normal_like(s, mu, cov):
        s = np.asmatrix(s)
        mu = np.asmatrix(mu)
        cov = np.asmatrix(cov)
        return .5 * (s - mu) * (cov**-1) * (s - mu).T - .5 * np.log(np.linalg.det(cov))

    summary_vecs = np.empty((samples, sum_stats_len))
    thresh = gen_weibull_gpu(a, a_k, a_l)

    for sample in range(samples):
        rts = sim_drift_var_thresh(v, V, thresh, z, Z, t, T)
        summary_vecs[sample, :] = compute_stats_vec(rts.get())

    summary_data = compute_stats_vec(data)
    mean = np.mean(summary_vecs, axis=0)
    cov = np.cov(summary_vecs)

    # Evaluate synth likelihood
    logp = mv_normal_like(summary_data, mean, cov)

    return logp

WienerVarThresh = pm.stochastic_from_dist(name="Wiener synthetic likelihood",
                                          logp=synth_likelihood,
                                          dtype=np.float32,
                                          mv=False)


class wfpt_gpu_gen(hddm.likelihoods.wfpt_gen):
    sampling_method = 'gpu'

    def _rvs(self, v, V, a, z, Z, t, T):
        param_dict = {'v':v, 'z':z, 't':t, 'a':a, 'Z':Z, 'V':V, 'T':T}
        print self._size
        if self.sampling_method == 'gpu':
            size = 100
            rts = []
            for i in np.round(np.arange(0, self._size, size)):
                sampled_rts = sim_drift(v, V, a, z, Z, t, T, size=size, dt=self.dt)
                rts.append(sampled_rts)
            sampled_rts = np.concatenate(rts)
        else:
            sampled_rts = hddm.generate.gen_rts(param_dict, method=self.sampling_method, samples=self._size, dt=self.dt)
        return sampled_rts[:self._size]

wfpt_gpu_like = scipy_stochastic(wfpt_gpu_gen, name='wfpt_gpu')


def main():
    thresh_func = gen_weibull_gpu(3, 1, 1)
    max_time = 5.
    dt = 1e-4

    thresh_const = np.ones(max_time/dt, dtype=np.float32)
    thresh_func_const = pycuda.gpuarray.to_gpu(thresh_const)
    print thresh_func_const.get()
    plt.plot(np.arange(0, max_time, dt), thresh_func.get())
    plt.plot(np.arange(0, max_time, dt), -thresh_func.get())

    #thresh_func = np.array(a*np.exp(-rate*np.linspace(0, max_time, steps)), dtype=np.float32)
    size = 412
    out = sim_drift_var_thresh(.5, .1, thresh_func, 0., .1, .3, .1, max_time, size=size, update=True)

    plt.figure()
    plt.hist(out, bins=40)

    out = sim_drift(1, .1, 2, .5, .1, .3, .1, size)
    plt.figure()
    plt.hist(out, bins=40)

    plt.show()


if __name__ == '__main__':
    size = 512
    generator = pycuda.curandom.XORWOWRandomNumberGenerator()
    max_size = generator.generators_per_block
    out = gpuarray.empty(size, dtype=np.float32)

    fill_normal(generator.state, out, block=(64, 1, 1), grid=(size // 64 + 1, 1))
    data = out.get()
    print data.mean()
    print data.std()
    if size > max_size:
        print data[:max_size].mean()
        print data[:max_size].std()
    plt.hist(data, bins=50)
    plt.show()
    #main()

    x = np.linspace(-5, 5, 100)
    sampler = wfpt_gpu_like.rv
    params = hddm.generate.gen_rand_params()
    data = sampler.rvs(params['v'], params['V'], params['a'], params['z'],
                       params['Z'], params['t'], params['T'], size=size)

    hist = np.histogram(data, range=(-5,5), bins=100, density=True)[0]
    cumsum = np.cumsum(data)

    plt.plot(x, sampler.cdf(x, params['v'], params['V'], params['a'], params['z'],
                         params['Z'], params['t'], params['T']))
    plt.plot(x, hist)
    print cumsum.shape

    plt.show()


## test_synth_like.py
from __future__ import division

import matplotlib.pyplot as plt

import numpy as np
import scipy as sp
import scipy.stats
import pymc as pm

samples = np.random.randn(1000)

def plot_synth_samples(data, lower_samples=10, upper_samples=200, lower_dps=10, upper_dps=200, interval=50):
    x, y = np.meshgrid(np.arange(lower_samples, upper_samples, interval), np.arange(lower_dps, upper_dps, interval))
    #plt.figure()
    for i in range(len(x)):
        for j in range(len(y)):
            plt.subplot(x.shape[0], x.shape[1], i*x.shape[0]+j+1)
            if i == len(x)-1:
                plt.xlabel('%d'%x[i,j])
            if j == 0:
                plt.ylabel('%d'%y[i,j])
            #plot_synth_like(data, samples=np.round(x[i, j]), dataset_samples=np.round(y[i, j]))

def plot_synth_like(data, lower_mu=-1, upper_mu=1, lower_sig=.8, upper_sig=1.5, steps=50, samples=100, dataset_samples=100, plot=True):
    x, y = np.meshgrid(np.linspace(lower_mu, upper_mu, steps), np.linspace(lower_sig, upper_sig, steps))
    z = np.empty_like(x)
    for i in range(len(x)):
        for j in range(len(y)):
            z[i,j] = synth_likelihood(data, x[i,j], y[i,j], samples=samples, dataset_samples=dataset_samples)

    plt.contourf(x, y, z)

def plot_erroranalysis(data_samples=(10,), dataset_samples=(10,), datasets=200):
    x = dataset_samples
    y = np.empty(x.shape, dtype=np.float)

    for data_sample in data_samples:
        data = np.random.randn(data_sample)

        for i, dataset_sample in enumerate(dataset_samples):
            errors = []
            sl_sum = 0
            pt_sum = 0
            for rep in range(1, 400):
                # Chose two random mu pts
                mu1 = 0
                mu2 = (np.random.rand()-.5)

                # Evaluate true likelihood
                pt1 = pm.normal_like(data, mu=mu1, tau=1**-2)
                pt2 = pm.normal_like(data, mu=mu2, tau=1**-2)

                ptr = pt1 / pt2
                pt_sum += pt1
                pt_sum += pt2

            #print ptr

                # Evaluate synth likelihood
                ps1 = synth_likelihood(data, mu1, 1, dataset_samples=x[i], samples=datasets)
                ps2 = synth_likelihood(data, mu2, 1, dataset_samples=x[i], samples=datasets)

                sl_sum += ps1
                sl_sum += ps2

                pts = ps1 / ps2
            #print pts

                errors.append((pts - ptr)**2)
            print pt_sum
            print sl_sum
            y[i] = np.mean(errors)

        plt.plot(x, y, label='%i' % data_sample)

    plt.xlabel('Number of samples per dataset')
    plt.ylabel('MSE')
    plt.legend()


def plot_ratio_analysis(data_samples=(100,), dataset_samples=(100,), datasets=100):
    x, y = np.meshgrid(data_samples, dataset_samples)
    z = np.empty(x.shape, dtype=np.float)

    for i, data_sample in enumerate(data_samples):
        for j, dataset_sample in enumerate(dataset_samples):
            data = np.random.randn(x[j, i])
            errors = []
            sl_sum = 0
            pt_sum = 0
            for rep in range(1, 200):
                # Chose two random mu pts
                mu1 = (np.random.rand()-.5) * 3
                mu2 = (np.random.rand()-.5) * 3

                # Evaluate true likelihood
                pt1 = pm.normal_like(data, mu=mu1, tau=1)
                pt2 = pm.normal_like(data, mu=mu2, tau=1)

                ptr = pt1 / pt2
                pt_sum += pt1
                pt_sum += pt2

                #print ptr

                # Evaluate synth likelihood
                ps1 = synth_likelihood(data, mu1, 1, dataset_samples=y[j, i], samples=datasets)
                ps2 = synth_likelihood(data, mu2, 1, dataset_samples=y[j, i], samples=datasets)

                sl_sum += ps1
                sl_sum += ps2

                pts = ps1 / ps2
                #print pts

                errors.append((pts - ptr)**2)
            print pt_sum
            print sl_sum
            z[j, i] = np.mean(errors)
            print x[j, i], y[j,i], z[j, i]

    print x
    print y
    print z
    cont = plt.contourf(x, y, z)

    plt.colorbar(cont)
    plt.xlabel('Number of samples per dataset')
    plt.ylabel('Size of input data.')

def synth_likelihood(data, mu, std, samples=100, dataset_samples=100):
    """Compute synthetic likelihood for collapsing threshold wfpt."""
    def mv_normal_like(s, mu, cov):
        s = np.asmatrix(s)
        mu = np.asmatrix(mu)
        cov = np.asmatrix(cov)
        return .5 * (s - mu) * (cov**-1) * (s - mu).T - .5 * np.log(np.linalg.det(cov))

    true_sum = np.array((data.mean(), data.std())) #, np.sum(data), data.var()))

    sum_stats = np.empty((samples, 2))
    for sample in range(samples):
        s = np.random.randn(dataset_samples)*std + mu
        sum_stats[sample,:] = s.mean(), s.std() #, np.sum(s), s.var()

    mean = np.mean(sum_stats, axis=0)
    cov = np.cov(sum_stats.T)
    # Evaluate synth likelihood
    logp = mv_normal_like(true_sum, mean, cov)
    return -logp


synth = pm.stochastic_from_dist(name="Wiener synthetic likelihood",
                                logp=synth_likelihood,
                                dtype=np.float32,
                                mv=False)

mu = pm.Uniform('mu', lower=-5, upper=5, value=0)
std = pm.Uniform('std', lower=.1, upper=2, value=1)

sl = synth('Synthetic likelihood', value=samples, mu=mu, std=std, observed=True)
#rl = pm.Normal('Regular likelihood', value=samples, mu=mu, tau=std**-2, observed=True)
	from __future__ import division

	import pycuda.compiler
	import pycuda.gpuarray as gpuarray
	import pycuda.autoinit
	import pycuda.curandom
	from pycuda.cumath import exp as pycuda_exp
	from pycuda.compiler import SourceModule
	import matplotlib.pyplot as plt
	from kabuki.utils import scipy_stochastic

	import numpy as np
	import scipy as sp
	import scipy.stats
	import pymc as pm

	import hddm

	code = """
	#include <curand_kernel.h>

	extern "C"
	{
	__global__ void sim_drift(curandState global_state, float const v, float const V, float const a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, float out)
	{
	float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time;
	int idx = blockIdx.x*blockDim.x + threadIdx.x;

	curandState local_state = global_state[idx];

	/* Sample variability parameters. */
	start_delay = curand_uniform(&local_state)*T + (t-T/2);
	start_point = (curand_uniform(&local_state)Z + (z-Z/2))a;
	drift_rate = curand_normal(&local_state)*V + v;

	/* Set up drift variables. */
	prob_up = .5f(1+sqrtf(dt)/intra_svdrift_rate);
	step_size = sqrtf(dt)*intra_sv;
	time = start_delay;
	position = start_point;

	/* Simulate particle movement until threshold is crossed. */
	while (position > 0 & position < a) {
	rand = curand_uniform(&local_state);
	position += ((rand < prob_up)2 - 1) step_size;
	time += dt;
	}

	/* Save back state. */
	global_state[idx] = local_state;

	/* Figure out boundary, save result. */
	if (position <= 0) {
	out[idx] = -time;
	}
	else {
	out[idx] = time;
	}
	}

	__global__ void sim_drift_var_thresh(curandState global_state, float const v, float const V, float const a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, int const a_len, float *out)
	{
	float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time;
	int idx = blockIdx.x*blockDim.x + threadIdx.x;
	int x_pos = 0;

	curandState local_state = global_state[idx];

	start_delay = curand_uniform(&local_state)*T + (t-T/2);
	start_point = curand_uniform(&local_state)*Z + (z-Z/2);
	drift_rate = curand_normal(&local_state)*V + v;

	prob_up = .5f(1+sqrtf(dt)/intra_svdrift_rate);
	step_size = sqrtf(dt)*intra_sv;
	time = 0;
	position = start_point;

	while (fabs(position) < a[x_pos] & time < a_len) {
	rand = curand_uniform(&local_state);
	position += ((rand < prob_up)2 - 1) step_size;
	time += dt;
	x_pos++;
	}

	time += start_delay;

	global_state[idx] = local_state;

	if (position <= 0) {
	out[idx] = -time;
	}
	else {
	out[idx] = time;
	}
	}

	__global__ void fill_normal(curandState global_state, float out)
	{
	int idx = blockIdx.x*blockDim.x + threadIdx.x;

	curandState local_state = global_state[idx];

	out[idx] = curand_normal(&local_state);

	global_state[idx] = local_state;
	}

	/* __global__ void sim_drift_switch(curandState global_state, float const vpp, float const vcc, float const a, float const z, float const t, float const tcc, float const dt, float const intra_sv, float out)
	{
	float start_delay, start_point, rand, prob_up_pp, prob_up_cc, position, step_size, time;
	int idx = blockIdx.x*blockDim.x + threadIdx.x;

	curandState local_state = global_state[idx];

	start_delay = t;
	start_point = z;

	prob_up_pp = .5f(1+sqrtf(dt)/intra_svvpp);
	prob_up_cc = .5f(1+sqrtf(dt)/intra_svvcc);

	step_size = sqrtf(dt)*intra_sv;
	time = 0;
	position = start_point;

	while (fabs(position) < a) {
	rand = curand_uniform(&local_state);
	if time < tcc {
	position += ((rand < prob_up_pp)2 - 1) step_size;
	}
	else {
	position += ((rand < prob_up_cc)2 - 1) step_size;
	time += dt;
	}

	time += start_delay;

	global_state[idx] = local_state;

	if (position <= 0) {
	out[idx] = -time;
	}
	else {
	out[idx] = time;
	}
	}
	*/
	}
	"""

	mod = SourceModule(code, keep=False, no_extern_c=True)

	_size = 512

	_sim_drift_cuda = mod.get_function("sim_drift")
	_sim_drift_var_thresh_cuda = mod.get_function("sim_drift_var_thresh")
	fill_normal = mod.get_function("fill_normal")

	_generator = None
	_thresh = []
	_thresh_gpu = None
	_out = None

	def sim_drift(v, V, a, z, Z, t, T, size=512, dt=1e-4, update=False, return_gpu=False):
	global _generator, _out, _size
	size = np.long(size)
	if _generator is None or update:
	_generator = pycuda.curandom.XORWOWRandomNumberGenerator()
	max_size = _generator.generators_per_block
	if size // max_size > 1:
	print "too big"
	if _out is None or update:
	_out = gpuarray.empty(size, dtype=np.float32)

	_sim_drift_cuda(_generator.state, np.float32(v), np.float32(V), np.float32(a), np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), _out, block=(64, 1, 1), grid=(size // 64 + 1, 1))

	if return_gpu:
	return _out
	else:
	return _out.get()


	def sim_drift_var_thresh(v, V, a, z, Z, t, T, max_time, size=512, dt=1e-4, update=False, return_gpu=False):
	global _generator, _thresh, _thresh_gpu, _out

	# Init
	if _generator is None or update:
	_generator = pycuda.curandom.XORWOWRandomNumberGenerator()
	max_size = _generator.generators_per_block
	if size / max_size > 1:
	print "too big"
	if _thresh_gpu is None or update or np.any(_thresh != a):
	if isinstance(a, pycuda.gpuarray.GPUArray):
	_thresh_gpu = a
	else:
	_thresh_gpu = gpuarray.to_gpu(a)
	if _out is None or update:
	_out = gpuarray.empty(size, dtype=np.float32)

	_sim_drift_var_thresh_cuda(_generator.state, np.float32(v), np.float32(V), _thresh_gpu, np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), np.float32(max_time), _out, block=(64, 1, 1), grid=(size // 64 + 1,1))

	if return_gpu:
	return _out
	else:
	return _out.get()


	def gen_weibull_gpu(a, k, l, max_time=5, dt=1e-4):
	max_time = np.float32(max_time)
	x = pycuda.gpuarray.arange(0., max_time, dt, dtype=np.float32)

	# Weibull pdf
	thresh_func_gpu = k / l * (x / l)*(k - 1) pycuda_exp(-(x / l)**k)
	thresh_func_gpu *= a
	return thresh_func_gpu

	sum_stats_len = 2 * 10 + 1

	def compute_stats_vec(data):
	stats_vec = np.empty(sum_stats_len)
	lower = np.abs(data[data<0])
	upper = data[data>0]
	for i, data_resp in enumerate([lower, upper]):
	# mean
	stats_vec[i * 10 + 0] = np.mean(data_resp)
	# std
	stats_vec[i * 10 + 1] = np.std(data_resp)
	# skew
	stats_vec[i * 10 + 2] = sp.stats.skew(data_resp)
	# 7 quantiles
	stats_vec[i * 10 + 3:i * 10 + 9] = stats_vec.extend(sp.stats.mstats.mquantiles(data_resp, np.linspace(0,1,8, endpoint=False)[1:]))

	# Perc of resps
	stats_vec[-1] = len(upper) / len(lower)

	return stats_vec

	def synth_likelihood(data, v, V, a, a_k, a_l, z, Z, t, T, samples=50, drifts_per_sample=512):
	"""Compute synthetic likelihood for collapsing threshold wfpt."""
	def mv_normal_like(s, mu, cov):
	s = np.asmatrix(s)
	mu = np.asmatrix(mu)
	cov = np.asmatrix(cov)
	return .5 * (s - mu) * (cov*-1) (s - mu).T - .5 * np.log(np.linalg.det(cov))

	summary_vecs = np.empty((samples, sum_stats_len))
	thresh = gen_weibull_gpu(a, a_k, a_l)

	for sample in range(samples):
	rts = sim_drift_var_thresh(v, V, thresh, z, Z, t, T)
	summary_vecs[sample, :] = compute_stats_vec(rts.get())

	summary_data = compute_stats_vec(data)
	mean = np.mean(summary_vecs, axis=0)
	cov = np.cov(summary_vecs)

	# Evaluate synth likelihood
	logp = mv_normal_like(summary_data, mean, cov)

	return logp

	WienerVarThresh = pm.stochastic_from_dist(name="Wiener synthetic likelihood",
	logp=synth_likelihood,
	dtype=np.float32,
	mv=False)


	class wfpt_gpu_gen(hddm.likelihoods.wfpt_gen):
	sampling_method = 'gpu'

	def _rvs(self, v, V, a, z, Z, t, T):
	param_dict = {'v':v, 'z':z, 't':t, 'a':a, 'Z':Z, 'V':V, 'T':T}
	print self._size
	if self.sampling_method == 'gpu':
	size = 100
	rts = []
	for i in np.round(np.arange(0, self._size, size)):
	sampled_rts = sim_drift(v, V, a, z, Z, t, T, size=size, dt=self.dt)
	rts.append(sampled_rts)
	sampled_rts = np.concatenate(rts)
	else:
	sampled_rts = hddm.generate.gen_rts(param_dict, method=self.sampling_method, samples=self._size, dt=self.dt)
	return sampled_rts[:self._size]

	wfpt_gpu_like = scipy_stochastic(wfpt_gpu_gen, name='wfpt_gpu')


	def main():
	thresh_func = gen_weibull_gpu(3, 1, 1)
	max_time = 5.
	dt = 1e-4

	thresh_const = np.ones(max_time/dt, dtype=np.float32)
	thresh_func_const = pycuda.gpuarray.to_gpu(thresh_const)
	print thresh_func_const.get()
	plt.plot(np.arange(0, max_time, dt), thresh_func.get())
	plt.plot(np.arange(0, max_time, dt), -thresh_func.get())

	#thresh_func = np.array(anp.exp(-ratenp.linspace(0, max_time, steps)), dtype=np.float32)
	size = 412
	out = sim_drift_var_thresh(.5, .1, thresh_func, 0., .1, .3, .1, max_time, size=size, update=True)

	plt.figure()
	plt.hist(out, bins=40)

	out = sim_drift(1, .1, 2, .5, .1, .3, .1, size)
	plt.figure()
	plt.hist(out, bins=40)

	plt.show()


	if __name__ == '__main__':
	size = 512
	generator = pycuda.curandom.XORWOWRandomNumberGenerator()
	max_size = generator.generators_per_block
	out = gpuarray.empty(size, dtype=np.float32)

	fill_normal(generator.state, out, block=(64, 1, 1), grid=(size // 64 + 1, 1))
	data = out.get()
	print data.mean()
	print data.std()
	if size > max_size:
	print data[:max_size].mean()
	print data[:max_size].std()
	plt.hist(data, bins=50)
	plt.show()
	#main()

	x = np.linspace(-5, 5, 100)
	sampler = wfpt_gpu_like.rv
	params = hddm.generate.gen_rand_params()
	data = sampler.rvs(params['v'], params['V'], params['a'], params['z'],
	params['Z'], params['t'], params['T'], size=size)

	hist = np.histogram(data, range=(-5,5), bins=100, density=True)[0]
	cumsum = np.cumsum(data)

	plt.plot(x, sampler.cdf(x, params['v'], params['V'], params['a'], params['z'],
	params['Z'], params['t'], params['T']))
	plt.plot(x, hist)
	print cumsum.shape

	plt.show()