Robadob/FLAMEGPU.cu

## FLAMEGPU.cu

#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
//#include <cuda_gl_interop.h>
#include "FLAMEGPU.h"
#include <cstdio>
#include "header.h"
//#include "visualisation/GlobalsController.h"
#include "navigationhost.h"
#include <mutex>
#include "dynamic/uploading.h"
#include <random>
#include <glm/gtx/norm.hpp>
#include <QtCore/QString>


//These defines are duplicated in navigation.cu
#define SCALE_FACTOR0 1.0f //1.0f is underground station
#define SCALE_FACTOR1 1.5f //unfinished station
#define SCALE_FACTOR2 2.305291f //tunnel station

int CURRENT_TIMESTAMP = 0;
const int FPS = 60;

ModelData *md;
ScalingData *sd[3];
std::random_device rd;
std::mt19937 rng(rd());
std::uniform_int_distribution<int> entranceDistribution[3];
std::uniform_real_distribution<float> normalised;
std::uniform_real_distribution<float> speedModPercent;
const float DOOR_OFFSETS[3][4] = { { SCALE_FACTOR0 *-0.0462, SCALE_FACTOR0 *-0.0324, SCALE_FACTOR0 *0.0324, SCALE_FACTOR0 *0.0462 },
									{ SCALE_FACTOR1 *-0.0462, SCALE_FACTOR1 *-0.0324, SCALE_FACTOR1 *0.0324, SCALE_FACTOR1 *0.0462 },
									{ SCALE_FACTOR2 *-0.0462, SCALE_FACTOR2 *-0.0324, SCALE_FACTOR2 *0.0324, SCALE_FACTOR2 *0.0462 } };//For tram model, in FLAME units
void setCurrentTimestamp(int ct) { CURRENT_TIMESTAMP = ct; }
int getCurrentTimestamp() { return CURRENT_TIMESTAMP; }

void setCUDADevice()
{
	cudaDeviceProp devProp;

	//int deviceCount = 0;
	//cudaGetDeviceCount(&deviceCount);

	//for (int dc = 0; dc < deviceCount; ++dc)
	//{
		//cudaGetDeviceProperties(&devProp, dc);
		//printf("%d:%s\n", dc, devProp.name);
	//}
	//#include <cuda_gl_interop.h>
	//unsigned int devCount;
	//int devices[2];
	//devices[0] = -1; devices[1] = -1;
	//cudaGLGetDevices(&devCount, &devices[0], 2, cudaGLDeviceListAll);
	//printf("CUDAGL devices: %d, %d\n", devices[0], devices[1]);
	//if (devices[0] == -1)
	//{
	//	printf("err\n");
	//	devices[0] = 1;
	//}
	cudaError_t cudaStatus = cudaSetDevice(0);//Set GL Device 0 (Need it to be the graphics device)
	if (cudaStatus != cudaSuccess || cudaGetLastError() != cudaSuccess) {
		fprintf(stderr, "Error setting CUDA device!");
		getchar();
		//exit(0);
	}
}
void initFLAMEcu(std::string modelPath, unsigned int stationCount, unsigned int trainCount, ScalingData *returnScaling[])
{
	setCUDADevice();
	md = new ModelData(stationCount, trainCount);
	//Load each stations data
	{
		std::string undergroundPath = modelPath + std::string("/0underground.scx");
		returnScaling[0] = initNavigation(undergroundPath.c_str(), 0);
		std::string unfinishedPath = modelPath + std::string("/1unfinished.scx");
		returnScaling[1] = initNavigation(unfinishedPath.c_str(), 1);
		std::string tunnelPath = modelPath + std::string("/2tunnel.scx");
		returnScaling[2] = initNavigation(tunnelPath.c_str(), 2);
	}
	//Eventually write custom init, so it isn't loaded from a basically empty file.
	initialise("iterations\\0.xml");
	//Count entrances and stations
	for (int j = 0; j < 3; ++j)
	{
		md->platformCount[j] = navDat[j].platformVec.size();
		for (auto &&s : navDat[j].startsVec)
		{
			unsigned int a = abs(s.startId);//Don't correct them to 0-index, we want size
			if (a>md->entranceCount[j])
			{
				md->entranceCount[j] = a;
			}
		}
		entranceDistribution[j] = std::uniform_int_distribution<int>(-md->entranceCount[j], -1);
	}
	normalised = std::uniform_real_distribution<float>(0.0, 1.0);
	//speedModPercent = std::uniform_real_distribution<float>(rtn->toFlameScale(1.3f), rtn->toFlameScale(1.65f));//1.3-1.65 metres per second, scaled to FLAME scale
	speedModPercent = std::uniform_real_distribution<float>(1.3f, 1.65f);//1.3-1.65 metres per second
	//Initialise seeding from this info.
	//return rtn;
	cudaDeviceSynchronize();
}
void cleanupFLAMEcu()
{
	delete md;
}
void stepSim()
{
	singleIteration();
	runNavIteration();
}

void initFLAMEConst(PedSettings::Data data)
{
	cudaGetLastError();//Clear cuda error stack
	setCUDADevice();
	set_COLLISION_WEIGHT(&data.collisionWeight);
	set_GOAL_WEIGHT(&data.goalWeight);
	set_INTERACTION_RADIUS(&data.interactionRadius);
	set_SEPARATION_RADIUS(&data.separationRadius);
	set_PED_RADIUS(&data.pedestrianRadius);
	float ped_d = (data.pedestrianRadius * 2) + 0.05f;
	set_PED_DIAMETER(&ped_d);
	set_AGENT_MASS(&data.agentMass);
}

void notifyArrival(const int timestamp, const unsigned int trainId, const unsigned int stationId, std::shared_ptr<std::list<int>> passengers)
{
	if (stationId >= 4)//temp md->stationCount)
	{
		fprintf(stderr, "Unexpected arrival station %d.", stationId);
	}
	else
	{
		//Insert the arrival into the relevant stations buffer in chronological order, so it can be processed within the FLAME model
		auto it = md->arrivals[stationId]->begin();
		for (; it != md->arrivals[stationId]->end(); ++it)
			if (it->timestamp > timestamp)
				break;
		ArrivingTrain at;
		at.timestamp = timestamp;
		at.trainId = trainId;
		at.pedDests = passengers;
		md->arrivals[stationId]->insert(it, at);
	}
}
//Spawn
/**
* Returns a suitable RouteStart for the giving origin/destination
* @note This method assumes there is exactly one route between each OD pair
* @note If either param is negative, a uniform random entrance is chosen
*/
RouteStart &getRouteStart(unsigned int stationId, int origin, int destination, glm::vec2 agentLoc, bool isBus)
{
	if (isBus)
		origin = -1;
	else
		origin = origin<0 ? entranceDistribution[stationId](rng) : origin;
	destination = destination<0 ? entranceDistribution[stationId](rng) : destination;
#ifdef _DEBUG
	if (origin < -(int)md->entranceCount[stationId] || origin >= (int)md->platformCount[stationId])
		fprintf(stderr, "Error: Origin out of bounds @ getStartIndex() %d vs p%d e%d\n", origin, md->platformCount[stationId], md->entranceCount[stationId]);
	if (destination < -(int)md->entranceCount[stationId] || destination >= (int)md->platformCount[stationId])
		fprintf(stderr, "Error: Destination out of bounds @ getStartIndex() %d vs p%d e%d\n", destination, md->platformCount[stationId], md->entranceCount[stationId]);
#endif
	RouteStart *rs=nullptr;
	float bestDist=FLT_MAX;
	for (auto &&start:navDat[stationId].startsVec)
	{
		if (start.startId == origin&&start.endId == destination)
		{
			//If we are starting at a platform
			if (origin >= 0)
			{
				//Find the nearest applicable route
				float d = glm::distance2(agentLoc,start.firstBranchpt);
				if (d<bestDist)
				{
					bestDist = d;
					rs = &start;
				}
			}
			else
				return start;
		}
	}
	if (rs)
		return *rs;
#ifdef _DEBUG
		fprintf(stderr, "Error: Route %d->%d not found! @ getStartIndex()\n", origin, destination);
#endif
		origin = origin<0 ? -1 : origin;
		destination = destination<0 ? -1 : destination;
		for (auto &&start : navDat[stationId].startsVec)
	{
		if (start.startId == origin&&start.endId == destination)
		{
			//If we are starting at a platform
			if (origin >= 0)
			{
				//Find the nearest applicable route
				float d = glm::distance2(agentLoc, start.firstBranchpt);
				if (d<bestDist)
				{
					bestDist = d;
					rs = &start;
				}
			}
			else
				return start;
		}
	}
	if (rs)
		return *rs;
	fprintf(stderr, "Fatal Error: Route %d->%d still not found! @ getStartIndex()\n", origin, destination);
	getchar();
	exit(EXIT_FAILURE);
}
inline xmachine_memory_Boid0 emptyAgent0()
{
	xmachine_memory_Boid0 agent;
	//Locate agent
	agent.x = 0;
	agent.y = 0;
	agent.z = 0;
	//Init motion forces
	agent.dx = 0;
	agent.dy = 0;
	agent.fx = 0;
	agent.fy = 0;
	agent.fz = 0;
	agent.gx = 0;
	agent.gy = 0;
	agent.id = 0;
	//Config navigation
	agent.wp1 = -1;//-2 states agent is on platform
	agent.wp2 = -3;//-3 states next node is required
	agent.wp3 = -3;
	agent.state = 0;
	//Config animation
	agent.animate = (normalised(rng));
	agent.animate_dir = (normalised(rng))*0.5f + 1.0f;;
	agent.speed_mod = speedModPercent(rng);
	return agent;
}
inline xmachine_memory_Boid1 emptyAgent1()
{
	xmachine_memory_Boid1 agent;
	//Locate agent
	agent.x = 0;
	agent.y = 0;
	agent.z = 0;
	//Init motion forces
	agent.dx = 0;
	agent.dy = 0;
	agent.fx = 0;
	agent.fy = 0;
	agent.fz = 0;
	agent.gx = 0;
	agent.gy = 0;
	agent.id = 0;
	//Config navigation
	agent.wp1 = -1;//-2 states agent is on platform
	agent.wp2 = -3;//-3 states next node is required
	agent.wp3 = -3;
	agent.state = 0;
	//Config animation
	agent.animate = (normalised(rng));
	agent.animate_dir = (normalised(rng))*0.5f + 1.0f;;
	agent.speed_mod = speedModPercent(rng);
	return agent;
}
inline xmachine_memory_Boid2 emptyAgent2()
{
	xmachine_memory_Boid2 agent;
	//Locate agent
	agent.x = 0;
	agent.y = 0;
	agent.z = 0;
	//Init motion forces
	agent.dx = 0;
	agent.dy = 0;
	agent.fx = 0;
	agent.fy = 0;
	agent.fz = 0;
	agent.gx = 0;
	agent.gy = 0;
	agent.id = 0;
	//Config navigation
	agent.wp1 = -1;//-2 states agent is on platform
	agent.wp2 = -3;//-3 states next node is required
	agent.wp3 = -3;
	agent.state = 0;
	//Config animation
	agent.animate = (normalised(rng));
	agent.animate_dir = (normalised(rng))*0.5f + 1.0f;;
	agent.speed_mod = speedModPercent(rng);
	return agent;
}
void createPassengerEntrance(unsigned int stationId, unsigned int platformId, bool isBus)
{
	RouteStart &rs = getRouteStart(stationId, - 1, platformId, glm::vec2(), isBus);//Currently spawn rng entrance, we dont really care, there's probably only one
	RouteItem &ri = navDat[stationId].itemsVec[rs.startItem];
	NodeEnclosure &ne = navDat[stationId].enclosuresVec[ri.enclosure];
	//Create agent
	if (stationId==0)
	{
		xmachine_memory_Boid0 agent = emptyAgent0();
		//Randomly locate agent at starting enclosure
		agent.x = ne.vx[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
		agent.y = ne.vy[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
		//Init nav
		agent.wp1 = rs.startItem;//-2 states agent is on platform
		agent.destination = platformId;
		addXAgent_Boid0(&agent);
	}
	else if (stationId == 1)
	{
		xmachine_memory_Boid1 agent = emptyAgent1();
		//Randomly locate agent at starting enclosure
		agent.x = ne.vx[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
		agent.y = ne.vy[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
		//Init nav
		agent.wp1 = rs.startItem;//-2 states agent is on platform
		agent.destination = platformId;
		addXAgent_Boid1(&agent);
	}
	else if (stationId == 2)
	{
		xmachine_memory_Boid2 agent = emptyAgent2();
		//Randomly locate agent at starting enclosure
		agent.x = ne.vx[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
		agent.y = ne.vy[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
		//Init nav
		agent.wp1 = rs.startItem;//-2 states agent is on platform
		agent.destination = platformId;
		addXAgent_Boid2(&agent);
	}
	else
	{
		assert(false);
	}
}
glm::vec2 pformDir2(unsigned int stationId, unsigned int platformId)
{
	glm::vec2 center = (navDat[stationId].platformVec[platformId].start + navDat[stationId].platformVec[platformId].end) / 2.0f;
	glm::vec2 direction = normalize(navDat[stationId].platformVec[platformId].end - navDat[stationId].platformVec[platformId].start);
	glm::vec3 _platformDir = normalize(cross(glm::vec3(0, 1, 0), glm::vec3(direction.x, 0, direction.y)));
	return glm::vec2(_platformDir.x, _platformDir.z);
}
void createPassengerPlatform(unsigned int stationId, unsigned int platformId, unsigned int trainId, int destination, int d_index)
{
	//int startItem = rs.startItem;//First item is bad, gives us path from platform center to waypoint
	//int secondItem = navDat.itemsVec[startItem].nextNode;//this should point to the waypoint
	//Iterate till we find the branching point
	//while (secondItem>=0)
	//{
	//	secondItem = navDat.itemsVec[secondItem].nextNode;
	//	printf("Start:%d, Next:%d, Target: %d\n", startItem, secondItem, navDat.itemsVec[secondItem].target);
	//}
	//printf("Pfm%d, Dst%d, wp1%d, wp2%d,\n", platformId, destination, startItem, secondItem);
	//This node shall be our target
	//Locate passenger at platform
	//This math should be done once at init
	glm::vec2 center = (navDat[stationId].platformVec[platformId].start + ((navDat[stationId].platformVec[platformId].end - navDat[stationId].platformVec[platformId].start) / 2.0f)) - (0.00391604984f*pformDir2(stationId, platformId));
	glm::vec2 direction = normalize(navDat[stationId].platformVec[platformId].end*100.0f - navDat[stationId].platformVec[platformId].start*100.0f);
	//Convert doorOffset to model units (from metres?), need to get scale factor out of scx gen
	glm::vec2 pos = center + direction*(DOOR_OFFSETS[stationId][d_index]);
	//Calc agent direction
	glm::vec3 _platformDir = normalize(cross(glm::vec3(0, 1, 0), glm::vec3(direction.x,0,direction.y)));
	glm::vec2 platformDir = glm::vec2(_platformDir.x, _platformDir.z);
	//Parse destination (-1 = home, 0+ = new platform)
	RouteStart &rs = getRouteStart(stationId, platformId, destination, pos, false);
	int startItem = rs.startItem;//First item is actually the waypoint
	//Create agent
	if (stationId==0)
	{
		xmachine_memory_Boid0 agent = emptyAgent0();
		agent.x = pos.x;
		agent.y = pos.y;
		agent.dx = platformDir.x;
		agent.dy = platformDir.y;
		agent.fx = platformDir.x * 2;
		agent.fy = platformDir.y * 2;
		//Init nav
		agent.wp1 = -12;//-12 states agent is on platform
		agent.wp2 = startItem;
		agent.wp3 = platformId;
		agent.state = 1;//We will use 1 to say agent is walking towards platform
		agent.destination = destination;
		addXAgent_Boid0(&agent);
	}
	else if (stationId == 1)
	{
		xmachine_memory_Boid1 agent = emptyAgent1();
		agent.x = pos.x;
		agent.y = pos.y;
		agent.dx = platformDir.x;
		agent.dy = platformDir.y;
		agent.fx = platformDir.x * 2;
		agent.fy = platformDir.y * 2;
		//Init nav
		agent.wp1 = -12;//-12 states agent is on platform
		agent.wp2 = startItem;
		agent.wp3 = platformId;
		agent.state = 1;//We will use 1 to say agent is walking towards platform
		agent.destination = destination;
		addXAgent_Boid1(&agent);
	}
	else if (stationId == 2)
	{
		xmachine_memory_Boid2 agent = emptyAgent2();
		agent.x = pos.x;
		agent.y = pos.y;
		agent.dx = platformDir.x;
		agent.dy = platformDir.y;
		agent.fx = platformDir.x * 2;
		agent.fy = platformDir.y * 2;
		//Init nav
		agent.wp1 = -12;//-12 states agent is on platform
		agent.wp2 = startItem;
		agent.wp3 = platformId;
		agent.state = 1;//We will use 1 to say agent is walking towards platform
		agent.destination = destination;
		addXAgent_Boid2(&agent);
	}
	else
		assert(false);
}

//util

void cleanupFLAME()
{
	cleanup();
}
int cleanupCUDA()
{
	printf("cleanup cuda\n");
	cudaError_t cudaStatus = cudaDeviceReset();
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "Error resetting the device!");
		return 1;
	}
	return 0;
}
/**
 * @param stationId The station id
 */
int getAgentMax(int stationId)
{
	if (stationId==0)
		return get_agent_Boid0_MAX_count();
	if (stationId == 1)
		return get_agent_Boid1_MAX_count();
	if (stationId == 2)
		return get_agent_Boid2_MAX_count();
	assert(false);
	return 0;
}
/**
* @param stationId The station id
*/
int getAgentCount(int stationId)
{
	//printf("%d\n", get_agent_agent_default_count());
	if (stationId == 0)
		return get_agent_Boid0_default0_count();
	if (stationId == 1)
		return get_agent_Boid1_default1_count();
	if (stationId == 2)
		return get_agent_Boid2_default2_count();
	assert(false);
	return 0;
}
__global__ void output_to_TBO0(xmachine_memory_Boid0_list* agents, glm::vec4* data1, glm::vec4* data2)
{
	//global thread index
	int index = blockIdx.x * blockDim.x + threadIdx.x;
	data1[index].x = agents->x[index];
	data1[index].y = agents->z[index];// agents->animate[index];
	data1[index].z = agents->y[index];
	//data1[index].w = agents->height[index];
	data2[index].x = agents->fx[index];//gx
	data2[index].y = agents->fz[index];
	data2[index].z = agents->fy[index];//gy //(float)agents->exit_no[index];
	data2[index].w = agents->animate[index];
}
__global__ void output_to_TBO1(xmachine_memory_Boid1_list* agents, glm::vec4* data1, glm::vec4* data2)
{
	//global thread index
	int index = blockIdx.x * blockDim.x + threadIdx.x;
	data1[index].x = agents->x[index];
	data1[index].y = agents->z[index];// agents->animate[index];
	data1[index].z = agents->y[index];
	//data1[index].w = agents->height[index];
	data2[index].x = agents->fx[index];//gx
	data2[index].y = agents->fz[index];
	data2[index].z = agents->fy[index];//gy //(float)agents->exit_no[index];
	data2[index].w = agents->animate[index];
}
__global__ void output_to_TBO2(xmachine_memory_Boid2_list* agents, glm::vec4* data1, glm::vec4* data2)
{
	//global thread index
	int index = blockIdx.x * blockDim.x + threadIdx.x;
	data1[index].x = agents->x[index];
	data1[index].y = agents->z[index];// agents->animate[index];
	data1[index].z = agents->y[index];
	//data1[index].w = agents->height[index];
	data2[index].x = agents->fx[index];//gx
	data2[index].y = agents->fz[index];
	data2[index].z = agents->fy[index];//gy //(float)agents->exit_no[index];
	data2[index].w = agents->animate[index];
}
void fillBuffers(std::pair<glm::vec4*, glm::vec4*> buffs, int stationId)
{
	cudaDeviceSynchronize();
	cudaError_t status = cudaGetLastError();
	if (status != CUDA_SUCCESS || (status = cudaGetLastError()) != CUDA_SUCCESS)
	{
		if (status == cudaErrorUnknown)
		{
			printf("An Unknown CUDA Error Occurred :(\n");
			printf("Perhaps performing the same operation under the CUDA debugger with Memory Checker enabled could help!\n");
			printf("If this error only occurs outside of NSight debugging sessions, or causes the system to lock up. It may be caused by not passing the required amount of shared memory to a kernal launch that uses runtime sized shared memory.\n");
			printf("Also possible you have forgotten to allocate texture memory you are trying to read\n");
			printf("Passing a buffer to 'cudaGraphicsSubResourceGetMappedArray' or a texture to 'cudaGraphicsResourceGetMappedPointer'.\n");
			getchar();
			exit(1);
		}
		printf(" CUDA Error Occurred @ output_to_TBO2;\n%s\n", cudaGetErrorString(status));
		getchar();
		exit(1);
	}
	int threads_per_tile = 128;
	int tile_size;
	dim3 grid;
	dim3 threads;

	int ct = getAgentCount(stationId);

	if (ct > 0)
	{
		// map OpenGL buffer object for writing from CUDA
		//gpuErrchk(cudaGLMapBufferObject((void**)&dptr_1, *instances_data1_tbo));
		//gpuErrchk(cudaGLMapBufferObject((void**)&dptr_2, *instances_data2_tbo));
		//cuda block size
		tile_size = (int)ceil((float)ct / threads_per_tile);
		grid = dim3(tile_size, 1, 1);
		threads = dim3(threads_per_tile, 1, 1);
		//kernel
		if (stationId==0)
		{
			output_to_TBO0 <<<grid, threads >>>(get_device_Boid0_default0_agents(), std::get<0>(buffs), std::get<1>(buffs));
		}
		else if (stationId == 1)
		{
			output_to_TBO1 <<<grid, threads >>>(get_device_Boid1_default1_agents(), std::get<0>(buffs), std::get<1>(buffs));
		}
		else if (stationId == 2)
		{
			output_to_TBO2 <<<grid, threads >>>(get_device_Boid2_default2_agents(), std::get<0>(buffs), std::get<1>(buffs));
		}
		else
		{
			assert(false);
		}
		cudaDeviceSynchronize();
		//cudaError_t
			status = cudaGetLastError();
		if (status != CUDA_SUCCESS || (status = cudaGetLastError()) != CUDA_SUCCESS)
		{
			if (status == cudaErrorUnknown)
			{
				printf("An Unknown CUDA Error Occurred :(\n");
				printf("Perhaps performing the same operation under the CUDA debugger with Memory Checker enabled could help!\n");
				printf("If this error only occurs outside of NSight debugging sessions, or causes the system to lock up. It may be caused by not passing the required amount of shared memory to a kernal launch that uses runtime sized shared memory.\n");
				printf("Also possible you have forgotten to allocate texture memory you are trying to read\n");
				printf("Passing a buffer to 'cudaGraphicsSubResourceGetMappedArray' or a texture to 'cudaGraphicsResourceGetMappedPointer'.\n");
				getchar();
				exit(1);
			}
			printf(" CUDA Error Occurred @ output_to_TBO2;\n%s\n", cudaGetErrorString(status));
			getchar();
			exit(1);
		}
		// unmap buffer object
		//gpuErrchk(cudaGLUnmapBufferObject(*instances_data1_tbo));
		//gpuErrchk(cudaGLUnmapBufferObject(*instances_data2_tbo));
	}
}

	#include <cuda.h>
	#include <cuda_runtime.h>
	#include <device_launch_parameters.h>
	//#include <cuda_gl_interop.h>
	#include "FLAMEGPU.h"
	#include <cstdio>
	#include "header.h"
	//#include "visualisation/GlobalsController.h"
	#include "navigationhost.h"
	#include <mutex>
	#include "dynamic/uploading.h"
	#include <random>
	#include <glm/gtx/norm.hpp>
	#include <QtCore/QString>


	//These defines are duplicated in navigation.cu
	#define SCALE_FACTOR0 1.0f //1.0f is underground station
	#define SCALE_FACTOR1 1.5f //unfinished station
	#define SCALE_FACTOR2 2.305291f //tunnel station

	int CURRENT_TIMESTAMP = 0;
	const int FPS = 60;

	ModelData *md;
	ScalingData *sd[3];
	std::random_device rd;
	std::mt19937 rng(rd());
	std::uniform_int_distribution<int> entranceDistribution[3];
	std::uniform_real_distribution<float> normalised;
	std::uniform_real_distribution<float> speedModPercent;
	const float DOOR_OFFSETS[3][4] = { { SCALE_FACTOR0 -0.0462, SCALE_FACTOR0 -0.0324, SCALE_FACTOR0 0.0324, SCALE_FACTOR0 0.0462 },
	{ SCALE_FACTOR1 -0.0462, SCALE_FACTOR1 -0.0324, SCALE_FACTOR1 0.0324, SCALE_FACTOR1 0.0462 },
	{ SCALE_FACTOR2 -0.0462, SCALE_FACTOR2 -0.0324, SCALE_FACTOR2 0.0324, SCALE_FACTOR2 0.0462 } };//For tram model, in FLAME units
	void setCurrentTimestamp(int ct) { CURRENT_TIMESTAMP = ct; }
	int getCurrentTimestamp() { return CURRENT_TIMESTAMP; }

	void setCUDADevice()
	{
	cudaDeviceProp devProp;

	//int deviceCount = 0;
	//cudaGetDeviceCount(&deviceCount);

	//for (int dc = 0; dc < deviceCount; ++dc)
	//{
	//cudaGetDeviceProperties(&devProp, dc);
	//printf("%d:%s\n", dc, devProp.name);
	//}
	//#include <cuda_gl_interop.h>
	//unsigned int devCount;
	//int devices[2];
	//devices[0] = -1; devices[1] = -1;
	//cudaGLGetDevices(&devCount, &devices[0], 2, cudaGLDeviceListAll);
	//printf("CUDAGL devices: %d, %d\n", devices[0], devices[1]);
	//if (devices[0] == -1)
	//{
	// printf("err\n");
	// devices[0] = 1;
	//}
	cudaError_t cudaStatus = cudaSetDevice(0);//Set GL Device 0 (Need it to be the graphics device)
	if (cudaStatus != cudaSuccess \|\| cudaGetLastError() != cudaSuccess) {
	fprintf(stderr, "Error setting CUDA device!");
	getchar();
	//exit(0);
	}
	}
	void initFLAMEcu(std::string modelPath, unsigned int stationCount, unsigned int trainCount, ScalingData *returnScaling[])
	{
	setCUDADevice();
	md = new ModelData(stationCount, trainCount);
	//Load each stations data
	{
	std::string undergroundPath = modelPath + std::string("/0underground.scx");
	returnScaling[0] = initNavigation(undergroundPath.c_str(), 0);
	std::string unfinishedPath = modelPath + std::string("/1unfinished.scx");
	returnScaling[1] = initNavigation(unfinishedPath.c_str(), 1);
	std::string tunnelPath = modelPath + std::string("/2tunnel.scx");
	returnScaling[2] = initNavigation(tunnelPath.c_str(), 2);
	}
	//Eventually write custom init, so it isn't loaded from a basically empty file.
	initialise("iterations\\0.xml");
	//Count entrances and stations
	for (int j = 0; j < 3; ++j)
	{
	md->platformCount[j] = navDat[j].platformVec.size();
	for (auto &&s : navDat[j].startsVec)
	{
	unsigned int a = abs(s.startId);//Don't correct them to 0-index, we want size
	if (a>md->entranceCount[j])
	{
	md->entranceCount[j] = a;
	}
	}
	entranceDistribution[j] = std::uniform_int_distribution<int>(-md->entranceCount[j], -1);
	}
	normalised = std::uniform_real_distribution<float>(0.0, 1.0);
	//speedModPercent = std::uniform_real_distribution<float>(rtn->toFlameScale(1.3f), rtn->toFlameScale(1.65f));//1.3-1.65 metres per second, scaled to FLAME scale
	speedModPercent = std::uniform_real_distribution<float>(1.3f, 1.65f);//1.3-1.65 metres per second
	//Initialise seeding from this info.
	//return rtn;
	cudaDeviceSynchronize();
	}
	void cleanupFLAMEcu()
	{
	delete md;
	}
	void stepSim()
	{
	singleIteration();
	runNavIteration();
	}

	void initFLAMEConst(PedSettings::Data data)
	{
	cudaGetLastError();//Clear cuda error stack
	setCUDADevice();
	set_COLLISION_WEIGHT(&data.collisionWeight);
	set_GOAL_WEIGHT(&data.goalWeight);
	set_INTERACTION_RADIUS(&data.interactionRadius);
	set_SEPARATION_RADIUS(&data.separationRadius);
	set_PED_RADIUS(&data.pedestrianRadius);
	float ped_d = (data.pedestrianRadius * 2) + 0.05f;
	set_PED_DIAMETER(&ped_d);
	set_AGENT_MASS(&data.agentMass);
	}

	void notifyArrival(const int timestamp, const unsigned int trainId, const unsigned int stationId, std::shared_ptr<std::list<int>> passengers)
	{
	if (stationId >= 4)//temp md->stationCount)
	{
	fprintf(stderr, "Unexpected arrival station %d.", stationId);
	}
	else
	{
	//Insert the arrival into the relevant stations buffer in chronological order, so it can be processed within the FLAME model
	auto it = md->arrivals[stationId]->begin();
	for (; it != md->arrivals[stationId]->end(); ++it)
	if (it->timestamp > timestamp)
	break;
	ArrivingTrain at;
	at.timestamp = timestamp;
	at.trainId = trainId;
	at.pedDests = passengers;
	md->arrivals[stationId]->insert(it, at);
	}
	}
	//Spawn
	/**
	* Returns a suitable RouteStart for the giving origin/destination
	* @note This method assumes there is exactly one route between each OD pair
	* @note If either param is negative, a uniform random entrance is chosen
	*/
	RouteStart &getRouteStart(unsigned int stationId, int origin, int destination, glm::vec2 agentLoc, bool isBus)
	{
	if (isBus)
	origin = -1;
	else
	origin = origin<0 ? entranceDistribution[stationId](rng) : origin;
	destination = destination<0 ? entranceDistribution[stationId](rng) : destination;
	#ifdef _DEBUG
	if (origin < -(int)md->entranceCount[stationId] \|\| origin >= (int)md->platformCount[stationId])
	fprintf(stderr, "Error: Origin out of bounds @ getStartIndex() %d vs p%d e%d\n", origin, md->platformCount[stationId], md->entranceCount[stationId]);
	if (destination < -(int)md->entranceCount[stationId] \|\| destination >= (int)md->platformCount[stationId])
	fprintf(stderr, "Error: Destination out of bounds @ getStartIndex() %d vs p%d e%d\n", destination, md->platformCount[stationId], md->entranceCount[stationId]);
	#endif
	RouteStart *rs=nullptr;
	float bestDist=FLT_MAX;
	for (auto &&start:navDat[stationId].startsVec)
	{
	if (start.startId == origin&&start.endId == destination)
	{
	//If we are starting at a platform
	if (origin >= 0)
	{
	//Find the nearest applicable route
	float d = glm::distance2(agentLoc,start.firstBranchpt);
	if (d<bestDist)
	{
	bestDist = d;
	rs = &start;
	}
	}
	else
	return start;
	}
	}
	if (rs)
	return *rs;
	#ifdef _DEBUG
	fprintf(stderr, "Error: Route %d->%d not found! @ getStartIndex()\n", origin, destination);
	#endif
	origin = origin<0 ? -1 : origin;
	destination = destination<0 ? -1 : destination;
	for (auto &&start : navDat[stationId].startsVec)
	{
	if (start.startId == origin&&start.endId == destination)
	{
	//If we are starting at a platform
	if (origin >= 0)
	{
	//Find the nearest applicable route
	float d = glm::distance2(agentLoc, start.firstBranchpt);
	if (d<bestDist)
	{
	bestDist = d;
	rs = &start;
	}
	}
	else
	return start;
	}
	}
	if (rs)
	return *rs;
	fprintf(stderr, "Fatal Error: Route %d->%d still not found! @ getStartIndex()\n", origin, destination);
	getchar();
	exit(EXIT_FAILURE);
	}
	inline xmachine_memory_Boid0 emptyAgent0()
	{
	xmachine_memory_Boid0 agent;
	//Locate agent
	agent.x = 0;
	agent.y = 0;
	agent.z = 0;
	//Init motion forces
	agent.dx = 0;
	agent.dy = 0;
	agent.fx = 0;
	agent.fy = 0;
	agent.fz = 0;
	agent.gx = 0;
	agent.gy = 0;
	agent.id = 0;
	//Config navigation
	agent.wp1 = -1;//-2 states agent is on platform
	agent.wp2 = -3;//-3 states next node is required
	agent.wp3 = -3;
	agent.state = 0;
	//Config animation
	agent.animate = (normalised(rng));
	agent.animate_dir = (normalised(rng))*0.5f + 1.0f;;
	agent.speed_mod = speedModPercent(rng);
	return agent;
	}
	inline xmachine_memory_Boid1 emptyAgent1()
	{
	xmachine_memory_Boid1 agent;
	//Locate agent
	agent.x = 0;
	agent.y = 0;
	agent.z = 0;
	//Init motion forces
	agent.dx = 0;
	agent.dy = 0;
	agent.fx = 0;
	agent.fy = 0;
	agent.fz = 0;
	agent.gx = 0;
	agent.gy = 0;
	agent.id = 0;
	//Config navigation
	agent.wp1 = -1;//-2 states agent is on platform
	agent.wp2 = -3;//-3 states next node is required
	agent.wp3 = -3;
	agent.state = 0;
	//Config animation
	agent.animate = (normalised(rng));
	agent.animate_dir = (normalised(rng))*0.5f + 1.0f;;
	agent.speed_mod = speedModPercent(rng);
	return agent;
	}
	inline xmachine_memory_Boid2 emptyAgent2()
	{
	xmachine_memory_Boid2 agent;
	//Locate agent
	agent.x = 0;
	agent.y = 0;
	agent.z = 0;
	//Init motion forces
	agent.dx = 0;
	agent.dy = 0;
	agent.fx = 0;
	agent.fy = 0;
	agent.fz = 0;
	agent.gx = 0;
	agent.gy = 0;
	agent.id = 0;
	//Config navigation
	agent.wp1 = -1;//-2 states agent is on platform
	agent.wp2 = -3;//-3 states next node is required
	agent.wp3 = -3;
	agent.state = 0;
	//Config animation
	agent.animate = (normalised(rng));
	agent.animate_dir = (normalised(rng))*0.5f + 1.0f;;
	agent.speed_mod = speedModPercent(rng);
	return agent;
	}
	void createPassengerEntrance(unsigned int stationId, unsigned int platformId, bool isBus)
	{
	RouteStart &rs = getRouteStart(stationId, - 1, platformId, glm::vec2(), isBus);//Currently spawn rng entrance, we dont really care, there's probably only one
	RouteItem &ri = navDat[stationId].itemsVec[rs.startItem];
	NodeEnclosure &ne = navDat[stationId].enclosuresVec[ri.enclosure];
	//Create agent
	if (stationId==0)
	{
	xmachine_memory_Boid0 agent = emptyAgent0();
	//Randomly locate agent at starting enclosure
	agent.x = ne.vx[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
	agent.y = ne.vy[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
	//Init nav
	agent.wp1 = rs.startItem;//-2 states agent is on platform
	agent.destination = platformId;
	addXAgent_Boid0(&agent);
	}
	else if (stationId == 1)
	{
	xmachine_memory_Boid1 agent = emptyAgent1();
	//Randomly locate agent at starting enclosure
	agent.x = ne.vx[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
	agent.y = ne.vy[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
	//Init nav
	agent.wp1 = rs.startItem;//-2 states agent is on platform
	agent.destination = platformId;
	addXAgent_Boid1(&agent);
	}
	else if (stationId == 2)
	{
	xmachine_memory_Boid2 agent = emptyAgent2();
	//Randomly locate agent at starting enclosure
	agent.x = ne.vx[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
	agent.y = ne.vy[0] + (-0.01f + 0.02f * (rand() % 100) / 100.0f);
	//Init nav
	agent.wp1 = rs.startItem;//-2 states agent is on platform
	agent.destination = platformId;
	addXAgent_Boid2(&agent);
	}
	else
	{
	assert(false);
	}
	}
	glm::vec2 pformDir2(unsigned int stationId, unsigned int platformId)
	{
	glm::vec2 center = (navDat[stationId].platformVec[platformId].start + navDat[stationId].platformVec[platformId].end) / 2.0f;
	glm::vec2 direction = normalize(navDat[stationId].platformVec[platformId].end - navDat[stationId].platformVec[platformId].start);
	glm::vec3 _platformDir = normalize(cross(glm::vec3(0, 1, 0), glm::vec3(direction.x, 0, direction.y)));
	return glm::vec2(_platformDir.x, _platformDir.z);
	}
	void createPassengerPlatform(unsigned int stationId, unsigned int platformId, unsigned int trainId, int destination, int d_index)
	{
	//int startItem = rs.startItem;//First item is bad, gives us path from platform center to waypoint
	//int secondItem = navDat.itemsVec[startItem].nextNode;//this should point to the waypoint
	//Iterate till we find the branching point
	//while (secondItem>=0)
	//{
	// secondItem = navDat.itemsVec[secondItem].nextNode;
	// printf("Start:%d, Next:%d, Target: %d\n", startItem, secondItem, navDat.itemsVec[secondItem].target);
	//}
	//printf("Pfm%d, Dst%d, wp1%d, wp2%d,\n", platformId, destination, startItem, secondItem);
	//This node shall be our target
	//Locate passenger at platform
	//This math should be done once at init
	glm::vec2 center = (navDat[stationId].platformVec[platformId].start + ((navDat[stationId].platformVec[platformId].end - navDat[stationId].platformVec[platformId].start) / 2.0f)) - (0.00391604984f*pformDir2(stationId, platformId));
	glm::vec2 direction = normalize(navDat[stationId].platformVec[platformId].end100.0f - navDat[stationId].platformVec[platformId].start100.0f);
	//Convert doorOffset to model units (from metres?), need to get scale factor out of scx gen
	glm::vec2 pos = center + direction*(DOOR_OFFSETS[stationId][d_index]);
	//Calc agent direction
	glm::vec3 _platformDir = normalize(cross(glm::vec3(0, 1, 0), glm::vec3(direction.x,0,direction.y)));
	glm::vec2 platformDir = glm::vec2(_platformDir.x, _platformDir.z);
	//Parse destination (-1 = home, 0+ = new platform)
	RouteStart &rs = getRouteStart(stationId, platformId, destination, pos, false);
	int startItem = rs.startItem;//First item is actually the waypoint
	//Create agent
	if (stationId==0)
	{
	xmachine_memory_Boid0 agent = emptyAgent0();
	agent.x = pos.x;
	agent.y = pos.y;
	agent.dx = platformDir.x;
	agent.dy = platformDir.y;
	agent.fx = platformDir.x * 2;
	agent.fy = platformDir.y * 2;
	//Init nav
	agent.wp1 = -12;//-12 states agent is on platform
	agent.wp2 = startItem;
	agent.wp3 = platformId;
	agent.state = 1;//We will use 1 to say agent is walking towards platform
	agent.destination = destination;
	addXAgent_Boid0(&agent);
	}
	else if (stationId == 1)
	{
	xmachine_memory_Boid1 agent = emptyAgent1();
	agent.x = pos.x;
	agent.y = pos.y;
	agent.dx = platformDir.x;
	agent.dy = platformDir.y;
	agent.fx = platformDir.x * 2;
	agent.fy = platformDir.y * 2;
	//Init nav
	agent.wp1 = -12;//-12 states agent is on platform
	agent.wp2 = startItem;
	agent.wp3 = platformId;
	agent.state = 1;//We will use 1 to say agent is walking towards platform
	agent.destination = destination;
	addXAgent_Boid1(&agent);
	}
	else if (stationId == 2)
	{
	xmachine_memory_Boid2 agent = emptyAgent2();
	agent.x = pos.x;
	agent.y = pos.y;
	agent.dx = platformDir.x;
	agent.dy = platformDir.y;
	agent.fx = platformDir.x * 2;
	agent.fy = platformDir.y * 2;
	//Init nav
	agent.wp1 = -12;//-12 states agent is on platform
	agent.wp2 = startItem;
	agent.wp3 = platformId;
	agent.state = 1;//We will use 1 to say agent is walking towards platform
	agent.destination = destination;
	addXAgent_Boid2(&agent);
	}
	else
	assert(false);
	}

	//util

	void cleanupFLAME()
	{
	cleanup();
	}
	int cleanupCUDA()
	{
	printf("cleanup cuda\n");
	cudaError_t cudaStatus = cudaDeviceReset();
	if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "Error resetting the device!");
	return 1;
	}
	return 0;
	}
	/**
	* @param stationId The station id
	*/
	int getAgentMax(int stationId)
	{
	if (stationId==0)
	return get_agent_Boid0_MAX_count();
	if (stationId == 1)
	return get_agent_Boid1_MAX_count();
	if (stationId == 2)
	return get_agent_Boid2_MAX_count();
	assert(false);
	return 0;
	}
	/**
	* @param stationId The station id
	*/
	int getAgentCount(int stationId)
	{
	//printf("%d\n", get_agent_agent_default_count());
	if (stationId == 0)
	return get_agent_Boid0_default0_count();
	if (stationId == 1)
	return get_agent_Boid1_default1_count();
	if (stationId == 2)
	return get_agent_Boid2_default2_count();
	assert(false);
	return 0;
	}
	__global__ void output_to_TBO0(xmachine_memory_Boid0_list* agents, glm::vec4* data1, glm::vec4* data2)
	{
	//global thread index
	int index = blockIdx.x * blockDim.x + threadIdx.x;
	data1[index].x = agents->x[index];
	data1[index].y = agents->z[index];// agents->animate[index];
	data1[index].z = agents->y[index];
	//data1[index].w = agents->height[index];
	data2[index].x = agents->fx[index];//gx
	data2[index].y = agents->fz[index];
	data2[index].z = agents->fy[index];//gy //(float)agents->exit_no[index];
	data2[index].w = agents->animate[index];
	}
	__global__ void output_to_TBO1(xmachine_memory_Boid1_list* agents, glm::vec4* data1, glm::vec4* data2)
	{
	//global thread index
	int index = blockIdx.x * blockDim.x + threadIdx.x;
	data1[index].x = agents->x[index];
	data1[index].y = agents->z[index];// agents->animate[index];
	data1[index].z = agents->y[index];
	//data1[index].w = agents->height[index];
	data2[index].x = agents->fx[index];//gx
	data2[index].y = agents->fz[index];
	data2[index].z = agents->fy[index];//gy //(float)agents->exit_no[index];
	data2[index].w = agents->animate[index];
	}
	__global__ void output_to_TBO2(xmachine_memory_Boid2_list* agents, glm::vec4* data1, glm::vec4* data2)
	{
	//global thread index
	int index = blockIdx.x * blockDim.x + threadIdx.x;
	data1[index].x = agents->x[index];
	data1[index].y = agents->z[index];// agents->animate[index];
	data1[index].z = agents->y[index];
	//data1[index].w = agents->height[index];
	data2[index].x = agents->fx[index];//gx
	data2[index].y = agents->fz[index];
	data2[index].z = agents->fy[index];//gy //(float)agents->exit_no[index];
	data2[index].w = agents->animate[index];
	}
	void fillBuffers(std::pair<glm::vec4, glm::vec4> buffs, int stationId)
	{
	cudaDeviceSynchronize();
	cudaError_t status = cudaGetLastError();
	if (status != CUDA_SUCCESS \|\| (status = cudaGetLastError()) != CUDA_SUCCESS)
	{
	if (status == cudaErrorUnknown)
	{
	printf("An Unknown CUDA Error Occurred :(\n");
	printf("Perhaps performing the same operation under the CUDA debugger with Memory Checker enabled could help!\n");
	printf("If this error only occurs outside of NSight debugging sessions, or causes the system to lock up. It may be caused by not passing the required amount of shared memory to a kernal launch that uses runtime sized shared memory.\n");
	printf("Also possible you have forgotten to allocate texture memory you are trying to read\n");
	printf("Passing a buffer to 'cudaGraphicsSubResourceGetMappedArray' or a texture to 'cudaGraphicsResourceGetMappedPointer'.\n");
	getchar();
	exit(1);
	}
	printf(" CUDA Error Occurred @ output_to_TBO2;\n%s\n", cudaGetErrorString(status));
	getchar();
	exit(1);
	}
	int threads_per_tile = 128;
	int tile_size;
	dim3 grid;
	dim3 threads;

	int ct = getAgentCount(stationId);

	if (ct > 0)
	{
	// map OpenGL buffer object for writing from CUDA
	//gpuErrchk(cudaGLMapBufferObject((void*)&dptr_1, instances_data1_tbo));
	//gpuErrchk(cudaGLMapBufferObject((void*)&dptr_2, instances_data2_tbo));
	//cuda block size
	tile_size = (int)ceil((float)ct / threads_per_tile);
	grid = dim3(tile_size, 1, 1);
	threads = dim3(threads_per_tile, 1, 1);
	//kernel
	if (stationId==0)
	{
	output_to_TBO0 <<<grid, threads >>>(get_device_Boid0_default0_agents(), std::get<0>(buffs), std::get<1>(buffs));
	}
	else if (stationId == 1)
	{
	output_to_TBO1 <<<grid, threads >>>(get_device_Boid1_default1_agents(), std::get<0>(buffs), std::get<1>(buffs));
	}
	else if (stationId == 2)
	{
	output_to_TBO2 <<<grid, threads >>>(get_device_Boid2_default2_agents(), std::get<0>(buffs), std::get<1>(buffs));
	}
	else
	{
	assert(false);
	}
	cudaDeviceSynchronize();
	//cudaError_t
	status = cudaGetLastError();
	if (status != CUDA_SUCCESS \|\| (status = cudaGetLastError()) != CUDA_SUCCESS)
	{
	if (status == cudaErrorUnknown)
	{
	printf("An Unknown CUDA Error Occurred :(\n");
	printf("Perhaps performing the same operation under the CUDA debugger with Memory Checker enabled could help!\n");
	printf("If this error only occurs outside of NSight debugging sessions, or causes the system to lock up. It may be caused by not passing the required amount of shared memory to a kernal launch that uses runtime sized shared memory.\n");
	printf("Also possible you have forgotten to allocate texture memory you are trying to read\n");
	printf("Passing a buffer to 'cudaGraphicsSubResourceGetMappedArray' or a texture to 'cudaGraphicsResourceGetMappedPointer'.\n");
	getchar();
	exit(1);
	}
	printf(" CUDA Error Occurred @ output_to_TBO2;\n%s\n", cudaGetErrorString(status));
	getchar();
	exit(1);
	}
	// unmap buffer object
	//gpuErrchk(cudaGLUnmapBufferObject(*instances_data1_tbo));
	//gpuErrchk(cudaGLUnmapBufferObject(*instances_data2_tbo));
	}
	}