Skip to content

Instantly share code, notes, and snippets.

@mmajewsk
Created December 14, 2015 14:39
Show Gist options
  • Save mmajewsk/1b3ed249c8c7f7d2581a to your computer and use it in GitHub Desktop.
Save mmajewsk/1b3ed249c8c7f7d2581a to your computer and use it in GitHub Desktop.
#include "logger.hpp"
Logger::Logger(double *x,
double *y,
double *total_distance,
int n,
std::string datapath_hist,
std::string datapath_time)
:x(x),
y(y),
total_distance(total_distance),
size(n),
datapath_hist(datapath_hist),
datapath_time(datapath_time)
{
hist_file.open(datapath_hist);
time_file.open(datapath_time);
hist_file<<"i"<<" "<<"x"<<" "<<"y"<<" "<<"d"<<"\n";
time_file<<"t"<<"\n";
}
void Logger::log_histograms(){
for(int i =0 ;i < size; ++i){
hist_file<<i<<" "<<x[i]<<" "<<y[i]<<" "<<total_distance[i]<<"\n";
}
}
void Logger::log_time(int t){
time_file<<t<<"\n";
}
void Logger::stop(){
hist_file.close();
time_file.close();
//a
}
Logger::~Logger(){
this->stop();
}
#include <fstream>
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <chrono>
using namespace std::chrono;
class Logger{
public:
double *x;
double *y;
double * total_distance;
int size;
std::ofstream hist_file;
std::ofstream time_file;
std::string datapath_hist;
std::string datapath_time;
Logger(double *x,
double *y,
double *total_distance,
int n,
std::string datapath_hist,
std::string datapath_time);
~Logger();
void log_histograms();
void log_time(int t);
void stop();
};
#include "processor.hpp"
#include "logger.hpp"
#include <string>
#include <chrono>
using namespace std::chrono;
int main(int args, char **vargs){
std::string cuda(vargs[1]);
std::string datapath(vargs[2]);
std::string s_steps(vargs[3]);
std::string s_size(vargs[4]);
std::string datapath_text = datapath + cuda +"_"+ s_steps +"_"+ s_size;
std::string datapath_time = datapath_text + "_time.txt";
std::string datapath_hist = datapath_text + ".txt";
bool processing_on_gpu = cuda=="gpu"?true:false;
int number_of_particles = std::stoi(s_size);
int startx = 0;
int starty = 0;
int _time = 0;
int dt = 1;
double *vx_d;
auto vx_h = new double[number_of_particles];
auto vy_h = new double[number_of_particles];
auto X = new double[number_of_particles];
auto Y = new double[number_of_particles];
auto total_distance = new double[number_of_particles];
auto steps = std::stoi(s_steps);
Processor * processor;
if (processing_on_gpu){
processor = (Processor *)(new GPU(number_of_particles));
}
else{
processor = (Processor *)(new CPU(number_of_particles));
}
processor->setIinitialPosition(X,Y);
processor->setInitialVelocity(vx_h,vy_h);
Logger logger(X,Y,total_distance,number_of_particles,datapath_hist,datapath_time);
//std::ofstream fs(datapath);
for(int i = 0; i<steps; ++i ){
//std::cout<<"petla"<<std::endl;
high_resolution_clock::time_point t1 = high_resolution_clock::now();
_time = processor->evolveSystem(X,Y,vx_h,vy_h,total_distance,number_of_particles,_time,dt);
high_resolution_clock::time_point t2 = high_resolution_clock::now();
auto duration = duration_cast<microseconds>( t2 - t1 ).count();
//print_to_file(X, Y, number_of_particles, step, fs);
logger.log_time(duration);
}
logger.log_histograms();
logger.stop();
}
#include <cstdio>
#include <cstdlib>
#include <time.h>
#include <cuda_runtime_api.h>
#include <cuda.h>
#include <fstream>
#include <random>
void setInitialVelocity(double *vx_h, double *vy_h, int size){
std::default_random_engine generator;
std::normal_distribution<double> distribution(1.0,2.5);
for(auto i = 0; i<size; ++i) {
vx_h[i] = distribution(generator);
vy_h[i] = distribution(generator);
}
}
void setIinitialPosition(double *x, double *y , int size){
for(int i=0;i<size;++i){
x[i] = 0.0;
y[i] = 0.0;
}
}
void randomizeVelocity(double * vx_h, double * vy_h, int size){
std::default_random_engine generator;
std::uniform_real_distribution<double> distribution(0.0,1.0);
for(auto i = 0; i<size; ++i) {
if(distribution(generator) > 0.5){
vx_h[i] = -vx_h[i];
vy_h[i] = -vy_h[i];
}
}
}
void print_to_file(double *x, double *y, int n, int step, std::ofstream & fs){
for(int i =0 ;i < n; ++i){
fs<<x[i]<<" "<<y[i]<<" "<<step<<"\n";
}
}
struct DATA{
double *x;
double *y;
double *vx_h;
double *vy_h;
double *tmpa;
double *tmpb;
int t;
int dt;
int size;
};
void CPUaddVectors(double * a, double * b, double * result, int size){
for(auto i = 0; i<size; ++i) result[i] = a[i] + b[i];
}
void CPUaddToVector(double * a, double* result, int size, double value){
for(auto i = 0; i<size; ++i) result[i] = a[i] + value;
}
void CPUmultiplyVector(double * a, double * result, int size, double value){
for(auto i = 0; i<size; ++i) result[i] = a[i] * value;
}
int CPUevolveSystem(DATA d){
int t = d.dt + d.t;
CPUaddVectors(d.x, d.vx_h, d.x, d.size);
CPUaddVectors(d.y, d.vy_h, d.y, d.size);
CPUmultiplyVector(d.x, d.tmpa, d.size, 0.025);
CPUmultiplyVector(d.y, d.tmpb, d.size, 0.025);
CPUaddVectors(d.tmpa, d.vx_h, d.vx_h, d.size);
CPUaddVectors(d.tmpb, d.vy_h, d.vy_h, d.size);
randomizeVelocity(d.vx_h, d.vy_h, d.size);
return t;
}
__global__ void _AddVectors(double* a, double* b, double * c, int size){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
if(threadId<size){
c[threadId] = a[threadId] + b[threadId];
}
}
__global__ void _AddToVector(double* a, double* result, int size, double value){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
if(threadId<size){
result[threadId] = a[threadId] + value;
}
}
__global__ void _MultiplyVector(double* a, double * result, int size, double value){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
if(threadId<size){
result[threadId] = a[threadId] * value;
}
}
void GPUaddVectors(double * a, double * b, double * result, double * a_d, double * b_d, double * result_d, int size){
cudaMemcpy(a, a_d, size*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(b, b_d, size*sizeof(double), cudaMemcpyHostToDevice);
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_AddVectors<<<gridSize, blockSize>>>(a_d,b_d,result_d,size);
cudaMemcpy( result, result_d, size*sizeof(double), cudaMemcpyDeviceToHost);
}
void GPUaddToVector(double * a, double* result, double * a_d, double * result_d, int size, double value){
cudaMemcpy(a, a_d, size*sizeof(double), cudaMemcpyHostToDevice);
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_AddToVector<<<gridSize, blockSize>>>(a_d, result_d, size, value);
cudaMemcpy( a, a_d, size*sizeof(double), cudaMemcpyDeviceToHost);
cudaMemcpy( result, result_d, size*sizeof(double), cudaMemcpyDeviceToHost);
}
void GPUmultiplyVector(double * a, double * result, double * a_d, double * result_d, int size, double value){
cudaMemcpy(a, a_d, size*sizeof(double), cudaMemcpyHostToDevice);
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_MultiplyVector<<<gridSize, blockSize>>>(a_d, result_d, size, value);
cudaMemcpy( a, a_d, size*sizeof(double), cudaMemcpyDeviceToHost);
cudaMemcpy( result, result_d, size*sizeof(double), cudaMemcpyDeviceToHost);
}
int GPUevolveSystem(DATA d, double * a_d, double * b_d, double * result_d){
int t = d.t + d.dt;
GPUaddVectors(d.x, d.vx_h, d.x, a_d, b_d, result_d, d.size);
GPUaddVectors(d.y, d.vy_h, d.y, a_d, b_d, result_d, d.size);
GPUmultiplyVector(d.x, d.tmpa, d.size, 0.025);
GPUmultiplyVector(d.y, d.tmpb, a_d, result_d, d.size, 0.025);
GPUaddVectors(d.tmpa, d.vx_h, d.vx_h, a_d, b_d, result_d, d.size);
GPUaddVectors(d.tmpb, d.vy_h, d.vy_h, a_d, b_d, result_d, d.size);
randomizeVelocity(d.vx_h, d.vy_h, d.size);
return t;
}
int main(int args, char **vargs){
std::string cuda(vargs[0]);
std::string datapath(vargs[1]);
bool processing_on_gpu = cuda=="gpu"?true:false;
int number_of_particles = 4000;
int startx = 0;
int starty = 0;
int time = 0;
int dt = 1;
double *vx_d;
double vx_h = new double[number_of_particles];
double vy_h = new double[number_of_particles];
double x = new double[number_of_particles];
double y = new double[number_of_particles];
double tmpa = new double[number_of_particles];
double tmpb = new double[number_of_particles];
double steps = 3;
dim3 gridSize(16,16);
dim3 blockSize(8,8);
double * a_d,* b_d, * result_d;
DATA d;
d.x = x;
d.y = y;
d.vx_h = vx_h;
d.vy_h = vy_h;
d.t = t;
d.dt = dt;
d.size = number_of_particles;
d.tmpa = tmpa;
d.tmpb = tmpb
setIinitialPosition(x,y,number_of_particles);
setInitialVelocity(vx_h,vy_h,number_of_particles);
std::ofstream fs(datapath);
for(int i = 0; i<steps; ++i ){
if (processing_on_gpu){
cudaMalloc(&a_d, number_of_particles*sizeof(double));
cudaMalloc(&b_d, number_of_particles*sizeof(double));
cudaMalloc(&result_d, number_of_particles*sizeof(double));
d.t = evolveSystem(d,a_d,b_d,result_d);
}
else{
d.t = evolveSystem(d);
}
print_to_file(x, y, number_of_particles, 0, fs);
}
fs.close();
}
CFLAGS = -std=c++11 -g
all: main.cu logger.cu processor.cu
nvcc $(CFLAGS) main.cu logger.cu processor.cu
# -*- coding: utf-8 -*-
# coding: utf-8
# In[1]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# In[2]:
datapath = r"C:\Users\hawker\Desktop\studia\cuda\miniproject1\data\inc_step_1_50"
os.chdir(datapath)
# In[3]:
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
files_list = os.listdir(datapath)
_DATA_DICT = { filename[:-4]: pd.read_csv(filename,sep=' ') for filename in files_list}
DATA = AttrDict(_DATA_DICT)
# In[5]:
def plotlist(i):
return "datagpu_"+str(i+1)+"_50"
# ###położenie ze zmiana ilosci kroków
# In[14]:
from matplotlib import animation
def animate(nframe):
plotname = plotlist(nframe%25)
plt.cla()
df = DATA[plotname]
plt.plot(df.x,df.y,"ro",alpha=0.5)
plt.xlim(-60,60)
plt.ylim(-60,60)
plt.title("Ewolucja w czasie ")
fig = plt.figure(figsize=(5,4))
anim = animation.FuncAnimation(fig, animate, frames=25)
anim.save(u"demo.gif", writer='imagemagick', fps=4);
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#include "processor.hpp"
Processor::Processor(int size){
this->size = size;
this->tmpa= new double[size];
this->tmpb= new double[size];
this->generator = new std::default_random_engine();
this->distribution = new std::uniform_real_distribution<double>(0.0,1.0);
}
void Processor::setInitialVelocity(double *vx_h, double *vy_h){
std::default_random_engine generator;
std::normal_distribution<double> distribution(1.0,2.5);
for(auto i = 0; i<size; ++i) {
vx_h[i] = distribution(generator);
vy_h[i] = distribution(generator);
}
}
void Processor::setIinitialPosition(double *x, double *y ){
for(int i=0;i<size;++i){
x[i] = 0.0;
y[i] = 0.0;
}
}
void Processor::randomizeVelocity(double * vx_h, double * vy_h){
for(auto i = 0; i<size; ++i) {
if((*distribution)((*generator)) > 0.5){
vx_h[i] = -vx_h[i];
vy_h[i] = -vy_h[i];
}
}
}
void Processor::countTotalDistance(double * vx_h, double * vy_h, double * total_distance){
this->euclidianDistance(vx_h,vy_h,this->tmpa);
//std::cout<<vx_h[0]<<" "<<vy_h[0]<<" "<<tmpa[0]<<" "<<std::endl;
this->addVectors(this->tmpa,total_distance,total_distance);
}
//ssss//ssss
int Processor::evolveSystem(double *x,
double *y,
double *vx_h,
double *vy_h,
double *total_distance,
int l,
int t,
int dt ){
t += dt;
this->countTotalDistance(vx_h, vy_h, total_distance);
//std::cout<<"time------------"<<t<<std::endl;
this->addVectors(x, vx_h, x);
this->addVectors(y, vy_h, y);
//std::cout<<"\n"<<std::endl;
this->euclidianDistance(x,y,tmpa); // x^2 + y^2 = d^2 ; tmpa=d
this->divideVectors(x,tmpa,tmpb); // d0 = 0.025; x0= d0 * x/d
this->multiplyVector(tmpb,tmpb,0.025); // x0= d0 * tmpb
//std::cout<<vx_h[0];
this->addVectors(vx_h,tmpb,vx_h); // x0 = tmpb vx_h = vx_h +x0
//std::cout<<vx_h[0];
this->divideVectors(y,tmpa,tmpb);
this->multiplyVector(tmpb,tmpb,0.025);
this->addVectors(vy_h,tmpb,vy_h);
this->randomizeVelocity(vx_h,vy_h);
return t;
}
CPU::CPU(int size): Processor(size){
}
void CPU::addVectors(double * a, double * b, double * result){
//double * a_ = new double[this->size];
//double * b_ = new double[this->size];
//memcpy(a, a_, this->size);
//memcpy(b, b_, this->size);
for(auto i = 0; i<size; ++i) {
result[i] = a[i] + b[i];
//std::cout<<result[i]<<" "<<a[i]<<" "<<b[i]<<std::endl;
}
//std::cout<<std::endl;
}
void CPU::divideVectors(double * a, double * b, double * result){
//double * a_ = new double[this->size];
//double * b_ = new double[this->size];
//memcpy(a_, a, this->size);
//memcpy(b_, b, this->size);
for(auto i = 0; i<size; ++i) {
if(b[i] == 0){
result[i] = 0;
}
else{
result[i] = a[i] / b[i];
}
//std::cout<<result[i]<<" "<<a[i]<<" "<<b[i]<<std::endl;
}
//std::cout<<std::endl;
}
void CPU::addToVector(double * a, double* result, double value){
for(auto i = 0; i<size; ++i) result[i] = a[i] + value;
}
void CPU::multiplyVector(double * a, double * result, double value){
for(auto i = 0; i<size; ++i) result[i] = a[i] * value;
}
void CPU::euclidianDistance(double *a, double *b, double *result){
for(auto i = 0; i<size; ++i) result[i] = sqrt(pow(a[i],2)+pow(b[i],2));
}
__global__ void _AddVectors(double* a, double* b, double * c, int size){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
//if(threadId<size){
c[threadId] = a[threadId] + b[threadId];
//}
}
__global__ void _DivideVectors(double* a, double* b, double * c, int size){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
//if(threadId<size){
if (b[threadId] == 0){
c[threadId] = 0;
}
else{
c[threadId] = a[threadId] / b[threadId];
}
//}
}
__global__ void _AddToVector(double* a, double* result, int size, double value){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
if(threadId<size){
result[threadId] = a[threadId] + value;
}
}
__global__ void _MultiplyVector(double* a, double * result, int size, double value){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
if(threadId<size){
result[threadId] = a[threadId] * value;
}
}
__global__ void _EuclideanDistanceVectors(double* a, double* b, double * result, int size){
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
if(threadId<size){
result[threadId] = sqrt(pow(a[threadId],2)+pow(b[threadId],2));
}
}
GPU::GPU(int size): Processor(size){
cudaMalloc((void **)&a, size*sizeof(double));
cudaMalloc((void **)&b, size*sizeof(double));
cudaMalloc((void **)&c, size*sizeof(double));
}
void GPU::addVectors(double * a, double * b, double * result){
cudaError_t error1 = cudaMemcpy(this->a, a, size*sizeof(double), cudaMemcpyHostToDevice);
cudaError_t error2 = cudaMemcpy(this->b, b, size*sizeof(double), cudaMemcpyHostToDevice);
if(error1 != cudaSuccess){
printf("cudaMemcpy returned error %s (code %d), line(%d)\n", cudaGetErrorString(error1), error1, __LINE__);
exit(EXIT_FAILURE);
}
if(error2 != cudaSuccess){
printf("cudaMemcpy returned error %s (code %d), line(%d)\n", cudaGetErrorString(error2), error2, __LINE__);
exit(EXIT_FAILURE);
}
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_AddVectors<<<gridSize, blockSize>>>(this->a,this->b,this->c,this->size);
cudaMemcpy( result, this->c, size*sizeof(double), cudaMemcpyDeviceToHost);
}
void GPU::divideVectors(double * a, double * b, double * result){
cudaError_t error1 = cudaMemcpy(this->a, a, size*sizeof(double), cudaMemcpyHostToDevice);
cudaError_t error2 = cudaMemcpy(this->b, b, size*sizeof(double), cudaMemcpyHostToDevice);
if(error1 != cudaSuccess){
printf("cudaMemcpy returned error %s (code %d), line(%d)\n", cudaGetErrorString(error1), error1, __LINE__);
exit(EXIT_FAILURE);
}
if(error2 != cudaSuccess){
printf("cudaMemcpy returned error %s (code %d), line(%d)\n", cudaGetErrorString(error2), error2, __LINE__);
exit(EXIT_FAILURE);
}
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_DivideVectors<<<gridSize, blockSize>>>(this->a,this->b,this->c,this->size);
cudaMemcpy( result, this->c, size*sizeof(double), cudaMemcpyDeviceToHost);
}
void GPU::addToVector(double * a, double* result, double value){
cudaMemcpy(this->a, a, size*sizeof(double), cudaMemcpyHostToDevice);
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_AddToVector<<<gridSize, blockSize>>>(this->a, this->c, this->size, value);
cudaMemcpy( result, this->c, size*sizeof(double), cudaMemcpyDeviceToHost);
}
void GPU::multiplyVector(double * a, double * result, double value){
cudaMemcpy(this->a, a, size*sizeof(double), cudaMemcpyHostToDevice);
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_MultiplyVector<<<gridSize, blockSize>>>(this->a, this->c, this->size, value);
cudaMemcpy( result, this->c, size*sizeof(double), cudaMemcpyDeviceToHost);
}
void GPU::euclidianDistance(double * a, double * b, double *result){
cudaMemcpy(this->a, a, size*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(this->b, b, size*sizeof(double), cudaMemcpyHostToDevice);
dim3 gridSize(16,16);
dim3 blockSize(8,8);
_EuclideanDistanceVectors<<<gridSize, blockSize>>>(this->a,this->b,this->c,this->size);
cudaMemcpy( result, this->c, size*sizeof(double), cudaMemcpyDeviceToHost);
}
#include <time.h>
#include <cuda_runtime_api.h>
#include <cuda.h>
#include <iostream>
#include <cmath>
#include <random>
class Processor{
public:
double *tmpa, *tmpb;
int size;
std::default_random_engine *generator;
std::uniform_real_distribution<double> *distribution;
Processor(int size);
virtual void addVectors(double * a, double * b, double * result) = 0;
virtual void addToVector(double * a, double * result, double value) = 0;
virtual void multiplyVector(double * a, double * result, double value) = 0;
virtual void divideVectors(double *, double *, double *)=0;
virtual void euclidianDistance(double *a, double *b, double *result) = 0;
void countTotalDistance(double * vx_h, double * vy_h, double * total_distance);
void setInitialVelocity(double *vx_h, double *vy_h);
void setIinitialPosition(double *x, double *y );
void randomizeVelocity(double * vx_h, double * vy_h);
int evolveSystem(double *x,
double *y,
double *vx_h,
double *vy_h,
double* total_distance,
int l,
int t,
int dt);
};
class CPU: public Processor
{
public:
CPU(int size);
void addVectors(double * a, double * b, double * result);
void addToVector(double * a, double * result, double value);
void multiplyVector(double * a, double * result, double value);
void divideVectors(double *, double *, double *);
void euclidianDistance(double *a, double *b, double *result);
};
class GPU: public Processor
{
public:
double *a, *b, *c;
GPU(int size);
void addVectors(double * a, double * b, double * result);
void addToVector(double * a, double* result, double value);
void multiplyVector(double * a, double * result, double value);
void divideVectors(double *, double *, double *);
void euclidianDistance(double *a, double *b, double *result);
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment