machinaut/Makefile

## bench.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              bench.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Makefile
CUDA_PATH ?= /usr/local/cuda

.PHONY: clean

vadd.so: vadd.o
	nvcc -shared $^ -o $@ -lcuda

vadd.o: vadd.cu
	nvcc -I $(CUDA_PATH)/include -I$(CUDA_PATH)/samples/common/inc -arch=sm_70 --compiler-options '-fPIC' $^ -c $@

clean:
	rm -f *.o *.so

## vadd.cu
// For the CUDA runtime routines (prefixed with "cuda_")
// #include <cuda.h>
#include <cuda_runtime.h>

namespace
{
    __global__ void _vadd(const float *A, const float *B, float *C, int n)
    {
        int i = blockDim.x * blockIdx.x + threadIdx.x;
        if (i < n)
        {
            C[i] = A[i] + B[i];
        }
    }
}

extern "C" void vadd(const float *A, const float *B, float *C, int n, int threads)
{
    const int blocks = (n + threads - 1) / threads;
    _vadd<<<blocks, threads>>>(A, B, C, n);
}
	CUDA_PATH ?= /usr/local/cuda

	.PHONY: clean

	vadd.so: vadd.o
	nvcc -shared $^ -o $@ -lcuda

	vadd.o: vadd.cu
	nvcc -I $(CUDA_PATH)/include -I$(CUDA_PATH)/samples/common/inc -arch=sm_70 --compiler-options '-fPIC' $^ -c $@

	clean:
	rm -f .o .so
	// For the CUDA runtime routines (prefixed with "cuda_")
	// #include <cuda.h>
	#include <cuda_runtime.h>

	namespace
	{
	__global__ void _vadd(const float A, const float B, float *C, int n)
	{
	int i = blockDim.x * blockIdx.x + threadIdx.x;
	if (i < n)
	{
	C[i] = A[i] + B[i];
	}
	}
	}

	extern "C" void vadd(const float A, const float B, float *C, int n, int threads)
	{
	const int blocks = (n + threads - 1) / threads;
	_vadd<<<blocks, threads>>>(A, B, C, n);
	}