Last active
August 16, 2018 13:18
-
-
Save robertmaynard/8e39dba15b9e3232f7bcf52c1df63f9f to your computer and use it in GitHub Desktop.
cuRand and thrust example error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nvcc -std=c++11 -arch=sm_30 -c main.cu [works] | |
nvcc -std=c++11 -arch=sm_32 -c main.cu [works] | |
nvcc -std=c++11 -arch=sm_35 -c main.cu [works] | |
nvcc -std=c++11 -arch=sm_50 -c main.cu [works] | |
nvcc -std=c++11 -arch=sm_60 -c main.cu [works] | |
nvcc -std=c++11 -arch=sm_70 -c main.cu [works] | |
nvcc -std=c++11 -arch=sm_30 -dc main.cu [works] | |
nvcc -std=c++11 -arch=sm_32 -dc main.cu [works] | |
nvcc -std=c++11 -arch=sm_35 -dc main.cu [fails] | |
nvcc -std=c++11 -arch=sm_50 -dc main.cu [fails] | |
nvcc -std=c++11 -arch=sm_60 -dc main.cu [fails] | |
nvcc -std=c++11 -arch=sm_70 -dc main.cu [fails] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/usr/local/cuda/bin/..//include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh(446): error: cannot pass an argument with a user-provided copy-constructor to a device-side kernel launch | |
detected during: | |
instantiation of "cudaError_t thrust::cuda_cub::cub::DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT, OutputT>::InvokeSingleTile<ActivePolicyT,SingleTileKernelT>(SingleTileKernelT) [with InputIteratorT=thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, OutputIteratorT=float *, OffsetT=int, ReductionOpT=thrust::plus<float>, OutputT=float, ActivePolicyT=thrust::cuda_cub::cub::DeviceReducePolicy<float, int, thrust::plus<float>>::Policy130, SingleTileKernelT=void (*)(thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, float *, int, thrust::plus<float>, float)]" | |
(599): here | |
instantiation of "cudaError_t thrust::cuda_cub::cub::DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT, OutputT>::Invoke<ActivePolicyT>() [with InputIteratorT=thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, OutputIteratorT=float *, OffsetT=int, ReductionOpT=thrust::plus<float>, OutputT=float, ActivePolicyT=thrust::cuda_cub::cub::DeviceReducePolicy<float, int, thrust::plus<float>>::Policy130]" | |
/usr/local/cuda/bin/..//include/thrust/system/cuda/detail/cub/block/../iterator/../util_device.cuh(332): here | |
instantiation of "cudaError_t thrust::cuda_cub::cub::ChainedPolicy<PTX_VERSION, PolicyT, PolicyT>::Invoke(int, FunctorT &) [with PTX_VERSION=130, PolicyT=thrust::cuda_cub::cub::DeviceReducePolicy<float, int, thrust::plus<float>>::Policy130, FunctorT=thrust::cuda_cub::cub::DispatchReduce<thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, float *, int, thrust::plus<float>, float>]" | |
/usr/local/cuda/bin/..//include/thrust/system/cuda/detail/cub/block/../iterator/../util_device.cuh(315): here | |
instantiation of "cudaError_t thrust::cuda_cub::cub::ChainedPolicy<PTX_VERSION, PolicyT, PrevPolicyT>::Invoke(int, FunctorT &) [with PTX_VERSION=200, PolicyT=thrust::cuda_cub::cub::DeviceReducePolicy<float, int, thrust::plus<float>>::Policy200, PrevPolicyT=thrust::cuda_cub::cub::DeviceReducePolicy<float, int, thrust::plus<float>>::Policy130, FunctorT=thrust::cuda_cub::cub::DispatchReduce<thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, float *, int, thrust::plus<float>, float>]" | |
/usr/local/cuda/bin/..//include/thrust/system/cuda/detail/cub/block/../iterator/../util_device.cuh(315): here | |
instantiation of "cudaError_t thrust::cuda_cub::cub::ChainedPolicy<PTX_VERSION, PolicyT, PrevPolicyT>::Invoke(int, FunctorT &) [with PTX_VERSION=300, PolicyT=thrust::cuda_cub::cub::DeviceReducePolicy<float, int, thrust::plus<float>>::Policy300, PrevPolicyT=thrust::cuda_cub::cub::DeviceReducePolicy<float, int, thrust::plus<float>>::Policy200, FunctorT=thrust::cuda_cub::cub::DispatchReduce<thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, float *, int, thrust::plus<float>, float>]" | |
/usr/local/cuda/bin/..//include/thrust/system/cuda/detail/cub/block/../iterator/../util_device.cuh(315): here | |
[ 3 instantiation contexts not shown ] | |
instantiation of "cudaError_t thrust::cuda_cub::cub::DeviceReduce::Reduce(void *, size_t &, InputIteratorT, OutputIteratorT, int, ReductionOpT, T, cudaStream_t, __nv_bool) [with InputIteratorT=thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, OutputIteratorT=float *, ReductionOpT=thrust::plus<float>, T=float]" | |
/usr/local/cuda/bin/..//include/thrust/system/cuda/detail/reduce.h(950): here | |
instantiation of "T thrust::cuda_cub::reduce_n(thrust::cuda_cub::execution_policy<Derived> &, InputIt, Size, T, BinaryOp) [with Derived=thrust::cuda_cub::tag, InputIt=thrust::cuda_cub::transform_input_iterator_t<float, thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, estimate_pi>, Size=signed long, T=float, BinaryOp=thrust::plus<float>]" | |
/usr/local/cuda/bin/..//include/thrust/system/cuda/detail/transform_reduce.h(62): here | |
instantiation of "T thrust::cuda_cub::transform_reduce(thrust::cuda_cub::execution_policy<Derived> &, InputIt, InputIt, TransformOp, T, ReduceOp) [with Derived=thrust::cuda_cub::tag, InputIt=thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, TransformOp=estimate_pi, T=float, ReduceOp=thrust::plus<float>]" | |
/usr/local/cuda/bin/..//include/thrust/detail/transform_reduce.inl(47): here | |
instantiation of "OutputType thrust::transform_reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, UnaryFunction, OutputType, BinaryFunction) [with DerivedPolicy=thrust::cuda_cub::tag, InputIterator=thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, UnaryFunction=estimate_pi, OutputType=float, BinaryFunction=thrust::plus<float>]" | |
/usr/local/cuda/bin/..//include/thrust/detail/transform_reduce.inl(67): here | |
instantiation of "OutputType thrust::transform_reduce(InputIterator, InputIterator, UnaryFunction, OutputType, BinaryFunction) [with InputIterator=thrust::counting_iterator<int, thrust::use_default, thrust::use_default, thrust::use_default>, UnaryFunction=estimate_pi, OutputType=float, BinaryFunction=thrust::plus<float>]" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <curand_kernel.h> | |
#include <thrust/functional.h> | |
#include <thrust/iterator/counting_iterator.h> | |
#include <thrust/transform_reduce.h> | |
#include <iomanip> | |
#include <iostream> | |
//Example of using curand in device code to device out device linking | |
struct estimate_pi : public thrust::unary_function<unsigned int, float> | |
{ | |
estimate_pi() = default; | |
estimate_pi(const estimate_pi&) = default; | |
__device__ float operator()(unsigned int thread_id) | |
{ | |
float sum = 0; | |
unsigned int N = 10000; // samples per thread | |
unsigned int seed = thread_id; | |
curandState s; | |
// seed a random number generator | |
curand_init(seed, 0, 0, &s); | |
// take N samples in a quarter circle | |
for (unsigned int i = 0; i < N; ++i) { | |
// draw a sample from the unit square | |
float x = curand_uniform(&s); | |
float y = curand_uniform(&s); | |
// measure distance from the origin | |
float dist = sqrtf(x * x + y * y); | |
// add 1.0f if (u0,u1) is inside the quarter circle | |
if (dist <= 1.0f) | |
sum += 1.0f; | |
} | |
// multiply by 4 to get the area of the whole circle | |
sum *= 4.0f; | |
// divide by N | |
return sum / N; | |
} | |
}; | |
int choose_cuda_device() | |
{ | |
int nDevices = 0; | |
cudaError_t err = cudaGetDeviceCount(&nDevices); | |
if (err != cudaSuccess) { | |
std::cerr << "Failed to retrieve the number of CUDA enabled devices" | |
<< std::endl; | |
return 1; | |
} | |
for (int i = 0; i < nDevices; ++i) { | |
cudaDeviceProp prop; | |
cudaError_t err = cudaGetDeviceProperties(&prop, i); | |
if (err != cudaSuccess) { | |
std::cerr << "Could not retrieve properties from CUDA device " << i | |
<< std::endl; | |
return 1; | |
} | |
if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) { | |
err = cudaSetDevice(i); | |
if (err != cudaSuccess) { | |
std::cout << "Could not select CUDA device " << i << std::endl; | |
} else { | |
return 0; | |
} | |
} | |
} | |
std::cout << "Could not find a CUDA enabled card supporting compute >=3.5" | |
<< std::endl; | |
return 1; | |
} | |
int main(int argc, char** argv) | |
{ | |
int ret = choose_cuda_device(); | |
if (ret) { | |
return 0; | |
} | |
// use 30K independent seeds | |
constexpr int M = 30000; | |
float estimate = thrust::transform_reduce( | |
thrust::counting_iterator<int>(0), thrust::counting_iterator<int>(M), | |
estimate_pi(), 0.0f, thrust::plus<float>()); | |
estimate /= M; | |
std::cout << std::setprecision(3); | |
std::cout << "pi is approximately "; | |
std::cout << estimate << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment