The first round of reviews of submissions to technical conferences should be double-blind (e.g. reviewers don't know who the submitter is).
Non-double-blind submissions:
- Contribute to Hero Culture: Hero culture is the tendency within technical
#include <cassert> | |
int current_device() | |
{ | |
int device = 0; | |
cudaError_t const error = cudaGetDevice(&device); | |
assert(cudaSuccess == error); | |
return device; | |
} |
/****************************************************************************** | |
* Copyright (c) 2011, Duane Merrill. All rights reserved. | |
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions are met: | |
* * Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* * Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the |
// This is how we run libc++ tests on the GPU without modification. | |
// We force include this header into each test with `-include`. | |
__host__ __device__ | |
int fake_main(int, char**); | |
__global__ | |
void fake_main_kernel(int * ret) | |
{ | |
*ret = fake_main(0, NULL); |
// I have this code: | |
struct thread_group { | |
private: | |
std::vector<std::thread> members; | |
public: | |
thread_group(thread_group const&) = delete; | |
thread_group& operator=(thread_group const&) = delete; |
// Sort the sequence of integers by the Nth bit. | |
template <typename ExecutionPolicy, | |
std::ranges::random_access_range InputRange, random_access_iterator OutputIt> | |
requires std::integral<typename std::ranges_value_t<InputRange>> | |
unique_future<std::uint64_t> async_radix_sort_pass(ExecutionPolicy&& exec, | |
InputRange input, OutputRange output, | |
std::uint64_t bit) | |
{ | |
auto const elements = std::distance(input); |
template <typename InputIt, typename OutputIt> | |
OutputIt | |
radix_sort_split(InputIt first, InputIt last, OutputIt output, std::uint64_t bit) | |
{ | |
std::vector<std::uint64_t> e(std::distance(first, last)); | |
// Count 0s. | |
std::transform(first, last, e.begin(), | |
[=] (auto t) { return !(t & (1 << bit)); }); |
template <typename InputIterator, typename OutputIterator, typename T, typename BinaryOp> | |
OutputIterator exclusive_scan(InputIterator first, InputIterator last, | |
OutputIterator result, T init, BinaryOp op) | |
{ | |
if (first != last) { | |
T saved = init; | |
do { | |
init = op(init, *first); | |
*result = saved; | |
saved = init; |
template <typename InputIt, typename OutputIt, typename BinaryOp, typename T, typename Size> | |
unique_future<OutputIt> | |
async_inclusive_scan(InputIt first, InputIt last, OutputIt output,BinaryOp op, T init, Size chunk_size) | |
{ | |
Size const elements = std::distance(first, last); | |
Size const chunks = (1 + ((elements - 1) / chunk_size)); // Round up. | |
std::vector<unique_future<T>> sweep; | |
sweep.reserve(chunks); |
template <typename InputIt, typename OutputIt, typename BinaryOp, typename T, typename Size> | |
unique_future<OutputIt> | |
async_inclusive_scan(InputIt first, InputIt last, OutputIt output,BinaryOp op, T init, Size chunk_size) | |
{ | |
Size const elements = std::distance(first, last); | |
Size const chunks = (1 + ((elements - 1) / chunk_size)); // Round up. | |
std::vector<unique_future<T>> sweep; | |
sweep.reserve(chunks); |