This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
// | |
// | |
// | |
static | |
void | |
cuda_assert(const cudaError_t code, const char* const file, const int line, const bool abort) | |
{ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- compile-command: "nvcc -m 32 -arch sm_35 -Xptxas=-v,-abi=no -cubin short4.cu"; -*- | |
#include <stdint.h> | |
typedef uint32_t u32; | |
typedef uint64_t u64; | |
typedef union | |
{ | |
short4 s16v4; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- compile-command: "nvcc -m 32 -arch sm_35 -Xptxas=-v,-abi=no -cubin sha256.cu"; -*- | |
// | |
// Copyright 2013 Allan MacKinnon <allanmac@alum.mit.edu> | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining | |
// a copy of this software and associated documentation files (the | |
// "Software"), to deal in the Software without restriction, including | |
// without limitation the rights to use, copy, modify, merge, publish, | |
// distribute, sublicense, and/or sell copies of the Software, and to |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=============================================================================================== | |
Load three arrays (x, y and z) in SoA order, repack them and store them in AoS order. | |
Strategy: each warp permutes its load lane with: | |
(rowNum + (laneId() * 3)) & 31 | |
This will convert SoA into AoS but with x/y/z staggered across rows of registers. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
// | |
// | |
// | |
#define PXL_WARP_SCAN_SHFL(_op,_vT,_opT,_regC,_exc,_exc0,_excP) | |
//////////////////////////////////////////////////////////////////////// | |
// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <cuda.h> | |
int main(int argc, char** argv) | |
{ | |
cuInit(0); | |
int count; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// | |
// | |
#define KERNEL_QUALIFIERS extern "C" __global__ | |
// | |
// | |
// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
// | |
// | |
// | |
#define WARP_SIZE 32 | |
#define VOLATILE volatile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define KERNEL_QUALIFIERS __global__ | |
#define VOLATILE volatile | |
#define DEVICE_INTRINSIC_QUALIFIERS __device__ __forceinline__ | |
#define DEVICE_STATIC_FUNCTION_QUALIFIERS static DEVICE_FUNCTION_QUALIFIERS | |
#define DEVICE_STATIC_INTRINSIC_QUALIFIERS static DEVICE_INTRINSIC_QUALIFIERS | |
// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// | |
// | |
#define KERNEL_QUALIFIERS extern "C" __global__ | |
// | |
// | |
// |