This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Super Dumb Kernel | |
__device__ __forceinline__ long calculateOffset( | |
long index, // index to calculate offset for | |
int ndim, // number of dimensions in Tensor | |
long sizes[8], // sizes for Tensor dims (either from the Tensor, or the size of the adv indexer at that dim) | |
long strides[8], // strides for Tensor | |
bool adv[8], // which Tensors are advanced indexers | |
long *advIndexTensors[8], // Adv Indexing Tensors | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [1]: import torch | |
In [2]: x = torch.arange(0, 64).view(8, 8) | |
In [3]: x | |
Out[3]: | |
0 1 2 3 4 5 6 7 | |
8 9 10 11 12 13 14 15 | |
16 17 18 19 20 21 22 23 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Case 1: arg is a non-tuple sequence object | |
if (PySequence_Check(arg) && !PyTuple_Check(arg)) return true; | |
#ifdef WITH_NUMPY | |
// Case 2: arg is an nd-array with type integer or bool | |
if (PyArray_Check(arg) && (PyArray_TYPE((PyArrayObject*)arg) == NPY_INT64 || PyArray_TYPE((PyArrayObject*)arg) == NPY_BOOL)) return true; | |
#endif | |
// Case 3: arg is a tuple containing at least one sequence object, ndarray, or LongTensor | |
if (PyTuple_Check(arg)) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef TH_GENERIC_FILE | |
#define TH_GENERIC_FILE "generic/Tensor.cpp" | |
#else | |
#ifdef WITH_NUMPY | |
#ifdef TH_REAL_IS_DOUBLE | |
#define NUMPY_TYPE_ENUM NPY_DOUBLE | |
#endif | |
#ifdef TH_REAL_IS_FLOAT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bool THPUtils_checkAdvancedIndexing(PyObject *arg) { | |
// Checks whether the specified selection object should trigger advanced | |
// indexing | |
// Case 1: arg is a non-tuple sequence object | |
if (PyList_Check(arg) || PyRange_Check(arg)) return true; | |
#ifdef WITH_NUMPY | |
// Case 2: arg is an nd-array with type integer or bool | |
if (PyArray_Check(arg) && (PyArray_TYPE((PyArrayObject*)arg) == NPY_INT64 || PyArray_TYPE((PyArrayObject*)arg) == NPY_BOOL)) return true; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef THC_REDUCE_APPLY_UTILS_INC | |
#define THC_REDUCE_APPLY_UTILS_INC | |
#include <algorithm> | |
#include <cuda.h> | |
#include <assert.h> | |
#include "THCGeneral.h" | |
#include "THCTensor.h" | |
#include "THCDeviceUtils.cuh" | |
#include "THCTensorInfo.cuh" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Testing average duration for 10 loops | |
Testing 1D Tensor of size 8: 6 usec (TH), 51 usec (THC) | |
Testing 1D Tensor of size 16: 1 usec (TH), 43 usec (THC) | |
Testing 1D Tensor of size 32: 1 usec (TH), 42 usec (THC) | |
Testing 1D Tensor of size 64: 1 usec (TH), 57 usec (THC) | |
Testing 1D Tensor of size 128: 3 usec (TH), 60 usec (THC) | |
Testing 1D Tensor of size 256: 4 usec (TH), 99 usec (THC) | |
Testing 1D Tensor of size 512: 10 usec (TH), 128 usec (THC) | |
Testing 1D Tensor of size 1024: 24 usec (TH), 130 usec (THC) | |
Testing 1D Tensor of size 2048: 52 usec (TH), 1723 usec (THC) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Block-wide reduction in shared memory helper; only threadIdx.x == 0 will | |
// return the reduced value | |
template <typename T, typename ReduceOp> | |
__device__ T reduceBlock(T* smem, | |
int numVals, | |
T threadVal, | |
ReduceOp reduceOp, | |
T init) { | |
if (numVals == 0) { | |
return init; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Block-wide reduction where each thread locally reduces N | |
// values before letting a single warp take over | |
template <typename T, typename ReduceOp, int N> | |
__device__ T reduceBlockN(T *smem, | |
int numVals, | |
ReduceOp reduceOp, | |
T init) { | |
T local = threadIdx.x < numVals ? smem[threadIdx.x] : init; | |
#pragma unroll |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Testing average duration for 10 loops | |
Testing 1D Tensor of size 8: 6 usec (TH), 49 usec (THC) | |
Testing 1D Tensor of size 16: 1 usec (TH), 39 usec (THC) | |
Testing 1D Tensor of size 32: 1 usec (TH), 41 usec (THC) | |
Testing 1D Tensor of size 64: 1 usec (TH), 51 usec (THC) | |
Testing 1D Tensor of size 128: 2 usec (TH), 53 usec (THC) | |
Testing 1D Tensor of size 256: 6 usec (TH), 83 usec (THC) | |
Testing 1D Tensor of size 512: 10 usec (TH), 108 usec (THC) | |
Testing 1D Tensor of size 1024: 23 usec (TH), 109 usec (THC) | |
Testing 1D Tensor of size 2048: 48 usec (TH), 1370 usec (THC) |