Skip to content

Instantly share code, notes, and snippets.

Avatar

Akifumi Imanishi asi1024

View GitHub Profile
View compare_cub_cutensor.py
import numpy
import cupy
from cupy import cutensor
import cupyx
assert cupy.cuda.cub_enabled
assert cupy.cuda.cutensor_enabled
shape_axes = [
View cupy_pr2822.py
import itertools
import numpy
import cupy
import cupyx
shapes = [(4096, 4096), (64, 64, 64, 64)]
View bench.py
import numpy
import cupy
import cupyx
shapes = [(10, 10), (4096, 4096)]
# cupy.add
View measure_reductions.py
import time
import cupy
import numpy
class _PerfCaseResult(object):
def __init__(self, name, ts):
assert ts.ndim == 2 and ts.shape[0] == 2 and ts.shape[1] > 0
self.name = name
View 3c2117dcf
cupy.sum (shape = ( 1, 16777216), axis=0): 13.978 us +/- 0.560 (min: 13.467 / max: 21.584) us 186.376 us +/- 0.719 (min: 177.312 / max: 193.696) us
cupy.sum (shape = ( 2, 8388608), axis=0): 17.943 us +/-76.250 (min: 13.828 / max: 1720.384) us 181.624 us +/-73.942 (min: 168.544 / max: 1832.480) us
cupy.sum (shape = ( 4, 4194304), axis=0): 14.479 us +/- 0.561 (min: 13.962 / max: 22.706) us 187.462 us +/- 0.741 (min: 177.504 / max: 195.456) us
cupy.sum (shape = ( 8, 2097152), axis=0): 14.375 us +/- 0.522 (min: 13.851 / max: 21.145) us 199.112 us +/- 0.744 (min: 188.768 / max: 205.216) us
cupy.sum (shape = ( 16, 1048576), axis=0): 14.507 us +/- 0.473 (min: 13.974 / max: 19.830) us 213.054 us +/- 0.664 (min: 203.136 / max: 218.048) us
cupy.sum (shape = ( 32, 524288), axis=0): 14.273 us +/- 0.384 (min: 13.735 / max: 18.258) us 142.936 us +/- 0.606 (min: 133.408 / max: 146.912) us
cupy
View 8d6317313
cupy.sum (shape = ( 1, 16777216), axis=0): 14.256 us +/- 0.705 (min: 13.582 / max: 24.281) us 188.497 us +/- 1.241 (min: 179.552 / max: 198.432) us
cupy.sum (shape = ( 2, 8388608), axis=0): 14.646 us +/- 0.579 (min: 14.041 / max: 20.914) us 178.568 us +/- 0.724 (min: 168.384 / max: 184.640) us
cupy.sum (shape = ( 4, 4194304), axis=0): 14.906 us +/- 0.518 (min: 14.368 / max: 20.649) us 187.801 us +/- 0.709 (min: 177.792 / max: 193.312) us
cupy.sum (shape = ( 8, 2097152), axis=0): 14.831 us +/- 0.871 (min: 14.219 / max: 25.082) us 199.579 us +/- 0.969 (min: 189.184 / max: 209.696) us
cupy.sum (shape = ( 16, 1048576), axis=0): 14.718 us +/- 0.541 (min: 14.216 / max: 22.016) us 213.216 us +/- 0.704 (min: 203.104 / max: 220.000) us
cupy.sum (shape = ( 32, 524288), axis=0): 14.761 us +/- 1.614 (min: 14.130 / max: 48.321) us 143.380 us +/- 1.984 (min: 133.248 / max: 182.848) us
cupy
View AsiaYokohama2019J.cpp
#include <bits/stdc++.h>
using ld = long double;
using P = std::complex<ld>;
const ld eps = 1e-9;
ld dot (P a, P b) { return real(conj(a) * b); }
ld cross (P a, P b) { return imag(conj(a) * b); }
View time.sh
#!/bin/bash
# set -eu
# export CUDA_VISIBLE_DEVICES=-1
DIR=$1
HASH=`git rev-parse --short HEAD`
echo "DIR=${DIR}"
View numpy_class.txt
<class 'int'> numpy.ALLOW_THREADS
<class 'type'> numpy.AxisError
<class 'int'> numpy.BUFSIZE
<class 'int'> numpy.CLIP
<class 'type'> numpy.ComplexWarning
<class 'type'> numpy.DataSource
<class 'int'> numpy.ERR_CALL
<class 'int'> numpy.ERR_DEFAULT
<class 'int'> numpy.ERR_LOG
<class 'int'> numpy.ERR_PRINT
View fallbacks_nanargmin.py
# import numpy
from cupy.fallback_mode import numpy
x = numpy.array([[float('nan'), -1], [float('nan'), 1]]) # cupy.array
result1 = numpy.nanargmin(x, axis=1) # Converts to numpy.array, calls numpy.argmin and returns cupy.array
result2 = numpy.nanargmin(x) # Returns privitive int
print(result1) # Output: [1 1]
print(result2) # Output: 1
You can’t perform that action at this time.