Skip to content

Instantly share code, notes, and snippets.

View asi1024's full-sized avatar

Akifumi Imanishi asi1024

  • Preferred Networks Inc.
  • Tokyo, Japan
View GitHub Profile
sizes = [
128,
512,
2 * 1024,
8 * 1024,
32 * 1024,
128 * 1024,
512 * 1024,
2 * 1024 * 1024,
8 * 1024 * 1024,
import numpy
import cupy
from cupy import cutensor
import cupyx
assert cupy.cuda.cub_enabled
assert cupy.cuda.cutensor_enabled
shape_axes = [
import itertools
import numpy
import cupy
import cupyx
shapes = [(4096, 4096), (64, 64, 64, 64)]
import numpy
import cupy
import cupyx
shapes = [(10, 10), (4096, 4096)]
# cupy.add
import time
import cupy
import numpy
class _PerfCaseResult(object):
def __init__(self, name, ts):
assert ts.ndim == 2 and ts.shape[0] == 2 and ts.shape[1] > 0
self.name = name
cupy.sum (shape = ( 1, 16777216), axis=0): 13.978 us +/- 0.560 (min: 13.467 / max: 21.584) us 186.376 us +/- 0.719 (min: 177.312 / max: 193.696) us
cupy.sum (shape = ( 2, 8388608), axis=0): 17.943 us +/-76.250 (min: 13.828 / max: 1720.384) us 181.624 us +/-73.942 (min: 168.544 / max: 1832.480) us
cupy.sum (shape = ( 4, 4194304), axis=0): 14.479 us +/- 0.561 (min: 13.962 / max: 22.706) us 187.462 us +/- 0.741 (min: 177.504 / max: 195.456) us
cupy.sum (shape = ( 8, 2097152), axis=0): 14.375 us +/- 0.522 (min: 13.851 / max: 21.145) us 199.112 us +/- 0.744 (min: 188.768 / max: 205.216) us
cupy.sum (shape = ( 16, 1048576), axis=0): 14.507 us +/- 0.473 (min: 13.974 / max: 19.830) us 213.054 us +/- 0.664 (min: 203.136 / max: 218.048) us
cupy.sum (shape = ( 32, 524288), axis=0): 14.273 us +/- 0.384 (min: 13.735 / max: 18.258) us 142.936 us +/- 0.606 (min: 133.408 / max: 146.912) us
cupy
cupy.sum (shape = ( 1, 16777216), axis=0): 14.256 us +/- 0.705 (min: 13.582 / max: 24.281) us 188.497 us +/- 1.241 (min: 179.552 / max: 198.432) us
cupy.sum (shape = ( 2, 8388608), axis=0): 14.646 us +/- 0.579 (min: 14.041 / max: 20.914) us 178.568 us +/- 0.724 (min: 168.384 / max: 184.640) us
cupy.sum (shape = ( 4, 4194304), axis=0): 14.906 us +/- 0.518 (min: 14.368 / max: 20.649) us 187.801 us +/- 0.709 (min: 177.792 / max: 193.312) us
cupy.sum (shape = ( 8, 2097152), axis=0): 14.831 us +/- 0.871 (min: 14.219 / max: 25.082) us 199.579 us +/- 0.969 (min: 189.184 / max: 209.696) us
cupy.sum (shape = ( 16, 1048576), axis=0): 14.718 us +/- 0.541 (min: 14.216 / max: 22.016) us 213.216 us +/- 0.704 (min: 203.104 / max: 220.000) us
cupy.sum (shape = ( 32, 524288), axis=0): 14.761 us +/- 1.614 (min: 14.130 / max: 48.321) us 143.380 us +/- 1.984 (min: 133.248 / max: 182.848) us
cupy
#include <bits/stdc++.h>
using ld = long double;
using P = std::complex<ld>;
const ld eps = 1e-9;
ld dot (P a, P b) { return real(conj(a) * b); }
ld cross (P a, P b) { return imag(conj(a) * b); }
@asi1024
asi1024 / time.sh
Last active September 4, 2019 08:41
#!/bin/bash
# set -eu
# export CUDA_VISIBLE_DEVICES=-1
DIR=$1
HASH=`git rev-parse --short HEAD`
echo "DIR=${DIR}"
<class 'int'> numpy.ALLOW_THREADS
<class 'type'> numpy.AxisError
<class 'int'> numpy.BUFSIZE
<class 'int'> numpy.CLIP
<class 'type'> numpy.ComplexWarning
<class 'type'> numpy.DataSource
<class 'int'> numpy.ERR_CALL
<class 'int'> numpy.ERR_DEFAULT
<class 'int'> numpy.ERR_LOG
<class 'int'> numpy.ERR_PRINT