Masayuki Takagi takagi

## test.cu
#include <cassert>
#include <iostream>
#include <thread>

__global__ void vecAddOne(float *a, int n) {
    int id = blockIdx.x * blockDim.x + threadIdx.x;
    if (id < n)
        a[id] += 1.0f;
}

## diff.patch
diff --git a/cupyx/scipy/interpolate/_interpolate.py b/cupyx/scipy/interpolate/_interpolate.py
index bab74671e..ec3c5bcac 100644
--- a/cupyx/scipy/interpolate/_interpolate.py
+++ b/cupyx/scipy/interpolate/_interpolate.py
@@ -22,7 +22,7 @@ INTERVAL_KERNEL = r'''
 extern "C" {
 __global__ void find_breakpoint_position(
         const double* breakpoints, const double* x, long long* out,
-        bool extrapolate, int total_x, int total_breakpoints, bool asc) {
+        bool extrapolate, int total_x, int total_breakpoints, const bool* pasc) {

## README.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                takagi
                / README.md
            
            
              Last active
              February 1, 2021 23:56
            
          
    A tool to generate files for C extensions of CUDA-relatged libraries for CuPy. Currently covered are cuBLAS, cuSPARSE, and cuSOLVER, which have so many APIs to write their extensions by hands.
Usage

Generate files for all of the libraries

./gen.sh


## out
{
    "_nodetype": "FileAST",
    "coord": null,
    "ext": [
        {
            "_nodetype": "Pragma",
            "coord": "../utils/fake_libc_include/_fake_typedefs.h:56:9",
            "string": "GCC diagnostic ignored \"-Wunused-function\""
        },
        {

## nccl_broadcast.py
import multiprocessing

import cupy
from cupy import cuda
from cupy.cuda import nccl
from cupy import testing

def f(n_devices, device, comm_id, rank):
    device.use()
    comm = nccl.NcclCommunicator(n_devices, comm_id, rank)

## ptb_fp16
$ CHAINER_DTYPE=float16 python train_ptb.py -d 0 -e 10
#vocab = 10000
epoch       iteration   perplexity  val_perplexity
0           500         326440
0           1000        301342
1           1500        298940      inf
1           2000        334369
1           2500        334369
2           3000        306202      inf
2           3500        339762

## pos_fp16
$ CHAINER_DTYPE=float16 python postagging.py -d 0
[nltk_data] Downloading package brown to /home/ext-
[nltk_data]     mtakagi/nltk_data...
[nltk_data]   Package brown is already up-to-date!
# of sentences: 57340
# of words: 56057
# of pos: 472
epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy  elapsed_time
0           244.875                                                                    18.3736
0           373.75                                                                     34.9924

## memnn_fp16
$ CHAINER_DTYPE=float16 python train_memnn.py tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt -d 0
Training data: tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt: 2000
Test data: tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt: 200
epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy
1           nan         nan                   0.0017004      0
2           nan         nan                   0              0
3           nan         nan                   0              0
4           nan         nan                   0              0
5           nan         nan                   0              0
6           nan         nan                   0              0

## mnist_fp16
$ CHAINER_DTYPE=float16 python train_mnist.py -d 0
Device: @cupy:0
# unit: 1000
# Minibatch-size: 100
# epoch: 20

epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy  elapsed_time
1           nan         nan                   0.0994271      0.0980225                 3.91818
2           nan         nan                   0.0997917      0.0980225                 6.22553
3           nan         nan                   0.0995833      0.0980225                 8.72424

## dcgan_fp16
$ CHAINER_DTYPE=float16 python train_dcgan.py -d 0
Device: @cupy:0
# Minibatch-size: 50
# n_hidden: 100
# epoch: 1000

epoch       iteration   gen/loss    dis/loss  ................]  0.01%
0           100         nan         nan
0           200         nan         nan
0           300         nan         nan
	#include <cassert>
	#include <iostream>
	#include <thread>

	__global__ void vecAddOne(float *a, int n) {
	int id = blockIdx.x * blockDim.x + threadIdx.x;
	if (id < n)
	a[id] += 1.0f;
	}
	diff --git a/cupyx/scipy/interpolate/_interpolate.py b/cupyx/scipy/interpolate/_interpolate.py
	index bab74671e..ec3c5bcac 100644
	--- a/cupyx/scipy/interpolate/_interpolate.py
	+++ b/cupyx/scipy/interpolate/_interpolate.py
	@@ -22,7 +22,7 @@ INTERVAL_KERNEL = r'''
	extern "C" {
	__global__ void find_breakpoint_position(
	const double* breakpoints, const double* x, long long* out,
	- bool extrapolate, int total_x, int total_breakpoints, bool asc) {
	+ bool extrapolate, int total_x, int total_breakpoints, const bool* pasc) {
	{
	"_nodetype": "FileAST",
	"coord": null,
	"ext": [
	{
	"_nodetype": "Pragma",
	"coord": "../utils/fake_libc_include/_fake_typedefs.h:56:9",
	"string": "GCC diagnostic ignored \"-Wunused-function\""
	},
	{
	import multiprocessing

	import cupy
	from cupy import cuda
	from cupy.cuda import nccl
	from cupy import testing

	def f(n_devices, device, comm_id, rank):
	device.use()
	comm = nccl.NcclCommunicator(n_devices, comm_id, rank)
	$ CHAINER_DTYPE=float16 python train_ptb.py -d 0 -e 10
	#vocab = 10000
	epoch iteration perplexity val_perplexity
	0 500 326440
	0 1000 301342
	1 1500 298940 inf
	1 2000 334369
	1 2500 334369
	2 3000 306202 inf
	2 3500 339762
	$ CHAINER_DTYPE=float16 python postagging.py -d 0
	[nltk_data] Downloading package brown to /home/ext-
	[nltk_data] mtakagi/nltk_data...
	[nltk_data] Package brown is already up-to-date!
	# of sentences: 57340
	# of words: 56057
	# of pos: 472
	epoch main/loss validation/main/loss main/accuracy validation/main/accuracy elapsed_time
	0 244.875 18.3736
	0 373.75 34.9924
	$ CHAINER_DTYPE=float16 python train_memnn.py tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt -d 0
	Training data: tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt: 2000
	Test data: tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt: 200
	epoch main/loss validation/main/loss main/accuracy validation/main/accuracy
	1 nan nan 0.0017004 0
	2 nan nan 0 0
	3 nan nan 0 0
	4 nan nan 0 0
	5 nan nan 0 0
	6 nan nan 0 0
	$ CHAINER_DTYPE=float16 python train_mnist.py -d 0
	Device: @cupy:0
	# unit: 1000
	# Minibatch-size: 100
	# epoch: 20

	epoch main/loss validation/main/loss main/accuracy validation/main/accuracy elapsed_time
	1 nan nan 0.0994271 0.0980225 3.91818
	2 nan nan 0.0997917 0.0980225 6.22553
	3 nan nan 0.0995833 0.0980225 8.72424
	$ CHAINER_DTYPE=float16 python train_dcgan.py -d 0
	Device: @cupy:0
	# Minibatch-size: 50
	# n_hidden: 100
	# epoch: 1000

	epoch iteration gen/loss dis/loss ................] 0.01%
	0 100 nan nan
	0 200 nan nan
	0 300 nan nan