Skip to content

Instantly share code, notes, and snippets.

@shackenberg
Last active December 20, 2015 16:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shackenberg/6164487 to your computer and use it in GitHub Desktop.
Save shackenberg/6164487 to your computer and use it in GitHub Desktop.
>>> theano.test()
Theano version 0.6.0rc3
theano is installed in /usr/local/lib/python2.7/dist-packages/theano
NumPy version 1.6.1
NumPy is installed in /usr/lib/python2.7/dist-packages/numpy
Python version 2.7.3 (default, Aug 1 2012, 05:14:39) [GCC 4.6.3]
nose version 1.1.2
Using gpu device 0: GeForce GTX 670
.........................................K.............../usr/local/lib/python2.7/dist-packages/theano/compile/tests/test_inplace_opt_for_value.py:170: UserWarning: theano modules are deprecated and will be removed in release 0.7
super(ExampleRNN, self).__init__()
...............................................................................................WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((1,), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.gof.tests.test_compute_test_value.IncOneC object at 0x10317410>, ((Scalar(int32), ((-1, 0), False)),), (1, (False,)))))])
................................................................................................................................................................................................................................................................................................................................................................./usr/lib/python2.7/dist-packages/scipy/signal/signaltools.py:408: ComplexWarning: Casting complex values to real discards the imaginary part
return sigtools._convolve2d(in1,in2,1,val,bval,fillvalue)
..............................................................................SS...SSSSS...............1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 void free_generator(void *_gen)
14 {
15 curandGenerator_t * gen = (curandGenerator_t*)_gen;
16 curandStatus_t err = curandDestroyGenerator(*gen);
17 if (err != CURAND_STATUS_SUCCESS)
18 {
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator",
20 (int)err);
21 }
22 free(_gen);
23 }
24
25
26 struct __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964 {
27 PyObject* __ERROR;
28
29 PyObject* storage_V3;
30 PyObject* storage_V5;
31 PyObject* storage_V7;
32 PyObject* storage_V1;
33
34
35 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964() {}
36 ~__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(void) {
37 cleanup();
38 }
39
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
41 Py_XINCREF(storage_V3);
42 Py_XINCREF(storage_V5);
43 Py_XINCREF(storage_V7);
44 Py_XINCREF(storage_V1);
45 this->storage_V3 = storage_V3;
46 this->storage_V5 = storage_V5;
47 this->storage_V7 = storage_V7;
48 this->storage_V1 = storage_V1;
49 int __failure = 0;
50
51 {
52
53 {
54
55 {
56
57 {
58
59 this->__ERROR = __ERROR;
60 return 0;
61 __label_7:
62
63 double __DUMMY_7;
64
65 }
66 __label_5:
67
68 double __DUMMY_5;
69
70 }
71 __label_3:
72
73 double __DUMMY_3;
74
75 }
76 __label_1:
77
78 double __DUMMY_1;
79
80 }
81
82 Py_XDECREF(this->storage_V3);
83 Py_XDECREF(this->storage_V5);
84 Py_XDECREF(this->storage_V7);
85 Py_XDECREF(this->storage_V1);
86
87 if (__failure) {
88 // When there is a failure, this code puts the exception
89 // in __ERROR.
90 PyObject* err_type = NULL;
91 PyObject* err_msg = NULL;
92 PyObject* err_traceback = NULL;
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
100 PyList_SET_ITEM(__ERROR, 0, err_type);
101 PyList_SET_ITEM(__ERROR, 1, err_msg);
102 PyList_SET_ITEM(__ERROR, 2, err_traceback);
103 {Py_XDECREF(old_err_type);}
104 {Py_XDECREF(old_err_msg);}
105 {Py_XDECREF(old_err_traceback);}
106 }
107 // The failure code is returned to index what code block failed.
108 return __failure;
109
110 }
111 void cleanup(void) {
112 __label_1:
113
114 double __DUMMY_1;
115 __label_3:
116
117 double __DUMMY_3;
118 __label_5:
119
120 double __DUMMY_5;
121 __label_7:
122
123 double __DUMMY_7;
124
125 Py_XDECREF(this->storage_V3);
126 Py_XDECREF(this->storage_V5);
127 Py_XDECREF(this->storage_V7);
128 Py_XDECREF(this->storage_V1);
129 }
130 int run(void) {
131 int __failure = 0;
132
133 PyObject* py_V1;
134 CudaNdarray * V1;
135 PyObject* py_V3;
136
137 PyObject* V3;
138
139 PyObject* py_V5;
140
141 PyArrayObject* V5;
142 int type_num_V5;
143 typedef npy_int32 dtype_V5;
144
145 PyObject* py_V7;
146
147 PyObject* V7;
148
149 {
150
151 py_V1 = PyList_GET_ITEM(storage_V1, 0);
152 {Py_XINCREF(py_V1);}
153
154 if (py_V1 == Py_None)
155 {
156 V1 = NULL;
157 }
158 else
159 {
160
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
162 // and one ref from the local scope.
163
164 if (CudaNdarray_Check(py_V1))
165 {
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
167 V1 = (CudaNdarray*)py_V1;
168 //std::cerr << "c_extract " << V1 << '\n';
169 if (V1->nd != 2)
170 {
171 PyErr_Format(PyExc_RuntimeError,
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
173 V1->nd);
174 V1 = NULL;
175 {__failure = 2; goto __label_2;};
176 }
177 //std::cerr << "c_extract " << V1 << " nd check passed\n";
178
179
180 assert(V1);
181 Py_INCREF(py_V1);
182 }
183 else if (py_V1 == Py_None)
184 {
185 PyErr_SetString(PyExc_TypeError,
186 "expected a CudaNdarray, not None");
187 V1 = NULL;
188 {__failure = 2; goto __label_2;};
189 }
190 else
191 {
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
194 V1 = NULL;
195 {__failure = 2; goto __label_2;};
196 }
197 //std::cerr << "c_extract done " << V1 << '\n';
198
199
200 }
201
202 {
203
204 py_V3 = PyList_GET_ITEM(storage_V3, 0);
205 {Py_XINCREF(py_V3);}
206
207 Py_INCREF(py_V3);
208 V3 = py_V3;
209
210 {
211
212 py_V5 = PyList_GET_ITEM(storage_V5, 0);
213 {Py_XINCREF(py_V5);}
214
215 V5 = NULL;
216 if (py_V5 == Py_None) {
217 // We can either fail here or set V5 to NULL and rely on Ops
218 // using tensors to handle the NULL case, but if they fail to do so
219 // they'll end up with nasty segfaults, so this is public service.
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
221 {__failure = 6; goto __label_6;}
222 }
223 if (!PyArray_Check(py_V5)) {
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
225 {__failure = 6; goto __label_6;}
226 }
227 // We expect NPY_INT32
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
229 if (!PyArray_ISALIGNED(py_V5)) {
230 PyErr_Format(PyExc_NotImplementedError,
231 "expected an aligned array of type %d "
232 "(NPY_INT32), got non-aligned array of type %d"
233 " with %d dimensions, with 3 last dims %d, %d, %d"
234 " and 3 last strides %d %d, %d.",
235 NPY_INT32, type_num_V5,
236 PyArray_NDIM(py_V5),
237 PyArray_NDIM(py_V5) >= 3 ?
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
239 PyArray_NDIM(py_V5) >= 2 ?
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
241 PyArray_NDIM(py_V5) >= 1 ?
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
243 PyArray_NDIM(py_V5) >= 2 ?
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
245 PyArray_NDIM(py_V5) >= 3 ?
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
247 PyArray_NDIM(py_V5) >= 1 ?
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
249 );
250 {__failure = 6; goto __label_6;}
251 }
252 // This is a TypeError to be consistent with DEBUG_MODE
253 // Note: DEBUG_MODE also tells the name of the container
254 if (type_num_V5 != NPY_INT32) {
255 PyErr_Format(PyExc_TypeError,
256 "expected type_num %d (NPY_INT32) got %d",
257 NPY_INT32, type_num_V5);
258 {__failure = 6; goto __label_6;}
259 }
260 V5 = (PyArrayObject*)(py_V5);
261 Py_XINCREF(V5);
262
263 {
264
265 py_V7 = Py_None;
266 {Py_XINCREF(py_V7);}
267
268 V7 = NULL;
269
270 {
271
272 //////// <code generated by CURAND_Base>
273
274 int odims[2];
275 int n_elements = 1;
276 int must_alloc_sample = ((NULL == V1)
277 || !CudaNdarray_Check(py_V1)
278 || (V1->nd != 2));
279
280 if (V5->nd != 1)
281 {
282 PyErr_SetString(PyExc_ValueError, "size must be vector");
283 {__failure = 9; goto __label_9;}
284 }
285 if (V5->dimensions[0] != 2)
286 {
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
288 2, V5->dimensions[0]);
289 {__failure = 9; goto __label_9;}
290 }
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
292 {
293 PyErr_SetString(PyExc_ValueError, "size must be int32");
294 {__failure = 9; goto __label_9;}
295 }
296 for (int i = 0; i < 2; ++i)
297 {
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
299 n_elements *= odims[i];
300 must_alloc_sample = (must_alloc_sample
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
302 }
303 if (must_alloc_sample)
304 {
305 Py_XDECREF(V1);
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
307 if(!V1)
308 {
309 {__failure = 9; goto __label_9;};
310 }
311 }
312 if (!PyCObject_Check(V3))
313 {
314 // allocate a new generator for o_generator
315 Py_XDECREF(V7);
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
317 assert(gen);
318 if (CURAND_STATUS_SUCCESS !=
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
321 {__failure = 9; goto __label_9;};
322 }
323 if (CURAND_STATUS_SUCCESS !=
324 curandSetPseudoRandomGeneratorSeed(*gen,234))
325 {
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
327 {__failure = 9; goto __label_9;};
328 }
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
330 assert (V3 == Py_False);
331 }
332 else if (1)
333 {
334 // use i_generator for o_generator
335 Py_XDECREF(V7);
336 Py_INCREF(V3);
337 V7 = V3;
338 }
339 else
340 {
341 // copy i_generator for o_generator
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
343 {__failure = 9; goto __label_9;};
344 }
345 {
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
347 curandStatus_t err = curandGenerateUniform(*gen,
348 CudaNdarray_DEV_DATA(V1),
349 n_elements);
350
351
352 if (err != CURAND_STATUS_SUCCESS)
353 {
354 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
355 {__failure = 9; goto __label_9;};
356 }
357 cudaThreadSynchronize();
358 }
359 //////// </ code generated by CURAND_Base>
360 __label_9:
361
362 double __DUMMY_9;
363
364 }
365 __label_8:
366
367 if (!__failure) {
368
369 assert(py_V7->ob_refcnt > 1);
370 Py_DECREF(py_V7);
371 py_V7 = V7 ? V7 : Py_None;
372 Py_INCREF(py_V7);
373
374 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
375 {Py_XINCREF(py_V7);}
376 PyList_SET_ITEM(storage_V7, 0, py_V7);
377 {Py_XDECREF(old);}
378 }
379
380 Py_XDECREF(V7);
381
382 {Py_XDECREF(py_V7);}
383
384 double __DUMMY_8;
385
386 }
387 __label_6:
388
389 if (V5) {
390 Py_XDECREF(V5);
391 }
392
393 {Py_XDECREF(py_V5);}
394
395 double __DUMMY_6;
396
397 }
398 __label_4:
399
400 Py_XDECREF(V3);
401
402 {Py_XDECREF(py_V3);}
403
404 double __DUMMY_4;
405
406 }
407 __label_2:
408
409 if (!__failure) {
410
411 //std::cerr << "sync\n";
412 if (NULL == V1) {
413 // failure: sync None to storage
414 Py_XDECREF(py_V1);
415 py_V1 = Py_None;
416 Py_INCREF(py_V1);
417 }
418 else
419 {
420 if (py_V1 != (PyObject*)V1)
421 {
422 Py_XDECREF(py_V1);
423 py_V1 = (PyObject*)V1;
424 Py_INCREF(py_V1);
425 }
426 assert(py_V1->ob_refcnt);
427 }
428
429 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
430 {Py_XINCREF(py_V1);}
431 PyList_SET_ITEM(storage_V1, 0, py_V1);
432 {Py_XDECREF(old);}
433 }
434
435 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
436 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
437 if (V1)
438 {
439 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
440 Py_XDECREF(V1);
441 }
442 //std::cerr << "cleanup done" << py_V1 << "\n";
443
444 {Py_XDECREF(py_V1);}
445
446 double __DUMMY_2;
447
448 }
449
450
451 if (__failure) {
452 // When there is a failure, this code puts the exception
453 // in __ERROR.
454 PyObject* err_type = NULL;
455 PyObject* err_msg = NULL;
456 PyObject* err_traceback = NULL;
457 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
458 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
459 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
460 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
461 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
462 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
463 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
464 PyList_SET_ITEM(__ERROR, 0, err_type);
465 PyList_SET_ITEM(__ERROR, 1, err_msg);
466 PyList_SET_ITEM(__ERROR, 2, err_traceback);
467 {Py_XDECREF(old_err_type);}
468 {Py_XDECREF(old_err_msg);}
469 {Py_XDECREF(old_err_traceback);}
470 }
471 // The failure code is returned to index what code block failed.
472 return __failure;
473
474 }
475 };
476
477
478 int __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor(__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* self) {
479 return self->run();
480 }
481
482 void __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor(void* executor, void* self) {
483 //printf("doing cleanup\n");
484 //fflush(stdout);
485 // ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self)->cleanup();
486 // free(self);
487 delete ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self);
488 //printf("done cleanup\n");
489 //fflush(stdout);
490 }
491
492 //////////////////////
493 //// Functions
494 //////////////////////
495 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
496 assert(PyTuple_Check(argtuple));
497 if (5 != PyTuple_Size(argtuple)){
498 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
499 return NULL;
500 }
501 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* struct_ptr = new __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964();
502 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
503 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor), struct_ptr, __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor);
504 return thunk; }
505
506 //////////////////////
507 //// Module init
508 //////////////////////
509 static PyMethodDef MyMethods[] = {
510 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
511 {NULL, NULL, 0, NULL}
512 };
513 PyMODINIT_FUNC init7a7573cd1a887cbf5d8946c487571964(void){
514 import_array();
515 (void) Py_InitModule("7a7573cd1a887cbf5d8946c487571964", MyMethods);
516 }
517
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 void free_generator(void *_gen)
14 {
15 curandGenerator_t * gen = (curandGenerator_t*)_gen;
16 curandStatus_t err = curandDestroyGenerator(*gen);
17 if (err != CURAND_STATUS_SUCCESS)
18 {
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator",
20 (int)err);
21 }
22 free(_gen);
23 }
24
25
26 struct __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964 {
27 PyObject* __ERROR;
28
29 PyObject* storage_V3;
30 PyObject* storage_V5;
31 PyObject* storage_V7;
32 PyObject* storage_V1;
33
34
35 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964() {}
36 ~__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(void) {
37 cleanup();
38 }
39
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
41 Py_XINCREF(storage_V3);
42 Py_XINCREF(storage_V5);
43 Py_XINCREF(storage_V7);
44 Py_XINCREF(storage_V1);
45 this->storage_V3 = storage_V3;
46 this->storage_V5 = storage_V5;
47 this->storage_V7 = storage_V7;
48 this->storage_V1 = storage_V1;
49 int __failure = 0;
50
51 {
52
53 {
54
55 {
56
57 {
58
59 this->__ERROR = __ERROR;
60 return 0;
61 __label_7:
62
63 double __DUMMY_7;
64
65 }
66 __label_5:
67
68 double __DUMMY_5;
69
70 }
71 __label_3:
72
73 double __DUMMY_3;
74
75 }
76 __label_1:
77
78 double __DUMMY_1;
79
80 }
81
82 Py_XDECREF(this->storage_V3);
83 Py_XDECREF(this->storage_V5);
84 Py_XDECREF(this->storage_V7);
85 Py_XDECREF(this->storage_V1);
86
87 if (__failure) {
88 // When there is a failure, this code puts the exception
89 // in __ERROR.
90 PyObject* err_type = NULL;
91 PyObject* err_msg = NULL;
92 PyObject* err_traceback = NULL;
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
100 PyList_SET_ITEM(__ERROR, 0, err_type);
101 PyList_SET_ITEM(__ERROR, 1, err_msg);
102 PyList_SET_ITEM(__ERROR, 2, err_traceback);
103 {Py_XDECREF(old_err_type);}
104 {Py_XDECREF(old_err_msg);}
105 {Py_XDECREF(old_err_traceback);}
106 }
107 // The failure code is returned to index what code block failed.
108 return __failure;
109
110 }
111 void cleanup(void) {
112 __label_1:
113
114 double __DUMMY_1;
115 __label_3:
116
117 double __DUMMY_3;
118 __label_5:
119
120 double __DUMMY_5;
121 __label_7:
122
123 double __DUMMY_7;
124
125 Py_XDECREF(this->storage_V3);
126 Py_XDECREF(this->storage_V5);
127 Py_XDECREF(this->storage_V7);
128 Py_XDECREF(this->storage_V1);
129 }
130 int run(void) {
131 int __failure = 0;
132
133 PyObject* py_V1;
134 CudaNdarray * V1;
135 PyObject* py_V3;
136
137 PyObject* V3;
138
139 PyObject* py_V5;
140
141 PyArrayObject* V5;
142 int type_num_V5;
143 typedef npy_int32 dtype_V5;
144
145 PyObject* py_V7;
146
147 PyObject* V7;
148
149 {
150
151 py_V1 = PyList_GET_ITEM(storage_V1, 0);
152 {Py_XINCREF(py_V1);}
153
154 if (py_V1 == Py_None)
155 {
156 V1 = NULL;
157 }
158 else
159 {
160
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
162 // and one ref from the local scope.
163
164 if (CudaNdarray_Check(py_V1))
165 {
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
167 V1 = (CudaNdarray*)py_V1;
168 //std::cerr << "c_extract " << V1 << '\n';
169 if (V1->nd != 2)
170 {
171 PyErr_Format(PyExc_RuntimeError,
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
173 V1->nd);
174 V1 = NULL;
175 {__failure = 2; goto __label_2;};
176 }
177 //std::cerr << "c_extract " << V1 << " nd check passed\n";
178
179
180 assert(V1);
181 Py_INCREF(py_V1);
182 }
183 else if (py_V1 == Py_None)
184 {
185 PyErr_SetString(PyExc_TypeError,
186 "expected a CudaNdarray, not None");
187 V1 = NULL;
188 {__failure = 2; goto __label_2;};
189 }
190 else
191 {
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
194 V1 = NULL;
195 {__failure = 2; goto __label_2;};
196 }
197 //std::cerr << "c_extract done " << V1 << '\n';
198
199
200 }
201
202 {
203
204 py_V3 = PyList_GET_ITEM(storage_V3, 0);
205 {Py_XINCREF(py_V3);}
206
207 Py_INCREF(py_V3);
208 V3 = py_V3;
209
210 {
211
212 py_V5 = PyList_GET_ITEM(storage_V5, 0);
213 {Py_XINCREF(py_V5);}
214
215 V5 = NULL;
216 if (py_V5 == Py_None) {
217 // We can either fail here or set V5 to NULL and rely on Ops
218 // using tensors to handle the NULL case, but if they fail to do so
219 // they'll end up with nasty segfaults, so this is public service.
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
221 {__failure = 6; goto __label_6;}
222 }
223 if (!PyArray_Check(py_V5)) {
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
225 {__failure = 6; goto __label_6;}
226 }
227 // We expect NPY_INT32
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
229 if (!PyArray_ISALIGNED(py_V5)) {
230 PyErr_Format(PyExc_NotImplementedError,
231 "expected an aligned array of type %d "
232 "(NPY_INT32), got non-aligned array of type %d"
233 " with %d dimensions, with 3 last dims %d, %d, %d"
234 " and 3 last strides %d %d, %d.",
235 NPY_INT32, type_num_V5,
236 PyArray_NDIM(py_V5),
237 PyArray_NDIM(py_V5) >= 3 ?
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
239 PyArray_NDIM(py_V5) >= 2 ?
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
241 PyArray_NDIM(py_V5) >= 1 ?
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
243 PyArray_NDIM(py_V5) >= 2 ?
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
245 PyArray_NDIM(py_V5) >= 3 ?
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
247 PyArray_NDIM(py_V5) >= 1 ?
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
249 );
250 {__failure = 6; goto __label_6;}
251 }
252 // This is a TypeError to be consistent with DEBUG_MODE
253 // Note: DEBUG_MODE also tells the name of the container
254 if (type_num_V5 != NPY_INT32) {
255 PyErr_Format(PyExc_TypeError,
256 "expected type_num %d (NPY_INT32) got %d",
257 NPY_INT32, type_num_V5);
258 {__failure = 6; goto __label_6;}
259 }
260 V5 = (PyArrayObject*)(py_V5);
261 Py_XINCREF(V5);
262
263 {
264
265 py_V7 = Py_None;
266 {Py_XINCREF(py_V7);}
267
268 V7 = NULL;
269
270 {
271
272 //////// <code generated by CURAND_Base>
273
274 int odims[2];
275 int n_elements = 1;
276 int must_alloc_sample = ((NULL == V1)
277 || !CudaNdarray_Check(py_V1)
278 || (V1->nd != 2));
279
280 if (V5->nd != 1)
281 {
282 PyErr_SetString(PyExc_ValueError, "size must be vector");
283 {__failure = 9; goto __label_9;}
284 }
285 if (V5->dimensions[0] != 2)
286 {
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
288 2, V5->dimensions[0]);
289 {__failure = 9; goto __label_9;}
290 }
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
292 {
293 PyErr_SetString(PyExc_ValueError, "size must be int32");
294 {__failure = 9; goto __label_9;}
295 }
296 for (int i = 0; i < 2; ++i)
297 {
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
299 n_elements *= odims[i];
300 must_alloc_sample = (must_alloc_sample
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
302 }
303 if (must_alloc_sample)
304 {
305 Py_XDECREF(V1);
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
307 if(!V1)
308 {
309 {__failure = 9; goto __label_9;};
310 }
311 }
312 if (!PyCObject_Check(V3))
313 {
314 // allocate a new generator for o_generator
315 Py_XDECREF(V7);
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
317 assert(gen);
318 if (CURAND_STATUS_SUCCESS !=
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
321 {__failure = 9; goto __label_9;};
322 }
323 if (CURAND_STATUS_SUCCESS !=
324 curandSetPseudoRandomGeneratorSeed(*gen,234))
325 {
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
327 {__failure = 9; goto __label_9;};
328 }
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
330 assert (V3 == Py_False);
331 }
332 else if (1)
333 {
334 // use i_generator for o_generator
335 Py_XDECREF(V7);
336 Py_INCREF(V3);
337 V7 = V3;
338 }
339 else
340 {
341 // copy i_generator for o_generator
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
343 {__failure = 9; goto __label_9;};
344 }
345 {
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
347 curandStatus_t err = curandGenerateUniform(*gen,
348 CudaNdarray_DEV_DATA(V1),
349 n_elements);
350
351
352 if (err != CURAND_STATUS_SUCCESS)
353 {
354 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
355 {__failure = 9; goto __label_9;};
356 }
357 cudaThreadSynchronize();
358 }
359 //////// </ code generated by CURAND_Base>
360 __label_9:
361
362 double __DUMMY_9;
363
364 }
365 __label_8:
366
367 if (!__failure) {
368
369 assert(py_V7->ob_refcnt > 1);
370 Py_DECREF(py_V7);
371 py_V7 = V7 ? V7 : Py_None;
372 Py_INCREF(py_V7);
373
374 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
375 {Py_XINCREF(py_V7);}
376 PyList_SET_ITEM(storage_V7, 0, py_V7);
377 {Py_XDECREF(old);}
378 }
379
380 Py_XDECREF(V7);
381
382 {Py_XDECREF(py_V7);}
383
384 double __DUMMY_8;
385
386 }
387 __label_6:
388
389 if (V5) {
390 Py_XDECREF(V5);
391 }
392
393 {Py_XDECREF(py_V5);}
394
395 double __DUMMY_6;
396
397 }
398 __label_4:
399
400 Py_XDECREF(V3);
401
402 {Py_XDECREF(py_V3);}
403
404 double __DUMMY_4;
405
406 }
407 __label_2:
408
409 if (!__failure) {
410
411 //std::cerr << "sync\n";
412 if (NULL == V1) {
413 // failure: sync None to storage
414 Py_XDECREF(py_V1);
415 py_V1 = Py_None;
416 Py_INCREF(py_V1);
417 }
418 else
419 {
420 if (py_V1 != (PyObject*)V1)
421 {
422 Py_XDECREF(py_V1);
423 py_V1 = (PyObject*)V1;
424 Py_INCREF(py_V1);
425 }
426 assert(py_V1->ob_refcnt);
427 }
428
429 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
430 {Py_XINCREF(py_V1);}
431 PyList_SET_ITEM(storage_V1, 0, py_V1);
432 {Py_XDECREF(old);}
433 }
434
435 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
436 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
437 if (V1)
438 {
439 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
440 Py_XDECREF(V1);
441 }
442 //std::cerr << "cleanup done" << py_V1 << "\n";
443
444 {Py_XDECREF(py_V1);}
445
446 double __DUMMY_2;
447
448 }
449
450
451 if (__failure) {
452 // When there is a failure, this code puts the exception
453 // in __ERROR.
454 PyObject* err_type = NULL;
455 PyObject* err_msg = NULL;
456 PyObject* err_traceback = NULL;
457 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
458 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
459 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
460 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
461 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
462 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
463 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
464 PyList_SET_ITEM(__ERROR, 0, err_type);
465 PyList_SET_ITEM(__ERROR, 1, err_msg);
466 PyList_SET_ITEM(__ERROR, 2, err_traceback);
467 {Py_XDECREF(old_err_type);}
468 {Py_XDECREF(old_err_msg);}
469 {Py_XDECREF(old_err_traceback);}
470 }
471 // The failure code is returned to index what code block failed.
472 return __failure;
473
474 }
475 };
476
477
478 int __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor(__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* self) {
479 return self->run();
480 }
481
482 void __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor(void* executor, void* self) {
483 //printf("doing cleanup\n");
484 //fflush(stdout);
485 // ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self)->cleanup();
486 // free(self);
487 delete ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self);
488 //printf("done cleanup\n");
489 //fflush(stdout);
490 }
491
492 //////////////////////
493 //// Functions
494 //////////////////////
495 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
496 assert(PyTuple_Check(argtuple));
497 if (5 != PyTuple_Size(argtuple)){
498 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
499 return NULL;
500 }
501 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* struct_ptr = new __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964();
502 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
503 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor), struct_ptr, __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor);
504 return thunk; }
505
506 //////////////////////
507 //// Module init
508 //////////////////////
509 static PyMethodDef MyMethods[] = {
510 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
511 {NULL, NULL, 0, NULL}
512 };
513 PyMODINIT_FUNC init7a7573cd1a887cbf5d8946c487571964(void){
514 import_array();
515 (void) Py_InitModule("7a7573cd1a887cbf5d8946c487571964", MyMethods);
516 }
517
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 void free_generator(void *_gen)
14 {
15 curandGenerator_t * gen = (curandGenerator_t*)_gen;
16 curandStatus_t err = curandDestroyGenerator(*gen);
17 if (err != CURAND_STATUS_SUCCESS)
18 {
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator",
20 (int)err);
21 }
22 free(_gen);
23 }
24
25
26 struct __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964 {
27 PyObject* __ERROR;
28
29 PyObject* storage_V3;
30 PyObject* storage_V5;
31 PyObject* storage_V7;
32 PyObject* storage_V1;
33
34
35 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964() {}
36 ~__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(void) {
37 cleanup();
38 }
39
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
41 Py_XINCREF(storage_V3);
42 Py_XINCREF(storage_V5);
43 Py_XINCREF(storage_V7);
44 Py_XINCREF(storage_V1);
45 this->storage_V3 = storage_V3;
46 this->storage_V5 = storage_V5;
47 this->storage_V7 = storage_V7;
48 this->storage_V1 = storage_V1;
49 int __failure = 0;
50
51 {
52
53 {
54
55 {
56
57 {
58
59 this->__ERROR = __ERROR;
60 return 0;
61 __label_7:
62
63 double __DUMMY_7;
64
65 }
66 __label_5:
67
68 double __DUMMY_5;
69
70 }
71 __label_3:
72
73 double __DUMMY_3;
74
75 }
76 __label_1:
77
78 double __DUMMY_1;
79
80 }
81
82 Py_XDECREF(this->storage_V3);
83 Py_XDECREF(this->storage_V5);
84 Py_XDECREF(this->storage_V7);
85 Py_XDECREF(this->storage_V1);
86
87 if (__failure) {
88 // When there is a failure, this code puts the exception
89 // in __ERROR.
90 PyObject* err_type = NULL;
91 PyObject* err_msg = NULL;
92 PyObject* err_traceback = NULL;
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
100 PyList_SET_ITEM(__ERROR, 0, err_type);
101 PyList_SET_ITEM(__ERROR, 1, err_msg);
102 PyList_SET_ITEM(__ERROR, 2, err_traceback);
103 {Py_XDECREF(old_err_type);}
104 {Py_XDECREF(old_err_msg);}
105 {Py_XDECREF(old_err_traceback);}
106 }
107 // The failure code is returned to index what code block failed.
108 return __failure;
109
110 }
111 void cleanup(void) {
112 __label_1:
113
114 double __DUMMY_1;
115 __label_3:
116
117 double __DUMMY_3;
118 __label_5:
119
120 double __DUMMY_5;
121 __label_7:
122
123 double __DUMMY_7;
124
125 Py_XDECREF(this->storage_V3);
126 Py_XDECREF(this->storage_V5);
127 Py_XDECREF(this->storage_V7);
128 Py_XDECREF(this->storage_V1);
129 }
130 int run(void) {
131 int __failure = 0;
132
133 PyObject* py_V1;
134 CudaNdarray * V1;
135 PyObject* py_V3;
136
137 PyObject* V3;
138
139 PyObject* py_V5;
140
141 PyArrayObject* V5;
142 int type_num_V5;
143 typedef npy_int32 dtype_V5;
144
145 PyObject* py_V7;
146
147 PyObject* V7;
148
149 {
150
151 py_V1 = PyList_GET_ITEM(storage_V1, 0);
152 {Py_XINCREF(py_V1);}
153
154 if (py_V1 == Py_None)
155 {
156 V1 = NULL;
157 }
158 else
159 {
160
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
162 // and one ref from the local scope.
163
164 if (CudaNdarray_Check(py_V1))
165 {
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
167 V1 = (CudaNdarray*)py_V1;
168 //std::cerr << "c_extract " << V1 << '\n';
169 if (V1->nd != 2)
170 {
171 PyErr_Format(PyExc_RuntimeError,
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
173 V1->nd);
174 V1 = NULL;
175 {__failure = 2; goto __label_2;};
176 }
177 //std::cerr << "c_extract " << V1 << " nd check passed\n";
178
179
180 assert(V1);
181 Py_INCREF(py_V1);
182 }
183 else if (py_V1 == Py_None)
184 {
185 PyErr_SetString(PyExc_TypeError,
186 "expected a CudaNdarray, not None");
187 V1 = NULL;
188 {__failure = 2; goto __label_2;};
189 }
190 else
191 {
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
194 V1 = NULL;
195 {__failure = 2; goto __label_2;};
196 }
197 //std::cerr << "c_extract done " << V1 << '\n';
198
199
200 }
201
202 {
203
204 py_V3 = PyList_GET_ITEM(storage_V3, 0);
205 {Py_XINCREF(py_V3);}
206
207 Py_INCREF(py_V3);
208 V3 = py_V3;
209
210 {
211
212 py_V5 = PyList_GET_ITEM(storage_V5, 0);
213 {Py_XINCREF(py_V5);}
214
215 V5 = NULL;
216 if (py_V5 == Py_None) {
217 // We can either fail here or set V5 to NULL and rely on Ops
218 // using tensors to handle the NULL case, but if they fail to do so
219 // they'll end up with nasty segfaults, so this is public service.
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
221 {__failure = 6; goto __label_6;}
222 }
223 if (!PyArray_Check(py_V5)) {
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
225 {__failure = 6; goto __label_6;}
226 }
227 // We expect NPY_INT32
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
229 if (!PyArray_ISALIGNED(py_V5)) {
230 PyErr_Format(PyExc_NotImplementedError,
231 "expected an aligned array of type %d "
232 "(NPY_INT32), got non-aligned array of type %d"
233 " with %d dimensions, with 3 last dims %d, %d, %d"
234 " and 3 last strides %d %d, %d.",
235 NPY_INT32, type_num_V5,
236 PyArray_NDIM(py_V5),
237 PyArray_NDIM(py_V5) >= 3 ?
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
239 PyArray_NDIM(py_V5) >= 2 ?
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
241 PyArray_NDIM(py_V5) >= 1 ?
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
243 PyArray_NDIM(py_V5) >= 2 ?
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
245 PyArray_NDIM(py_V5) >= 3 ?
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
247 PyArray_NDIM(py_V5) >= 1 ?
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
249 );
250 {__failure = 6; goto __label_6;}
251 }
252 // This is a TypeError to be consistent with DEBUG_MODE
253 // Note: DEBUG_MODE also tells the name of the container
254 if (type_num_V5 != NPY_INT32) {
255 PyErr_Format(PyExc_TypeError,
256 "expected type_num %d (NPY_INT32) got %d",
257 NPY_INT32, type_num_V5);
258 {__failure = 6; goto __label_6;}
259 }
260 V5 = (PyArrayObject*)(py_V5);
261 Py_XINCREF(V5);
262
263 {
264
265 py_V7 = Py_None;
266 {Py_XINCREF(py_V7);}
267
268 V7 = NULL;
269
270 {
271
272 //////// <code generated by CURAND_Base>
273
274 int odims[2];
275 int n_elements = 1;
276 int must_alloc_sample = ((NULL == V1)
277 || !CudaNdarray_Check(py_V1)
278 || (V1->nd != 2));
279
280 if (V5->nd != 1)
281 {
282 PyErr_SetString(PyExc_ValueError, "size must be vector");
283 {__failure = 9; goto __label_9;}
284 }
285 if (V5->dimensions[0] != 2)
286 {
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
288 2, V5->dimensions[0]);
289 {__failure = 9; goto __label_9;}
290 }
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
292 {
293 PyErr_SetString(PyExc_ValueError, "size must be int32");
294 {__failure = 9; goto __label_9;}
295 }
296 for (int i = 0; i < 2; ++i)
297 {
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
299 n_elements *= odims[i];
300 must_alloc_sample = (must_alloc_sample
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
302 }
303 if (must_alloc_sample)
304 {
305 Py_XDECREF(V1);
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
307 if(!V1)
308 {
309 {__failure = 9; goto __label_9;};
310 }
311 }
312 if (!PyCObject_Check(V3))
313 {
314 // allocate a new generator for o_generator
315 Py_XDECREF(V7);
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
317 assert(gen);
318 if (CURAND_STATUS_SUCCESS !=
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
321 {__failure = 9; goto __label_9;};
322 }
323 if (CURAND_STATUS_SUCCESS !=
324 curandSetPseudoRandomGeneratorSeed(*gen,234))
325 {
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
327 {__failure = 9; goto __label_9;};
328 }
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
330 assert (V3 == Py_False);
331 }
332 else if (1)
333 {
334 // use i_generator for o_generator
335 Py_XDECREF(V7);
336 Py_INCREF(V3);
337 V7 = V3;
338 }
339 else
340 {
341 // copy i_generator for o_generator
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
343 {__failure = 9; goto __label_9;};
344 }
345 {
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
347 curandStatus_t err = curandGenerateUniform(*gen,
348 CudaNdarray_DEV_DATA(V1),
349 n_elements);
350
351
352 if (err != CURAND_STATUS_SUCCESS)
353 {
354 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
355 {__failure = 9; goto __label_9;};
356 }
357 cudaThreadSynchronize();
358 }
359 //////// </ code generated by CURAND_Base>
360 __label_9:
361
362 double __DUMMY_9;
363
364 }
365 __label_8:
366
367 if (!__failure) {
368
369 assert(py_V7->ob_refcnt > 1);
370 Py_DECREF(py_V7);
371 py_V7 = V7 ? V7 : Py_None;
372 Py_INCREF(py_V7);
373
374 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
375 {Py_XINCREF(py_V7);}
376 PyList_SET_ITEM(storage_V7, 0, py_V7);
377 {Py_XDECREF(old);}
378 }
379
380 Py_XDECREF(V7);
381
382 {Py_XDECREF(py_V7);}
383
384 double __DUMMY_8;
385
386 }
387 __label_6:
388
389 if (V5) {
390 Py_XDECREF(V5);
391 }
392
393 {Py_XDECREF(py_V5);}
394
395 double __DUMMY_6;
396
397 }
398 __label_4:
399
400 Py_XDECREF(V3);
401
402 {Py_XDECREF(py_V3);}
403
404 double __DUMMY_4;
405
406 }
407 __label_2:
408
409 if (!__failure) {
410
411 //std::cerr << "sync\n";
412 if (NULL == V1) {
413 // failure: sync None to storage
414 Py_XDECREF(py_V1);
415 py_V1 = Py_None;
416 Py_INCREF(py_V1);
417 }
418 else
419 {
420 if (py_V1 != (PyObject*)V1)
421 {
422 Py_XDECREF(py_V1);
423 py_V1 = (PyObject*)V1;
424 Py_INCREF(py_V1);
425 }
426 assert(py_V1->ob_refcnt);
427 }
428
429 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
430 {Py_XINCREF(py_V1);}
431 PyList_SET_ITEM(storage_V1, 0, py_V1);
432 {Py_XDECREF(old);}
433 }
434
435 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
436 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
437 if (V1)
438 {
439 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
440 Py_XDECREF(V1);
441 }
442 //std::cerr << "cleanup done" << py_V1 << "\n";
443
444 {Py_XDECREF(py_V1);}
445
446 double __DUMMY_2;
447
448 }
449
450
451 if (__failure) {
452 // When there is a failure, this code puts the exception
453 // in __ERROR.
454 PyObject* err_type = NULL;
455 PyObject* err_msg = NULL;
456 PyObject* err_traceback = NULL;
457 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
458 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
459 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
460 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
461 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
462 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
463 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
464 PyList_SET_ITEM(__ERROR, 0, err_type);
465 PyList_SET_ITEM(__ERROR, 1, err_msg);
466 PyList_SET_ITEM(__ERROR, 2, err_traceback);
467 {Py_XDECREF(old_err_type);}
468 {Py_XDECREF(old_err_msg);}
469 {Py_XDECREF(old_err_traceback);}
470 }
471 // The failure code is returned to index what code block failed.
472 return __failure;
473
474 }
475 };
476
477
478 int __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor(__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* self) {
479 return self->run();
480 }
481
482 void __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor(void* executor, void* self) {
483 //printf("doing cleanup\n");
484 //fflush(stdout);
485 // ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self)->cleanup();
486 // free(self);
487 delete ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self);
488 //printf("done cleanup\n");
489 //fflush(stdout);
490 }
491
492 //////////////////////
493 //// Functions
494 //////////////////////
495 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
496 assert(PyTuple_Check(argtuple));
497 if (5 != PyTuple_Size(argtuple)){
498 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
499 return NULL;
500 }
501 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* struct_ptr = new __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964();
502 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
503 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor), struct_ptr, __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor);
504 return thunk; }
505
506 //////////////////////
507 //// Module init
508 //////////////////////
509 static PyMethodDef MyMethods[] = {
510 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
511 {NULL, NULL, 0, NULL}
512 };
513 PyMODINIT_FUNC init7a7573cd1a887cbf5d8946c487571964(void){
514 import_array();
515 (void) Py_InitModule("7a7573cd1a887cbf5d8946c487571964", MyMethods);
516 }
517
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 void free_generator(void *_gen)
14 {
15 curandGenerator_t * gen = (curandGenerator_t*)_gen;
16 curandStatus_t err = curandDestroyGenerator(*gen);
17 if (err != CURAND_STATUS_SUCCESS)
18 {
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator",
20 (int)err);
21 }
22 free(_gen);
23 }
24
25
26 struct __struct_compiled_op_889e175e75159a3e61d065caf0802126 {
27 PyObject* __ERROR;
28
29 PyObject* storage_V3;
30 PyObject* storage_V5;
31 PyObject* storage_V7;
32 PyObject* storage_V1;
33
34
35 __struct_compiled_op_889e175e75159a3e61d065caf0802126() {}
36 ~__struct_compiled_op_889e175e75159a3e61d065caf0802126(void) {
37 cleanup();
38 }
39
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
41 Py_XINCREF(storage_V3);
42 Py_XINCREF(storage_V5);
43 Py_XINCREF(storage_V7);
44 Py_XINCREF(storage_V1);
45 this->storage_V3 = storage_V3;
46 this->storage_V5 = storage_V5;
47 this->storage_V7 = storage_V7;
48 this->storage_V1 = storage_V1;
49 int __failure = 0;
50
51 {
52
53 {
54
55 {
56
57 {
58
59 this->__ERROR = __ERROR;
60 return 0;
61 __label_7:
62
63 double __DUMMY_7;
64
65 }
66 __label_5:
67
68 double __DUMMY_5;
69
70 }
71 __label_3:
72
73 double __DUMMY_3;
74
75 }
76 __label_1:
77
78 double __DUMMY_1;
79
80 }
81
82 Py_XDECREF(this->storage_V3);
83 Py_XDECREF(this->storage_V5);
84 Py_XDECREF(this->storage_V7);
85 Py_XDECREF(this->storage_V1);
86
87 if (__failure) {
88 // When there is a failure, this code puts the exception
89 // in __ERROR.
90 PyObject* err_type = NULL;
91 PyObject* err_msg = NULL;
92 PyObject* err_traceback = NULL;
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
100 PyList_SET_ITEM(__ERROR, 0, err_type);
101 PyList_SET_ITEM(__ERROR, 1, err_msg);
102 PyList_SET_ITEM(__ERROR, 2, err_traceback);
103 {Py_XDECREF(old_err_type);}
104 {Py_XDECREF(old_err_msg);}
105 {Py_XDECREF(old_err_traceback);}
106 }
107 // The failure code is returned to index what code block failed.
108 return __failure;
109
110 }
111 void cleanup(void) {
112 __label_1:
113
114 double __DUMMY_1;
115 __label_3:
116
117 double __DUMMY_3;
118 __label_5:
119
120 double __DUMMY_5;
121 __label_7:
122
123 double __DUMMY_7;
124
125 Py_XDECREF(this->storage_V3);
126 Py_XDECREF(this->storage_V5);
127 Py_XDECREF(this->storage_V7);
128 Py_XDECREF(this->storage_V1);
129 }
130 int run(void) {
131 int __failure = 0;
132
133 PyObject* py_V1;
134 CudaNdarray * V1;
135 PyObject* py_V3;
136
137 PyObject* V3;
138
139 PyObject* py_V5;
140
141 PyArrayObject* V5;
142 int type_num_V5;
143 typedef npy_int32 dtype_V5;
144
145 PyObject* py_V7;
146
147 PyObject* V7;
148
149 {
150
151 py_V1 = PyList_GET_ITEM(storage_V1, 0);
152 {Py_XINCREF(py_V1);}
153
154 if (py_V1 == Py_None)
155 {
156 V1 = NULL;
157 }
158 else
159 {
160
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
162 // and one ref from the local scope.
163
164 if (CudaNdarray_Check(py_V1))
165 {
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
167 V1 = (CudaNdarray*)py_V1;
168 //std::cerr << "c_extract " << V1 << '\n';
169 if (V1->nd != 2)
170 {
171 PyErr_Format(PyExc_RuntimeError,
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
173 V1->nd);
174 V1 = NULL;
175 {__failure = 2; goto __label_2;};
176 }
177 //std::cerr << "c_extract " << V1 << " nd check passed\n";
178
179
180 assert(V1);
181 Py_INCREF(py_V1);
182 }
183 else if (py_V1 == Py_None)
184 {
185 PyErr_SetString(PyExc_TypeError,
186 "expected a CudaNdarray, not None");
187 V1 = NULL;
188 {__failure = 2; goto __label_2;};
189 }
190 else
191 {
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
194 V1 = NULL;
195 {__failure = 2; goto __label_2;};
196 }
197 //std::cerr << "c_extract done " << V1 << '\n';
198
199
200 }
201
202 {
203
204 py_V3 = PyList_GET_ITEM(storage_V3, 0);
205 {Py_XINCREF(py_V3);}
206
207 Py_INCREF(py_V3);
208 V3 = py_V3;
209
210 {
211
212 py_V5 = PyList_GET_ITEM(storage_V5, 0);
213 {Py_XINCREF(py_V5);}
214
215 V5 = NULL;
216 if (py_V5 == Py_None) {
217 // We can either fail here or set V5 to NULL and rely on Ops
218 // using tensors to handle the NULL case, but if they fail to do so
219 // they'll end up with nasty segfaults, so this is public service.
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
221 {__failure = 6; goto __label_6;}
222 }
223 if (!PyArray_Check(py_V5)) {
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
225 {__failure = 6; goto __label_6;}
226 }
227 // We expect NPY_INT32
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
229 if (!PyArray_ISALIGNED(py_V5)) {
230 PyErr_Format(PyExc_NotImplementedError,
231 "expected an aligned array of type %d "
232 "(NPY_INT32), got non-aligned array of type %d"
233 " with %d dimensions, with 3 last dims %d, %d, %d"
234 " and 3 last strides %d %d, %d.",
235 NPY_INT32, type_num_V5,
236 PyArray_NDIM(py_V5),
237 PyArray_NDIM(py_V5) >= 3 ?
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
239 PyArray_NDIM(py_V5) >= 2 ?
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
241 PyArray_NDIM(py_V5) >= 1 ?
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
243 PyArray_NDIM(py_V5) >= 2 ?
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
245 PyArray_NDIM(py_V5) >= 3 ?
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
247 PyArray_NDIM(py_V5) >= 1 ?
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
249 );
250 {__failure = 6; goto __label_6;}
251 }
252 // This is a TypeError to be consistent with DEBUG_MODE
253 // Note: DEBUG_MODE also tells the name of the container
254 if (type_num_V5 != NPY_INT32) {
255 PyErr_Format(PyExc_TypeError,
256 "expected type_num %d (NPY_INT32) got %d",
257 NPY_INT32, type_num_V5);
258 {__failure = 6; goto __label_6;}
259 }
260 V5 = (PyArrayObject*)(py_V5);
261 Py_XINCREF(V5);
262
263 {
264
265 py_V7 = Py_None;
266 {Py_XINCREF(py_V7);}
267
268 V7 = NULL;
269
270 {
271
272 //////// <code generated by CURAND_Base>
273
274 int odims[2];
275 int n_elements = 1;
276 int must_alloc_sample = ((NULL == V1)
277 || !CudaNdarray_Check(py_V1)
278 || (V1->nd != 2));
279
280 if (V5->nd != 1)
281 {
282 PyErr_SetString(PyExc_ValueError, "size must be vector");
283 {__failure = 9; goto __label_9;}
284 }
285 if (V5->dimensions[0] != 2)
286 {
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
288 2, V5->dimensions[0]);
289 {__failure = 9; goto __label_9;}
290 }
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
292 {
293 PyErr_SetString(PyExc_ValueError, "size must be int32");
294 {__failure = 9; goto __label_9;}
295 }
296 for (int i = 0; i < 2; ++i)
297 {
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
299 n_elements *= odims[i];
300 must_alloc_sample = (must_alloc_sample
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
302 }
303 if (must_alloc_sample)
304 {
305 Py_XDECREF(V1);
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
307 if(!V1)
308 {
309 {__failure = 9; goto __label_9;};
310 }
311 }
312 if (!PyCObject_Check(V3))
313 {
314 // allocate a new generator for o_generator
315 Py_XDECREF(V7);
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
317 assert(gen);
318 if (CURAND_STATUS_SUCCESS !=
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
321 {__failure = 9; goto __label_9;};
322 }
323 if (CURAND_STATUS_SUCCESS !=
324 curandSetPseudoRandomGeneratorSeed(*gen,234))
325 {
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
327 {__failure = 9; goto __label_9;};
328 }
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
330 assert (V3 == Py_False);
331 }
332 else if (1)
333 {
334 // use i_generator for o_generator
335 Py_XDECREF(V7);
336 Py_INCREF(V3);
337 V7 = V3;
338 }
339 else
340 {
341 // copy i_generator for o_generator
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
343 {__failure = 9; goto __label_9;};
344 }
345 {
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
347 curandStatus_t err = curandGenerateNormal(*gen,
348 CudaNdarray_DEV_DATA(V1),
349 n_elements,
350 0.0, 1.0);
351
352
353 if (err != CURAND_STATUS_SUCCESS)
354 {
355 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
356 {__failure = 9; goto __label_9;};
357 }
358 cudaThreadSynchronize();
359 }
360 //////// </ code generated by CURAND_Base>
361 __label_9:
362
363 double __DUMMY_9;
364
365 }
366 __label_8:
367
368 if (!__failure) {
369
370 assert(py_V7->ob_refcnt > 1);
371 Py_DECREF(py_V7);
372 py_V7 = V7 ? V7 : Py_None;
373 Py_INCREF(py_V7);
374
375 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
376 {Py_XINCREF(py_V7);}
377 PyList_SET_ITEM(storage_V7, 0, py_V7);
378 {Py_XDECREF(old);}
379 }
380
381 Py_XDECREF(V7);
382
383 {Py_XDECREF(py_V7);}
384
385 double __DUMMY_8;
386
387 }
388 __label_6:
389
390 if (V5) {
391 Py_XDECREF(V5);
392 }
393
394 {Py_XDECREF(py_V5);}
395
396 double __DUMMY_6;
397
398 }
399 __label_4:
400
401 Py_XDECREF(V3);
402
403 {Py_XDECREF(py_V3);}
404
405 double __DUMMY_4;
406
407 }
408 __label_2:
409
410 if (!__failure) {
411
412 //std::cerr << "sync\n";
413 if (NULL == V1) {
414 // failure: sync None to storage
415 Py_XDECREF(py_V1);
416 py_V1 = Py_None;
417 Py_INCREF(py_V1);
418 }
419 else
420 {
421 if (py_V1 != (PyObject*)V1)
422 {
423 Py_XDECREF(py_V1);
424 py_V1 = (PyObject*)V1;
425 Py_INCREF(py_V1);
426 }
427 assert(py_V1->ob_refcnt);
428 }
429
430 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
431 {Py_XINCREF(py_V1);}
432 PyList_SET_ITEM(storage_V1, 0, py_V1);
433 {Py_XDECREF(old);}
434 }
435
436 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
437 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
438 if (V1)
439 {
440 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
441 Py_XDECREF(V1);
442 }
443 //std::cerr << "cleanup done" << py_V1 << "\n";
444
445 {Py_XDECREF(py_V1);}
446
447 double __DUMMY_2;
448
449 }
450
451
452 if (__failure) {
453 // When there is a failure, this code puts the exception
454 // in __ERROR.
455 PyObject* err_type = NULL;
456 PyObject* err_msg = NULL;
457 PyObject* err_traceback = NULL;
458 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
459 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
460 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
461 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
462 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
463 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
464 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
465 PyList_SET_ITEM(__ERROR, 0, err_type);
466 PyList_SET_ITEM(__ERROR, 1, err_msg);
467 PyList_SET_ITEM(__ERROR, 2, err_traceback);
468 {Py_XDECREF(old_err_type);}
469 {Py_XDECREF(old_err_msg);}
470 {Py_XDECREF(old_err_traceback);}
471 }
472 // The failure code is returned to index what code block failed.
473 return __failure;
474
475 }
476 };
477
478
479 int __struct_compiled_op_889e175e75159a3e61d065caf0802126_executor(__struct_compiled_op_889e175e75159a3e61d065caf0802126* self) {
480 return self->run();
481 }
482
483 void __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor(void* executor, void* self) {
484 //printf("doing cleanup\n");
485 //fflush(stdout);
486 // ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self)->cleanup();
487 // free(self);
488 delete ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self);
489 //printf("done cleanup\n");
490 //fflush(stdout);
491 }
492
493 //////////////////////
494 //// Functions
495 //////////////////////
496 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
497 assert(PyTuple_Check(argtuple));
498 if (5 != PyTuple_Size(argtuple)){
499 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
500 return NULL;
501 }
502 __struct_compiled_op_889e175e75159a3e61d065caf0802126* struct_ptr = new __struct_compiled_op_889e175e75159a3e61d065caf0802126();
503 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
504 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_889e175e75159a3e61d065caf0802126_executor), struct_ptr, __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor);
505 return thunk; }
506
507 //////////////////////
508 //// Module init
509 //////////////////////
510 static PyMethodDef MyMethods[] = {
511 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
512 {NULL, NULL, 0, NULL}
513 };
514 PyMODINIT_FUNC init889e175e75159a3e61d065caf0802126(void){
515 import_array();
516 (void) Py_InitModule("889e175e75159a3e61d065caf0802126", MyMethods);
517 }
518
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 void free_generator(void *_gen)
14 {
15 curandGenerator_t * gen = (curandGenerator_t*)_gen;
16 curandStatus_t err = curandDestroyGenerator(*gen);
17 if (err != CURAND_STATUS_SUCCESS)
18 {
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator",
20 (int)err);
21 }
22 free(_gen);
23 }
24
25
26 struct __struct_compiled_op_889e175e75159a3e61d065caf0802126 {
27 PyObject* __ERROR;
28
29 PyObject* storage_V3;
30 PyObject* storage_V5;
31 PyObject* storage_V7;
32 PyObject* storage_V1;
33
34
35 __struct_compiled_op_889e175e75159a3e61d065caf0802126() {}
36 ~__struct_compiled_op_889e175e75159a3e61d065caf0802126(void) {
37 cleanup();
38 }
39
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
41 Py_XINCREF(storage_V3);
42 Py_XINCREF(storage_V5);
43 Py_XINCREF(storage_V7);
44 Py_XINCREF(storage_V1);
45 this->storage_V3 = storage_V3;
46 this->storage_V5 = storage_V5;
47 this->storage_V7 = storage_V7;
48 this->storage_V1 = storage_V1;
49 int __failure = 0;
50
51 {
52
53 {
54
55 {
56
57 {
58
59 this->__ERROR = __ERROR;
60 return 0;
61 __label_7:
62
63 double __DUMMY_7;
64
65 }
66 __label_5:
67
68 double __DUMMY_5;
69
70 }
71 __label_3:
72
73 double __DUMMY_3;
74
75 }
76 __label_1:
77
78 double __DUMMY_1;
79
80 }
81
82 Py_XDECREF(this->storage_V3);
83 Py_XDECREF(this->storage_V5);
84 Py_XDECREF(this->storage_V7);
85 Py_XDECREF(this->storage_V1);
86
87 if (__failure) {
88 // When there is a failure, this code puts the exception
89 // in __ERROR.
90 PyObject* err_type = NULL;
91 PyObject* err_msg = NULL;
92 PyObject* err_traceback = NULL;
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
100 PyList_SET_ITEM(__ERROR, 0, err_type);
101 PyList_SET_ITEM(__ERROR, 1, err_msg);
102 PyList_SET_ITEM(__ERROR, 2, err_traceback);
103 {Py_XDECREF(old_err_type);}
104 {Py_XDECREF(old_err_msg);}
105 {Py_XDECREF(old_err_traceback);}
106 }
107 // The failure code is returned to index what code block failed.
108 return __failure;
109
110 }
111 void cleanup(void) {
112 __label_1:
113
114 double __DUMMY_1;
115 __label_3:
116
117 double __DUMMY_3;
118 __label_5:
119
120 double __DUMMY_5;
121 __label_7:
122
123 double __DUMMY_7;
124
125 Py_XDECREF(this->storage_V3);
126 Py_XDECREF(this->storage_V5);
127 Py_XDECREF(this->storage_V7);
128 Py_XDECREF(this->storage_V1);
129 }
130 int run(void) {
131 int __failure = 0;
132
133 PyObject* py_V1;
134 CudaNdarray * V1;
135 PyObject* py_V3;
136
137 PyObject* V3;
138
139 PyObject* py_V5;
140
141 PyArrayObject* V5;
142 int type_num_V5;
143 typedef npy_int32 dtype_V5;
144
145 PyObject* py_V7;
146
147 PyObject* V7;
148
149 {
150
151 py_V1 = PyList_GET_ITEM(storage_V1, 0);
152 {Py_XINCREF(py_V1);}
153
154 if (py_V1 == Py_None)
155 {
156 V1 = NULL;
157 }
158 else
159 {
160
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
162 // and one ref from the local scope.
163
164 if (CudaNdarray_Check(py_V1))
165 {
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
167 V1 = (CudaNdarray*)py_V1;
168 //std::cerr << "c_extract " << V1 << '\n';
169 if (V1->nd != 2)
170 {
171 PyErr_Format(PyExc_RuntimeError,
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
173 V1->nd);
174 V1 = NULL;
175 {__failure = 2; goto __label_2;};
176 }
177 //std::cerr << "c_extract " << V1 << " nd check passed\n";
178
179
180 assert(V1);
181 Py_INCREF(py_V1);
182 }
183 else if (py_V1 == Py_None)
184 {
185 PyErr_SetString(PyExc_TypeError,
186 "expected a CudaNdarray, not None");
187 V1 = NULL;
188 {__failure = 2; goto __label_2;};
189 }
190 else
191 {
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
194 V1 = NULL;
195 {__failure = 2; goto __label_2;};
196 }
197 //std::cerr << "c_extract done " << V1 << '\n';
198
199
200 }
201
202 {
203
204 py_V3 = PyList_GET_ITEM(storage_V3, 0);
205 {Py_XINCREF(py_V3);}
206
207 Py_INCREF(py_V3);
208 V3 = py_V3;
209
210 {
211
212 py_V5 = PyList_GET_ITEM(storage_V5, 0);
213 {Py_XINCREF(py_V5);}
214
215 V5 = NULL;
216 if (py_V5 == Py_None) {
217 // We can either fail here or set V5 to NULL and rely on Ops
218 // using tensors to handle the NULL case, but if they fail to do so
219 // they'll end up with nasty segfaults, so this is public service.
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
221 {__failure = 6; goto __label_6;}
222 }
223 if (!PyArray_Check(py_V5)) {
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
225 {__failure = 6; goto __label_6;}
226 }
227 // We expect NPY_INT32
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
229 if (!PyArray_ISALIGNED(py_V5)) {
230 PyErr_Format(PyExc_NotImplementedError,
231 "expected an aligned array of type %d "
232 "(NPY_INT32), got non-aligned array of type %d"
233 " with %d dimensions, with 3 last dims %d, %d, %d"
234 " and 3 last strides %d %d, %d.",
235 NPY_INT32, type_num_V5,
236 PyArray_NDIM(py_V5),
237 PyArray_NDIM(py_V5) >= 3 ?
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
239 PyArray_NDIM(py_V5) >= 2 ?
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
241 PyArray_NDIM(py_V5) >= 1 ?
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
243 PyArray_NDIM(py_V5) >= 2 ?
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
245 PyArray_NDIM(py_V5) >= 3 ?
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
247 PyArray_NDIM(py_V5) >= 1 ?
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
249 );
250 {__failure = 6; goto __label_6;}
251 }
252 // This is a TypeError to be consistent with DEBUG_MODE
253 // Note: DEBUG_MODE also tells the name of the container
254 if (type_num_V5 != NPY_INT32) {
255 PyErr_Format(PyExc_TypeError,
256 "expected type_num %d (NPY_INT32) got %d",
257 NPY_INT32, type_num_V5);
258 {__failure = 6; goto __label_6;}
259 }
260 V5 = (PyArrayObject*)(py_V5);
261 Py_XINCREF(V5);
262
263 {
264
265 py_V7 = Py_None;
266 {Py_XINCREF(py_V7);}
267
268 V7 = NULL;
269
270 {
271
272 //////// <code generated by CURAND_Base>
273
274 int odims[2];
275 int n_elements = 1;
276 int must_alloc_sample = ((NULL == V1)
277 || !CudaNdarray_Check(py_V1)
278 || (V1->nd != 2));
279
280 if (V5->nd != 1)
281 {
282 PyErr_SetString(PyExc_ValueError, "size must be vector");
283 {__failure = 9; goto __label_9;}
284 }
285 if (V5->dimensions[0] != 2)
286 {
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
288 2, V5->dimensions[0]);
289 {__failure = 9; goto __label_9;}
290 }
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
292 {
293 PyErr_SetString(PyExc_ValueError, "size must be int32");
294 {__failure = 9; goto __label_9;}
295 }
296 for (int i = 0; i < 2; ++i)
297 {
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
299 n_elements *= odims[i];
300 must_alloc_sample = (must_alloc_sample
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
302 }
303 if (must_alloc_sample)
304 {
305 Py_XDECREF(V1);
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
307 if(!V1)
308 {
309 {__failure = 9; goto __label_9;};
310 }
311 }
312 if (!PyCObject_Check(V3))
313 {
314 // allocate a new generator for o_generator
315 Py_XDECREF(V7);
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
317 assert(gen);
318 if (CURAND_STATUS_SUCCESS !=
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
321 {__failure = 9; goto __label_9;};
322 }
323 if (CURAND_STATUS_SUCCESS !=
324 curandSetPseudoRandomGeneratorSeed(*gen,234))
325 {
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
327 {__failure = 9; goto __label_9;};
328 }
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
330 assert (V3 == Py_False);
331 }
332 else if (1)
333 {
334 // use i_generator for o_generator
335 Py_XDECREF(V7);
336 Py_INCREF(V3);
337 V7 = V3;
338 }
339 else
340 {
341 // copy i_generator for o_generator
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
343 {__failure = 9; goto __label_9;};
344 }
345 {
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
347 curandStatus_t err = curandGenerateNormal(*gen,
348 CudaNdarray_DEV_DATA(V1),
349 n_elements,
350 0.0, 1.0);
351
352
353 if (err != CURAND_STATUS_SUCCESS)
354 {
355 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
356 {__failure = 9; goto __label_9;};
357 }
358 cudaThreadSynchronize();
359 }
360 //////// </ code generated by CURAND_Base>
361 __label_9:
362
363 double __DUMMY_9;
364
365 }
366 __label_8:
367
368 if (!__failure) {
369
370 assert(py_V7->ob_refcnt > 1);
371 Py_DECREF(py_V7);
372 py_V7 = V7 ? V7 : Py_None;
373 Py_INCREF(py_V7);
374
375 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
376 {Py_XINCREF(py_V7);}
377 PyList_SET_ITEM(storage_V7, 0, py_V7);
378 {Py_XDECREF(old);}
379 }
380
381 Py_XDECREF(V7);
382
383 {Py_XDECREF(py_V7);}
384
385 double __DUMMY_8;
386
387 }
388 __label_6:
389
390 if (V5) {
391 Py_XDECREF(V5);
392 }
393
394 {Py_XDECREF(py_V5);}
395
396 double __DUMMY_6;
397
398 }
399 __label_4:
400
401 Py_XDECREF(V3);
402
403 {Py_XDECREF(py_V3);}
404
405 double __DUMMY_4;
406
407 }
408 __label_2:
409
410 if (!__failure) {
411
412 //std::cerr << "sync\n";
413 if (NULL == V1) {
414 // failure: sync None to storage
415 Py_XDECREF(py_V1);
416 py_V1 = Py_None;
417 Py_INCREF(py_V1);
418 }
419 else
420 {
421 if (py_V1 != (PyObject*)V1)
422 {
423 Py_XDECREF(py_V1);
424 py_V1 = (PyObject*)V1;
425 Py_INCREF(py_V1);
426 }
427 assert(py_V1->ob_refcnt);
428 }
429
430 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
431 {Py_XINCREF(py_V1);}
432 PyList_SET_ITEM(storage_V1, 0, py_V1);
433 {Py_XDECREF(old);}
434 }
435
436 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
437 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
438 if (V1)
439 {
440 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
441 Py_XDECREF(V1);
442 }
443 //std::cerr << "cleanup done" << py_V1 << "\n";
444
445 {Py_XDECREF(py_V1);}
446
447 double __DUMMY_2;
448
449 }
450
451
452 if (__failure) {
453 // When there is a failure, this code puts the exception
454 // in __ERROR.
455 PyObject* err_type = NULL;
456 PyObject* err_msg = NULL;
457 PyObject* err_traceback = NULL;
458 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
459 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
460 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
461 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
462 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
463 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
464 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
465 PyList_SET_ITEM(__ERROR, 0, err_type);
466 PyList_SET_ITEM(__ERROR, 1, err_msg);
467 PyList_SET_ITEM(__ERROR, 2, err_traceback);
468 {Py_XDECREF(old_err_type);}
469 {Py_XDECREF(old_err_msg);}
470 {Py_XDECREF(old_err_traceback);}
471 }
472 // The failure code is returned to index what code block failed.
473 return __failure;
474
475 }
476 };
477
478
479 int __struct_compiled_op_889e175e75159a3e61d065caf0802126_executor(__struct_compiled_op_889e175e75159a3e61d065caf0802126* self) {
480 return self->run();
481 }
482
483 void __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor(void* executor, void* self) {
484 //printf("doing cleanup\n");
485 //fflush(stdout);
486 // ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self)->cleanup();
487 // free(self);
488 delete ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self);
489 //printf("done cleanup\n");
490 //fflush(stdout);
491 }
492
493 //////////////////////
494 //// Functions
495 //////////////////////
496 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
497 assert(PyTuple_Check(argtuple));
498 if (5 != PyTuple_Size(argtuple)){
499 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
500 return NULL;
501 }
502 __struct_compiled_op_889e175e75159a3e61d065caf0802126* struct_ptr = new __struct_compiled_op_889e175e75159a3e61d065caf0802126();
503 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
504 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_889e175e75159a3e61d065caf0802126_executor), struct_ptr, __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor);
505 return thunk; }
506
507 //////////////////////
508 //// Module init
509 //////////////////////
510 static PyMethodDef MyMethods[] = {
511 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
512 {NULL, NULL, 0, NULL}
513 };
514 PyMODINIT_FUNC init889e175e75159a3e61d065caf0802126(void){
515 import_array();
516 (void) Py_InitModule("889e175e75159a3e61d065caf0802126", MyMethods);
517 }
518
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 void free_generator(void *_gen)
14 {
15 curandGenerator_t * gen = (curandGenerator_t*)_gen;
16 curandStatus_t err = curandDestroyGenerator(*gen);
17 if (err != CURAND_STATUS_SUCCESS)
18 {
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator",
20 (int)err);
21 }
22 free(_gen);
23 }
24
25
26 struct __struct_compiled_op_889e175e75159a3e61d065caf0802126 {
27 PyObject* __ERROR;
28
29 PyObject* storage_V3;
30 PyObject* storage_V5;
31 PyObject* storage_V7;
32 PyObject* storage_V1;
33
34
35 __struct_compiled_op_889e175e75159a3e61d065caf0802126() {}
36 ~__struct_compiled_op_889e175e75159a3e61d065caf0802126(void) {
37 cleanup();
38 }
39
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
41 Py_XINCREF(storage_V3);
42 Py_XINCREF(storage_V5);
43 Py_XINCREF(storage_V7);
44 Py_XINCREF(storage_V1);
45 this->storage_V3 = storage_V3;
46 this->storage_V5 = storage_V5;
47 this->storage_V7 = storage_V7;
48 this->storage_V1 = storage_V1;
49 int __failure = 0;
50
51 {
52
53 {
54
55 {
56
57 {
58
59 this->__ERROR = __ERROR;
60 return 0;
61 __label_7:
62
63 double __DUMMY_7;
64
65 }
66 __label_5:
67
68 double __DUMMY_5;
69
70 }
71 __label_3:
72
73 double __DUMMY_3;
74
75 }
76 __label_1:
77
78 double __DUMMY_1;
79
80 }
81
82 Py_XDECREF(this->storage_V3);
83 Py_XDECREF(this->storage_V5);
84 Py_XDECREF(this->storage_V7);
85 Py_XDECREF(this->storage_V1);
86
87 if (__failure) {
88 // When there is a failure, this code puts the exception
89 // in __ERROR.
90 PyObject* err_type = NULL;
91 PyObject* err_msg = NULL;
92 PyObject* err_traceback = NULL;
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
100 PyList_SET_ITEM(__ERROR, 0, err_type);
101 PyList_SET_ITEM(__ERROR, 1, err_msg);
102 PyList_SET_ITEM(__ERROR, 2, err_traceback);
103 {Py_XDECREF(old_err_type);}
104 {Py_XDECREF(old_err_msg);}
105 {Py_XDECREF(old_err_traceback);}
106 }
107 // The failure code is returned to index what code block failed.
108 return __failure;
109
110 }
111 void cleanup(void) {
112 __label_1:
113
114 double __DUMMY_1;
115 __label_3:
116
117 double __DUMMY_3;
118 __label_5:
119
120 double __DUMMY_5;
121 __label_7:
122
123 double __DUMMY_7;
124
125 Py_XDECREF(this->storage_V3);
126 Py_XDECREF(this->storage_V5);
127 Py_XDECREF(this->storage_V7);
128 Py_XDECREF(this->storage_V1);
129 }
130 int run(void) {
131 int __failure = 0;
132
133 PyObject* py_V1;
134 CudaNdarray * V1;
135 PyObject* py_V3;
136
137 PyObject* V3;
138
139 PyObject* py_V5;
140
141 PyArrayObject* V5;
142 int type_num_V5;
143 typedef npy_int32 dtype_V5;
144
145 PyObject* py_V7;
146
147 PyObject* V7;
148
149 {
150
151 py_V1 = PyList_GET_ITEM(storage_V1, 0);
152 {Py_XINCREF(py_V1);}
153
154 if (py_V1 == Py_None)
155 {
156 V1 = NULL;
157 }
158 else
159 {
160
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
162 // and one ref from the local scope.
163
164 if (CudaNdarray_Check(py_V1))
165 {
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
167 V1 = (CudaNdarray*)py_V1;
168 //std::cerr << "c_extract " << V1 << '\n';
169 if (V1->nd != 2)
170 {
171 PyErr_Format(PyExc_RuntimeError,
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
173 V1->nd);
174 V1 = NULL;
175 {__failure = 2; goto __label_2;};
176 }
177 //std::cerr << "c_extract " << V1 << " nd check passed\n";
178
179
180 assert(V1);
181 Py_INCREF(py_V1);
182 }
183 else if (py_V1 == Py_None)
184 {
185 PyErr_SetString(PyExc_TypeError,
186 "expected a CudaNdarray, not None");
187 V1 = NULL;
188 {__failure = 2; goto __label_2;};
189 }
190 else
191 {
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
194 V1 = NULL;
195 {__failure = 2; goto __label_2;};
196 }
197 //std::cerr << "c_extract done " << V1 << '\n';
198
199
200 }
201
202 {
203
204 py_V3 = PyList_GET_ITEM(storage_V3, 0);
205 {Py_XINCREF(py_V3);}
206
207 Py_INCREF(py_V3);
208 V3 = py_V3;
209
210 {
211
212 py_V5 = PyList_GET_ITEM(storage_V5, 0);
213 {Py_XINCREF(py_V5);}
214
215 V5 = NULL;
216 if (py_V5 == Py_None) {
217 // We can either fail here or set V5 to NULL and rely on Ops
218 // using tensors to handle the NULL case, but if they fail to do so
219 // they'll end up with nasty segfaults, so this is public service.
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
221 {__failure = 6; goto __label_6;}
222 }
223 if (!PyArray_Check(py_V5)) {
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
225 {__failure = 6; goto __label_6;}
226 }
227 // We expect NPY_INT32
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
229 if (!PyArray_ISALIGNED(py_V5)) {
230 PyErr_Format(PyExc_NotImplementedError,
231 "expected an aligned array of type %d "
232 "(NPY_INT32), got non-aligned array of type %d"
233 " with %d dimensions, with 3 last dims %d, %d, %d"
234 " and 3 last strides %d %d, %d.",
235 NPY_INT32, type_num_V5,
236 PyArray_NDIM(py_V5),
237 PyArray_NDIM(py_V5) >= 3 ?
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
239 PyArray_NDIM(py_V5) >= 2 ?
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
241 PyArray_NDIM(py_V5) >= 1 ?
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
243 PyArray_NDIM(py_V5) >= 2 ?
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
245 PyArray_NDIM(py_V5) >= 3 ?
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
247 PyArray_NDIM(py_V5) >= 1 ?
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
249 );
250 {__failure = 6; goto __label_6;}
251 }
252 // This is a TypeError to be consistent with DEBUG_MODE
253 // Note: DEBUG_MODE also tells the name of the container
254 if (type_num_V5 != NPY_INT32) {
255 PyErr_Format(PyExc_TypeError,
256 "expected type_num %d (NPY_INT32) got %d",
257 NPY_INT32, type_num_V5);
258 {__failure = 6; goto __label_6;}
259 }
260 V5 = (PyArrayObject*)(py_V5);
261 Py_XINCREF(V5);
262
263 {
264
265 py_V7 = Py_None;
266 {Py_XINCREF(py_V7);}
267
268 V7 = NULL;
269
270 {
271
272 //////// <code generated by CURAND_Base>
273
274 int odims[2];
275 int n_elements = 1;
276 int must_alloc_sample = ((NULL == V1)
277 || !CudaNdarray_Check(py_V1)
278 || (V1->nd != 2));
279
280 if (V5->nd != 1)
281 {
282 PyErr_SetString(PyExc_ValueError, "size must be vector");
283 {__failure = 9; goto __label_9;}
284 }
285 if (V5->dimensions[0] != 2)
286 {
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
288 2, V5->dimensions[0]);
289 {__failure = 9; goto __label_9;}
290 }
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
292 {
293 PyErr_SetString(PyExc_ValueError, "size must be int32");
294 {__failure = 9; goto __label_9;}
295 }
296 for (int i = 0; i < 2; ++i)
297 {
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
299 n_elements *= odims[i];
300 must_alloc_sample = (must_alloc_sample
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
302 }
303 if (must_alloc_sample)
304 {
305 Py_XDECREF(V1);
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
307 if(!V1)
308 {
309 {__failure = 9; goto __label_9;};
310 }
311 }
312 if (!PyCObject_Check(V3))
313 {
314 // allocate a new generator for o_generator
315 Py_XDECREF(V7);
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
317 assert(gen);
318 if (CURAND_STATUS_SUCCESS !=
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
321 {__failure = 9; goto __label_9;};
322 }
323 if (CURAND_STATUS_SUCCESS !=
324 curandSetPseudoRandomGeneratorSeed(*gen,234))
325 {
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
327 {__failure = 9; goto __label_9;};
328 }
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
330 assert (V3 == Py_False);
331 }
332 else if (1)
333 {
334 // use i_generator for o_generator
335 Py_XDECREF(V7);
336 Py_INCREF(V3);
337 V7 = V3;
338 }
339 else
340 {
341 // copy i_generator for o_generator
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
343 {__failure = 9; goto __label_9;};
344 }
345 {
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
347 curandStatus_t err = curandGenerateNormal(*gen,
348 CudaNdarray_DEV_DATA(V1),
349 n_elements,
350 0.0, 1.0);
351
352
353 if (err != CURAND_STATUS_SUCCESS)
354 {
355 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
356 {__failure = 9; goto __label_9;};
357 }
358 cudaThreadSynchronize();
359 }
360 //////// </ code generated by CURAND_Base>
361 __label_9:
362
363 double __DUMMY_9;
364
365 }
366 __label_8:
367
368 if (!__failure) {
369
370 assert(py_V7->ob_refcnt > 1);
371 Py_DECREF(py_V7);
372 py_V7 = V7 ? V7 : Py_None;
373 Py_INCREF(py_V7);
374
375 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
376 {Py_XINCREF(py_V7);}
377 PyList_SET_ITEM(storage_V7, 0, py_V7);
378 {Py_XDECREF(old);}
379 }
380
381 Py_XDECREF(V7);
382
383 {Py_XDECREF(py_V7);}
384
385 double __DUMMY_8;
386
387 }
388 __label_6:
389
390 if (V5) {
391 Py_XDECREF(V5);
392 }
393
394 {Py_XDECREF(py_V5);}
395
396 double __DUMMY_6;
397
398 }
399 __label_4:
400
401 Py_XDECREF(V3);
402
403 {Py_XDECREF(py_V3);}
404
405 double __DUMMY_4;
406
407 }
408 __label_2:
409
410 if (!__failure) {
411
412 //std::cerr << "sync\n";
413 if (NULL == V1) {
414 // failure: sync None to storage
415 Py_XDECREF(py_V1);
416 py_V1 = Py_None;
417 Py_INCREF(py_V1);
418 }
419 else
420 {
421 if (py_V1 != (PyObject*)V1)
422 {
423 Py_XDECREF(py_V1);
424 py_V1 = (PyObject*)V1;
425 Py_INCREF(py_V1);
426 }
427 assert(py_V1->ob_refcnt);
428 }
429
430 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
431 {Py_XINCREF(py_V1);}
432 PyList_SET_ITEM(storage_V1, 0, py_V1);
433 {Py_XDECREF(old);}
434 }
435
436 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
437 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
438 if (V1)
439 {
440 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
441 Py_XDECREF(V1);
442 }
443 //std::cerr << "cleanup done" << py_V1 << "\n";
444
445 {Py_XDECREF(py_V1);}
446
447 double __DUMMY_2;
448
449 }
450
451
452 if (__failure) {
453 // When there is a failure, this code puts the exception
454 // in __ERROR.
455 PyObject* err_type = NULL;
456 PyObject* err_msg = NULL;
457 PyObject* err_traceback = NULL;
458 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
459 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
460 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
461 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
462 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
463 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
464 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
465 PyList_SET_ITEM(__ERROR, 0, err_type);
466 PyList_SET_ITEM(__ERROR, 1, err_msg);
467 PyList_SET_ITEM(__ERROR, 2, err_traceback);
468 {Py_XDECREF(old_err_type);}
469 {Py_XDECREF(old_err_msg);}
470 {Py_XDECREF(old_err_traceback);}
471 }
472 // The failure code is returned to index what code block failed.
473 return __failure;
474
475 }
476 };
477
478
479 int __struct_compiled_op_889e175e75159a3e61d065caf0802126_executor(__struct_compiled_op_889e175e75159a3e61d065caf0802126* self) {
480 return self->run();
481 }
482
483 void __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor(void* executor, void* self) {
484 //printf("doing cleanup\n");
485 //fflush(stdout);
486 // ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self)->cleanup();
487 // free(self);
488 delete ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self);
489 //printf("done cleanup\n");
490 //fflush(stdout);
491 }
492
493 //////////////////////
494 //// Functions
495 //////////////////////
496 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
497 assert(PyTuple_Check(argtuple));
498 if (5 != PyTuple_Size(argtuple)){
499 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
500 return NULL;
501 }
502 __struct_compiled_op_889e175e75159a3e61d065caf0802126* struct_ptr = new __struct_compiled_op_889e175e75159a3e61d065caf0802126();
503 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
504 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_889e175e75159a3e61d065caf0802126_executor), struct_ptr, __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor);
505 return thunk; }
506
507 //////////////////////
508 //// Module init
509 //////////////////////
510 static PyMethodDef MyMethods[] = {
511 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
512 {NULL, NULL, 0, NULL}
513 };
514 PyMODINIT_FUNC init889e175e75159a3e61d065caf0802126(void){
515 import_array();
516 (void) Py_InitModule("889e175e75159a3e61d065caf0802126", MyMethods);
517 }
518
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
E..............KK....................................................WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((3, (4,), (4,), (4,), (4,), (4,)), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.scalar.basic.Composite object at 0x1224add0>, ((Scalar(float64), ((-1, 0), False)), (Scalar(float64), ((-1, 1), False)), (Scalar(float64), ((-1, 2), False))), (1, (False, False, False)))))])
.WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((3, (4,), (4,), (4,)), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.scalar.basic.Composite object at 0x125546d0>, ((Scalar(float64), ((-1, 0), False)), (Scalar(float64), ((-1, 1), False))), (1, (False,)))))])
.WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((3, (4,), (4,), (4,)), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.scalar.basic.Composite object at 0x11c8c990>, ((Scalar(float64), ((-1, 0), False)), (Scalar(float64), ((-1, 1), False))), (1, (False,)))))])
.............................../usr/lib/python2.7/dist-packages/scipy/sparse/data.py:54: ComplexWarning: Casting complex values to real discards the imaginary part
return self._with_data(self.data.astype(t))
/usr/local/lib/python2.7/dist-packages/theano/sparse/tests/test_basic.py:2021: ComplexWarning: Casting complex values to real discards the imaginary part
expected = data.toarray().astype(o_dtype)
...../usr/lib/python2.7/dist-packages/scipy/sparse/compressed.py:486: SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning)
/usr/lib/python2.7/dist-packages/scipy/sparse/compressed.py:486: SparseEfficiencyWarning: changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning)
.....................................................S.......S...........................................................................................................................................^[^[^[........................S.SS............................................................................................................................................................................................................................................................................................................^[................................................................................................................................................................................................................................................................................................................................................................................K..............................................................................................................................S..................................................................................................................................................................................K..KKKK.K.....................................................K......................../usr/local/lib/python2.7/dist-packages/theano/tensor/tests/test_naacl09.py:69: UserWarning: RandomStreams is deprecated and will be removed in release 0.7. Use shared_randomstreams.RandomStreams or MRG_RandomStreams instead.
self.random = T.RandomStreams()
.....................................................S.S..S................................................K......K..................................................................................................................................................................SS...SSSSS..........K.............................................
======================================================================
ERROR: Run the tests for `uniform` with different settings for the
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 59, in check_uniform_basic
f0 = theano.function([], u0, mode=mode_with_gpu)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function
profile=profile)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc
on_unused_input=on_unused_input)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function
defaults)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk
output_storage = output_storage)[:3]
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all
for node in order]
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk
compute_map, no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk
output_storage=node_output_storage)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key
module = compile_steps.next()
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step
preargs=preargs)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpsYHM22/7a7573cd1a887cbf5d8946c487571964.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Uniform{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]')
-------------------- >> begin captured stdout << ---------------------
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpsYHM22/7a7573cd1a887cbf5d8946c487571964.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
--------------------- >> end captured stdout << ----------------------
======================================================================
ERROR: Run the tests for `uniform` with different settings for the
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 59, in check_uniform_basic
f0 = theano.function([], u0, mode=mode_with_gpu)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function
profile=profile)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc
on_unused_input=on_unused_input)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function
defaults)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk
output_storage = output_storage)[:3]
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all
for node in order]
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk
compute_map, no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk
output_storage=node_output_storage)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key
module = compile_steps.next()
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step
preargs=preargs)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp9zHeYu/7a7573cd1a887cbf5d8946c487571964.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Uniform{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]')
-------------------- >> begin captured stdout << ---------------------
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp9zHeYu/7a7573cd1a887cbf5d8946c487571964.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
--------------------- >> end captured stdout << ----------------------
======================================================================
ERROR: Run the tests for `uniform` with different settings for the
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 59, in check_uniform_basic
f0 = theano.function([], u0, mode=mode_with_gpu)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function
profile=profile)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc
on_unused_input=on_unused_input)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function
defaults)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk
output_storage = output_storage)[:3]
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all
for node in order]
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk
compute_map, no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk
output_storage=node_output_storage)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key
module = compile_steps.next()
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step
preargs=preargs)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp4H_5Ej/7a7573cd1a887cbf5d8946c487571964.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Uniform{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]')
-------------------- >> begin captured stdout << ---------------------
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp4H_5Ej/7a7573cd1a887cbf5d8946c487571964.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
--------------------- >> end captured stdout << ----------------------
======================================================================
ERROR: Run the tests for `normal` with different settings for the
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 127, in check_normal_basic
f0 = theano.function([], u0, mode=mode_with_gpu)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function
profile=profile)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc
on_unused_input=on_unused_input)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function
defaults)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk
output_storage = output_storage)[:3]
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all
for node in order]
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk
compute_map, no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk
output_storage=node_output_storage)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key
module = compile_steps.next()
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step
preargs=preargs)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpCuTurr/889e175e75159a3e61d065caf0802126.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Normal{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]')
-------------------- >> begin captured stdout << ---------------------
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpCuTurr/889e175e75159a3e61d065caf0802126.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
--------------------- >> end captured stdout << ----------------------
======================================================================
ERROR: Run the tests for `normal` with different settings for the
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 127, in check_normal_basic
f0 = theano.function([], u0, mode=mode_with_gpu)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function
profile=profile)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc
on_unused_input=on_unused_input)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function
defaults)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk
output_storage = output_storage)[:3]
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all
for node in order]
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk
compute_map, no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk
output_storage=node_output_storage)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key
module = compile_steps.next()
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step
preargs=preargs)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpjF7IY9/889e175e75159a3e61d065caf0802126.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Normal{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]')
-------------------- >> begin captured stdout << ---------------------
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpjF7IY9/889e175e75159a3e61d065caf0802126.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
--------------------- >> end captured stdout << ----------------------
======================================================================
ERROR: Run the tests for `normal` with different settings for the
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 127, in check_normal_basic
f0 = theano.function([], u0, mode=mode_with_gpu)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function
profile=profile)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc
on_unused_input=on_unused_input)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function
defaults)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk
output_storage = output_storage)[:3]
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all
for node in order]
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk
compute_map, no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk
output_storage=node_output_storage)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key
module = compile_steps.next()
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step
preargs=preargs)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp6Uc8gr/889e175e75159a3e61d065caf0802126.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Normal{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]')
-------------------- >> begin captured stdout << ---------------------
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp6Uc8gr/889e175e75159a3e61d065caf0802126.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
--------------------- >> end captured stdout << ----------------------
----------------------------------------------------------------------
Ran 2353 tests in 6431.965s
FAILED (KNOWNFAIL=14, SKIP=23, errors=6)
<nose.result.TextTestResult run=2353 errors=6 failures=0>
>>>
.Xauthority .bashrc .hgrc .nv/ .selected_editor Theano.dev/ playing_with_theano/
.bash_history .cache/ .local/ .pip/ .ssh/ cuda5.5/ temp/
.bash_logout .config/ .matplotlib/ .profile .theano/ ocropus/
>>> theano.__version__
'0.6.0rc3'
>>> theano.test()
Theano version 0.6.0rc3.dev-6ecd6c6cfb8c06d8fd0bb73cf6924e1171e4d069
theano is installed in /home/ludwig/Theano/theano
NumPy version 1.6.1
NumPy is installed in /usr/lib/python2.7/dist-packages/numpy
Python version 2.7.3 (default, Aug 1 2012, 05:14:39) [GCC 4.6.3]
nose version 1.1.2
/home/ludwig/Theano/theano/misc/pycuda_init.py:34: UserWarning: PyCUDA import failed in theano.misc.pycuda_init
warnings.warn("PyCUDA import failed in theano.misc.pycuda_init")
Using gpu device 0: GeForce GTX 670
.........................................E.............../home/ludwig/Theano/theano/compile/tests/test_inplace_opt_for_value.py:170: UserWarning: theano modules are deprecated and will be removed in release 0.7
super(ExampleRNN, self).__init__()
..............................*** NaN detected ***
Elemwise{Composite{[mul(log(i0), i0)]}} [@A] ''
|x [@B]
Inputs : [array(0.0)]
Outputs: [array(nan)]
.*** NaN detected ***
Elemwise{Mul{output_types_preference=transfer_type{0}}}[(0, 0)] [@A] ''
|Elemwise{log,no_inplace} [@B] ''
| |x [@C]
|x [@C]
Inputs : [array(nan), array(0.0)]
Outputs: [array(nan)]
.*** NaN detected ***
Elemwise{mul,no_inplace} [@A] ''
|Elemwise{log,no_inplace} [@B] ''
| |CGer{destructive} [@C] ''
| |Alloc [@D] ''
| | |TensorConstant{0.0} [@E]
| | |Shape_i{0} [@F] ''
| | | |x [@G]
| | |Shape_i{0} [@F] ''
| |TensorConstant{1.0} [@H]
| |x [@G]
| |x [@G]
|CGer{destructive} [@C] ''
Inputs : [array([[-inf, -inf],
[-inf, -inf]]), array([[ 0., 0.],
[ 0., 0.]])]
Outputs: [array([[ nan, nan],
[ nan, nan]])]
...........................................................Yay, TEST PASSED
.0.591525729001
0.591525729001
0.591525729001
0.591525729001
.......WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((1,), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fPIC', '-fno-math-errno', '-m64'), (), (), 'NPY_ABI_VERSION=0x1000009', u'c_compiler_str=g++ 4.6', 'md5:2efdfc3f30b8f0efb4ed894c4c08f52f', (<theano.gof.tests.test_compute_test_value.IncOneC object at 0xacd29d0>, ((Scalar(int32), ((-1, 0), False)),), (1, (False,)))))])
...................................................case 1
... passed
case 2
b
... passed
case 1
... passed
case 2
b
... passed
case 1
... passed
case 2
b
... passed
case 1
... passed
case 2
b
... passed
.20.5
... passed
.........................[Op3(Op4(x, y))]
[Op2(x, y)]
.[Op1(Op1(Op3(x, y)))]
.before [Op3(Op4(x, y))]
after [Op1(x, y)]
........................................c|py takes 0.001862 s/Kop
vmLinker takes 0.001792 s/Kop
vmLinker_nogc takes 0.001305 s/Kop
vmLinker_CLOOP takes 0.000271 s/Kop
numpy takes 0.000000 s/Kop
.vmLinker takes 0.072410 s/Kop
vmLinker_nogc takes 0.062354 s/Kop
vmLinker_C takes 0.017431 s/Kop
.................................................................................................................................................................................................................................................(3, 10, 11, 11) (2, 10, 10, 10) (10, 11, 11) (10, 10)
HostFromGpu [@A] '' 4
|GpuConv{full, (1, 1), None, (10, 10), True, (10, 11, 11), (10, 10)} [@B] '' 3
|GpuFromHost [@C] '' 1
| |<TensorType(float32, 4D)> [@D]
|GpuDimShuffle{1,0,2,3} [@E] '' 2
|GpuFromHost [@F] '' 0
|<TensorType(float32, 4D)> [@G]
(3, 10, 11, 11) (2, 10, 10, 10) (10, 22, 22) (10, 10)
HostFromGpu [@A] '' 8
|GpuConv{full, (1, 1), None, (10, 10), True, (10, 11, 11), (10, 10)} [@B] '' 7
|GpuIncSubtensor{InplaceSet;::, ::, ::2, ::2} [@C] '' 6
| |GpuAlloc{memset_0=True} [@D] '' 5
| | |CudaNdarrayConstant{[[[[ 0.]]]]} [@E]
| | |Subtensor{0} [@F] '' 4
| | | |GpuShape [@G] '' 3
| | | |GpuFromHost [@H] '' 1
| | | |<TensorType(float32, 4D)> [@I]
| | |TensorConstant{10} [@J]
| | |TensorConstant{22} [@K]
| | |TensorConstant{22} [@K]
| |GpuFromHost [@H] '' 1
|GpuDimShuffle{1,0,2,3} [@L] '' 2
|GpuFromHost [@M] '' 0
|<TensorType(float32, 4D)> [@N]
(3, 10, 11, 11) (2, 10, 10, 10) (10, 33, 33) (10, 10)
HostFromGpu [@A] '' 8
|GpuConv{full, (1, 1), None, (10, 10), True, (10, 11, 11), (10, 10)} [@B] '' 7
|GpuIncSubtensor{InplaceSet;::, ::, ::3, ::3} [@C] '' 6
| |GpuAlloc{memset_0=True} [@D] '' 5
| | |CudaNdarrayConstant{[[[[ 0.]]]]} [@E]
| | |Subtensor{0} [@F] '' 4
| | | |GpuShape [@G] '' 3
| | | |GpuFromHost [@H] '' 1
| | | |<TensorType(float32, 4D)> [@I]
| | |TensorConstant{10} [@J]
| | |TensorConstant{33} [@K]
| | |TensorConstant{33} [@K]
| |GpuFromHost [@H] '' 1
|GpuDimShuffle{1,0,2,3} [@L] '' 2
|GpuFromHost [@M] '' 0
|<TensorType(float32, 4D)> [@N]
./usr/lib/python2.7/dist-packages/scipy/signal/signaltools.py:408: ComplexWarning: Casting complex values to real discards the imaginary part
return sigtools._convolve2d(in1,in2,1,val,bval,fillvalue)
Executed 284 different shapes
.Executed 1460 different shapes
.Executed 143 different shapes
.Executed 368 different shapes
.Executed 1098 different shapes
.Executed 736 different shapes
.Executed 1107 different shapes
.Executed 2936 different shapes
.Executed 1374 different shapes
Executed 458 different shapes
......................................float32
Before shared variable ('n malloc on the gpu', 1)
Shared took 0 kB
Before compilation ('n malloc on the gpu', 2)
After function compilation 1 ('n malloc on the gpu', 3)
After function compilation 2 ('n malloc on the gpu', 6)
After function evaluation 1 ('n malloc on the gpu', 6)
After function evaluation 2 ('n malloc on the gpu', 6)
After function evaluation 1 ('n malloc on the gpu', 6)
After function evaluation 2 ('n malloc on the gpu', 6)
After function evaluation 1 ('n malloc on the gpu', 6)
After function evaluation 2 ('n malloc on the gpu', 6)
After deleting function 2 ('n malloc on the gpu', 2)
After deleting shared variable and ref to it ('n malloc on the gpu', 1)
float64
Before shared variable ('n malloc on the gpu', 1)
Shared took 0 kB
Before compilation ('n malloc on the gpu', 2)
After function compilation 1 ('n malloc on the gpu', 2)
After function compilation 2 ('n malloc on the gpu', 2)
After function evaluation 1 ('n malloc on the gpu', 2)
After function evaluation 2 ('n malloc on the gpu', 2)
After function evaluation 1 ('n malloc on the gpu', 2)
After function evaluation 2 ('n malloc on the gpu', 2)
After function evaluation 1 ('n malloc on the gpu', 2)
After function evaluation 2 ('n malloc on the gpu', 2)
After deleting function 2 ('n malloc on the gpu', 2)
After deleting shared variable and ref to it ('n malloc on the gpu', 1)
.float32
Before shared variable ('n malloc on the gpu', 1)
Shared took 0 kB
Before compilation ('n malloc on the gpu', 2)
After function compilation 1 ('n malloc on the gpu', 4)
After function evaluation branch true ('n malloc on the gpu', 4)
After function evaluation branch false ('n malloc on the gpu', 4)
After function evaluation branch true ('n malloc on the gpu', 4)
After function evaluation branch false ('n malloc on the gpu', 4)
After function evaluation branch true ('n malloc on the gpu', 4)
After function evaluation branch false ('n malloc on the gpu', 4)
After deleting function 1 ('n malloc on the gpu', 2)
After deleting shared variable and ref to it ('n malloc on the gpu', 1)
float64
Before shared variable ('n malloc on the gpu', 1)
Shared took 0 kB
Before compilation ('n malloc on the gpu', 2)
After function compilation 1 ('n malloc on the gpu', 2)
After function evaluation branch true ('n malloc on the gpu', 2)
After function evaluation branch false ('n malloc on the gpu', 2)
After function evaluation branch true ('n malloc on the gpu', 2)
After function evaluation branch false ('n malloc on the gpu', 2)
After function evaluation branch true ('n malloc on the gpu', 2)
After function evaluation branch false ('n malloc on the gpu', 2)
After deleting function 1 ('n malloc on the gpu', 2)
After deleting shared variable and ref to it ('n malloc on the gpu', 1)
...................................................... __str__ = [[ 0.41844133 0.42187652 0.41898602 0.91393226 0.2673761 ]
[ 0.43664843 0.95744944 0.87686652 0.56096673 0.20658322]
[ 0.18596491 0.4768765 0.82073575 0.22224128 0.80181545]
[ 0.82680261 0.51986992 0.56643027 0.00883366 0.15047923]
[ 0.39132819 0.62885153 0.2296776 0.30525967 0.92171496]]
...0 GpuFromHost(<TensorType(float32, matrix)>)
1 GpuFromHost(<TensorType(float32, matrix)>)
2 GpuElemwise{Composite{[add(add(i0, i1), i2)]}}[(0, 1)](a, GpuFromHost.0, GpuFromHost.0)
3 HostFromGpu(GpuElemwise{Composite{[add(add(i0, i1), i2)]}}[(0, 1)].0)
.Elemwise{Cast{float32}}(<TensorType(float64, matrix)>)
..1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 #if PY_MAJOR_VERSION >= 3
14 void free_generator(PyObject *_gen)
15 {
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen);
17 #else
18 void free_generator(void *_gen)
19 {
20 curandGenerator_t * gen = (curandGenerator_t*)_gen;
21 #endif
22
23 curandStatus_t err = curandDestroyGenerator(*gen);
24 if (err != CURAND_STATUS_SUCCESS)
25 {
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n",
27 (int)err);
28 }
29 free(gen);
30 }
31
32
33 struct __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4 {
34 PyObject* __ERROR;
35
36 PyObject* storage_V3;
37 PyObject* storage_V5;
38 PyObject* storage_V7;
39 PyObject* storage_V1;
40
41
42 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4() {}
43 ~__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(void) {
44 cleanup();
45 }
46
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
48 Py_XINCREF(storage_V3);
49 Py_XINCREF(storage_V5);
50 Py_XINCREF(storage_V7);
51 Py_XINCREF(storage_V1);
52 this->storage_V3 = storage_V3;
53 this->storage_V5 = storage_V5;
54 this->storage_V7 = storage_V7;
55 this->storage_V1 = storage_V1;
56 int __failure = 0;
57
58 {
59
60 {
61
62 {
63
64 {
65
66 this->__ERROR = __ERROR;
67 return 0;
68 __label_7:
69
70 double __DUMMY_7;
71
72 }
73 __label_5:
74
75 double __DUMMY_5;
76
77 }
78 __label_3:
79
80 double __DUMMY_3;
81
82 }
83 __label_1:
84
85 double __DUMMY_1;
86
87 }
88
89 Py_XDECREF(this->storage_V3);
90 Py_XDECREF(this->storage_V5);
91 Py_XDECREF(this->storage_V7);
92 Py_XDECREF(this->storage_V1);
93
94 if (__failure) {
95 // When there is a failure, this code puts the exception
96 // in __ERROR.
97 PyObject* err_type = NULL;
98 PyObject* err_msg = NULL;
99 PyObject* err_traceback = NULL;
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
107 PyList_SET_ITEM(__ERROR, 0, err_type);
108 PyList_SET_ITEM(__ERROR, 1, err_msg);
109 PyList_SET_ITEM(__ERROR, 2, err_traceback);
110 {Py_XDECREF(old_err_type);}
111 {Py_XDECREF(old_err_msg);}
112 {Py_XDECREF(old_err_traceback);}
113 }
114 // The failure code is returned to index what code block failed.
115 return __failure;
116
117 }
118 void cleanup(void) {
119 __label_1:
120
121 double __DUMMY_1;
122 __label_3:
123
124 double __DUMMY_3;
125 __label_5:
126
127 double __DUMMY_5;
128 __label_7:
129
130 double __DUMMY_7;
131
132 Py_XDECREF(this->storage_V3);
133 Py_XDECREF(this->storage_V5);
134 Py_XDECREF(this->storage_V7);
135 Py_XDECREF(this->storage_V1);
136 }
137 int run(void) {
138 int __failure = 0;
139
140 PyObject* py_V1;
141 CudaNdarray * V1;
142 PyObject* py_V3;
143
144 PyObject* V3;
145
146 PyObject* py_V5;
147
148 PyArrayObject* V5;
149 int type_num_V5;
150 typedef npy_int32 dtype_V5;
151
152 PyObject* py_V7;
153
154 PyObject* V7;
155
156 {
157
158 py_V1 = PyList_GET_ITEM(storage_V1, 0);
159 {Py_XINCREF(py_V1);}
160
161 if (py_V1 == Py_None)
162 {
163 V1 = NULL;
164 }
165 else
166 {
167
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
169 // and one ref from the local scope.
170
171 if (CudaNdarray_Check(py_V1))
172 {
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
174 V1 = (CudaNdarray*)py_V1;
175 //std::cerr << "c_extract " << V1 << '\n';
176 if (V1->nd != 2)
177 {
178 PyErr_Format(PyExc_RuntimeError,
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
180 V1->nd);
181 V1 = NULL;
182 {
183 __failure = 2;
184 if (!PyErr_Occurred()) {
185 PyErr_SetString(PyExc_RuntimeError,
186 "Unexpected error in an Op's C code. "
187 "No Python exception was set.");
188 }
189 goto __label_2;};
190 }
191 //std::cerr << "c_extract " << V1 << " nd check passed\n";
192
193
194 assert(V1);
195 Py_INCREF(py_V1);
196 }
197 else if (py_V1 == Py_None)
198 {
199 PyErr_SetString(PyExc_TypeError,
200 "expected a CudaNdarray, not None");
201 V1 = NULL;
202 {
203 __failure = 2;
204 if (!PyErr_Occurred()) {
205 PyErr_SetString(PyExc_RuntimeError,
206 "Unexpected error in an Op's C code. "
207 "No Python exception was set.");
208 }
209 goto __label_2;};
210 }
211 else
212 {
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
215 V1 = NULL;
216 {
217 __failure = 2;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_2;};
224 }
225 //std::cerr << "c_extract done " << V1 << '\n';
226
227
228 }
229
230 {
231
232 py_V3 = PyList_GET_ITEM(storage_V3, 0);
233 {Py_XINCREF(py_V3);}
234
235 Py_INCREF(py_V3);
236 V3 = py_V3;
237
238 {
239
240 py_V5 = PyList_GET_ITEM(storage_V5, 0);
241 {Py_XINCREF(py_V5);}
242
243 V5 = NULL;
244 if (py_V5 == Py_None) {
245 // We can either fail here or set V5 to NULL and rely on Ops
246 // using tensors to handle the NULL case, but if they fail to do so
247 // they'll end up with nasty segfaults, so this is public service.
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
249 {
250 __failure = 6;
251 if (!PyErr_Occurred()) {
252 PyErr_SetString(PyExc_RuntimeError,
253 "Unexpected error in an Op's C code. "
254 "No Python exception was set.");
255 }
256 goto __label_6;}
257 }
258 if (!PyArray_Check(py_V5)) {
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
260 {
261 __failure = 6;
262 if (!PyErr_Occurred()) {
263 PyErr_SetString(PyExc_RuntimeError,
264 "Unexpected error in an Op's C code. "
265 "No Python exception was set.");
266 }
267 goto __label_6;}
268 }
269 // We expect NPY_INT32
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
271 if (!PyArray_ISALIGNED(py_V5)) {
272 PyErr_Format(PyExc_NotImplementedError,
273 "expected an aligned array of type %ld "
274 "(NPY_INT32), got non-aligned array of type %ld"
275 " with %ld dimensions, with 3 last dims "
276 "%ld, %ld, %ld"
277 " and 3 last strides %ld %ld, %ld.",
278 (long int) NPY_INT32,
279 (long int) type_num_V5,
280 (long int) PyArray_NDIM(py_V5),
281 (long int) PyArray_NDIM(py_V5) >= 3 ?
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
283 (long int) PyArray_NDIM(py_V5) >= 2 ?
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
285 (long int) PyArray_NDIM(py_V5) >= 1 ?
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
287 (long int) PyArray_NDIM(py_V5) >= 3 ?
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
289 (long int) PyArray_NDIM(py_V5) >= 2 ?
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
291 (long int) PyArray_NDIM(py_V5) >= 1 ?
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
293 );
294 {
295 __failure = 6;
296 if (!PyErr_Occurred()) {
297 PyErr_SetString(PyExc_RuntimeError,
298 "Unexpected error in an Op's C code. "
299 "No Python exception was set.");
300 }
301 goto __label_6;}
302 }
303 // This is a TypeError to be consistent with DEBUG_MODE
304 // Note: DEBUG_MODE also tells the name of the container
305 if (type_num_V5 != NPY_INT32) {
306 PyErr_Format(PyExc_TypeError,
307 "expected type_num %d (NPY_INT32) got %d",
308 NPY_INT32, type_num_V5);
309 {
310 __failure = 6;
311 if (!PyErr_Occurred()) {
312 PyErr_SetString(PyExc_RuntimeError,
313 "Unexpected error in an Op's C code. "
314 "No Python exception was set.");
315 }
316 goto __label_6;}
317 }
318 V5 = (PyArrayObject*)(py_V5);
319 Py_XINCREF(V5);
320
321 {
322
323 py_V7 = Py_None;
324 {Py_XINCREF(py_V7);}
325
326 V7 = NULL;
327
328 {
329
330 //////// <code generated by CURAND_Base>
331 int odims[2];
332 int n_elements = 1;
333 int must_alloc_sample = ((NULL == V1)
334 || !CudaNdarray_Check(py_V1)
335 || (V1->nd != 2));
336
337 if (V5->nd != 1)
338 {
339 PyErr_SetString(PyExc_ValueError, "size must be vector");
340 {
341 __failure = 9;
342 if (!PyErr_Occurred()) {
343 PyErr_SetString(PyExc_RuntimeError,
344 "Unexpected error in an Op's C code. "
345 "No Python exception was set.");
346 }
347 goto __label_9;}
348 }
349 if (V5->dimensions[0] != 2)
350 {
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
352 2, V5->dimensions[0]);
353 {
354 __failure = 9;
355 if (!PyErr_Occurred()) {
356 PyErr_SetString(PyExc_RuntimeError,
357 "Unexpected error in an Op's C code. "
358 "No Python exception was set.");
359 }
360 goto __label_9;}
361 }
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
363 {
364 PyErr_SetString(PyExc_ValueError, "size must be int32");
365 {
366 __failure = 9;
367 if (!PyErr_Occurred()) {
368 PyErr_SetString(PyExc_RuntimeError,
369 "Unexpected error in an Op's C code. "
370 "No Python exception was set.");
371 }
372 goto __label_9;}
373 }
374 for (int i = 0; i < 2; ++i)
375 {
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
377 n_elements *= odims[i];
378 must_alloc_sample = (must_alloc_sample
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
380 }
381 if (must_alloc_sample)
382 {
383 Py_XDECREF(V1);
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
385 if(!V1)
386 {
387 {
388 __failure = 9;
389 if (!PyErr_Occurred()) {
390 PyErr_SetString(PyExc_RuntimeError,
391 "Unexpected error in an Op's C code. "
392 "No Python exception was set.");
393 }
394 goto __label_9;};
395 }
396 }
397 if (!PyCObject_Check(V3))
398 {
399 // allocate a new generator for o_generator
400 Py_XDECREF(V7);
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
402 assert(gen);
403 if (CURAND_STATUS_SUCCESS !=
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
406 {
407 __failure = 9;
408 if (!PyErr_Occurred()) {
409 PyErr_SetString(PyExc_RuntimeError,
410 "Unexpected error in an Op's C code. "
411 "No Python exception was set.");
412 }
413 goto __label_9;};
414 }
415 if (CURAND_STATUS_SUCCESS !=
416 curandSetPseudoRandomGeneratorSeed(*gen,234))
417 {
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
419 {
420 __failure = 9;
421 if (!PyErr_Occurred()) {
422 PyErr_SetString(PyExc_RuntimeError,
423 "Unexpected error in an Op's C code. "
424 "No Python exception was set.");
425 }
426 goto __label_9;};
427 }
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
429 assert (V3 == Py_False);
430 }
431 else if (1)
432 {
433 // use i_generator for o_generator
434 Py_XDECREF(V7);
435 Py_INCREF(V3);
436 V7 = V3;
437 }
438 else
439 {
440 // copy i_generator for o_generator
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
442 {
443 __failure = 9;
444 if (!PyErr_Occurred()) {
445 PyErr_SetString(PyExc_RuntimeError,
446 "Unexpected error in an Op's C code. "
447 "No Python exception was set.");
448 }
449 goto __label_9;};
450 }
451 {
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
453 curandStatus_t err = curandGenerateUniform(*gen,
454 CudaNdarray_DEV_DATA(V1),
455 n_elements);
456
457
458 if (err != CURAND_STATUS_SUCCESS)
459 {
460 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
461 {
462 __failure = 9;
463 if (!PyErr_Occurred()) {
464 PyErr_SetString(PyExc_RuntimeError,
465 "Unexpected error in an Op's C code. "
466 "No Python exception was set.");
467 }
468 goto __label_9;};
469 }
470 cudaThreadSynchronize();
471 }
472 //////// </ code generated by CURAND_Base>
473 __label_9:
474
475 double __DUMMY_9;
476
477 }
478 __label_8:
479
480 if (!__failure) {
481
482 assert(py_V7->ob_refcnt > 1);
483 Py_DECREF(py_V7);
484 py_V7 = V7 ? V7 : Py_None;
485 Py_INCREF(py_V7);
486
487 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
488 {Py_XINCREF(py_V7);}
489 PyList_SET_ITEM(storage_V7, 0, py_V7);
490 {Py_XDECREF(old);}
491 }
492
493 Py_XDECREF(V7);
494
495 {Py_XDECREF(py_V7);}
496
497 double __DUMMY_8;
498
499 }
500 __label_6:
501
502 if (V5) {
503 Py_XDECREF(V5);
504 }
505
506 {Py_XDECREF(py_V5);}
507
508 double __DUMMY_6;
509
510 }
511 __label_4:
512
513 Py_XDECREF(V3);
514
515 {Py_XDECREF(py_V3);}
516
517 double __DUMMY_4;
518
519 }
520 __label_2:
521
522 if (!__failure) {
523
524 //std::cerr << "sync\n";
525 if (NULL == V1) {
526 // failure: sync None to storage
527 Py_XDECREF(py_V1);
528 py_V1 = Py_None;
529 Py_INCREF(py_V1);
530 }
531 else
532 {
533 if (py_V1 != (PyObject*)V1)
534 {
535 Py_XDECREF(py_V1);
536 py_V1 = (PyObject*)V1;
537 Py_INCREF(py_V1);
538 }
539 assert(py_V1->ob_refcnt);
540 }
541
542 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
543 {Py_XINCREF(py_V1);}
544 PyList_SET_ITEM(storage_V1, 0, py_V1);
545 {Py_XDECREF(old);}
546 }
547
548 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
549 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
550 if (V1)
551 {
552 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
553 Py_XDECREF(V1);
554 }
555 //std::cerr << "cleanup done" << py_V1 << "\n";
556
557 {Py_XDECREF(py_V1);}
558
559 double __DUMMY_2;
560
561 }
562
563
564 if (__failure) {
565 // When there is a failure, this code puts the exception
566 // in __ERROR.
567 PyObject* err_type = NULL;
568 PyObject* err_msg = NULL;
569 PyObject* err_traceback = NULL;
570 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
571 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
572 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
573 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
574 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
575 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
576 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
577 PyList_SET_ITEM(__ERROR, 0, err_type);
578 PyList_SET_ITEM(__ERROR, 1, err_msg);
579 PyList_SET_ITEM(__ERROR, 2, err_traceback);
580 {Py_XDECREF(old_err_type);}
581 {Py_XDECREF(old_err_msg);}
582 {Py_XDECREF(old_err_traceback);}
583 }
584 // The failure code is returned to index what code block failed.
585 return __failure;
586
587 }
588 };
589
590
591 int __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor(__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* self) {
592 return self->run();
593 }
594
595 void __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor(void* executor, void* self) {
596 delete ((__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4*)self);
597 }
598
599 //////////////////////
600 //// Functions
601 //////////////////////
602 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
603 assert(PyTuple_Check(argtuple));
604 if (5 != PyTuple_Size(argtuple)){
605 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
606 return NULL;
607 }
608 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* struct_ptr = new __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4();
609 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
610 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor), struct_ptr, __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor);
611 return thunk; }
612
613 //////////////////////
614 //// Module init
615 //////////////////////
616 static PyMethodDef MyMethods[] = {
617 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
618 {NULL, NULL, 0, NULL}
619 };
620 PyMODINIT_FUNC inite89e1fed0e21a65d4b9fbb16fea234f4(void){
621 import_array();
622 (void) Py_InitModule("e89e1fed0e21a65d4b9fbb16fea234f4", MyMethods);
623 }
624
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp7caAcY/e89e1fed0e21a65d4b9fbb16fea234f4.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 #if PY_MAJOR_VERSION >= 3
14 void free_generator(PyObject *_gen)
15 {
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen);
17 #else
18 void free_generator(void *_gen)
19 {
20 curandGenerator_t * gen = (curandGenerator_t*)_gen;
21 #endif
22
23 curandStatus_t err = curandDestroyGenerator(*gen);
24 if (err != CURAND_STATUS_SUCCESS)
25 {
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n",
27 (int)err);
28 }
29 free(gen);
30 }
31
32
33 struct __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4 {
34 PyObject* __ERROR;
35
36 PyObject* storage_V3;
37 PyObject* storage_V5;
38 PyObject* storage_V7;
39 PyObject* storage_V1;
40
41
42 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4() {}
43 ~__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(void) {
44 cleanup();
45 }
46
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
48 Py_XINCREF(storage_V3);
49 Py_XINCREF(storage_V5);
50 Py_XINCREF(storage_V7);
51 Py_XINCREF(storage_V1);
52 this->storage_V3 = storage_V3;
53 this->storage_V5 = storage_V5;
54 this->storage_V7 = storage_V7;
55 this->storage_V1 = storage_V1;
56 int __failure = 0;
57
58 {
59
60 {
61
62 {
63
64 {
65
66 this->__ERROR = __ERROR;
67 return 0;
68 __label_7:
69
70 double __DUMMY_7;
71
72 }
73 __label_5:
74
75 double __DUMMY_5;
76
77 }
78 __label_3:
79
80 double __DUMMY_3;
81
82 }
83 __label_1:
84
85 double __DUMMY_1;
86
87 }
88
89 Py_XDECREF(this->storage_V3);
90 Py_XDECREF(this->storage_V5);
91 Py_XDECREF(this->storage_V7);
92 Py_XDECREF(this->storage_V1);
93
94 if (__failure) {
95 // When there is a failure, this code puts the exception
96 // in __ERROR.
97 PyObject* err_type = NULL;
98 PyObject* err_msg = NULL;
99 PyObject* err_traceback = NULL;
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
107 PyList_SET_ITEM(__ERROR, 0, err_type);
108 PyList_SET_ITEM(__ERROR, 1, err_msg);
109 PyList_SET_ITEM(__ERROR, 2, err_traceback);
110 {Py_XDECREF(old_err_type);}
111 {Py_XDECREF(old_err_msg);}
112 {Py_XDECREF(old_err_traceback);}
113 }
114 // The failure code is returned to index what code block failed.
115 return __failure;
116
117 }
118 void cleanup(void) {
119 __label_1:
120
121 double __DUMMY_1;
122 __label_3:
123
124 double __DUMMY_3;
125 __label_5:
126
127 double __DUMMY_5;
128 __label_7:
129
130 double __DUMMY_7;
131
132 Py_XDECREF(this->storage_V3);
133 Py_XDECREF(this->storage_V5);
134 Py_XDECREF(this->storage_V7);
135 Py_XDECREF(this->storage_V1);
136 }
137 int run(void) {
138 int __failure = 0;
139
140 PyObject* py_V1;
141 CudaNdarray * V1;
142 PyObject* py_V3;
143
144 PyObject* V3;
145
146 PyObject* py_V5;
147
148 PyArrayObject* V5;
149 int type_num_V5;
150 typedef npy_int32 dtype_V5;
151
152 PyObject* py_V7;
153
154 PyObject* V7;
155
156 {
157
158 py_V1 = PyList_GET_ITEM(storage_V1, 0);
159 {Py_XINCREF(py_V1);}
160
161 if (py_V1 == Py_None)
162 {
163 V1 = NULL;
164 }
165 else
166 {
167
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
169 // and one ref from the local scope.
170
171 if (CudaNdarray_Check(py_V1))
172 {
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
174 V1 = (CudaNdarray*)py_V1;
175 //std::cerr << "c_extract " << V1 << '\n';
176 if (V1->nd != 2)
177 {
178 PyErr_Format(PyExc_RuntimeError,
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
180 V1->nd);
181 V1 = NULL;
182 {
183 __failure = 2;
184 if (!PyErr_Occurred()) {
185 PyErr_SetString(PyExc_RuntimeError,
186 "Unexpected error in an Op's C code. "
187 "No Python exception was set.");
188 }
189 goto __label_2;};
190 }
191 //std::cerr << "c_extract " << V1 << " nd check passed\n";
192
193
194 assert(V1);
195 Py_INCREF(py_V1);
196 }
197 else if (py_V1 == Py_None)
198 {
199 PyErr_SetString(PyExc_TypeError,
200 "expected a CudaNdarray, not None");
201 V1 = NULL;
202 {
203 __failure = 2;
204 if (!PyErr_Occurred()) {
205 PyErr_SetString(PyExc_RuntimeError,
206 "Unexpected error in an Op's C code. "
207 "No Python exception was set.");
208 }
209 goto __label_2;};
210 }
211 else
212 {
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
215 V1 = NULL;
216 {
217 __failure = 2;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_2;};
224 }
225 //std::cerr << "c_extract done " << V1 << '\n';
226
227
228 }
229
230 {
231
232 py_V3 = PyList_GET_ITEM(storage_V3, 0);
233 {Py_XINCREF(py_V3);}
234
235 Py_INCREF(py_V3);
236 V3 = py_V3;
237
238 {
239
240 py_V5 = PyList_GET_ITEM(storage_V5, 0);
241 {Py_XINCREF(py_V5);}
242
243 V5 = NULL;
244 if (py_V5 == Py_None) {
245 // We can either fail here or set V5 to NULL and rely on Ops
246 // using tensors to handle the NULL case, but if they fail to do so
247 // they'll end up with nasty segfaults, so this is public service.
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
249 {
250 __failure = 6;
251 if (!PyErr_Occurred()) {
252 PyErr_SetString(PyExc_RuntimeError,
253 "Unexpected error in an Op's C code. "
254 "No Python exception was set.");
255 }
256 goto __label_6;}
257 }
258 if (!PyArray_Check(py_V5)) {
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
260 {
261 __failure = 6;
262 if (!PyErr_Occurred()) {
263 PyErr_SetString(PyExc_RuntimeError,
264 "Unexpected error in an Op's C code. "
265 "No Python exception was set.");
266 }
267 goto __label_6;}
268 }
269 // We expect NPY_INT32
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
271 if (!PyArray_ISALIGNED(py_V5)) {
272 PyErr_Format(PyExc_NotImplementedError,
273 "expected an aligned array of type %ld "
274 "(NPY_INT32), got non-aligned array of type %ld"
275 " with %ld dimensions, with 3 last dims "
276 "%ld, %ld, %ld"
277 " and 3 last strides %ld %ld, %ld.",
278 (long int) NPY_INT32,
279 (long int) type_num_V5,
280 (long int) PyArray_NDIM(py_V5),
281 (long int) PyArray_NDIM(py_V5) >= 3 ?
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
283 (long int) PyArray_NDIM(py_V5) >= 2 ?
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
285 (long int) PyArray_NDIM(py_V5) >= 1 ?
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
287 (long int) PyArray_NDIM(py_V5) >= 3 ?
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
289 (long int) PyArray_NDIM(py_V5) >= 2 ?
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
291 (long int) PyArray_NDIM(py_V5) >= 1 ?
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
293 );
294 {
295 __failure = 6;
296 if (!PyErr_Occurred()) {
297 PyErr_SetString(PyExc_RuntimeError,
298 "Unexpected error in an Op's C code. "
299 "No Python exception was set.");
300 }
301 goto __label_6;}
302 }
303 // This is a TypeError to be consistent with DEBUG_MODE
304 // Note: DEBUG_MODE also tells the name of the container
305 if (type_num_V5 != NPY_INT32) {
306 PyErr_Format(PyExc_TypeError,
307 "expected type_num %d (NPY_INT32) got %d",
308 NPY_INT32, type_num_V5);
309 {
310 __failure = 6;
311 if (!PyErr_Occurred()) {
312 PyErr_SetString(PyExc_RuntimeError,
313 "Unexpected error in an Op's C code. "
314 "No Python exception was set.");
315 }
316 goto __label_6;}
317 }
318 V5 = (PyArrayObject*)(py_V5);
319 Py_XINCREF(V5);
320
321 {
322
323 py_V7 = Py_None;
324 {Py_XINCREF(py_V7);}
325
326 V7 = NULL;
327
328 {
329
330 //////// <code generated by CURAND_Base>
331 int odims[2];
332 int n_elements = 1;
333 int must_alloc_sample = ((NULL == V1)
334 || !CudaNdarray_Check(py_V1)
335 || (V1->nd != 2));
336
337 if (V5->nd != 1)
338 {
339 PyErr_SetString(PyExc_ValueError, "size must be vector");
340 {
341 __failure = 9;
342 if (!PyErr_Occurred()) {
343 PyErr_SetString(PyExc_RuntimeError,
344 "Unexpected error in an Op's C code. "
345 "No Python exception was set.");
346 }
347 goto __label_9;}
348 }
349 if (V5->dimensions[0] != 2)
350 {
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
352 2, V5->dimensions[0]);
353 {
354 __failure = 9;
355 if (!PyErr_Occurred()) {
356 PyErr_SetString(PyExc_RuntimeError,
357 "Unexpected error in an Op's C code. "
358 "No Python exception was set.");
359 }
360 goto __label_9;}
361 }
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
363 {
364 PyErr_SetString(PyExc_ValueError, "size must be int32");
365 {
366 __failure = 9;
367 if (!PyErr_Occurred()) {
368 PyErr_SetString(PyExc_RuntimeError,
369 "Unexpected error in an Op's C code. "
370 "No Python exception was set.");
371 }
372 goto __label_9;}
373 }
374 for (int i = 0; i < 2; ++i)
375 {
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
377 n_elements *= odims[i];
378 must_alloc_sample = (must_alloc_sample
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
380 }
381 if (must_alloc_sample)
382 {
383 Py_XDECREF(V1);
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
385 if(!V1)
386 {
387 {
388 __failure = 9;
389 if (!PyErr_Occurred()) {
390 PyErr_SetString(PyExc_RuntimeError,
391 "Unexpected error in an Op's C code. "
392 "No Python exception was set.");
393 }
394 goto __label_9;};
395 }
396 }
397 if (!PyCObject_Check(V3))
398 {
399 // allocate a new generator for o_generator
400 Py_XDECREF(V7);
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
402 assert(gen);
403 if (CURAND_STATUS_SUCCESS !=
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
406 {
407 __failure = 9;
408 if (!PyErr_Occurred()) {
409 PyErr_SetString(PyExc_RuntimeError,
410 "Unexpected error in an Op's C code. "
411 "No Python exception was set.");
412 }
413 goto __label_9;};
414 }
415 if (CURAND_STATUS_SUCCESS !=
416 curandSetPseudoRandomGeneratorSeed(*gen,234))
417 {
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
419 {
420 __failure = 9;
421 if (!PyErr_Occurred()) {
422 PyErr_SetString(PyExc_RuntimeError,
423 "Unexpected error in an Op's C code. "
424 "No Python exception was set.");
425 }
426 goto __label_9;};
427 }
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
429 assert (V3 == Py_False);
430 }
431 else if (1)
432 {
433 // use i_generator for o_generator
434 Py_XDECREF(V7);
435 Py_INCREF(V3);
436 V7 = V3;
437 }
438 else
439 {
440 // copy i_generator for o_generator
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
442 {
443 __failure = 9;
444 if (!PyErr_Occurred()) {
445 PyErr_SetString(PyExc_RuntimeError,
446 "Unexpected error in an Op's C code. "
447 "No Python exception was set.");
448 }
449 goto __label_9;};
450 }
451 {
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
453 curandStatus_t err = curandGenerateUniform(*gen,
454 CudaNdarray_DEV_DATA(V1),
455 n_elements);
456
457
458 if (err != CURAND_STATUS_SUCCESS)
459 {
460 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
461 {
462 __failure = 9;
463 if (!PyErr_Occurred()) {
464 PyErr_SetString(PyExc_RuntimeError,
465 "Unexpected error in an Op's C code. "
466 "No Python exception was set.");
467 }
468 goto __label_9;};
469 }
470 cudaThreadSynchronize();
471 }
472 //////// </ code generated by CURAND_Base>
473 __label_9:
474
475 double __DUMMY_9;
476
477 }
478 __label_8:
479
480 if (!__failure) {
481
482 assert(py_V7->ob_refcnt > 1);
483 Py_DECREF(py_V7);
484 py_V7 = V7 ? V7 : Py_None;
485 Py_INCREF(py_V7);
486
487 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
488 {Py_XINCREF(py_V7);}
489 PyList_SET_ITEM(storage_V7, 0, py_V7);
490 {Py_XDECREF(old);}
491 }
492
493 Py_XDECREF(V7);
494
495 {Py_XDECREF(py_V7);}
496
497 double __DUMMY_8;
498
499 }
500 __label_6:
501
502 if (V5) {
503 Py_XDECREF(V5);
504 }
505
506 {Py_XDECREF(py_V5);}
507
508 double __DUMMY_6;
509
510 }
511 __label_4:
512
513 Py_XDECREF(V3);
514
515 {Py_XDECREF(py_V3);}
516
517 double __DUMMY_4;
518
519 }
520 __label_2:
521
522 if (!__failure) {
523
524 //std::cerr << "sync\n";
525 if (NULL == V1) {
526 // failure: sync None to storage
527 Py_XDECREF(py_V1);
528 py_V1 = Py_None;
529 Py_INCREF(py_V1);
530 }
531 else
532 {
533 if (py_V1 != (PyObject*)V1)
534 {
535 Py_XDECREF(py_V1);
536 py_V1 = (PyObject*)V1;
537 Py_INCREF(py_V1);
538 }
539 assert(py_V1->ob_refcnt);
540 }
541
542 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
543 {Py_XINCREF(py_V1);}
544 PyList_SET_ITEM(storage_V1, 0, py_V1);
545 {Py_XDECREF(old);}
546 }
547
548 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
549 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
550 if (V1)
551 {
552 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
553 Py_XDECREF(V1);
554 }
555 //std::cerr << "cleanup done" << py_V1 << "\n";
556
557 {Py_XDECREF(py_V1);}
558
559 double __DUMMY_2;
560
561 }
562
563
564 if (__failure) {
565 // When there is a failure, this code puts the exception
566 // in __ERROR.
567 PyObject* err_type = NULL;
568 PyObject* err_msg = NULL;
569 PyObject* err_traceback = NULL;
570 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
571 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
572 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
573 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
574 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
575 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
576 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
577 PyList_SET_ITEM(__ERROR, 0, err_type);
578 PyList_SET_ITEM(__ERROR, 1, err_msg);
579 PyList_SET_ITEM(__ERROR, 2, err_traceback);
580 {Py_XDECREF(old_err_type);}
581 {Py_XDECREF(old_err_msg);}
582 {Py_XDECREF(old_err_traceback);}
583 }
584 // The failure code is returned to index what code block failed.
585 return __failure;
586
587 }
588 };
589
590
591 int __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor(__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* self) {
592 return self->run();
593 }
594
595 void __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor(void* executor, void* self) {
596 delete ((__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4*)self);
597 }
598
599 //////////////////////
600 //// Functions
601 //////////////////////
602 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
603 assert(PyTuple_Check(argtuple));
604 if (5 != PyTuple_Size(argtuple)){
605 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
606 return NULL;
607 }
608 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* struct_ptr = new __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4();
609 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
610 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor), struct_ptr, __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor);
611 return thunk; }
612
613 //////////////////////
614 //// Module init
615 //////////////////////
616 static PyMethodDef MyMethods[] = {
617 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
618 {NULL, NULL, 0, NULL}
619 };
620 PyMODINIT_FUNC inite89e1fed0e21a65d4b9fbb16fea234f4(void){
621 import_array();
622 (void) Py_InitModule("e89e1fed0e21a65d4b9fbb16fea234f4", MyMethods);
623 }
624
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp2_748k/e89e1fed0e21a65d4b9fbb16fea234f4.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 #if PY_MAJOR_VERSION >= 3
14 void free_generator(PyObject *_gen)
15 {
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen);
17 #else
18 void free_generator(void *_gen)
19 {
20 curandGenerator_t * gen = (curandGenerator_t*)_gen;
21 #endif
22
23 curandStatus_t err = curandDestroyGenerator(*gen);
24 if (err != CURAND_STATUS_SUCCESS)
25 {
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n",
27 (int)err);
28 }
29 free(gen);
30 }
31
32
33 struct __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4 {
34 PyObject* __ERROR;
35
36 PyObject* storage_V3;
37 PyObject* storage_V5;
38 PyObject* storage_V7;
39 PyObject* storage_V1;
40
41
42 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4() {}
43 ~__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(void) {
44 cleanup();
45 }
46
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
48 Py_XINCREF(storage_V3);
49 Py_XINCREF(storage_V5);
50 Py_XINCREF(storage_V7);
51 Py_XINCREF(storage_V1);
52 this->storage_V3 = storage_V3;
53 this->storage_V5 = storage_V5;
54 this->storage_V7 = storage_V7;
55 this->storage_V1 = storage_V1;
56 int __failure = 0;
57
58 {
59
60 {
61
62 {
63
64 {
65
66 this->__ERROR = __ERROR;
67 return 0;
68 __label_7:
69
70 double __DUMMY_7;
71
72 }
73 __label_5:
74
75 double __DUMMY_5;
76
77 }
78 __label_3:
79
80 double __DUMMY_3;
81
82 }
83 __label_1:
84
85 double __DUMMY_1;
86
87 }
88
89 Py_XDECREF(this->storage_V3);
90 Py_XDECREF(this->storage_V5);
91 Py_XDECREF(this->storage_V7);
92 Py_XDECREF(this->storage_V1);
93
94 if (__failure) {
95 // When there is a failure, this code puts the exception
96 // in __ERROR.
97 PyObject* err_type = NULL;
98 PyObject* err_msg = NULL;
99 PyObject* err_traceback = NULL;
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
107 PyList_SET_ITEM(__ERROR, 0, err_type);
108 PyList_SET_ITEM(__ERROR, 1, err_msg);
109 PyList_SET_ITEM(__ERROR, 2, err_traceback);
110 {Py_XDECREF(old_err_type);}
111 {Py_XDECREF(old_err_msg);}
112 {Py_XDECREF(old_err_traceback);}
113 }
114 // The failure code is returned to index what code block failed.
115 return __failure;
116
117 }
118 void cleanup(void) {
119 __label_1:
120
121 double __DUMMY_1;
122 __label_3:
123
124 double __DUMMY_3;
125 __label_5:
126
127 double __DUMMY_5;
128 __label_7:
129
130 double __DUMMY_7;
131
132 Py_XDECREF(this->storage_V3);
133 Py_XDECREF(this->storage_V5);
134 Py_XDECREF(this->storage_V7);
135 Py_XDECREF(this->storage_V1);
136 }
137 int run(void) {
138 int __failure = 0;
139
140 PyObject* py_V1;
141 CudaNdarray * V1;
142 PyObject* py_V3;
143
144 PyObject* V3;
145
146 PyObject* py_V5;
147
148 PyArrayObject* V5;
149 int type_num_V5;
150 typedef npy_int32 dtype_V5;
151
152 PyObject* py_V7;
153
154 PyObject* V7;
155
156 {
157
158 py_V1 = PyList_GET_ITEM(storage_V1, 0);
159 {Py_XINCREF(py_V1);}
160
161 if (py_V1 == Py_None)
162 {
163 V1 = NULL;
164 }
165 else
166 {
167
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
169 // and one ref from the local scope.
170
171 if (CudaNdarray_Check(py_V1))
172 {
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
174 V1 = (CudaNdarray*)py_V1;
175 //std::cerr << "c_extract " << V1 << '\n';
176 if (V1->nd != 2)
177 {
178 PyErr_Format(PyExc_RuntimeError,
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
180 V1->nd);
181 V1 = NULL;
182 {
183 __failure = 2;
184 if (!PyErr_Occurred()) {
185 PyErr_SetString(PyExc_RuntimeError,
186 "Unexpected error in an Op's C code. "
187 "No Python exception was set.");
188 }
189 goto __label_2;};
190 }
191 //std::cerr << "c_extract " << V1 << " nd check passed\n";
192
193
194 assert(V1);
195 Py_INCREF(py_V1);
196 }
197 else if (py_V1 == Py_None)
198 {
199 PyErr_SetString(PyExc_TypeError,
200 "expected a CudaNdarray, not None");
201 V1 = NULL;
202 {
203 __failure = 2;
204 if (!PyErr_Occurred()) {
205 PyErr_SetString(PyExc_RuntimeError,
206 "Unexpected error in an Op's C code. "
207 "No Python exception was set.");
208 }
209 goto __label_2;};
210 }
211 else
212 {
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
215 V1 = NULL;
216 {
217 __failure = 2;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_2;};
224 }
225 //std::cerr << "c_extract done " << V1 << '\n';
226
227
228 }
229
230 {
231
232 py_V3 = PyList_GET_ITEM(storage_V3, 0);
233 {Py_XINCREF(py_V3);}
234
235 Py_INCREF(py_V3);
236 V3 = py_V3;
237
238 {
239
240 py_V5 = PyList_GET_ITEM(storage_V5, 0);
241 {Py_XINCREF(py_V5);}
242
243 V5 = NULL;
244 if (py_V5 == Py_None) {
245 // We can either fail here or set V5 to NULL and rely on Ops
246 // using tensors to handle the NULL case, but if they fail to do so
247 // they'll end up with nasty segfaults, so this is public service.
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
249 {
250 __failure = 6;
251 if (!PyErr_Occurred()) {
252 PyErr_SetString(PyExc_RuntimeError,
253 "Unexpected error in an Op's C code. "
254 "No Python exception was set.");
255 }
256 goto __label_6;}
257 }
258 if (!PyArray_Check(py_V5)) {
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
260 {
261 __failure = 6;
262 if (!PyErr_Occurred()) {
263 PyErr_SetString(PyExc_RuntimeError,
264 "Unexpected error in an Op's C code. "
265 "No Python exception was set.");
266 }
267 goto __label_6;}
268 }
269 // We expect NPY_INT32
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
271 if (!PyArray_ISALIGNED(py_V5)) {
272 PyErr_Format(PyExc_NotImplementedError,
273 "expected an aligned array of type %ld "
274 "(NPY_INT32), got non-aligned array of type %ld"
275 " with %ld dimensions, with 3 last dims "
276 "%ld, %ld, %ld"
277 " and 3 last strides %ld %ld, %ld.",
278 (long int) NPY_INT32,
279 (long int) type_num_V5,
280 (long int) PyArray_NDIM(py_V5),
281 (long int) PyArray_NDIM(py_V5) >= 3 ?
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
283 (long int) PyArray_NDIM(py_V5) >= 2 ?
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
285 (long int) PyArray_NDIM(py_V5) >= 1 ?
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
287 (long int) PyArray_NDIM(py_V5) >= 3 ?
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
289 (long int) PyArray_NDIM(py_V5) >= 2 ?
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
291 (long int) PyArray_NDIM(py_V5) >= 1 ?
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
293 );
294 {
295 __failure = 6;
296 if (!PyErr_Occurred()) {
297 PyErr_SetString(PyExc_RuntimeError,
298 "Unexpected error in an Op's C code. "
299 "No Python exception was set.");
300 }
301 goto __label_6;}
302 }
303 // This is a TypeError to be consistent with DEBUG_MODE
304 // Note: DEBUG_MODE also tells the name of the container
305 if (type_num_V5 != NPY_INT32) {
306 PyErr_Format(PyExc_TypeError,
307 "expected type_num %d (NPY_INT32) got %d",
308 NPY_INT32, type_num_V5);
309 {
310 __failure = 6;
311 if (!PyErr_Occurred()) {
312 PyErr_SetString(PyExc_RuntimeError,
313 "Unexpected error in an Op's C code. "
314 "No Python exception was set.");
315 }
316 goto __label_6;}
317 }
318 V5 = (PyArrayObject*)(py_V5);
319 Py_XINCREF(V5);
320
321 {
322
323 py_V7 = Py_None;
324 {Py_XINCREF(py_V7);}
325
326 V7 = NULL;
327
328 {
329
330 //////// <code generated by CURAND_Base>
331 int odims[2];
332 int n_elements = 1;
333 int must_alloc_sample = ((NULL == V1)
334 || !CudaNdarray_Check(py_V1)
335 || (V1->nd != 2));
336
337 if (V5->nd != 1)
338 {
339 PyErr_SetString(PyExc_ValueError, "size must be vector");
340 {
341 __failure = 9;
342 if (!PyErr_Occurred()) {
343 PyErr_SetString(PyExc_RuntimeError,
344 "Unexpected error in an Op's C code. "
345 "No Python exception was set.");
346 }
347 goto __label_9;}
348 }
349 if (V5->dimensions[0] != 2)
350 {
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
352 2, V5->dimensions[0]);
353 {
354 __failure = 9;
355 if (!PyErr_Occurred()) {
356 PyErr_SetString(PyExc_RuntimeError,
357 "Unexpected error in an Op's C code. "
358 "No Python exception was set.");
359 }
360 goto __label_9;}
361 }
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
363 {
364 PyErr_SetString(PyExc_ValueError, "size must be int32");
365 {
366 __failure = 9;
367 if (!PyErr_Occurred()) {
368 PyErr_SetString(PyExc_RuntimeError,
369 "Unexpected error in an Op's C code. "
370 "No Python exception was set.");
371 }
372 goto __label_9;}
373 }
374 for (int i = 0; i < 2; ++i)
375 {
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
377 n_elements *= odims[i];
378 must_alloc_sample = (must_alloc_sample
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
380 }
381 if (must_alloc_sample)
382 {
383 Py_XDECREF(V1);
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
385 if(!V1)
386 {
387 {
388 __failure = 9;
389 if (!PyErr_Occurred()) {
390 PyErr_SetString(PyExc_RuntimeError,
391 "Unexpected error in an Op's C code. "
392 "No Python exception was set.");
393 }
394 goto __label_9;};
395 }
396 }
397 if (!PyCObject_Check(V3))
398 {
399 // allocate a new generator for o_generator
400 Py_XDECREF(V7);
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
402 assert(gen);
403 if (CURAND_STATUS_SUCCESS !=
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
406 {
407 __failure = 9;
408 if (!PyErr_Occurred()) {
409 PyErr_SetString(PyExc_RuntimeError,
410 "Unexpected error in an Op's C code. "
411 "No Python exception was set.");
412 }
413 goto __label_9;};
414 }
415 if (CURAND_STATUS_SUCCESS !=
416 curandSetPseudoRandomGeneratorSeed(*gen,234))
417 {
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
419 {
420 __failure = 9;
421 if (!PyErr_Occurred()) {
422 PyErr_SetString(PyExc_RuntimeError,
423 "Unexpected error in an Op's C code. "
424 "No Python exception was set.");
425 }
426 goto __label_9;};
427 }
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
429 assert (V3 == Py_False);
430 }
431 else if (1)
432 {
433 // use i_generator for o_generator
434 Py_XDECREF(V7);
435 Py_INCREF(V3);
436 V7 = V3;
437 }
438 else
439 {
440 // copy i_generator for o_generator
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
442 {
443 __failure = 9;
444 if (!PyErr_Occurred()) {
445 PyErr_SetString(PyExc_RuntimeError,
446 "Unexpected error in an Op's C code. "
447 "No Python exception was set.");
448 }
449 goto __label_9;};
450 }
451 {
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
453 curandStatus_t err = curandGenerateUniform(*gen,
454 CudaNdarray_DEV_DATA(V1),
455 n_elements);
456
457
458 if (err != CURAND_STATUS_SUCCESS)
459 {
460 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
461 {
462 __failure = 9;
463 if (!PyErr_Occurred()) {
464 PyErr_SetString(PyExc_RuntimeError,
465 "Unexpected error in an Op's C code. "
466 "No Python exception was set.");
467 }
468 goto __label_9;};
469 }
470 cudaThreadSynchronize();
471 }
472 //////// </ code generated by CURAND_Base>
473 __label_9:
474
475 double __DUMMY_9;
476
477 }
478 __label_8:
479
480 if (!__failure) {
481
482 assert(py_V7->ob_refcnt > 1);
483 Py_DECREF(py_V7);
484 py_V7 = V7 ? V7 : Py_None;
485 Py_INCREF(py_V7);
486
487 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
488 {Py_XINCREF(py_V7);}
489 PyList_SET_ITEM(storage_V7, 0, py_V7);
490 {Py_XDECREF(old);}
491 }
492
493 Py_XDECREF(V7);
494
495 {Py_XDECREF(py_V7);}
496
497 double __DUMMY_8;
498
499 }
500 __label_6:
501
502 if (V5) {
503 Py_XDECREF(V5);
504 }
505
506 {Py_XDECREF(py_V5);}
507
508 double __DUMMY_6;
509
510 }
511 __label_4:
512
513 Py_XDECREF(V3);
514
515 {Py_XDECREF(py_V3);}
516
517 double __DUMMY_4;
518
519 }
520 __label_2:
521
522 if (!__failure) {
523
524 //std::cerr << "sync\n";
525 if (NULL == V1) {
526 // failure: sync None to storage
527 Py_XDECREF(py_V1);
528 py_V1 = Py_None;
529 Py_INCREF(py_V1);
530 }
531 else
532 {
533 if (py_V1 != (PyObject*)V1)
534 {
535 Py_XDECREF(py_V1);
536 py_V1 = (PyObject*)V1;
537 Py_INCREF(py_V1);
538 }
539 assert(py_V1->ob_refcnt);
540 }
541
542 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
543 {Py_XINCREF(py_V1);}
544 PyList_SET_ITEM(storage_V1, 0, py_V1);
545 {Py_XDECREF(old);}
546 }
547
548 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
549 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
550 if (V1)
551 {
552 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
553 Py_XDECREF(V1);
554 }
555 //std::cerr << "cleanup done" << py_V1 << "\n";
556
557 {Py_XDECREF(py_V1);}
558
559 double __DUMMY_2;
560
561 }
562
563
564 if (__failure) {
565 // When there is a failure, this code puts the exception
566 // in __ERROR.
567 PyObject* err_type = NULL;
568 PyObject* err_msg = NULL;
569 PyObject* err_traceback = NULL;
570 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
571 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
572 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
573 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
574 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
575 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
576 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
577 PyList_SET_ITEM(__ERROR, 0, err_type);
578 PyList_SET_ITEM(__ERROR, 1, err_msg);
579 PyList_SET_ITEM(__ERROR, 2, err_traceback);
580 {Py_XDECREF(old_err_type);}
581 {Py_XDECREF(old_err_msg);}
582 {Py_XDECREF(old_err_traceback);}
583 }
584 // The failure code is returned to index what code block failed.
585 return __failure;
586
587 }
588 };
589
590
591 int __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor(__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* self) {
592 return self->run();
593 }
594
595 void __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor(void* executor, void* self) {
596 delete ((__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4*)self);
597 }
598
599 //////////////////////
600 //// Functions
601 //////////////////////
602 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
603 assert(PyTuple_Check(argtuple));
604 if (5 != PyTuple_Size(argtuple)){
605 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
606 return NULL;
607 }
608 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* struct_ptr = new __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4();
609 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
610 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor), struct_ptr, __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor);
611 return thunk; }
612
613 //////////////////////
614 //// Module init
615 //////////////////////
616 static PyMethodDef MyMethods[] = {
617 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
618 {NULL, NULL, 0, NULL}
619 };
620 PyMODINIT_FUNC inite89e1fed0e21a65d4b9fbb16fea234f4(void){
621 import_array();
622 (void) Py_InitModule("e89e1fed0e21a65d4b9fbb16fea234f4", MyMethods);
623 }
624
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmptY4XQ8/e89e1fed0e21a65d4b9fbb16fea234f4.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 #if PY_MAJOR_VERSION >= 3
14 void free_generator(PyObject *_gen)
15 {
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen);
17 #else
18 void free_generator(void *_gen)
19 {
20 curandGenerator_t * gen = (curandGenerator_t*)_gen;
21 #endif
22
23 curandStatus_t err = curandDestroyGenerator(*gen);
24 if (err != CURAND_STATUS_SUCCESS)
25 {
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n",
27 (int)err);
28 }
29 free(gen);
30 }
31
32
33 struct __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6 {
34 PyObject* __ERROR;
35
36 PyObject* storage_V3;
37 PyObject* storage_V5;
38 PyObject* storage_V7;
39 PyObject* storage_V1;
40
41
42 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6() {}
43 ~__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(void) {
44 cleanup();
45 }
46
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
48 Py_XINCREF(storage_V3);
49 Py_XINCREF(storage_V5);
50 Py_XINCREF(storage_V7);
51 Py_XINCREF(storage_V1);
52 this->storage_V3 = storage_V3;
53 this->storage_V5 = storage_V5;
54 this->storage_V7 = storage_V7;
55 this->storage_V1 = storage_V1;
56 int __failure = 0;
57
58 {
59
60 {
61
62 {
63
64 {
65
66 this->__ERROR = __ERROR;
67 return 0;
68 __label_7:
69
70 double __DUMMY_7;
71
72 }
73 __label_5:
74
75 double __DUMMY_5;
76
77 }
78 __label_3:
79
80 double __DUMMY_3;
81
82 }
83 __label_1:
84
85 double __DUMMY_1;
86
87 }
88
89 Py_XDECREF(this->storage_V3);
90 Py_XDECREF(this->storage_V5);
91 Py_XDECREF(this->storage_V7);
92 Py_XDECREF(this->storage_V1);
93
94 if (__failure) {
95 // When there is a failure, this code puts the exception
96 // in __ERROR.
97 PyObject* err_type = NULL;
98 PyObject* err_msg = NULL;
99 PyObject* err_traceback = NULL;
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
107 PyList_SET_ITEM(__ERROR, 0, err_type);
108 PyList_SET_ITEM(__ERROR, 1, err_msg);
109 PyList_SET_ITEM(__ERROR, 2, err_traceback);
110 {Py_XDECREF(old_err_type);}
111 {Py_XDECREF(old_err_msg);}
112 {Py_XDECREF(old_err_traceback);}
113 }
114 // The failure code is returned to index what code block failed.
115 return __failure;
116
117 }
118 void cleanup(void) {
119 __label_1:
120
121 double __DUMMY_1;
122 __label_3:
123
124 double __DUMMY_3;
125 __label_5:
126
127 double __DUMMY_5;
128 __label_7:
129
130 double __DUMMY_7;
131
132 Py_XDECREF(this->storage_V3);
133 Py_XDECREF(this->storage_V5);
134 Py_XDECREF(this->storage_V7);
135 Py_XDECREF(this->storage_V1);
136 }
137 int run(void) {
138 int __failure = 0;
139
140 PyObject* py_V1;
141 CudaNdarray * V1;
142 PyObject* py_V3;
143
144 PyObject* V3;
145
146 PyObject* py_V5;
147
148 PyArrayObject* V5;
149 int type_num_V5;
150 typedef npy_int32 dtype_V5;
151
152 PyObject* py_V7;
153
154 PyObject* V7;
155
156 {
157
158 py_V1 = PyList_GET_ITEM(storage_V1, 0);
159 {Py_XINCREF(py_V1);}
160
161 if (py_V1 == Py_None)
162 {
163 V1 = NULL;
164 }
165 else
166 {
167
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
169 // and one ref from the local scope.
170
171 if (CudaNdarray_Check(py_V1))
172 {
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
174 V1 = (CudaNdarray*)py_V1;
175 //std::cerr << "c_extract " << V1 << '\n';
176 if (V1->nd != 2)
177 {
178 PyErr_Format(PyExc_RuntimeError,
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
180 V1->nd);
181 V1 = NULL;
182 {
183 __failure = 2;
184 if (!PyErr_Occurred()) {
185 PyErr_SetString(PyExc_RuntimeError,
186 "Unexpected error in an Op's C code. "
187 "No Python exception was set.");
188 }
189 goto __label_2;};
190 }
191 //std::cerr << "c_extract " << V1 << " nd check passed\n";
192
193
194 assert(V1);
195 Py_INCREF(py_V1);
196 }
197 else if (py_V1 == Py_None)
198 {
199 PyErr_SetString(PyExc_TypeError,
200 "expected a CudaNdarray, not None");
201 V1 = NULL;
202 {
203 __failure = 2;
204 if (!PyErr_Occurred()) {
205 PyErr_SetString(PyExc_RuntimeError,
206 "Unexpected error in an Op's C code. "
207 "No Python exception was set.");
208 }
209 goto __label_2;};
210 }
211 else
212 {
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
215 V1 = NULL;
216 {
217 __failure = 2;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_2;};
224 }
225 //std::cerr << "c_extract done " << V1 << '\n';
226
227
228 }
229
230 {
231
232 py_V3 = PyList_GET_ITEM(storage_V3, 0);
233 {Py_XINCREF(py_V3);}
234
235 Py_INCREF(py_V3);
236 V3 = py_V3;
237
238 {
239
240 py_V5 = PyList_GET_ITEM(storage_V5, 0);
241 {Py_XINCREF(py_V5);}
242
243 V5 = NULL;
244 if (py_V5 == Py_None) {
245 // We can either fail here or set V5 to NULL and rely on Ops
246 // using tensors to handle the NULL case, but if they fail to do so
247 // they'll end up with nasty segfaults, so this is public service.
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
249 {
250 __failure = 6;
251 if (!PyErr_Occurred()) {
252 PyErr_SetString(PyExc_RuntimeError,
253 "Unexpected error in an Op's C code. "
254 "No Python exception was set.");
255 }
256 goto __label_6;}
257 }
258 if (!PyArray_Check(py_V5)) {
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
260 {
261 __failure = 6;
262 if (!PyErr_Occurred()) {
263 PyErr_SetString(PyExc_RuntimeError,
264 "Unexpected error in an Op's C code. "
265 "No Python exception was set.");
266 }
267 goto __label_6;}
268 }
269 // We expect NPY_INT32
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
271 if (!PyArray_ISALIGNED(py_V5)) {
272 PyErr_Format(PyExc_NotImplementedError,
273 "expected an aligned array of type %ld "
274 "(NPY_INT32), got non-aligned array of type %ld"
275 " with %ld dimensions, with 3 last dims "
276 "%ld, %ld, %ld"
277 " and 3 last strides %ld %ld, %ld.",
278 (long int) NPY_INT32,
279 (long int) type_num_V5,
280 (long int) PyArray_NDIM(py_V5),
281 (long int) PyArray_NDIM(py_V5) >= 3 ?
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
283 (long int) PyArray_NDIM(py_V5) >= 2 ?
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
285 (long int) PyArray_NDIM(py_V5) >= 1 ?
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
287 (long int) PyArray_NDIM(py_V5) >= 3 ?
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
289 (long int) PyArray_NDIM(py_V5) >= 2 ?
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
291 (long int) PyArray_NDIM(py_V5) >= 1 ?
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
293 );
294 {
295 __failure = 6;
296 if (!PyErr_Occurred()) {
297 PyErr_SetString(PyExc_RuntimeError,
298 "Unexpected error in an Op's C code. "
299 "No Python exception was set.");
300 }
301 goto __label_6;}
302 }
303 // This is a TypeError to be consistent with DEBUG_MODE
304 // Note: DEBUG_MODE also tells the name of the container
305 if (type_num_V5 != NPY_INT32) {
306 PyErr_Format(PyExc_TypeError,
307 "expected type_num %d (NPY_INT32) got %d",
308 NPY_INT32, type_num_V5);
309 {
310 __failure = 6;
311 if (!PyErr_Occurred()) {
312 PyErr_SetString(PyExc_RuntimeError,
313 "Unexpected error in an Op's C code. "
314 "No Python exception was set.");
315 }
316 goto __label_6;}
317 }
318 V5 = (PyArrayObject*)(py_V5);
319 Py_XINCREF(V5);
320
321 {
322
323 py_V7 = Py_None;
324 {Py_XINCREF(py_V7);}
325
326 V7 = NULL;
327
328 {
329
330 //////// <code generated by CURAND_Base>
331 int odims[2];
332 int n_elements = 1;
333 int must_alloc_sample = ((NULL == V1)
334 || !CudaNdarray_Check(py_V1)
335 || (V1->nd != 2));
336
337 if (V5->nd != 1)
338 {
339 PyErr_SetString(PyExc_ValueError, "size must be vector");
340 {
341 __failure = 9;
342 if (!PyErr_Occurred()) {
343 PyErr_SetString(PyExc_RuntimeError,
344 "Unexpected error in an Op's C code. "
345 "No Python exception was set.");
346 }
347 goto __label_9;}
348 }
349 if (V5->dimensions[0] != 2)
350 {
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
352 2, V5->dimensions[0]);
353 {
354 __failure = 9;
355 if (!PyErr_Occurred()) {
356 PyErr_SetString(PyExc_RuntimeError,
357 "Unexpected error in an Op's C code. "
358 "No Python exception was set.");
359 }
360 goto __label_9;}
361 }
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
363 {
364 PyErr_SetString(PyExc_ValueError, "size must be int32");
365 {
366 __failure = 9;
367 if (!PyErr_Occurred()) {
368 PyErr_SetString(PyExc_RuntimeError,
369 "Unexpected error in an Op's C code. "
370 "No Python exception was set.");
371 }
372 goto __label_9;}
373 }
374 for (int i = 0; i < 2; ++i)
375 {
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
377 n_elements *= odims[i];
378 must_alloc_sample = (must_alloc_sample
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
380 }
381 if (must_alloc_sample)
382 {
383 Py_XDECREF(V1);
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
385 if(!V1)
386 {
387 {
388 __failure = 9;
389 if (!PyErr_Occurred()) {
390 PyErr_SetString(PyExc_RuntimeError,
391 "Unexpected error in an Op's C code. "
392 "No Python exception was set.");
393 }
394 goto __label_9;};
395 }
396 }
397 if (!PyCObject_Check(V3))
398 {
399 // allocate a new generator for o_generator
400 Py_XDECREF(V7);
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
402 assert(gen);
403 if (CURAND_STATUS_SUCCESS !=
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
406 {
407 __failure = 9;
408 if (!PyErr_Occurred()) {
409 PyErr_SetString(PyExc_RuntimeError,
410 "Unexpected error in an Op's C code. "
411 "No Python exception was set.");
412 }
413 goto __label_9;};
414 }
415 if (CURAND_STATUS_SUCCESS !=
416 curandSetPseudoRandomGeneratorSeed(*gen,234))
417 {
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
419 {
420 __failure = 9;
421 if (!PyErr_Occurred()) {
422 PyErr_SetString(PyExc_RuntimeError,
423 "Unexpected error in an Op's C code. "
424 "No Python exception was set.");
425 }
426 goto __label_9;};
427 }
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
429 assert (V3 == Py_False);
430 }
431 else if (1)
432 {
433 // use i_generator for o_generator
434 Py_XDECREF(V7);
435 Py_INCREF(V3);
436 V7 = V3;
437 }
438 else
439 {
440 // copy i_generator for o_generator
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
442 {
443 __failure = 9;
444 if (!PyErr_Occurred()) {
445 PyErr_SetString(PyExc_RuntimeError,
446 "Unexpected error in an Op's C code. "
447 "No Python exception was set.");
448 }
449 goto __label_9;};
450 }
451 {
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
453 curandStatus_t err = curandGenerateNormal(*gen,
454 CudaNdarray_DEV_DATA(V1),
455 n_elements,
456 0.0, 1.0);
457
458
459 if (err != CURAND_STATUS_SUCCESS)
460 {
461 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
462 {
463 __failure = 9;
464 if (!PyErr_Occurred()) {
465 PyErr_SetString(PyExc_RuntimeError,
466 "Unexpected error in an Op's C code. "
467 "No Python exception was set.");
468 }
469 goto __label_9;};
470 }
471 cudaThreadSynchronize();
472 }
473 //////// </ code generated by CURAND_Base>
474 __label_9:
475
476 double __DUMMY_9;
477
478 }
479 __label_8:
480
481 if (!__failure) {
482
483 assert(py_V7->ob_refcnt > 1);
484 Py_DECREF(py_V7);
485 py_V7 = V7 ? V7 : Py_None;
486 Py_INCREF(py_V7);
487
488 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
489 {Py_XINCREF(py_V7);}
490 PyList_SET_ITEM(storage_V7, 0, py_V7);
491 {Py_XDECREF(old);}
492 }
493
494 Py_XDECREF(V7);
495
496 {Py_XDECREF(py_V7);}
497
498 double __DUMMY_8;
499
500 }
501 __label_6:
502
503 if (V5) {
504 Py_XDECREF(V5);
505 }
506
507 {Py_XDECREF(py_V5);}
508
509 double __DUMMY_6;
510
511 }
512 __label_4:
513
514 Py_XDECREF(V3);
515
516 {Py_XDECREF(py_V3);}
517
518 double __DUMMY_4;
519
520 }
521 __label_2:
522
523 if (!__failure) {
524
525 //std::cerr << "sync\n";
526 if (NULL == V1) {
527 // failure: sync None to storage
528 Py_XDECREF(py_V1);
529 py_V1 = Py_None;
530 Py_INCREF(py_V1);
531 }
532 else
533 {
534 if (py_V1 != (PyObject*)V1)
535 {
536 Py_XDECREF(py_V1);
537 py_V1 = (PyObject*)V1;
538 Py_INCREF(py_V1);
539 }
540 assert(py_V1->ob_refcnt);
541 }
542
543 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
544 {Py_XINCREF(py_V1);}
545 PyList_SET_ITEM(storage_V1, 0, py_V1);
546 {Py_XDECREF(old);}
547 }
548
549 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
550 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
551 if (V1)
552 {
553 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
554 Py_XDECREF(V1);
555 }
556 //std::cerr << "cleanup done" << py_V1 << "\n";
557
558 {Py_XDECREF(py_V1);}
559
560 double __DUMMY_2;
561
562 }
563
564
565 if (__failure) {
566 // When there is a failure, this code puts the exception
567 // in __ERROR.
568 PyObject* err_type = NULL;
569 PyObject* err_msg = NULL;
570 PyObject* err_traceback = NULL;
571 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
572 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
573 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
574 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
575 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
576 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
577 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
578 PyList_SET_ITEM(__ERROR, 0, err_type);
579 PyList_SET_ITEM(__ERROR, 1, err_msg);
580 PyList_SET_ITEM(__ERROR, 2, err_traceback);
581 {Py_XDECREF(old_err_type);}
582 {Py_XDECREF(old_err_msg);}
583 {Py_XDECREF(old_err_traceback);}
584 }
585 // The failure code is returned to index what code block failed.
586 return __failure;
587
588 }
589 };
590
591
592 int __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor(__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* self) {
593 return self->run();
594 }
595
596 void __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor(void* executor, void* self) {
597 delete ((__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6*)self);
598 }
599
600 //////////////////////
601 //// Functions
602 //////////////////////
603 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
604 assert(PyTuple_Check(argtuple));
605 if (5 != PyTuple_Size(argtuple)){
606 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
607 return NULL;
608 }
609 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* struct_ptr = new __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6();
610 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
611 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor), struct_ptr, __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor);
612 return thunk; }
613
614 //////////////////////
615 //// Module init
616 //////////////////////
617 static PyMethodDef MyMethods[] = {
618 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
619 {NULL, NULL, 0, NULL}
620 };
621 PyMODINIT_FUNC init0a2742cf42fdbba4c958f02e9b7af2f6(void){
622 import_array();
623 (void) Py_InitModule("0a2742cf42fdbba4c958f02e9b7af2f6", MyMethods);
624 }
625
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmpKnseFy/0a2742cf42fdbba4c958f02e9b7af2f6.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 #if PY_MAJOR_VERSION >= 3
14 void free_generator(PyObject *_gen)
15 {
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen);
17 #else
18 void free_generator(void *_gen)
19 {
20 curandGenerator_t * gen = (curandGenerator_t*)_gen;
21 #endif
22
23 curandStatus_t err = curandDestroyGenerator(*gen);
24 if (err != CURAND_STATUS_SUCCESS)
25 {
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n",
27 (int)err);
28 }
29 free(gen);
30 }
31
32
33 struct __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6 {
34 PyObject* __ERROR;
35
36 PyObject* storage_V3;
37 PyObject* storage_V5;
38 PyObject* storage_V7;
39 PyObject* storage_V1;
40
41
42 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6() {}
43 ~__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(void) {
44 cleanup();
45 }
46
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
48 Py_XINCREF(storage_V3);
49 Py_XINCREF(storage_V5);
50 Py_XINCREF(storage_V7);
51 Py_XINCREF(storage_V1);
52 this->storage_V3 = storage_V3;
53 this->storage_V5 = storage_V5;
54 this->storage_V7 = storage_V7;
55 this->storage_V1 = storage_V1;
56 int __failure = 0;
57
58 {
59
60 {
61
62 {
63
64 {
65
66 this->__ERROR = __ERROR;
67 return 0;
68 __label_7:
69
70 double __DUMMY_7;
71
72 }
73 __label_5:
74
75 double __DUMMY_5;
76
77 }
78 __label_3:
79
80 double __DUMMY_3;
81
82 }
83 __label_1:
84
85 double __DUMMY_1;
86
87 }
88
89 Py_XDECREF(this->storage_V3);
90 Py_XDECREF(this->storage_V5);
91 Py_XDECREF(this->storage_V7);
92 Py_XDECREF(this->storage_V1);
93
94 if (__failure) {
95 // When there is a failure, this code puts the exception
96 // in __ERROR.
97 PyObject* err_type = NULL;
98 PyObject* err_msg = NULL;
99 PyObject* err_traceback = NULL;
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
107 PyList_SET_ITEM(__ERROR, 0, err_type);
108 PyList_SET_ITEM(__ERROR, 1, err_msg);
109 PyList_SET_ITEM(__ERROR, 2, err_traceback);
110 {Py_XDECREF(old_err_type);}
111 {Py_XDECREF(old_err_msg);}
112 {Py_XDECREF(old_err_traceback);}
113 }
114 // The failure code is returned to index what code block failed.
115 return __failure;
116
117 }
118 void cleanup(void) {
119 __label_1:
120
121 double __DUMMY_1;
122 __label_3:
123
124 double __DUMMY_3;
125 __label_5:
126
127 double __DUMMY_5;
128 __label_7:
129
130 double __DUMMY_7;
131
132 Py_XDECREF(this->storage_V3);
133 Py_XDECREF(this->storage_V5);
134 Py_XDECREF(this->storage_V7);
135 Py_XDECREF(this->storage_V1);
136 }
137 int run(void) {
138 int __failure = 0;
139
140 PyObject* py_V1;
141 CudaNdarray * V1;
142 PyObject* py_V3;
143
144 PyObject* V3;
145
146 PyObject* py_V5;
147
148 PyArrayObject* V5;
149 int type_num_V5;
150 typedef npy_int32 dtype_V5;
151
152 PyObject* py_V7;
153
154 PyObject* V7;
155
156 {
157
158 py_V1 = PyList_GET_ITEM(storage_V1, 0);
159 {Py_XINCREF(py_V1);}
160
161 if (py_V1 == Py_None)
162 {
163 V1 = NULL;
164 }
165 else
166 {
167
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
169 // and one ref from the local scope.
170
171 if (CudaNdarray_Check(py_V1))
172 {
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
174 V1 = (CudaNdarray*)py_V1;
175 //std::cerr << "c_extract " << V1 << '\n';
176 if (V1->nd != 2)
177 {
178 PyErr_Format(PyExc_RuntimeError,
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
180 V1->nd);
181 V1 = NULL;
182 {
183 __failure = 2;
184 if (!PyErr_Occurred()) {
185 PyErr_SetString(PyExc_RuntimeError,
186 "Unexpected error in an Op's C code. "
187 "No Python exception was set.");
188 }
189 goto __label_2;};
190 }
191 //std::cerr << "c_extract " << V1 << " nd check passed\n";
192
193
194 assert(V1);
195 Py_INCREF(py_V1);
196 }
197 else if (py_V1 == Py_None)
198 {
199 PyErr_SetString(PyExc_TypeError,
200 "expected a CudaNdarray, not None");
201 V1 = NULL;
202 {
203 __failure = 2;
204 if (!PyErr_Occurred()) {
205 PyErr_SetString(PyExc_RuntimeError,
206 "Unexpected error in an Op's C code. "
207 "No Python exception was set.");
208 }
209 goto __label_2;};
210 }
211 else
212 {
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
215 V1 = NULL;
216 {
217 __failure = 2;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_2;};
224 }
225 //std::cerr << "c_extract done " << V1 << '\n';
226
227
228 }
229
230 {
231
232 py_V3 = PyList_GET_ITEM(storage_V3, 0);
233 {Py_XINCREF(py_V3);}
234
235 Py_INCREF(py_V3);
236 V3 = py_V3;
237
238 {
239
240 py_V5 = PyList_GET_ITEM(storage_V5, 0);
241 {Py_XINCREF(py_V5);}
242
243 V5 = NULL;
244 if (py_V5 == Py_None) {
245 // We can either fail here or set V5 to NULL and rely on Ops
246 // using tensors to handle the NULL case, but if they fail to do so
247 // they'll end up with nasty segfaults, so this is public service.
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
249 {
250 __failure = 6;
251 if (!PyErr_Occurred()) {
252 PyErr_SetString(PyExc_RuntimeError,
253 "Unexpected error in an Op's C code. "
254 "No Python exception was set.");
255 }
256 goto __label_6;}
257 }
258 if (!PyArray_Check(py_V5)) {
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
260 {
261 __failure = 6;
262 if (!PyErr_Occurred()) {
263 PyErr_SetString(PyExc_RuntimeError,
264 "Unexpected error in an Op's C code. "
265 "No Python exception was set.");
266 }
267 goto __label_6;}
268 }
269 // We expect NPY_INT32
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
271 if (!PyArray_ISALIGNED(py_V5)) {
272 PyErr_Format(PyExc_NotImplementedError,
273 "expected an aligned array of type %ld "
274 "(NPY_INT32), got non-aligned array of type %ld"
275 " with %ld dimensions, with 3 last dims "
276 "%ld, %ld, %ld"
277 " and 3 last strides %ld %ld, %ld.",
278 (long int) NPY_INT32,
279 (long int) type_num_V5,
280 (long int) PyArray_NDIM(py_V5),
281 (long int) PyArray_NDIM(py_V5) >= 3 ?
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
283 (long int) PyArray_NDIM(py_V5) >= 2 ?
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
285 (long int) PyArray_NDIM(py_V5) >= 1 ?
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
287 (long int) PyArray_NDIM(py_V5) >= 3 ?
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
289 (long int) PyArray_NDIM(py_V5) >= 2 ?
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
291 (long int) PyArray_NDIM(py_V5) >= 1 ?
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
293 );
294 {
295 __failure = 6;
296 if (!PyErr_Occurred()) {
297 PyErr_SetString(PyExc_RuntimeError,
298 "Unexpected error in an Op's C code. "
299 "No Python exception was set.");
300 }
301 goto __label_6;}
302 }
303 // This is a TypeError to be consistent with DEBUG_MODE
304 // Note: DEBUG_MODE also tells the name of the container
305 if (type_num_V5 != NPY_INT32) {
306 PyErr_Format(PyExc_TypeError,
307 "expected type_num %d (NPY_INT32) got %d",
308 NPY_INT32, type_num_V5);
309 {
310 __failure = 6;
311 if (!PyErr_Occurred()) {
312 PyErr_SetString(PyExc_RuntimeError,
313 "Unexpected error in an Op's C code. "
314 "No Python exception was set.");
315 }
316 goto __label_6;}
317 }
318 V5 = (PyArrayObject*)(py_V5);
319 Py_XINCREF(V5);
320
321 {
322
323 py_V7 = Py_None;
324 {Py_XINCREF(py_V7);}
325
326 V7 = NULL;
327
328 {
329
330 //////// <code generated by CURAND_Base>
331 int odims[2];
332 int n_elements = 1;
333 int must_alloc_sample = ((NULL == V1)
334 || !CudaNdarray_Check(py_V1)
335 || (V1->nd != 2));
336
337 if (V5->nd != 1)
338 {
339 PyErr_SetString(PyExc_ValueError, "size must be vector");
340 {
341 __failure = 9;
342 if (!PyErr_Occurred()) {
343 PyErr_SetString(PyExc_RuntimeError,
344 "Unexpected error in an Op's C code. "
345 "No Python exception was set.");
346 }
347 goto __label_9;}
348 }
349 if (V5->dimensions[0] != 2)
350 {
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
352 2, V5->dimensions[0]);
353 {
354 __failure = 9;
355 if (!PyErr_Occurred()) {
356 PyErr_SetString(PyExc_RuntimeError,
357 "Unexpected error in an Op's C code. "
358 "No Python exception was set.");
359 }
360 goto __label_9;}
361 }
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
363 {
364 PyErr_SetString(PyExc_ValueError, "size must be int32");
365 {
366 __failure = 9;
367 if (!PyErr_Occurred()) {
368 PyErr_SetString(PyExc_RuntimeError,
369 "Unexpected error in an Op's C code. "
370 "No Python exception was set.");
371 }
372 goto __label_9;}
373 }
374 for (int i = 0; i < 2; ++i)
375 {
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
377 n_elements *= odims[i];
378 must_alloc_sample = (must_alloc_sample
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
380 }
381 if (must_alloc_sample)
382 {
383 Py_XDECREF(V1);
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
385 if(!V1)
386 {
387 {
388 __failure = 9;
389 if (!PyErr_Occurred()) {
390 PyErr_SetString(PyExc_RuntimeError,
391 "Unexpected error in an Op's C code. "
392 "No Python exception was set.");
393 }
394 goto __label_9;};
395 }
396 }
397 if (!PyCObject_Check(V3))
398 {
399 // allocate a new generator for o_generator
400 Py_XDECREF(V7);
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
402 assert(gen);
403 if (CURAND_STATUS_SUCCESS !=
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
406 {
407 __failure = 9;
408 if (!PyErr_Occurred()) {
409 PyErr_SetString(PyExc_RuntimeError,
410 "Unexpected error in an Op's C code. "
411 "No Python exception was set.");
412 }
413 goto __label_9;};
414 }
415 if (CURAND_STATUS_SUCCESS !=
416 curandSetPseudoRandomGeneratorSeed(*gen,234))
417 {
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
419 {
420 __failure = 9;
421 if (!PyErr_Occurred()) {
422 PyErr_SetString(PyExc_RuntimeError,
423 "Unexpected error in an Op's C code. "
424 "No Python exception was set.");
425 }
426 goto __label_9;};
427 }
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
429 assert (V3 == Py_False);
430 }
431 else if (1)
432 {
433 // use i_generator for o_generator
434 Py_XDECREF(V7);
435 Py_INCREF(V3);
436 V7 = V3;
437 }
438 else
439 {
440 // copy i_generator for o_generator
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
442 {
443 __failure = 9;
444 if (!PyErr_Occurred()) {
445 PyErr_SetString(PyExc_RuntimeError,
446 "Unexpected error in an Op's C code. "
447 "No Python exception was set.");
448 }
449 goto __label_9;};
450 }
451 {
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
453 curandStatus_t err = curandGenerateNormal(*gen,
454 CudaNdarray_DEV_DATA(V1),
455 n_elements,
456 0.0, 1.0);
457
458
459 if (err != CURAND_STATUS_SUCCESS)
460 {
461 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
462 {
463 __failure = 9;
464 if (!PyErr_Occurred()) {
465 PyErr_SetString(PyExc_RuntimeError,
466 "Unexpected error in an Op's C code. "
467 "No Python exception was set.");
468 }
469 goto __label_9;};
470 }
471 cudaThreadSynchronize();
472 }
473 //////// </ code generated by CURAND_Base>
474 __label_9:
475
476 double __DUMMY_9;
477
478 }
479 __label_8:
480
481 if (!__failure) {
482
483 assert(py_V7->ob_refcnt > 1);
484 Py_DECREF(py_V7);
485 py_V7 = V7 ? V7 : Py_None;
486 Py_INCREF(py_V7);
487
488 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
489 {Py_XINCREF(py_V7);}
490 PyList_SET_ITEM(storage_V7, 0, py_V7);
491 {Py_XDECREF(old);}
492 }
493
494 Py_XDECREF(V7);
495
496 {Py_XDECREF(py_V7);}
497
498 double __DUMMY_8;
499
500 }
501 __label_6:
502
503 if (V5) {
504 Py_XDECREF(V5);
505 }
506
507 {Py_XDECREF(py_V5);}
508
509 double __DUMMY_6;
510
511 }
512 __label_4:
513
514 Py_XDECREF(V3);
515
516 {Py_XDECREF(py_V3);}
517
518 double __DUMMY_4;
519
520 }
521 __label_2:
522
523 if (!__failure) {
524
525 //std::cerr << "sync\n";
526 if (NULL == V1) {
527 // failure: sync None to storage
528 Py_XDECREF(py_V1);
529 py_V1 = Py_None;
530 Py_INCREF(py_V1);
531 }
532 else
533 {
534 if (py_V1 != (PyObject*)V1)
535 {
536 Py_XDECREF(py_V1);
537 py_V1 = (PyObject*)V1;
538 Py_INCREF(py_V1);
539 }
540 assert(py_V1->ob_refcnt);
541 }
542
543 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
544 {Py_XINCREF(py_V1);}
545 PyList_SET_ITEM(storage_V1, 0, py_V1);
546 {Py_XDECREF(old);}
547 }
548
549 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
550 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
551 if (V1)
552 {
553 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
554 Py_XDECREF(V1);
555 }
556 //std::cerr << "cleanup done" << py_V1 << "\n";
557
558 {Py_XDECREF(py_V1);}
559
560 double __DUMMY_2;
561
562 }
563
564
565 if (__failure) {
566 // When there is a failure, this code puts the exception
567 // in __ERROR.
568 PyObject* err_type = NULL;
569 PyObject* err_msg = NULL;
570 PyObject* err_traceback = NULL;
571 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
572 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
573 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
574 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
575 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
576 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
577 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
578 PyList_SET_ITEM(__ERROR, 0, err_type);
579 PyList_SET_ITEM(__ERROR, 1, err_msg);
580 PyList_SET_ITEM(__ERROR, 2, err_traceback);
581 {Py_XDECREF(old_err_type);}
582 {Py_XDECREF(old_err_msg);}
583 {Py_XDECREF(old_err_traceback);}
584 }
585 // The failure code is returned to index what code block failed.
586 return __failure;
587
588 }
589 };
590
591
592 int __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor(__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* self) {
593 return self->run();
594 }
595
596 void __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor(void* executor, void* self) {
597 delete ((__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6*)self);
598 }
599
600 //////////////////////
601 //// Functions
602 //////////////////////
603 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
604 assert(PyTuple_Check(argtuple));
605 if (5 != PyTuple_Size(argtuple)){
606 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
607 return NULL;
608 }
609 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* struct_ptr = new __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6();
610 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
611 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor), struct_ptr, __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor);
612 return thunk; }
613
614 //////////////////////
615 //// Module init
616 //////////////////////
617 static PyMethodDef MyMethods[] = {
618 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
619 {NULL, NULL, 0, NULL}
620 };
621 PyMODINIT_FUNC init0a2742cf42fdbba4c958f02e9b7af2f6(void){
622 import_array();
623 (void) Py_InitModule("0a2742cf42fdbba4c958f02e9b7af2f6", MyMethods);
624 }
625
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp9_KVmB/0a2742cf42fdbba4c958f02e9b7af2f6.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
E1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include "curand.h"
6 #include <numpy/arrayscalars.h>
7 #include "cuda_ndarray.cuh"
8 //////////////////////
9 //// Support Code
10 //////////////////////
11
12
13 #if PY_MAJOR_VERSION >= 3
14 void free_generator(PyObject *_gen)
15 {
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen);
17 #else
18 void free_generator(void *_gen)
19 {
20 curandGenerator_t * gen = (curandGenerator_t*)_gen;
21 #endif
22
23 curandStatus_t err = curandDestroyGenerator(*gen);
24 if (err != CURAND_STATUS_SUCCESS)
25 {
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n",
27 (int)err);
28 }
29 free(gen);
30 }
31
32
33 struct __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6 {
34 PyObject* __ERROR;
35
36 PyObject* storage_V3;
37 PyObject* storage_V5;
38 PyObject* storage_V7;
39 PyObject* storage_V1;
40
41
42 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6() {}
43 ~__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(void) {
44 cleanup();
45 }
46
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
48 Py_XINCREF(storage_V3);
49 Py_XINCREF(storage_V5);
50 Py_XINCREF(storage_V7);
51 Py_XINCREF(storage_V1);
52 this->storage_V3 = storage_V3;
53 this->storage_V5 = storage_V5;
54 this->storage_V7 = storage_V7;
55 this->storage_V1 = storage_V1;
56 int __failure = 0;
57
58 {
59
60 {
61
62 {
63
64 {
65
66 this->__ERROR = __ERROR;
67 return 0;
68 __label_7:
69
70 double __DUMMY_7;
71
72 }
73 __label_5:
74
75 double __DUMMY_5;
76
77 }
78 __label_3:
79
80 double __DUMMY_3;
81
82 }
83 __label_1:
84
85 double __DUMMY_1;
86
87 }
88
89 Py_XDECREF(this->storage_V3);
90 Py_XDECREF(this->storage_V5);
91 Py_XDECREF(this->storage_V7);
92 Py_XDECREF(this->storage_V1);
93
94 if (__failure) {
95 // When there is a failure, this code puts the exception
96 // in __ERROR.
97 PyObject* err_type = NULL;
98 PyObject* err_msg = NULL;
99 PyObject* err_traceback = NULL;
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
107 PyList_SET_ITEM(__ERROR, 0, err_type);
108 PyList_SET_ITEM(__ERROR, 1, err_msg);
109 PyList_SET_ITEM(__ERROR, 2, err_traceback);
110 {Py_XDECREF(old_err_type);}
111 {Py_XDECREF(old_err_msg);}
112 {Py_XDECREF(old_err_traceback);}
113 }
114 // The failure code is returned to index what code block failed.
115 return __failure;
116
117 }
118 void cleanup(void) {
119 __label_1:
120
121 double __DUMMY_1;
122 __label_3:
123
124 double __DUMMY_3;
125 __label_5:
126
127 double __DUMMY_5;
128 __label_7:
129
130 double __DUMMY_7;
131
132 Py_XDECREF(this->storage_V3);
133 Py_XDECREF(this->storage_V5);
134 Py_XDECREF(this->storage_V7);
135 Py_XDECREF(this->storage_V1);
136 }
137 int run(void) {
138 int __failure = 0;
139
140 PyObject* py_V1;
141 CudaNdarray * V1;
142 PyObject* py_V3;
143
144 PyObject* V3;
145
146 PyObject* py_V5;
147
148 PyArrayObject* V5;
149 int type_num_V5;
150 typedef npy_int32 dtype_V5;
151
152 PyObject* py_V7;
153
154 PyObject* V7;
155
156 {
157
158 py_V1 = PyList_GET_ITEM(storage_V1, 0);
159 {Py_XINCREF(py_V1);}
160
161 if (py_V1 == Py_None)
162 {
163 V1 = NULL;
164 }
165 else
166 {
167
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
169 // and one ref from the local scope.
170
171 if (CudaNdarray_Check(py_V1))
172 {
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
174 V1 = (CudaNdarray*)py_V1;
175 //std::cerr << "c_extract " << V1 << '\n';
176 if (V1->nd != 2)
177 {
178 PyErr_Format(PyExc_RuntimeError,
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2",
180 V1->nd);
181 V1 = NULL;
182 {
183 __failure = 2;
184 if (!PyErr_Occurred()) {
185 PyErr_SetString(PyExc_RuntimeError,
186 "Unexpected error in an Op's C code. "
187 "No Python exception was set.");
188 }
189 goto __label_2;};
190 }
191 //std::cerr << "c_extract " << V1 << " nd check passed\n";
192
193
194 assert(V1);
195 Py_INCREF(py_V1);
196 }
197 else if (py_V1 == Py_None)
198 {
199 PyErr_SetString(PyExc_TypeError,
200 "expected a CudaNdarray, not None");
201 V1 = NULL;
202 {
203 __failure = 2;
204 if (!PyErr_Occurred()) {
205 PyErr_SetString(PyExc_RuntimeError,
206 "Unexpected error in an Op's C code. "
207 "No Python exception was set.");
208 }
209 goto __label_2;};
210 }
211 else
212 {
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
215 V1 = NULL;
216 {
217 __failure = 2;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_2;};
224 }
225 //std::cerr << "c_extract done " << V1 << '\n';
226
227
228 }
229
230 {
231
232 py_V3 = PyList_GET_ITEM(storage_V3, 0);
233 {Py_XINCREF(py_V3);}
234
235 Py_INCREF(py_V3);
236 V3 = py_V3;
237
238 {
239
240 py_V5 = PyList_GET_ITEM(storage_V5, 0);
241 {Py_XINCREF(py_V5);}
242
243 V5 = NULL;
244 if (py_V5 == Py_None) {
245 // We can either fail here or set V5 to NULL and rely on Ops
246 // using tensors to handle the NULL case, but if they fail to do so
247 // they'll end up with nasty segfaults, so this is public service.
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
249 {
250 __failure = 6;
251 if (!PyErr_Occurred()) {
252 PyErr_SetString(PyExc_RuntimeError,
253 "Unexpected error in an Op's C code. "
254 "No Python exception was set.");
255 }
256 goto __label_6;}
257 }
258 if (!PyArray_Check(py_V5)) {
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
260 {
261 __failure = 6;
262 if (!PyErr_Occurred()) {
263 PyErr_SetString(PyExc_RuntimeError,
264 "Unexpected error in an Op's C code. "
265 "No Python exception was set.");
266 }
267 goto __label_6;}
268 }
269 // We expect NPY_INT32
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num;
271 if (!PyArray_ISALIGNED(py_V5)) {
272 PyErr_Format(PyExc_NotImplementedError,
273 "expected an aligned array of type %ld "
274 "(NPY_INT32), got non-aligned array of type %ld"
275 " with %ld dimensions, with 3 last dims "
276 "%ld, %ld, %ld"
277 " and 3 last strides %ld %ld, %ld.",
278 (long int) NPY_INT32,
279 (long int) type_num_V5,
280 (long int) PyArray_NDIM(py_V5),
281 (long int) PyArray_NDIM(py_V5) >= 3 ?
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
283 (long int) PyArray_NDIM(py_V5) >= 2 ?
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
285 (long int) PyArray_NDIM(py_V5) >= 1 ?
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1,
287 (long int) PyArray_NDIM(py_V5) >= 3 ?
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1,
289 (long int) PyArray_NDIM(py_V5) >= 2 ?
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1,
291 (long int) PyArray_NDIM(py_V5) >= 1 ?
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1
293 );
294 {
295 __failure = 6;
296 if (!PyErr_Occurred()) {
297 PyErr_SetString(PyExc_RuntimeError,
298 "Unexpected error in an Op's C code. "
299 "No Python exception was set.");
300 }
301 goto __label_6;}
302 }
303 // This is a TypeError to be consistent with DEBUG_MODE
304 // Note: DEBUG_MODE also tells the name of the container
305 if (type_num_V5 != NPY_INT32) {
306 PyErr_Format(PyExc_TypeError,
307 "expected type_num %d (NPY_INT32) got %d",
308 NPY_INT32, type_num_V5);
309 {
310 __failure = 6;
311 if (!PyErr_Occurred()) {
312 PyErr_SetString(PyExc_RuntimeError,
313 "Unexpected error in an Op's C code. "
314 "No Python exception was set.");
315 }
316 goto __label_6;}
317 }
318 V5 = (PyArrayObject*)(py_V5);
319 Py_XINCREF(V5);
320
321 {
322
323 py_V7 = Py_None;
324 {Py_XINCREF(py_V7);}
325
326 V7 = NULL;
327
328 {
329
330 //////// <code generated by CURAND_Base>
331 int odims[2];
332 int n_elements = 1;
333 int must_alloc_sample = ((NULL == V1)
334 || !CudaNdarray_Check(py_V1)
335 || (V1->nd != 2));
336
337 if (V5->nd != 1)
338 {
339 PyErr_SetString(PyExc_ValueError, "size must be vector");
340 {
341 __failure = 9;
342 if (!PyErr_Occurred()) {
343 PyErr_SetString(PyExc_RuntimeError,
344 "Unexpected error in an Op's C code. "
345 "No Python exception was set.");
346 }
347 goto __label_9;}
348 }
349 if (V5->dimensions[0] != 2)
350 {
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)",
352 2, V5->dimensions[0]);
353 {
354 __failure = 9;
355 if (!PyErr_Occurred()) {
356 PyErr_SetString(PyExc_RuntimeError,
357 "Unexpected error in an Op's C code. "
358 "No Python exception was set.");
359 }
360 goto __label_9;}
361 }
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32)
363 {
364 PyErr_SetString(PyExc_ValueError, "size must be int32");
365 {
366 __failure = 9;
367 if (!PyErr_Occurred()) {
368 PyErr_SetString(PyExc_RuntimeError,
369 "Unexpected error in an Op's C code. "
370 "No Python exception was set.");
371 }
372 goto __label_9;}
373 }
374 for (int i = 0; i < 2; ++i)
375 {
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0];
377 n_elements *= odims[i];
378 must_alloc_sample = (must_alloc_sample
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]);
380 }
381 if (must_alloc_sample)
382 {
383 Py_XDECREF(V1);
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims);
385 if(!V1)
386 {
387 {
388 __failure = 9;
389 if (!PyErr_Occurred()) {
390 PyErr_SetString(PyExc_RuntimeError,
391 "Unexpected error in an Op's C code. "
392 "No Python exception was set.");
393 }
394 goto __label_9;};
395 }
396 }
397 if (!PyCObject_Check(V3))
398 {
399 // allocate a new generator for o_generator
400 Py_XDECREF(V7);
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
402 assert(gen);
403 if (CURAND_STATUS_SUCCESS !=
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) {
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator");
406 {
407 __failure = 9;
408 if (!PyErr_Occurred()) {
409 PyErr_SetString(PyExc_RuntimeError,
410 "Unexpected error in an Op's C code. "
411 "No Python exception was set.");
412 }
413 goto __label_9;};
414 }
415 if (CURAND_STATUS_SUCCESS !=
416 curandSetPseudoRandomGeneratorSeed(*gen,234))
417 {
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed");
419 {
420 __failure = 9;
421 if (!PyErr_Occurred()) {
422 PyErr_SetString(PyExc_RuntimeError,
423 "Unexpected error in an Op's C code. "
424 "No Python exception was set.");
425 }
426 goto __label_9;};
427 }
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator);
429 assert (V3 == Py_False);
430 }
431 else if (1)
432 {
433 // use i_generator for o_generator
434 Py_XDECREF(V7);
435 Py_INCREF(V3);
436 V7 = V3;
437 }
438 else
439 {
440 // copy i_generator for o_generator
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation");
442 {
443 __failure = 9;
444 if (!PyErr_Occurred()) {
445 PyErr_SetString(PyExc_RuntimeError,
446 "Unexpected error in an Op's C code. "
447 "No Python exception was set.");
448 }
449 goto __label_9;};
450 }
451 {
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7);
453 curandStatus_t err = curandGenerateNormal(*gen,
454 CudaNdarray_DEV_DATA(V1),
455 n_elements,
456 0.0, 1.0);
457
458
459 if (err != CURAND_STATUS_SUCCESS)
460 {
461 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err);
462 {
463 __failure = 9;
464 if (!PyErr_Occurred()) {
465 PyErr_SetString(PyExc_RuntimeError,
466 "Unexpected error in an Op's C code. "
467 "No Python exception was set.");
468 }
469 goto __label_9;};
470 }
471 cudaThreadSynchronize();
472 }
473 //////// </ code generated by CURAND_Base>
474 __label_9:
475
476 double __DUMMY_9;
477
478 }
479 __label_8:
480
481 if (!__failure) {
482
483 assert(py_V7->ob_refcnt > 1);
484 Py_DECREF(py_V7);
485 py_V7 = V7 ? V7 : Py_None;
486 Py_INCREF(py_V7);
487
488 PyObject* old = PyList_GET_ITEM(storage_V7, 0);
489 {Py_XINCREF(py_V7);}
490 PyList_SET_ITEM(storage_V7, 0, py_V7);
491 {Py_XDECREF(old);}
492 }
493
494 Py_XDECREF(V7);
495
496 {Py_XDECREF(py_V7);}
497
498 double __DUMMY_8;
499
500 }
501 __label_6:
502
503 if (V5) {
504 Py_XDECREF(V5);
505 }
506
507 {Py_XDECREF(py_V5);}
508
509 double __DUMMY_6;
510
511 }
512 __label_4:
513
514 Py_XDECREF(V3);
515
516 {Py_XDECREF(py_V3);}
517
518 double __DUMMY_4;
519
520 }
521 __label_2:
522
523 if (!__failure) {
524
525 //std::cerr << "sync\n";
526 if (NULL == V1) {
527 // failure: sync None to storage
528 Py_XDECREF(py_V1);
529 py_V1 = Py_None;
530 Py_INCREF(py_V1);
531 }
532 else
533 {
534 if (py_V1 != (PyObject*)V1)
535 {
536 Py_XDECREF(py_V1);
537 py_V1 = (PyObject*)V1;
538 Py_INCREF(py_V1);
539 }
540 assert(py_V1->ob_refcnt);
541 }
542
543 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
544 {Py_XINCREF(py_V1);}
545 PyList_SET_ITEM(storage_V1, 0, py_V1);
546 {Py_XDECREF(old);}
547 }
548
549 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
550 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
551 if (V1)
552 {
553 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
554 Py_XDECREF(V1);
555 }
556 //std::cerr << "cleanup done" << py_V1 << "\n";
557
558 {Py_XDECREF(py_V1);}
559
560 double __DUMMY_2;
561
562 }
563
564
565 if (__failure) {
566 // When there is a failure, this code puts the exception
567 // in __ERROR.
568 PyObject* err_type = NULL;
569 PyObject* err_msg = NULL;
570 PyObject* err_traceback = NULL;
571 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
572 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
573 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
574 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
575 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
576 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
577 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
578 PyList_SET_ITEM(__ERROR, 0, err_type);
579 PyList_SET_ITEM(__ERROR, 1, err_msg);
580 PyList_SET_ITEM(__ERROR, 2, err_traceback);
581 {Py_XDECREF(old_err_type);}
582 {Py_XDECREF(old_err_msg);}
583 {Py_XDECREF(old_err_traceback);}
584 }
585 // The failure code is returned to index what code block failed.
586 return __failure;
587
588 }
589 };
590
591
592 int __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor(__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* self) {
593 return self->run();
594 }
595
596 void __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor(void* executor, void* self) {
597 delete ((__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6*)self);
598 }
599
600 //////////////////////
601 //// Functions
602 //////////////////////
603 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
604 assert(PyTuple_Check(argtuple));
605 if (5 != PyTuple_Size(argtuple)){
606 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
607 return NULL;
608 }
609 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* struct_ptr = new __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6();
610 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
611 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor), struct_ptr, __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor);
612 return thunk; }
613
614 //////////////////////
615 //// Module init
616 //////////////////////
617 static PyMethodDef MyMethods[] = {
618 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
619 {NULL, NULL, 0, NULL}
620 };
621 PyMODINIT_FUNC init0a2742cf42fdbba4c958f02e9b7af2f6(void){
622 import_array();
623 (void) Py_InitModule("0a2742cf42fdbba4c958f02e9b7af2f6", MyMethods);
624 }
625
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied
compilation terminated.
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp8OAkSL/0a2742cf42fdbba4c958f02e9b7af2f6.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray']
E....DeepCopyOp [@A] '' 0
|<CudaNdarrayType(float32, matrix)> [@B]
DeepCopyOp [@A] '' 0
|<CudaNdarrayType(float32, matrix)> [@B]
.................................................................EE........................................Segmentation fault (core dumped)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment