Skip to content

Instantly share code, notes, and snippets.

@shackenberg
Created July 30, 2013 13:01
Show Gist options
  • Save shackenberg/6112689 to your computer and use it in GitHub Desktop.
Save shackenberg/6112689 to your computer and use it in GitHub Desktop.
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,exception_verbosity=high python jointest.py
Using gpu device 0: Quadro FX 580
WARNING (theano.sandbox.cuda): You are probably using an old GPU, that Theano does not support. This means GPU code will most likely be slow AND may crash when we try to use features that your GPU does not support.
1 #include <Python.h>
2 #include <iostream>
3 #include <numpy/arrayobject.h>
4 #include <math.h>
5 #include <numpy/arrayscalars.h>
6 #include "cuda_ndarray.cuh"
7 //////////////////////
8 //// Support Code
9 //////////////////////
10
11
12 struct __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae {
13 PyObject* __ERROR;
14
15 PyObject* storage_V3;
16 PyObject* storage_V5;
17 PyObject* storage_V7;
18 PyObject* storage_V1;
19
20
21 __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae() {}
22 ~__struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae(void) {
23 cleanup();
24 }
25
26 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) {
27 Py_XINCREF(storage_V3);
28 Py_XINCREF(storage_V5);
29 Py_XINCREF(storage_V7);
30 Py_XINCREF(storage_V1);
31 this->storage_V3 = storage_V3;
32 this->storage_V5 = storage_V5;
33 this->storage_V7 = storage_V7;
34 this->storage_V1 = storage_V1;
35 int __failure = 0;
36
37 {
38
39 {
40
41 {
42
43 {
44
45 this->__ERROR = __ERROR;
46 return 0;
47 __label_7:
48
49 double __DUMMY_7;
50
51 }
52 __label_5:
53
54 double __DUMMY_5;
55
56 }
57 __label_3:
58
59 double __DUMMY_3;
60
61 }
62 __label_1:
63
64 double __DUMMY_1;
65
66 }
67
68 Py_XDECREF(this->storage_V3);
69 Py_XDECREF(this->storage_V5);
70 Py_XDECREF(this->storage_V7);
71 Py_XDECREF(this->storage_V1);
72
73 if (__failure) {
74 // When there is a failure, this code puts the exception
75 // in __ERROR.
76 PyObject* err_type = NULL;
77 PyObject* err_msg = NULL;
78 PyObject* err_traceback = NULL;
79 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
80 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
81 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
82 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
83 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
84 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
85 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
86 PyList_SET_ITEM(__ERROR, 0, err_type);
87 PyList_SET_ITEM(__ERROR, 1, err_msg);
88 PyList_SET_ITEM(__ERROR, 2, err_traceback);
89 {Py_XDECREF(old_err_type);}
90 {Py_XDECREF(old_err_msg);}
91 {Py_XDECREF(old_err_traceback);}
92 }
93 // The failure code is returned to index what code block failed.
94 return __failure;
95
96 }
97 void cleanup(void) {
98 __label_1:
99
100 double __DUMMY_1;
101 __label_3:
102
103 double __DUMMY_3;
104 __label_5:
105
106 double __DUMMY_5;
107 __label_7:
108
109 double __DUMMY_7;
110
111 Py_XDECREF(this->storage_V3);
112 Py_XDECREF(this->storage_V5);
113 Py_XDECREF(this->storage_V7);
114 Py_XDECREF(this->storage_V1);
115 }
116 int run(void) {
117 int __failure = 0;
118
119 PyObject* py_V1;
120 CudaNdarray * V1;
121 PyObject* py_V3;
122
123 PyArrayObject* V3;
124 int type_num_V3;
125 typedef npy_int8 dtype_V3;
126
127 PyObject* py_V5;
128 CudaNdarray * V5;
129 PyObject* py_V7;
130 CudaNdarray * V7;
131 {
132
133 py_V1 = PyList_GET_ITEM(storage_V1, 0);
134 {Py_XINCREF(py_V1);}
135
136 if (py_V1 == Py_None)
137 {
138 V1 = NULL;
139 }
140 else
141 {
142
143 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
144 // and one ref from the local scope.
145
146 if (CudaNdarray_Check(py_V1))
147 {
148 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
149 V1 = (CudaNdarray*)py_V1;
150 //std::cerr << "c_extract " << V1 << '\n';
151 if (V1->nd != 1)
152 {
153 PyErr_Format(PyExc_RuntimeError,
154 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 1",
155 V1->nd);
156 V1 = NULL;
157 {
158 __failure = 2;
159 if (!PyErr_Occurred()) {
160 PyErr_SetString(PyExc_RuntimeError,
161 "Unexpected error in an Op's C code. "
162 "No Python exception was set.");
163 }
164 goto __label_2;};
165 }
166 //std::cerr << "c_extract " << V1 << " nd check passed\n";
167
168
169 assert(V1);
170 Py_INCREF(py_V1);
171 }
172 else if (py_V1 == Py_None)
173 {
174 PyErr_SetString(PyExc_TypeError,
175 "expected a CudaNdarray, not None");
176 V1 = NULL;
177 {
178 __failure = 2;
179 if (!PyErr_Occurred()) {
180 PyErr_SetString(PyExc_RuntimeError,
181 "Unexpected error in an Op's C code. "
182 "No Python exception was set.");
183 }
184 goto __label_2;};
185 }
186 else
187 {
188 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
189 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
190 V1 = NULL;
191 {
192 __failure = 2;
193 if (!PyErr_Occurred()) {
194 PyErr_SetString(PyExc_RuntimeError,
195 "Unexpected error in an Op's C code. "
196 "No Python exception was set.");
197 }
198 goto __label_2;};
199 }
200 //std::cerr << "c_extract done " << V1 << '\n';
201
202
203 }
204
205 {
206
207 py_V3 = PyList_GET_ITEM(storage_V3, 0);
208 {Py_XINCREF(py_V3);}
209
210 V3 = NULL;
211 if (py_V3 == Py_None) {
212 // We can either fail here or set V3 to NULL and rely on Ops
213 // using tensors to handle the NULL case, but if they fail to do so
214 // they'll end up with nasty segfaults, so this is public service.
215 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
216 {
217 __failure = 4;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_4;}
224 }
225 if (!PyArray_Check(py_V3)) {
226 PyErr_SetString(PyExc_ValueError, "expected an ndarray");
227 {
228 __failure = 4;
229 if (!PyErr_Occurred()) {
230 PyErr_SetString(PyExc_RuntimeError,
231 "Unexpected error in an Op's C code. "
232 "No Python exception was set.");
233 }
234 goto __label_4;}
235 }
236 // We expect NPY_INT8
237 type_num_V3 = ((PyArrayObject*)py_V3)->descr->type_num;
238 if (!PyArray_ISALIGNED(py_V3)) {
239 PyErr_Format(PyExc_NotImplementedError,
240 "expected an aligned array of type %ld "
241 "(NPY_INT8), got non-aligned array of type %ld"
242 " with %ld dimensions, with 3 last dims "
243 "%ld, %ld, %ld"
244 " and 3 last strides %ld %ld, %ld.",
245 (long int) NPY_INT8,
246 (long int) type_num_V3,
247 (long int) PyArray_NDIM(py_V3),
248 (long int) PyArray_NDIM(py_V3) >= 3 ?
249 PyArray_DIMS(py_V3)[PyArray_NDIM(py_V3)-3] : -1,
250 (long int) PyArray_NDIM(py_V3) >= 2 ?
251 PyArray_DIMS(py_V3)[PyArray_NDIM(py_V3)-2] : -1,
252 (long int) PyArray_NDIM(py_V3) >= 1 ?
253 PyArray_DIMS(py_V3)[PyArray_NDIM(py_V3)-1] : -1,
254 (long int) PyArray_NDIM(py_V3) >= 3 ?
255 PyArray_STRIDES(py_V3)[PyArray_NDIM(py_V3)-3] : -1,
256 (long int) PyArray_NDIM(py_V3) >= 2 ?
257 PyArray_STRIDES(py_V3)[PyArray_NDIM(py_V3)-2] : -1,
258 (long int) PyArray_NDIM(py_V3) >= 1 ?
259 PyArray_STRIDES(py_V3)[PyArray_NDIM(py_V3)-1] : -1
260 );
261 {
262 __failure = 4;
263 if (!PyErr_Occurred()) {
264 PyErr_SetString(PyExc_RuntimeError,
265 "Unexpected error in an Op's C code. "
266 "No Python exception was set.");
267 }
268 goto __label_4;}
269 }
270 // This is a TypeError to be consistent with DEBUG_MODE
271 // Note: DEBUG_MODE also tells the name of the container
272 if (type_num_V3 != NPY_INT8) {
273 PyErr_Format(PyExc_TypeError,
274 "expected type_num %d (NPY_INT8) got %d",
275 NPY_INT8, type_num_V3);
276 {
277 __failure = 4;
278 if (!PyErr_Occurred()) {
279 PyErr_SetString(PyExc_RuntimeError,
280 "Unexpected error in an Op's C code. "
281 "No Python exception was set.");
282 }
283 goto __label_4;}
284 }
285 V3 = (PyArrayObject*)(py_V3);
286 Py_XINCREF(V3);
287
288 {
289
290 py_V5 = PyList_GET_ITEM(storage_V5, 0);
291 {Py_XINCREF(py_V5);}
292
293 assert(py_V5->ob_refcnt >= 2); // There should be at least one ref from the container object,
294 // and one ref from the local scope.
295
296 if (CudaNdarray_Check(py_V5))
297 {
298 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V5, (py_V5->ob_refcnt));
299 V5 = (CudaNdarray*)py_V5;
300 //std::cerr << "c_extract " << V5 << '\n';
301 if (V5->nd != 1)
302 {
303 PyErr_Format(PyExc_RuntimeError,
304 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 1",
305 V5->nd);
306 V5 = NULL;
307 {
308 __failure = 6;
309 if (!PyErr_Occurred()) {
310 PyErr_SetString(PyExc_RuntimeError,
311 "Unexpected error in an Op's C code. "
312 "No Python exception was set.");
313 }
314 goto __label_6;};
315 }
316 //std::cerr << "c_extract " << V5 << " nd check passed\n";
317
318
319 assert(V5);
320 Py_INCREF(py_V5);
321 }
322 else if (py_V5 == Py_None)
323 {
324 PyErr_SetString(PyExc_TypeError,
325 "expected a CudaNdarray, not None");
326 V5 = NULL;
327 {
328 __failure = 6;
329 if (!PyErr_Occurred()) {
330 PyErr_SetString(PyExc_RuntimeError,
331 "Unexpected error in an Op's C code. "
332 "No Python exception was set.");
333 }
334 goto __label_6;};
335 }
336 else
337 {
338 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V5, (py_V5->ob_refcnt));
339 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
340 V5 = NULL;
341 {
342 __failure = 6;
343 if (!PyErr_Occurred()) {
344 PyErr_SetString(PyExc_RuntimeError,
345 "Unexpected error in an Op's C code. "
346 "No Python exception was set.");
347 }
348 goto __label_6;};
349 }
350 //std::cerr << "c_extract done " << V5 << '\n';
351
352
353 {
354
355 py_V7 = PyList_GET_ITEM(storage_V7, 0);
356 {Py_XINCREF(py_V7);}
357
358 assert(py_V7->ob_refcnt >= 2); // There should be at least one ref from the container object,
359 // and one ref from the local scope.
360
361 if (CudaNdarray_Check(py_V7))
362 {
363 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V7, (py_V7->ob_refcnt));
364 V7 = (CudaNdarray*)py_V7;
365 //std::cerr << "c_extract " << V7 << '\n';
366 if (V7->nd != 1)
367 {
368 PyErr_Format(PyExc_RuntimeError,
369 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 1",
370 V7->nd);
371 V7 = NULL;
372 {
373 __failure = 8;
374 if (!PyErr_Occurred()) {
375 PyErr_SetString(PyExc_RuntimeError,
376 "Unexpected error in an Op's C code. "
377 "No Python exception was set.");
378 }
379 goto __label_8;};
380 }
381 //std::cerr << "c_extract " << V7 << " nd check passed\n";
382
383
384 assert(V7);
385 Py_INCREF(py_V7);
386 }
387 else if (py_V7 == Py_None)
388 {
389 PyErr_SetString(PyExc_TypeError,
390 "expected a CudaNdarray, not None");
391 V7 = NULL;
392 {
393 __failure = 8;
394 if (!PyErr_Occurred()) {
395 PyErr_SetString(PyExc_RuntimeError,
396 "Unexpected error in an Op's C code. "
397 "No Python exception was set.");
398 }
399 goto __label_8;};
400 }
401 else
402 {
403 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V7, (py_V7->ob_refcnt));
404 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
405 V7 = NULL;
406 {
407 __failure = 8;
408 if (!PyErr_Occurred()) {
409 PyErr_SetString(PyExc_RuntimeError,
410 "Unexpected error in an Op's C code. "
411 "No Python exception was set.");
412 }
413 goto __label_8;};
414 }
415 //std::cerr << "c_extract done " << V7 << '\n';
416
417
418 {
419
420 int axis = PyInt_AsLong((PyObject*)V3);
421 int nd = CudaNdarray_NDIM(V5);
422 int shape_V5[nd];
423 int shape_out[nd];
424
425 for(int i = 0; i<nd; i+=1)
426 {
427 shape_V5[i] = CudaNdarray_HOST_DIMS(V5)[i];
428 shape_out[i] = shape_V5[i];
429 }
430
431 nd = CudaNdarray_NDIM(V7);
432 int shape_V7[nd];
433 for(int i = 0; i<nd; i+=1)
434 {
435 shape_V7[i] = CudaNdarray_HOST_DIMS(V7)[i];
436 if((i!=axis) && (shape_V7[i]!=shape_out[i]))
437 {
438 {
439 __failure = 9;
440 if (!PyErr_Occurred()) {
441 PyErr_SetString(PyExc_RuntimeError,
442 "Unexpected error in an Op's C code. "
443 "No Python exception was set.");
444 }
445 goto __label_9;}; //deactivated, because this causes segfault
446 }
447 }
448
449 int width_sum = 0;
450 width_sum += CudaNdarray_HOST_DIMS(V5)[axis];
451 width_sum += CudaNdarray_HOST_DIMS(V7)[axis];
452 shape_out[axis] = width_sum;
453
454 if (CudaNdarray_prep_output(&V1, nd, shape_out))
455 {
456 {
457 __failure = 9;
458 if (!PyErr_Occurred()) {
459 PyErr_SetString(PyExc_RuntimeError,
460 "Unexpected error in an Op's C code. "
461 "No Python exception was set.");
462 }
463 goto __label_9;};
464 }
465
466 PyObject *out_sub;
467 PyObject *start, *stop, *step;
468 step = NULL;
469 int errorcode;
470 int sum;
471 sum =0;
472
473 PyObject *slice_tuple;
474 PyObject *full_slice;
475 PyObject *section_slice;
476
477
478 sum += shape_V5[axis];
479 stop = PyInt_FromLong(sum);
480 slice_tuple = PyTuple_New(nd);
481 full_slice = PySlice_New(NULL, NULL, NULL);
482 section_slice = PySlice_New(start, stop, step);
483 for(int i=0; i<nd; i++)
484 {
485 if(i!=axis)
486 {
487 Py_INCREF(full_slice);
488 PyTuple_SetItem(slice_tuple, i, full_slice);
489 }
490 else if(i==axis)
491 {
492 Py_INCREF(section_slice);
493 PyTuple_SetItem(slice_tuple, i, section_slice);
494 }
495 }
496
497 out_sub = CudaNdarray_Subscript((PyObject*)V1, slice_tuple);
498 errorcode = CudaNdarray_CopyFromCudaNdarray((CudaNdarray*)out_sub, V5);
499 if((full_slice == NULL) || (section_slice == NULL) || (out_sub == NULL) || (errorcode != 0))
500 {
501 Py_XDECREF(full_slice);
502 Py_XDECREF(section_slice);
503 Py_XDECREF(slice_tuple);
504 Py_XDECREF(out_sub);
505 Py_XDECREF(V1);
506 {
507 __failure = 9;
508 if (!PyErr_Occurred()) {
509 PyErr_SetString(PyExc_RuntimeError,
510 "Unexpected error in an Op's C code. "
511 "No Python exception was set.");
512 }
513 goto __label_9;};
514 }
515 Py_XDECREF(full_slice);
516 Py_XDECREF(section_slice);
517 Py_XDECREF(out_sub);
518 Py_XDECREF(slice_tuple);
519 start = stop;
520
521 Py_XDECREF(start);
522 Py_XDECREF(stop);
523 Py_XDECREF(step);
524 sum += shape_V7[axis];
525 stop = PyInt_FromLong(sum);
526 slice_tuple = PyTuple_New(nd);
527 full_slice = PySlice_New(NULL, NULL, NULL);
528 section_slice = PySlice_New(start, stop, step);
529 for(int i=0; i<nd; i++)
530 {
531 if(i!=axis)
532 {
533 Py_INCREF(full_slice);
534 PyTuple_SetItem(slice_tuple, i, full_slice);
535 }
536 else if(i==axis)
537 {
538 Py_INCREF(section_slice);
539 PyTuple_SetItem(slice_tuple, i, section_slice);
540 }
541 }
542
543 out_sub = CudaNdarray_Subscript((PyObject*)V1, slice_tuple);
544 errorcode = CudaNdarray_CopyFromCudaNdarray((CudaNdarray*)out_sub, V7);
545 if((full_slice == NULL) || (section_slice == NULL) || (out_sub == NULL) || (errorcode != 0))
546 {
547 Py_XDECREF(full_slice);
548 Py_XDECREF(section_slice);
549 Py_XDECREF(slice_tuple);
550 Py_XDECREF(out_sub);
551 Py_XDECREF(V1);
552 {
553 __failure = 9;
554 if (!PyErr_Occurred()) {
555 PyErr_SetString(PyExc_RuntimeError,
556 "Unexpected error in an Op's C code. "
557 "No Python exception was set.");
558 }
559 goto __label_9;};
560 }
561 Py_XDECREF(full_slice);
562 Py_XDECREF(section_slice);
563 Py_XDECREF(out_sub);
564 Py_XDECREF(slice_tuple);
565 start = stop;
566
567 Py_XDECREF(start);
568 Py_XDECREF(stop);
569 Py_XDECREF(step);__label_9:
570
571 double __DUMMY_9;
572
573 }
574 __label_8:
575
576 //std::cerr << "cleanup " << py_V7 << " " << V7 << "\n";
577 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V7, (py_V7->ob_refcnt));
578 if (V7)
579 {
580 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V7, (V7->ob_refcnt));
581 Py_XDECREF(V7);
582 }
583 //std::cerr << "cleanup done" << py_V7 << "\n";
584
585 {Py_XDECREF(py_V7);}
586
587 double __DUMMY_8;
588
589 }
590 __label_6:
591
592 //std::cerr << "cleanup " << py_V5 << " " << V5 << "\n";
593 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V5, (py_V5->ob_refcnt));
594 if (V5)
595 {
596 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V5, (V5->ob_refcnt));
597 Py_XDECREF(V5);
598 }
599 //std::cerr << "cleanup done" << py_V5 << "\n";
600
601 {Py_XDECREF(py_V5);}
602
603 double __DUMMY_6;
604
605 }
606 __label_4:
607
608 if (V3) {
609 Py_XDECREF(V3);
610 }
611
612 {Py_XDECREF(py_V3);}
613
614 double __DUMMY_4;
615
616 }
617 __label_2:
618
619 if (!__failure) {
620
621 //std::cerr << "sync\n";
622 if (NULL == V1) {
623 // failure: sync None to storage
624 Py_XDECREF(py_V1);
625 py_V1 = Py_None;
626 Py_INCREF(py_V1);
627 }
628 else
629 {
630 if (py_V1 != (PyObject*)V1)
631 {
632 Py_XDECREF(py_V1);
633 py_V1 = (PyObject*)V1;
634 Py_INCREF(py_V1);
635 }
636 assert(py_V1->ob_refcnt);
637 }
638
639 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
640 {Py_XINCREF(py_V1);}
641 PyList_SET_ITEM(storage_V1, 0, py_V1);
642 {Py_XDECREF(old);}
643 }
644
645 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
646 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
647 if (V1)
648 {
649 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
650 Py_XDECREF(V1);
651 }
652 //std::cerr << "cleanup done" << py_V1 << "\n";
653
654 {Py_XDECREF(py_V1);}
655
656 double __DUMMY_2;
657
658 }
659
660
661 if (__failure) {
662 // When there is a failure, this code puts the exception
663 // in __ERROR.
664 PyObject* err_type = NULL;
665 PyObject* err_msg = NULL;
666 PyObject* err_traceback = NULL;
667 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
668 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
669 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
670 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
671 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
672 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
673 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
674 PyList_SET_ITEM(__ERROR, 0, err_type);
675 PyList_SET_ITEM(__ERROR, 1, err_msg);
676 PyList_SET_ITEM(__ERROR, 2, err_traceback);
677 {Py_XDECREF(old_err_type);}
678 {Py_XDECREF(old_err_msg);}
679 {Py_XDECREF(old_err_traceback);}
680 }
681 // The failure code is returned to index what code block failed.
682 return __failure;
683
684 }
685 };
686
687
688 int __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae_executor(__struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae* self) {
689 return self->run();
690 }
691
692 void __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae_destructor(void* executor, void* self) {
693 delete ((__struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae*)self);
694 }
695
696 //////////////////////
697 //// Functions
698 //////////////////////
699 static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
700 assert(PyTuple_Check(argtuple));
701 if (5 != PyTuple_Size(argtuple)){
702 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple));
703 return NULL;
704 }
705 __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae* struct_ptr = new __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae();
706 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) );
707 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae_executor), struct_ptr, __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae_destructor);
708 return thunk; }
709
710 //////////////////////
711 //// Module init
712 //////////////////////
713 static PyMethodDef MyMethods[] = {
714 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
715 {NULL, NULL, 0, NULL}
716 };
717 PyMODINIT_FUNC init2d9760fd2850315bf1e2ecf2c0c430ae(void){
718 import_array();
719 (void) Py_InitModule("2d9760fd2850315bf1e2ecf2c0c430ae", MyMethods);
720 }
721
===============================
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu(445): warning: transfer of control bypasses initialization of:
variable "width_sum"
(449): here
In file included from /usr/include/python2.7/Python.h:8:0,
from mod.cu:1:
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default]
/usr/include/features.h:164:0: note: this is the location of the previous definition
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default]
/usr/include/features.h:166:0: note: this is the location of the previous definition
mod.cu(445): warning: transfer of control bypasses initialization of:
variable "width_sum"
(449): here
mod.cu: In member function ‘int __struct_compiled_op_2d9760fd2850315bf1e2ecf2c0c430ae::run()’:
mod.cu:569:231: error: jump to label ‘__label_9’ [-fpermissive]
mod.cu:445:6: error: from here [-fpermissive]
mod.cu:449:5: error: crosses initialization of ‘int width_sum’
['nvcc', '-shared', '-g', '-O3', '-arch=sm_11', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/path/to/cuda/root/include', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Documents/projects/Theano/dev/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp1DsoAE/2d9760fd2850315bf1e2ecf2c0c430ae.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/path/to/cuda/root/lib', '-L/path/to/cuda/root/lib', '-L/path/to/cuda/root/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcuda_ndarray']
Traceback (most recent call last):
File "jointest.py", line 20, in <module>
f1 = theano.function([T_vector1, T_vector2], T_result , profile=True)
File "/home/ludwig/Documents/projects/Theano/dev/theano/compile/function.py", line 222, in function
profile=profile)
File "/home/ludwig/Documents/projects/Theano/dev/theano/compile/pfunc.py", line 512, in pfunc
on_unused_input=on_unused_input)
File "/home/ludwig/Documents/projects/Theano/dev/theano/compile/function_module.py", line 1307, in orig_function
defaults)
File "/home/ludwig/Documents/projects/Theano/dev/theano/compile/function_module.py", line 1176, in create
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/link.py", line 434, in make_thunk
output_storage=output_storage)[:3]
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/vm.py", line 845, in make_all
for node in order]
File "/home/ludwig/Documents/projects/Theano/dev/theano/sandbox/cuda/__init__.py", line 247, in make_thunk
compute_map, no_recycling)
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/op.py", line 591, in make_thunk
output_storage=node_output_storage)
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/cc.py", line 933, in make_thunk
keep_lock=keep_lock)
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/cc.py", line 876, in __compile__
keep_lock=keep_lock)
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/cc.py", line 1305, in cthunk_factory
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/cmodule.py", line 1002, in module_from_key
module = next(compile_steps)
File "/home/ludwig/Documents/projects/Theano/dev/theano/gof/cc.py", line 1222, in compile_cmodule_by_step
preargs=preargs)
File "/home/ludwig/Documents/projects/Theano/dev/theano/sandbox/cuda/nvcc_compiler.py", line 407, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_11 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray -I/home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray -I/path/to/cuda/root/include -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/home/ludwig/Documents/projects/Theano/dev/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp1DsoAE/2d9760fd2850315bf1e2ecf2c0c430ae.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-49-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray -L/path/to/cuda/root/lib -L/path/to/cuda/root/lib -L/path/to/cuda/root/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcuda_ndarray', '[GpuJoin(TensorConstant{0}, <CudaNdarrayType(float32, vector)>, <CudaNdarrayType(float32, vector)>)]')
Skipping empty Profile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment