提交 43f97ea3 authored 作者: James Bergstra's avatar James Bergstra

test_elemwise{0,1,2} pass in debugmode

上级 602e87d8
type_support.so : type_support.cu
nvcc -O3 -shared -I$(HOME)/cvs/lgcm/cuda_ndarray -I$(CUDA_ROOT)/include -I/usr/include/python2.6 -o type_support.so -Xcompiler -fPIC type_support.cu -L$(CUDA_ROOT)/lib $(HOME)/cvs/lgcm/cuda_ndarray/cuda_ndarray.so
type_support.so : type_support.cu $(HOME)/cvs/lgcm/cuda_ndarray/cuda_ndarray.so
nvcc -g -shared -I$(HOME)/cvs/lgcm/cuda_ndarray -I$(CUDA_ROOT)/include -I/usr/include/python2.6 -o type_support.so -Xcompiler -fPIC type_support.cu -L$(CUDA_ROOT)/lib -L$(HOME)/cvs/lgcm/cuda_ndarray -lcuda_ndarray
clean :
rm type_support.so
......@@ -275,7 +275,7 @@ class GpuElemwise(Op):
fail = sub['fail']
opname = str(self.scalar_op)
print >> sio, """
std::cerr << "C_CODE %(opname)s START\\n";
//std::cerr << "C_CODE %(opname)s START\\n";
//standard elemwise size checks
const int * dims = NULL;
""" %locals()
......@@ -310,7 +310,7 @@ class GpuElemwise(Op):
print >> sio, """
if (cnda_%(oname)s){
//TODO: check if we can maybe use existing storage
Py_XDECREF(cnda_%(oname)s);
Py_DECREF(cnda_%(oname)s);
cnda_%(oname)s = NULL;
}
if (NULL == cnda_%(oname)s)
......@@ -324,13 +324,13 @@ class GpuElemwise(Op):
if (CudaNdarray_alloc_contiguous(cnda_%(oname)s, %(nd)s, dims))
{
//error string already set
Py_XDECREF(cnda_%(oname)s);
Py_DECREF(cnda_%(oname)s);
cnda_%(oname)s = NULL;
%(fail)s;
}
}
std::cerr << "ELEMWISE NEW %(oname)s nd" << cnda_%(oname)s->nd << "\\n";
std::cerr << "ELEMWISE NEW %(oname)s data" << cnda_%(oname)s->devdata << "\\n";
//std::cerr << "ELEMWISE NEW %(oname)s nd" << cnda_%(oname)s->nd << "\\n";
//std::cerr << "ELEMWISE NEW %(oname)s data" << cnda_%(oname)s->devdata << "\\n";
""" % locals()
print >> sio, """
{
......@@ -357,14 +357,14 @@ class GpuElemwise(Op):
""" % locals()
for oname in outputs:
print >> sio, """
Py_XDECREF(cnda_%(oname)s);
Py_DECREF(cnda_%(oname)s);
cnda_%(oname)s = NULL;
""" % locals()
print >> sio, """
%(fail)s;
}
}
std::cerr << "C_CODE %(opname)s END\\n";
//std::cerr << "C_CODE %(opname)s END\\n";
""" % locals()
return sio.getvalue()
......@@ -477,14 +477,14 @@ class GpuDimShuffle(Op):
if (CudaNdarray_set_nd(cnda_%(res)s, %(nd_out)s))
{
// err message set
Py_XDECREF(cnda_%(res)s);
Py_DECREF(cnda_%(res)s);
cnda_%(res)s = NULL;
%(fail)s;
}
if (CudaNdarray_set_device_data(cnda_%(res)s, CudaNdarray_DEV_DATA(cnda_%(input)s)))
if (CudaNdarray_set_device_data(cnda_%(res)s, CudaNdarray_DEV_DATA(cnda_%(input)s), cnda_%(input)s))
{
// err message set
Py_XDECREF(cnda_%(res)s);
Py_DECREF(cnda_%(res)s);
cnda_%(res)s = NULL;
%(fail)s;
}
......@@ -508,13 +508,13 @@ class GpuDimShuffle(Op):
if (CudaNdarray_copy_structure_to_device(cnda_%(res)s))
{
//err msg set
Py_XDECREF(cnda_%(res)s);
Py_DECREF(cnda_%(res)s);
cnda_%(res)s = NULL;
%(fail)s;
}
""" %locals()
if 1:
if 0:
print '--------------------------------------'
print 'C_CODE'
print ''
......
import sys
import sys, time
from theano.compile.sandbox.sharedvalue import shared
from theano.compile.sandbox.pfunc import pfunc
from theano import tensor
......@@ -18,8 +18,9 @@ def test_elemwise0():
a0 = a.value * 1.0
print 'BEFORE ADD', a.value
for i, node in enumerate(f.maker.env.toposort()):
print i, node
f(numpy.ones((4,4)))
print f.maker.env.toposort()
print 'AFTER ADD', a.value
assert numpy.all(a0 + 1.0 == a.value)
......@@ -52,9 +53,24 @@ def test_elemwise1():
def test_elemwise2():
""" Several kinds of elemwise expressions with dimension permutations """
rng = numpy.random.RandomState(int(time.time()))
print 'random?', rng.rand(3)
shape = (3,5)
for pattern in [(0,1), (1,0)]:
a = tcn.shared_constructor(rng.rand(*shape), name=None)
b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
f = pfunc([b], [], updates=[(a, (a+b).dimshuffle(pattern))])
has_elemwise = False
for i, node in enumerate(f.maker.env.toposort()):
print >> sys.stderr, i, node
has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
assert not has_elemwise
#let debugmode catch errors
print >> sys.stderr, 'pattern', pattern
f(rng.rand(*shape)*.3)
shape = (3,4,5,6)
a = tcn.shared_constructor(numpy.random.rand(*shape), 'a')
a = tcn.shared_constructor(rng.rand(*shape), 'a')
b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) *
tensor.exp(b**a).dimshuffle([2,0,3,1]))])
......@@ -64,7 +80,7 @@ def test_elemwise2():
has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
assert not has_elemwise
#let debugmode catch errors
f(numpy.ones(shape))
f(rng.rand(*shape))
def test_elemwise3():
""" Several kinds of elemwise expressions with dimension permutations and broadcasting"""
......@@ -75,5 +91,5 @@ def test_elemwise3():
f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) * tensor.exp(1 +
b**a).dimshuffle([2,0,3,1]))])
#let debugmode catch errors
f(numpy.ones(6))
f(numpy.random.rand(6))
......@@ -50,6 +50,10 @@ class CudaNdarrayType(Type):
def filter(self, data, strict=False):
return type_support_filter(data, self.broadcastable, strict)
@staticmethod
def values_eq_approx(a, b):
return tensor.TensorType.values_eq_approx(numpy.asarray(a), numpy.asarray(b))
def dtype_specs(self):
"""Return a tuple (python type, c type, numpy typenum) that corresponds to
self.dtype.
......@@ -130,6 +134,7 @@ class CudaNdarrayType(Type):
if (CudaNdarray_Check(py_%(name)s))
{
cnda_%(name)s = (CudaNdarray*)py_%(name)s;
Py_INCREF(py_%(name)s);
}
else
{
......@@ -141,22 +146,29 @@ class CudaNdarrayType(Type):
def c_cleanup(self, name, sub):
return """
std::cerr << "cleanup " << py_%(name)s << "\\n";
//std::cerr << "cleanup " << py_%(name)s << "\\n";
Py_XDECREF(py_%(name)s);
""" % locals()
def c_sync(self, name, sub):
"""Override `CLinkerOp.c_sync` """
return """
std::cerr << "sync\\n";
//std::cerr << "sync\\n";
if (NULL == cnda_%(name)s) {
// failure: sync None to storage
Py_XDECREF(py_%(name)s);
py_%(name)s = Py_None;
Py_XINCREF(py_%(name)s);
Py_INCREF(py_%(name)s);
}
else
{
if (py_%(name)s != (PyObject*)cnda_%(name)s)
{
Py_XDECREF(py_%(name)s);
py_%(name)s = (PyObject*)cnda_%(name)s;
Py_INCREF(py_%(name)s);
}
assert(py_%(name)s->ob_refcnt);
}
""" % locals()
......
......@@ -4,10 +4,8 @@
#include "cuda_ndarray.cuh"
#define DECL(s) static PyObject * s(PyObject * self, PyObject *args)
static PyObject *
filter(PyObject* self, PyObject *args) // args = (data, broadcastable, strict)
filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, strict)
{
PyObject *py_data=NULL;
PyArrayObject * data = NULL;
......@@ -20,8 +18,8 @@ filter(PyObject* self, PyObject *args) // args = (data, broadcastable, strict)
PyErr_SetString(PyExc_TypeError, "broadcastable arg should be a tuple of int.");
return NULL;
}
Py_XINCREF(py_data);
Py_XINCREF(broadcastable);
Py_INCREF(py_data);
Py_INCREF(broadcastable);
CudaNdarray * cnda = (CudaNdarray*)py_data;
......@@ -30,15 +28,17 @@ filter(PyObject* self, PyObject *args) // args = (data, broadcastable, strict)
//TODO: support non-strict "casting" from a vt to the broadcastable/type/size that we need.
if (!CudaNdarray_Check(py_data))
{
Py_XDECREF(py_data);
Py_XDECREF(broadcastable);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
std::cerr << "strict mode requires CudaNdarray\n";
PyErr_SetString(PyExc_TypeError, "strict mode requires CudaNdarray");
return NULL;
}
if (cnda->nd != PyTuple_Size(broadcastable))
{
Py_XDECREF(py_data);
Py_XDECREF(broadcastable);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
std::cerr << "Wrong rank: "<< cnda->nd << " " << PyTuple_Size(broadcastable) << "\n";
PyErr_Format(PyExc_TypeError, "Wrong rank: %i vs %li", cnda->nd, (long)PyTuple_Size(broadcastable));
return NULL;
}
......@@ -46,13 +46,14 @@ filter(PyObject* self, PyObject *args) // args = (data, broadcastable, strict)
{
if ((cnda->dim[i] > 1) and PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i))))
{
std::cerr << "Non-unit size in bcastable dim:\n";
PyErr_Format(PyExc_TypeError, "Non-unit size in broadcastable vt dimension %i", i);
Py_XDECREF(py_data);
Py_XDECREF(broadcastable);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
return NULL;
}
}
Py_XDECREF(broadcastable);
Py_DECREF(broadcastable);
return py_data;
}
else
......@@ -61,8 +62,8 @@ filter(PyObject* self, PyObject *args) // args = (data, broadcastable, strict)
if (!data)
{
//err message already defined
Py_XDECREF(py_data);
Py_XDECREF(broadcastable);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
return NULL;
}
for (int i = 0; i < data->nd; ++i)
......@@ -70,22 +71,21 @@ filter(PyObject* self, PyObject *args) // args = (data, broadcastable, strict)
if ((data->dimensions[i] > 1) and PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i))))
{
PyErr_Format(PyExc_TypeError, "Non-unit size in broadcastable dimension %i", i);
Py_XDECREF(data);
Py_XDECREF(py_data);
Py_XDECREF(broadcastable);
Py_DECREF(data);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
return NULL;
}
}
CudaNdarray * rval = (CudaNdarray*) CudaNdarray_new_null();
if (CudaNdarray_CopyFromArray(rval, data))
{
Py_XDECREF(rval);
Py_DECREF(rval);
rval = NULL;
}
Py_XDECREF(data);
Py_XDECREF(py_data);
Py_XDECREF(broadcastable);
Py_DECREF(data);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
return (PyObject*)rval;
}
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论