提交 100d336e authored 作者: Marc-Alexandre Cote's avatar Marc-Alexandre Cote

Reuse allocated memory when possible.

上级 2dddf4ac
...@@ -62,4 +62,4 @@ from theano.gradient import Rop, Lop, grad, numeric_grad, verify_grad, \ ...@@ -62,4 +62,4 @@ from theano.gradient import Rop, Lop, grad, numeric_grad, verify_grad, \
from theano.tensor.sort import sort, argsort from theano.tensor.sort import sort, argsort
from theano.tensor.extra_ops import (DiffOp, bincount, squeeze, from theano.tensor.extra_ops import (DiffOp, bincount, squeeze,
repeat, bartlett, fill_diagonal) repeat, bartlett, fill_diagonal, cumsum)
...@@ -49,7 +49,7 @@ class CumsumOp(theano.Op): ...@@ -49,7 +49,7 @@ class CumsumOp(theano.Op):
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
if self.axis is None: if self.axis is None:
return [(np.prod(shapes[0]),)] # Flatten return [(tensor.prod(shapes[0]),)] # Flatten
return shapes return shapes
...@@ -59,11 +59,14 @@ class CumsumOp(theano.Op): ...@@ -59,11 +59,14 @@ class CumsumOp(theano.Op):
axis = self.axis axis = self.axis
fail = sub['fail'] fail = sub['fail']
if self.axis is None: if self.axis is None or (self.axis == 0 and node.inputs[0].ndim == 1):
code = """ code = """
npy_intp shape[1] = { PyArray_SIZE(%(x)s) }; npy_intp shape[1] = { PyArray_SIZE(%(x)s) };
if(!(%(z)s && PyArray_DIMS(%(z)s)[0] == shape[0]))
{
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, shape, type_num_%(x)s); %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, shape, type_num_%(x)s);
}
if (!%(z)s) if (!%(z)s)
%(fail)s; %(fail)s;
...@@ -73,8 +76,11 @@ class CumsumOp(theano.Op): ...@@ -73,8 +76,11 @@ class CumsumOp(theano.Op):
""" % locals() """ % locals()
else: else:
code = """ code = """
if(!(%(z)s && PyArray_CompareLists(PyArray_DIMS(%(z)s), PyArray_DIMS(%(x)s), PyArray_NDIM(%(x)s)) ))
{
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_SimpleNew(PyArray_NDIM(%(x)s), PyArray_DIMS(%(x)s), type_num_%(x)s); %(z)s = (PyArrayObject*) PyArray_SimpleNew(PyArray_NDIM(%(x)s), PyArray_DIMS(%(x)s), type_num_%(x)s);
}
if (!%(z)s) if (!%(z)s)
%(fail)s; %(fail)s;
...@@ -86,10 +92,10 @@ class CumsumOp(theano.Op): ...@@ -86,10 +92,10 @@ class CumsumOp(theano.Op):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
def __str__(self): def __str__(self):
return self.__class__.__name__ return "%s{%s}" % (self.__class__.__name__, self.axis)
def cumsum(x, axis=None): def cumsum(x, axis=None):
......
...@@ -22,15 +22,26 @@ class TestCumsumOp(utt.InferShapeTester): ...@@ -22,15 +22,26 @@ class TestCumsumOp(utt.InferShapeTester):
def test_cumsumOp(self): def test_cumsumOp(self):
x = T.tensor3('x') x = T.tensor3('x')
a = np.random.random((30, 50, 20)).astype(config.floatX) a = np.random.random((3, 5, 2)).astype(config.floatX)
b = np.random.random((30, 5, 2)).astype(config.floatX)
f = theano.function([x], cumsum(x), mode="DebugMode") f = theano.function([x], cumsum(x), mode="DebugMode")
assert np.allclose(np.cumsum(a), f(a)) # Test axis=None assert np.allclose(np.cumsum(a), f(a)) # Test axis=None
# Test without garbage collector
f = theano.function([x], cumsum(x).sum(), mode=theano.compile.Mode(linker="cvm_nogc", optimizer="fast_run") )
assert np.allclose(np.cumsum(a).sum(), f(a)) # Test axis=None
assert np.allclose(np.cumsum(b).sum(), f(b)) # Would fail without re-allocation
for axis in range(len(a.shape)): for axis in range(len(a.shape)):
f = theano.function([x], cumsum(x, axis=axis), mode="DebugMode") f = theano.function([x], cumsum(x, axis=axis), mode="DebugMode")
assert np.allclose(np.cumsum(a, axis=axis), f(a)) assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Test without garbage collector
f = theano.function([x], cumsum(x, axis=axis).sum(), mode=theano.compile.Mode(linker="cvm_nogc", optimizer="fast_run"))
assert np.allclose(np.cumsum(a, axis=axis).sum(), f(a))
assert np.allclose(np.cumsum(b, axis=axis).sum(), f(b)) # Would fail without re-allocation
def test_infer_shape(self): def test_infer_shape(self):
x = T.tensor3('x') x = T.tensor3('x')
a = np.random.random((30, 50, 20)).astype(config.floatX) a = np.random.random((30, 50, 20)).astype(config.floatX)
...@@ -53,7 +64,7 @@ class TestCumsumOp(utt.InferShapeTester): ...@@ -53,7 +64,7 @@ class TestCumsumOp(utt.InferShapeTester):
utt.verify_grad(self.op, [a]) # Test axis=None utt.verify_grad(self.op, [a]) # Test axis=None
for axis in range(len(a.shape)): for axis in range(len(a.shape)):
utt.verify_grad(CumsumOp(axis=axis), [a]) utt.verify_grad(self.op_class(axis=axis), [a])
class TestBinCountOp(utt.InferShapeTester): class TestBinCountOp(utt.InferShapeTester):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论