提交 91935a04 authored 作者: James Bergstra's avatar James Bergstra

Merge pull request #45 from nouiz/mixed_commit

Mixed commit
...@@ -593,6 +593,12 @@ __global__ void k_copy_reshape_rowmajor(unsigned int numEls, ...@@ -593,6 +593,12 @@ __global__ void k_copy_reshape_rowmajor(unsigned int numEls,
z_i[0] = a_i[0]; //copy one lousy float! z_i[0] = a_i[0]; //copy one lousy float!
} }
} }
// Reshape self to the new shape gived by the tuple shape.
//
// If self is c contiguous, it return a view. Otherwise it always do a copy.
// TODO: make it return a view when the strides allow it event if it is not
// c contiguous
PyObject * CudaNdarray_Reshape(CudaNdarray * self, PyObject * shape) PyObject * CudaNdarray_Reshape(CudaNdarray * self, PyObject * shape)
{ {
// check shape tuple // check shape tuple
...@@ -717,6 +723,7 @@ PyObject * CudaNdarray_View(CudaNdarray * self) ...@@ -717,6 +723,7 @@ PyObject * CudaNdarray_View(CudaNdarray * self)
} }
return (PyObject*)rval; return (PyObject*)rval;
} }
PyObject * CudaNdarray_SetStride(CudaNdarray * self, PyObject *args) PyObject * CudaNdarray_SetStride(CudaNdarray * self, PyObject *args)
{ {
int pos, stride; int pos, stride;
...@@ -803,6 +810,9 @@ static PyMethodDef CudaNdarray_methods[] = ...@@ -803,6 +810,9 @@ static PyMethodDef CudaNdarray_methods[] =
{"copy", {"copy",
(PyCFunction)CudaNdarray_Copy, METH_NOARGS, (PyCFunction)CudaNdarray_Copy, METH_NOARGS,
"Create a copy of this object"}, "Create a copy of this object"},
{"is_c_contiguous",
(PyCFunction)CudaNdarray_IS_C_Contiguous, METH_NOARGS,
"Return True is the object is c contiguous. False otherwise."},
{"reduce_sum", {"reduce_sum",
(PyCFunction)CudaNdarray_ReduceSum, METH_O, (PyCFunction)CudaNdarray_ReduceSum, METH_O,
"Reduce over the given dimensions by summation"}, "Reduce over the given dimensions by summation"},
......
...@@ -504,6 +504,10 @@ CudaNdarray_ZEROS(int n, int * dims); ...@@ -504,6 +504,10 @@ CudaNdarray_ZEROS(int n, int * dims);
* True iff the strides look like [dim[nd-2], dim[nd-3], ... , dim[0], 1] * True iff the strides look like [dim[nd-2], dim[nd-3], ... , dim[0], 1]
*/ */
bool CudaNdarray_is_c_contiguous(const CudaNdarray * self); bool CudaNdarray_is_c_contiguous(const CudaNdarray * self);
PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self)
{
return PyBool_FromLong(CudaNdarray_is_c_contiguous(self));
}
int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C); int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C);
int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray* A); int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray* A);
......
...@@ -909,6 +909,11 @@ def test_base(): ...@@ -909,6 +909,11 @@ def test_base():
e = b.reshape((5,2,2,3)) e = b.reshape((5,2,2,3))
assert e.base is a assert e.base is a
def test_is_c_contiguous():
a = cuda_ndarray.CudaNdarray.zeros((3,4,5))
assert a.is_c_contiguous()
assert a[1].is_c_contiguous()
assert not a[::2].is_c_contiguous()
if __name__ == '__main__': if __name__ == '__main__':
test_zeros_basic_3d_tensor() test_zeros_basic_3d_tensor()
......
...@@ -4880,7 +4880,11 @@ class AdvancedSubtensor1(Op): ...@@ -4880,7 +4880,11 @@ class AdvancedSubtensor1(Op):
x, i = inp x, i = inp
out, = out_ out, = out_
# Copy always implied by numpy advanced indexing semantic. # Copy always implied by numpy advanced indexing semantic.
out[0] = x[i] if out[0] is not None and out[0].shape==(len(i),)+x.shape[1:]:
o = out[0]
else:
o = None
out[0] = x.take(i, axis=0, out=o)
def grad(self, inputs, grads): def grad(self, inputs, grads):
gz, = grads gz, = grads
......
...@@ -1809,6 +1809,8 @@ class T_subtensor(unittest.TestCase): ...@@ -1809,6 +1809,8 @@ class T_subtensor(unittest.TestCase):
self.inc_sub = inc_sub self.inc_sub = inc_sub
self.adv_sub1 = adv_sub1 self.adv_sub1 = adv_sub1
self.adv_incsub1 = adv_incsub1 self.adv_incsub1 = adv_incsub1
if mode is None:
mode = theano.compile.mode.get_default_mode()
self.mode = mode self.mode = mode
self.dtype = dtype self.dtype = dtype
self.ignore_topo = ignore_topo self.ignore_topo = ignore_topo
...@@ -1885,7 +1887,9 @@ class T_subtensor(unittest.TestCase): ...@@ -1885,7 +1887,9 @@ class T_subtensor(unittest.TestCase):
def test2_ok_range_finite(self): def test2_ok_range_finite(self):
n = self.shared(numpy.ones((3,4), dtype=self.dtype)*5) n = self.shared(numpy.ones((3,4), dtype=self.dtype)*5)
t = n[0:2,3] # Also check negative index
for idx in [(slice(0,2),3),((slice(0,2),-1)),(slice(0,2),-4)]:
t = n[idx]#l]#0:2,3]
self.assertTrue(isinstance(t.owner.op, Subtensor)) self.assertTrue(isinstance(t.owner.op, Subtensor))
f = inplace_func([], t, mode=self.mode) f = inplace_func([], t, mode=self.mode)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
...@@ -1894,7 +1898,7 @@ class T_subtensor(unittest.TestCase): ...@@ -1894,7 +1898,7 @@ class T_subtensor(unittest.TestCase):
assert isinstance(topo_[0].op, self.sub) assert isinstance(topo_[0].op, self.sub)
tval = f() tval = f()
self.assertTrue(tval.shape == (2,)) self.assertTrue(tval.shape == (2,))
self.assertTrue(tval[1] == 5.0) self.assertTrue(numpy.allclose(tval, n.get_value()[idx]))
def test1_err_invalid(self): def test1_err_invalid(self):
n = self.shared(numpy.ones(1, dtype=self.dtype)) n = self.shared(numpy.ones(1, dtype=self.dtype))
...@@ -1946,7 +1950,8 @@ class T_subtensor(unittest.TestCase): ...@@ -1946,7 +1950,8 @@ class T_subtensor(unittest.TestCase):
def test2_err_bounds0(self): def test2_err_bounds0(self):
n = self.shared(numpy.ones((2,3), dtype=self.dtype)*5) n = self.shared(numpy.ones((2,3), dtype=self.dtype)*5)
t = n[0,4] for idx in [(0,4),(0,-4)]:
t = n[idx]
self.assertTrue(isinstance(t.owner.op, Subtensor)) self.assertTrue(isinstance(t.owner.op, Subtensor))
# Silence expected warnings # Silence expected warnings
_logger = logging.getLogger('theano.gof.opt') _logger = logging.getLogger('theano.gof.opt')
...@@ -1960,6 +1965,7 @@ class T_subtensor(unittest.TestCase): ...@@ -1960,6 +1965,7 @@ class T_subtensor(unittest.TestCase):
pass pass
finally: finally:
_logger.setLevel(oldlevel) _logger.setLevel(oldlevel)
def test2_err_bounds1(self): def test2_err_bounds1(self):
n = self.shared((numpy.ones((2,3), dtype=self.dtype)*5)) n = self.shared((numpy.ones((2,3), dtype=self.dtype)*5))
t = n[4:5,2] t = n[4:5,2]
...@@ -2075,6 +2081,10 @@ class T_subtensor(unittest.TestCase): ...@@ -2075,6 +2081,10 @@ class T_subtensor(unittest.TestCase):
(numpy.random.rand(4,5), [2,3]), (numpy.random.rand(4,5), [2,3]),
(numpy.random.rand(4,2,3), [0,3]), (numpy.random.rand(4,2,3), [0,3]),
(numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0]), (numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0]),
(numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0,-1,-2,-3,-4]),
# Test 4 dims as gpu code use another algo in that case
# This new algo is not as much optimized for that case.
(numpy.random.rand(4,4,2,3), [3,3,1,1,2,2,0,0,-1,-2,-3,-4]),
# Test with TensorConstant index. # Test with TensorConstant index.
(numpy.random.rand(4,2,3), constant([3,3,1,1,2,2,0,0])), (numpy.random.rand(4,2,3), constant([3,3,1,1,2,2,0,0])),
]: ]:
...@@ -2093,6 +2103,19 @@ class T_subtensor(unittest.TestCase): ...@@ -2093,6 +2103,19 @@ class T_subtensor(unittest.TestCase):
self.assertTrue(val.ndim == data.ndim) self.assertTrue(val.ndim == data.ndim)
self.assertTrue(numpy.allclose(val, good), (val, good)) self.assertTrue(numpy.allclose(val, good), (val, good))
# Test reuse of output memory
if isinstance(self.adv_sub1,tensor.AdvancedSubtensor1):
op = self.adv_sub1()
# When idx is a TensorConstant.
if hasattr(idx, "data"):
idx = idx.data
test_out = [[None]]
op.perform(None, [data, idx],test_out)
out1 = test_out[0][0]
op.perform(None, [data, idx],test_out)
out2 = test_out[0][0]
assert out1 is out2
def test_err_invalid_list(self): def test_err_invalid_list(self):
n = self.shared(numpy.asarray(5, dtype=self.dtype)) n = self.shared(numpy.asarray(5, dtype=self.dtype))
self.assertRaises(TypeError, n.__getitem__, [0,0]) self.assertRaises(TypeError, n.__getitem__, [0,0])
...@@ -2104,16 +2127,18 @@ class T_subtensor(unittest.TestCase): ...@@ -2104,16 +2127,18 @@ class T_subtensor(unittest.TestCase):
def test_err_bound_list(self): def test_err_bound_list(self):
n = self.shared(numpy.ones((2,3),dtype=self.dtype)*5) n = self.shared(numpy.ones((2,3),dtype=self.dtype)*5)
t = n[[0,4]] l = lvector()
t = n[l]
# We test again AdvancedSubtensor1 as we transfer data to the cpu. # We test again AdvancedSubtensor1 as we transfer data to the cpu.
self.assertTrue(isinstance(t.owner.op, theano.tensor.basic.AdvancedSubtensor1)) self.assertTrue(isinstance(t.owner.op, theano.tensor.basic.AdvancedSubtensor1))
f = function([], t, mode=self.mode) f = function([l], t, mode=self.mode)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
topo_ = [node for node in topo if not isinstance(node.op, self.ignore_topo)] topo_ = [node for node in topo if not isinstance(node.op, self.ignore_topo)]
assert len(topo_)==1 assert len(topo_)==1
self.assertTrue(isinstance(topo_[0].op, self.adv_sub1)) self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
self.assertRaises(IndexError, f) for shp in [[0,4],[0,-3], [-10]]:
self.assertRaises(IndexError, f, shp)
def test_adv_sub1_broadcast(self): def test_adv_sub1_broadcast(self):
ones = numpy.ones((1,3), dtype=self.dtype) ones = numpy.ones((1,3), dtype=self.dtype)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论