提交 24e73f67 authored 作者: abergeron's avatar abergeron

Merge pull request #3712 from nouiz/crash_gpu_from_host

Fix compilation crash
...@@ -504,6 +504,24 @@ def test_pdbbreakpoint_op(): ...@@ -504,6 +504,24 @@ def test_pdbbreakpoint_op():
assert topo[-1].op == cuda.host_from_gpu assert topo[-1].op == cuda.host_from_gpu
def test_local_gpu_elemwise_careduce():
x = theano.tensor.fmatrix()
o = (x * x).sum()
f = theano.function([x], o, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr
data = numpy.random.rand(3, 4).astype('float32')
utt.assert_allclose(f(data), (data * data).sum())
o = (x * x).sum(axis=1)
f = theano.function([x], o, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr
utt.assert_allclose(f(data), (data * data).sum(axis=1))
def test_huge_elemwise_fusion(): def test_huge_elemwise_fusion():
""" Test the the GpuElemwise fusion work correctly """ Test the the GpuElemwise fusion work correctly
We check that we fuse one node with part of its input We check that we fuse one node with part of its input
......
...@@ -403,6 +403,7 @@ class GpuFromHost(Op): ...@@ -403,6 +403,7 @@ class GpuFromHost(Op):
return """ return """
PyArrayObject *%(name)s_tmp; PyArrayObject *%(name)s_tmp;
%(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s); %(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s);
int err;
if (%(name)s_tmp == NULL) if (%(name)s_tmp == NULL)
%(fail)s %(fail)s
...@@ -411,8 +412,8 @@ class GpuFromHost(Op): ...@@ -411,8 +412,8 @@ class GpuFromHost(Op):
(size_t *)PyArray_DIMS(%(name)s_tmp), (size_t *)PyArray_DIMS(%(name)s_tmp),
get_typecode((PyObject *)PyArray_DESCR(%(name)s_tmp)))) { get_typecode((PyObject *)PyArray_DESCR(%(name)s_tmp)))) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
int err = GpuArray_write(&%(out)s->ga, PyArray_DATA(%(name)s_tmp), err = GpuArray_write(&%(out)s->ga, PyArray_DATA(%(name)s_tmp),
PyArray_NBYTES(%(name)s_tmp)); PyArray_NBYTES(%(name)s_tmp));
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
Py_DECREF(%(name)s_tmp); Py_DECREF(%(name)s_tmp);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
......
...@@ -867,12 +867,13 @@ def local_gpu_elemwise_careduce(node): ...@@ -867,12 +867,13 @@ def local_gpu_elemwise_careduce(node):
isinstance(node.inputs[0].owner.op, GpuElemwise) and isinstance(node.inputs[0].owner.op, GpuElemwise) and
# The Op support all scalar with 1 inputs. We don't # The Op support all scalar with 1 inputs. We don't
# automatically add more case, as some like trigonometic # automatically add more case, as some like trigonometic
# operation with some reduction pattern will probably result # operation with some reduction pattern will probably results
# to slow down. # in slow down.
isinstance(node.inputs[0].owner.op.scalar_op, scalar.basic.Sqr)): isinstance(node.inputs[0].owner.op.scalar_op, scalar.basic.Sqr)):
op = node.op op = node.op
inp = node.inputs[0].owner.inputs[0] inp = node.inputs[0].owner.inputs[0]
return [GpuCAReduceCuda(scalar_op=op.scalar_op, return [GpuCAReduceCuda(scalar_op=op.scalar_op,
axis=op.axis,
reduce_mask=op.reduce_mask, reduce_mask=op.reduce_mask,
pre_scalar_op=scalar.basic.sqr)(inp)] pre_scalar_op=scalar.basic.sqr)(inp)]
......
...@@ -212,7 +212,15 @@ def test_local_gpu_elemwise_careduce(): ...@@ -212,7 +212,15 @@ def test_local_gpu_elemwise_careduce():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 3 assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr assert topo[1].op.pre_scalar_op == theano.scalar.sqr
f(numpy.random.rand(3, 4).astype(theano.config.floatX)) data = numpy.random.rand(3, 4).astype(theano.config.floatX)
utt.assert_allclose(f(data), (data * data).sum())
o = (x * x).sum(axis=1)
f = theano.function([x], o, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr
utt.assert_allclose(f(data), (data * data).sum(axis=1))
def test_local_gpu_subtensor(): def test_local_gpu_subtensor():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论