提交 0ac9ec62 authored 作者: Frederic's avatar Frederic

[ENH] Make opt use the opencl version of the op for opencl device

上级 58e9b7fb
......@@ -31,7 +31,8 @@ from theano.sandbox.gpuarray.nnet import (
GpuSoftmaxWithBias, GpuSoftmax
)
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduceCuda)
GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY)
from theano.sandbox.gpuarray.subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
......@@ -366,15 +367,25 @@ def local_gpua_advanced_incsubtensor(node):
def local_gpua_careduce(node):
if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
scalar.Maximum, scalar.Minimum)):
dev = theano.sandbox.gpuarray.init_dev.device
if dev.startswith('opencl'):
op = GpuCAReduceCPY
if node.op.scalar_op not in [scalar.add, scalar.mul]:
# We don't support yet all reduction with cpy code.
return
else:
op = GpuCAReduceCuda
x, = node.inputs
greduce = GpuCAReduceCuda(
greduce = op(
node.op.scalar_op, axis=node.op.axis,
dtype=getattr(node.op, 'dtype', None),
acc_dtype=getattr(node.op, 'acc_dtype', None))
gvar = greduce(x)
# We need to have the make node called, otherwise the mask can
# be None
if gvar.owner.op.supports_c_code([gpu_from_host(x)]):
if (op is GpuCAReduceCPY or
gvar.owner.op.supports_c_code([gpu_from_host(x)])):
return greduce
else:
# Try to make a simpler pattern based on reshaping
......@@ -407,7 +418,7 @@ def local_gpua_careduce(node):
for idx, m in enumerate(new_mask):
if m == 1:
new_axis.append(idx)
greduce = GpuCAReduceCuda(
greduce = op(
node.op.scalar_op,
axis=new_axis, reduce_mask=new_mask,
dtype=getattr(node.op, 'dtype', None),
......
......@@ -7,7 +7,8 @@ import theano.sandbox.gpuarray
from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import (
GpuAlloc, GpuReshape, gpu_alloc, gpu_from_host, host_from_gpu)
from theano.sandbox.gpuarray.elemwise import GpuCAReduceCuda, GpuElemwise
from theano.sandbox.gpuarray.elemwise import (
GpuCAReduceCuda, GpuCAReduceCPY, GpuElemwise)
from theano.sandbox.gpuarray.tests.test_basic_ops import (
rand_gpuarray, mode_with_gpu, mode_without_gpu
)
......@@ -50,17 +51,26 @@ def test_flatten():
def test_reduce():
for method in ['sum', 'prod', 'max', 'min']:
dev = theano.sandbox.gpuarray.init_dev.device
for method, param in [('sum', dict(acc_dtype='float32')),
('prod', dict(acc_dtype='float32')),
('max', {}), ('min', {})]:
m = theano.tensor.fmatrix()
f = theano.function([m], getattr(m, method)(axis=0),
f = theano.function([m], getattr(m, method)(axis=0,
**param),
mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32")
res = f(val)
utt.assert_allclose(res, getattr(val, method)(axis=0))
assert res.shape == (11,)
topo = f.maker.fgraph.toposort()
assert GpuCAReduceCuda in [type(node.op)
for node in topo], topo
ops = [type(node.op) for node in topo]
if dev.startswith('opencl') and method in ["max", "min"]:
assert not(GpuCAReduceCuda in ops or GpuCAReduceCPY in ops)
else:
assert GpuCAReduceCuda in ops or GpuCAReduceCPY in ops
def test_local_gpualloc_memset_0():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论