提交 c1dfa69c authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #4932 from nouiz/no_complex

Don't move complex to the GPU in the new back-end.
......@@ -626,6 +626,8 @@ class GpuFromHost(Op):
def make_node(self, x):
if not isinstance(x.type, tensor.TensorType):
raise TypeError(x)
if "complex" in x.dtype:
raise TypeError("complex not supported in the new gpuarray back-end.", x)
return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
context_name=self.context_name,
dtype=x.dtype)()])
......
......@@ -1270,7 +1270,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
Subsampling window size.
stride : tuple
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple
(padX, padY) or (padX, padY, padZ)
default: (0, 0)
......@@ -1285,6 +1285,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img = gpu_contiguous(img)
if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
context_name = ret.type.context_name
window_elem = theano.tensor.prod(ws).astype(ret.dtype)
return as_gpuarray_variable(ret * window_elem, context_name)
return GpuDnnPool(mode=mode)(img, ws, stride, pad)
......
......@@ -191,8 +191,9 @@ def op_lifter(OP, cuda_only=False):
# Check if we should replace
if (not replace or
(cuda_only and
get_context(context_name).kind != b'cuda')):
(cuda_only and
get_context(context_name).kind != b'cuda') or
any(["complex" in i.dtype for i in node.inputs])):
return False
# tag the inputs with the context in case
......@@ -298,7 +299,8 @@ class GraphToGPU(Optimizer):
for i in fgraph.inputs:
# Do not move *int* scalar to the GPU.
if (isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)):
(i.ndim > 0 or 'int' not in i.dtype) and
"complex" not in i.dtype):
mapping[i] = i.transfer(getattr(i.tag, 'target', target))
else:
mapping[i] = i
......@@ -344,6 +346,10 @@ class GraphToGPU(Optimizer):
self.local_optimizers_map.get(type(c.op), []))):
move_to_GPU = True
new_ops = None
if move_to_GPU and any(["complex" in getattr(i, 'dtype', "")
for i in node.inputs]):
move_to_GPU = False
# Apply the lifter
if move_to_GPU:
for lopt in (self.local_optimizers_map.get(node.op, []) +
......
......@@ -356,6 +356,18 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling
f = theano.function(
[x],
pool_2d(x, ds=(2, 3), mode='sum',
ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
data = numpy.random.rand(10, 10).astype('float32')
f(data)
def test_dnn_tag():
"""
......
......@@ -472,3 +472,12 @@ def test_local_assert_no_cpu_op():
theano.function([], out, mode=mode_local_assert)
finally:
theano.config.assert_no_cpu_op = old
def test_no_complex():
width_var = tensor.cscalar()
freq_var = tensor.fscalar()
signal_var = tensor.fscalar()
stft_out = tensor.exp(width_var * freq_var) * signal_var
theano.function([width_var, freq_var, signal_var], stft_out,
mode=mode_with_gpu)
......@@ -1991,7 +1991,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
Subsampling window size.
stride
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad}
mode : {'max', 'average_inc_pad', 'average_exc_pad, 'sum'}
pad :
(pad_h, pad_w) padding information.
pad_h is the number of zero-valued pixels added to each of the top and
......@@ -2009,6 +2009,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img = gpu_contiguous(img)
if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
window_elem = theano.tensor.prod(ws).astype(ret.dtype)
return as_cuda_ndarray_variable(ret * window_elem)
return GpuDnnPool(mode=mode)(img, ws, stride, pad)
......
......@@ -545,6 +545,17 @@ def test_pooling_opt():
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling
f = theano.function(
[x],
pool_2d(x, ds=(2, 3), mode='sum',
ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
data = numpy.random.rand(10, 10).astype('float32')
f(data)
class test_DnnSoftMax(test_nnet.test_SoftMax):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论