提交 c1dfa69c authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #4932 from nouiz/no_complex

Don't move complex to the GPU in the new back-end.
...@@ -626,6 +626,8 @@ class GpuFromHost(Op): ...@@ -626,6 +626,8 @@ class GpuFromHost(Op):
def make_node(self, x): def make_node(self, x):
if not isinstance(x.type, tensor.TensorType): if not isinstance(x.type, tensor.TensorType):
raise TypeError(x) raise TypeError(x)
if "complex" in x.dtype:
raise TypeError("complex not supported in the new gpuarray back-end.", x)
return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable, return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
context_name=self.context_name, context_name=self.context_name,
dtype=x.dtype)()]) dtype=x.dtype)()])
......
...@@ -1270,7 +1270,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1270,7 +1270,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
Subsampling window size. Subsampling window size.
stride : tuple stride : tuple
Subsampling stride (default: (1, 1)). Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad'} mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple pad : tuple
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
default: (0, 0) default: (0, 0)
...@@ -1285,6 +1285,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1285,6 +1285,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
context_name = ret.type.context_name
window_elem = theano.tensor.prod(ws).astype(ret.dtype)
return as_gpuarray_variable(ret * window_elem, context_name)
return GpuDnnPool(mode=mode)(img, ws, stride, pad) return GpuDnnPool(mode=mode)(img, ws, stride, pad)
......
...@@ -191,8 +191,9 @@ def op_lifter(OP, cuda_only=False): ...@@ -191,8 +191,9 @@ def op_lifter(OP, cuda_only=False):
# Check if we should replace # Check if we should replace
if (not replace or if (not replace or
(cuda_only and (cuda_only and
get_context(context_name).kind != b'cuda')): get_context(context_name).kind != b'cuda') or
any(["complex" in i.dtype for i in node.inputs])):
return False return False
# tag the inputs with the context in case # tag the inputs with the context in case
...@@ -298,7 +299,8 @@ class GraphToGPU(Optimizer): ...@@ -298,7 +299,8 @@ class GraphToGPU(Optimizer):
for i in fgraph.inputs: for i in fgraph.inputs:
# Do not move *int* scalar to the GPU. # Do not move *int* scalar to the GPU.
if (isinstance(i.type, tensor.TensorType) and if (isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)): (i.ndim > 0 or 'int' not in i.dtype) and
"complex" not in i.dtype):
mapping[i] = i.transfer(getattr(i.tag, 'target', target)) mapping[i] = i.transfer(getattr(i.tag, 'target', target))
else: else:
mapping[i] = i mapping[i] = i
...@@ -344,6 +346,10 @@ class GraphToGPU(Optimizer): ...@@ -344,6 +346,10 @@ class GraphToGPU(Optimizer):
self.local_optimizers_map.get(type(c.op), []))): self.local_optimizers_map.get(type(c.op), []))):
move_to_GPU = True move_to_GPU = True
new_ops = None new_ops = None
if move_to_GPU and any(["complex" in getattr(i, 'dtype', "")
for i in node.inputs]):
move_to_GPU = False
# Apply the lifter # Apply the lifter
if move_to_GPU: if move_to_GPU:
for lopt in (self.local_optimizers_map.get(node.op, []) + for lopt in (self.local_optimizers_map.get(node.op, []) +
......
...@@ -356,6 +356,18 @@ def test_pooling_opt(): ...@@ -356,6 +356,18 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32')) f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling
f = theano.function(
[x],
pool_2d(x, ds=(2, 3), mode='sum',
ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
data = numpy.random.rand(10, 10).astype('float32')
f(data)
def test_dnn_tag(): def test_dnn_tag():
""" """
......
...@@ -472,3 +472,12 @@ def test_local_assert_no_cpu_op(): ...@@ -472,3 +472,12 @@ def test_local_assert_no_cpu_op():
theano.function([], out, mode=mode_local_assert) theano.function([], out, mode=mode_local_assert)
finally: finally:
theano.config.assert_no_cpu_op = old theano.config.assert_no_cpu_op = old
def test_no_complex():
width_var = tensor.cscalar()
freq_var = tensor.fscalar()
signal_var = tensor.fscalar()
stft_out = tensor.exp(width_var * freq_var) * signal_var
theano.function([width_var, freq_var, signal_var], stft_out,
mode=mode_with_gpu)
...@@ -1991,7 +1991,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1991,7 +1991,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
Subsampling window size. Subsampling window size.
stride stride
Subsampling stride (default: (1, 1)). Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad} mode : {'max', 'average_inc_pad', 'average_exc_pad, 'sum'}
pad : pad :
(pad_h, pad_w) padding information. (pad_h, pad_w) padding information.
pad_h is the number of zero-valued pixels added to each of the top and pad_h is the number of zero-valued pixels added to each of the top and
...@@ -2009,6 +2009,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -2009,6 +2009,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
window_elem = theano.tensor.prod(ws).astype(ret.dtype)
return as_cuda_ndarray_variable(ret * window_elem)
return GpuDnnPool(mode=mode)(img, ws, stride, pad) return GpuDnnPool(mode=mode)(img, ws, stride, pad)
......
...@@ -545,6 +545,17 @@ def test_pooling_opt(): ...@@ -545,6 +545,17 @@ def test_pooling_opt():
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10), dtype='float32')) f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling
f = theano.function(
[x],
pool_2d(x, ds=(2, 3), mode='sum',
ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
data = numpy.random.rand(10, 10).astype('float32')
f(data)
class test_DnnSoftMax(test_nnet.test_SoftMax): class test_DnnSoftMax(test_nnet.test_SoftMax):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论