提交 96a96b6f authored 作者: Cesar Laurent's avatar Cesar Laurent

Adapted for old GPU backend.

上级 90ae0f01
...@@ -2962,14 +2962,11 @@ if True: ...@@ -2962,14 +2962,11 @@ if True:
if isinstance(node.op, Pool): if isinstance(node.op, Pool):
if not node.op.ignore_border: if not node.op.ignore_border:
return return
img, = node.inputs img, ws, stride, pad = node.inputs
ds = node.op.ds
stride = node.op.st
pad = node.op.padding
mode = node.op.mode mode = node.op.mode
if (img.owner and isinstance(img.owner.op, HostFromGpu)): if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]), ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ds, stride=stride, pad=pad, mode=mode) ws, stride=stride, pad=pad, mode=mode)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
...@@ -2996,10 +2993,7 @@ if True: ...@@ -2996,10 +2993,7 @@ if True:
if isinstance(node.op, MaxPoolGrad): if isinstance(node.op, MaxPoolGrad):
if not node.op.ignore_border: if not node.op.ignore_border:
return return
inp, out, inp_grad = node.inputs inp, out, inp_grad, ws, stride, pad = node.inputs
ds = node.op.ds
st = node.op.st
pad = node.op.padding
mode = node.op.mode mode = node.op.mode
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
...@@ -3010,7 +3004,7 @@ if True: ...@@ -3010,7 +3004,7 @@ if True:
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out), gpu_contiguous(out),
gpu_contiguous(inp_grad), gpu_contiguous(inp_grad),
ds, st, pad) ws, stride, pad)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
...@@ -3021,10 +3015,7 @@ if True: ...@@ -3021,10 +3015,7 @@ if True:
if isinstance(node.op, AveragePoolGrad): if isinstance(node.op, AveragePoolGrad):
if not node.op.ignore_border: if not node.op.ignore_border:
return return
inp, inp_grad = node.inputs inp, inp_grad, ws, stride, pad = node.inputs
ds = node.op.ds
st = node.op.st
pad = node.op.padding
mode = node.op.mode mode = node.op.mode
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
...@@ -3034,7 +3025,7 @@ if True: ...@@ -3034,7 +3025,7 @@ if True:
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
contiguous_inp_grad, contiguous_inp_grad,
contiguous_inp_grad, contiguous_inp_grad,
ds, st, pad) ws, stride, pad)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
......
...@@ -1891,37 +1891,61 @@ def local_convtransp3d_gemm(node): ...@@ -1891,37 +1891,61 @@ def local_convtransp3d_gemm(node):
gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm) gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm)
def _check_constant_args_pool(ws, stride, pad, node):
"""Check if the args of pool are constants. Warns if not."""
try:
ws_w = tensor.get_scalar_constant_value(ws[0])
ws_h = tensor.get_scalar_constant_value(ws[1])
stride_w = tensor.get_scalar_constant_value(stride[0])
stride_h = tensor.get_scalar_constant_value(stride[1])
pad_w = tensor.get_scalar_constant_value(pad[0])
pad_h = tensor.get_scalar_constant_value(pad[1])
except tensor.NotScalarConstantError:
msg = ("Pool with tensor variable for the window size, stride or "
"padding is only supported in the new GPU backend, so this op "
"will run on CPU. (op %s)" % node)
if config.assert_no_cpu_op == "warn":
_logger.warning(msg)
elif config.assert_no_cpu_op == "raise":
raise AssertionError(msg)
return None
ws = (ws_w, ws_h)
stride = (stride_w, stride_h)
pad = (pad_w, pad_h)
return ws, stride, pad
@register_opt() @register_opt()
@local_optimizer([pool.Pool]) @local_optimizer([pool.Pool])
def local_gpu_downsample_factor_max(node): def local_gpu_downsample_factor_max(node):
if (isinstance(node.op, pool.Pool) and if isinstance(node.op, pool.Pool):
node.op.ds == node.op.st): assert node.op.__props__ == ('ignore_border', 'mode')
x, ws, stride, pad = node.inputs
assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding', ret = _check_constant_args_pool(ws, stride, pad, node)
'mode') if ret is None:
if node.op.padding != (0, 0) or node.op.mode != 'max': return
ws, stride, pad = ret
if (pad) != (0, 0) or node.op.mode != 'max' or stride != ws:
return return
x, = node.inputs
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds = GpuDownsampleFactorMax(node.op.ds, node.op.ignore_border) gpu_ds = GpuDownsampleFactorMax(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds(x.owner.inputs[0]))] return [host_from_gpu(gpu_ds(x.owner.inputs[0]))]
@register_opt() @register_opt()
@local_optimizer([pool.MaxPoolGrad]) @local_optimizer([pool.MaxPoolGrad])
def local_gpu_downsample_factor_max_grad(node): def local_gpu_downsample_factor_max_grad(node):
if (isinstance(node.op, pool.MaxPoolGrad) and node.op.ds == node.op.st): if isinstance(node.op, pool.MaxPoolGrad):
assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding', assert node.op.__props__ == ('ignore_border', 'mode')
'mode') x, z, gz, ws, stride, pad = node.inputs
if (node.op.padding != (0, 0) or ret = _check_constant_args_pool(ws, stride, pad, node)
node.op.mode != 'max' or if ret is None:
node.op.st != node.op.ds): return
ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws:
return return
x, z, gz = node.inputs
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds_grad = GpuDownsampleFactorMaxGrad(node.op.ds, gpu_ds_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border)
node.op.ignore_border)
return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0], return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0],
as_cuda_ndarray_variable(z), as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gz)))] as_cuda_ndarray_variable(gz)))]
...@@ -1931,16 +1955,16 @@ def local_gpu_downsample_factor_max_grad(node): ...@@ -1931,16 +1955,16 @@ def local_gpu_downsample_factor_max_grad(node):
@local_optimizer([pool.DownsampleFactorMaxGradGrad]) @local_optimizer([pool.DownsampleFactorMaxGradGrad])
def local_gpu_downsample_factor_max_grad_grad(node): def local_gpu_downsample_factor_max_grad_grad(node):
if isinstance(node.op, pool.DownsampleFactorMaxGradGrad): if isinstance(node.op, pool.DownsampleFactorMaxGradGrad):
assert node.op.__props__ == ('ds', 'ignore_border', 'st', assert node.op.__props__ == ('ignore_border', 'mode')
'padding', 'mode') x, z, gx, ws, stride, pad = node.inputs
if (node.op.padding != (0, 0) or ret = _check_constant_args_pool(ws, stride, pad, node)
node.op.mode != 'max' or if ret is None:
node.op.st != node.op.ds): return
ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws:
return return
x, z, gx = node.inputs
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
op = GpuDownsampleFactorMaxGradGrad(node.op.ds, op = GpuDownsampleFactorMaxGradGrad(ws, node.op.ignore_border)
node.op.ignore_border)
return [host_from_gpu(op(x.owner.inputs[0], return [host_from_gpu(op(x.owner.inputs[0],
as_cuda_ndarray_variable(z), as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gx)))] as_cuda_ndarray_variable(gx)))]
......
...@@ -369,12 +369,12 @@ def test_downsample(): ...@@ -369,12 +369,12 @@ def test_downsample():
continue continue
for ignore_border in (True, False): for ignore_border in (True, False):
# print 'test_downsample', shp, ds, ignore_border # print 'test_downsample', shp, ds, ignore_border
ds_op = Pool(ds, ignore_border=ignore_border) ds_op = Pool(ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), 'a') a = tcn.shared_constructor(my_rand(*shp), 'a')
f = pfunc([], ds_op(tensor.as_tensor_variable(a)), f = pfunc([], ds_op(tensor.as_tensor_variable(a), ds),
mode=mode_with_gpu.excluding('cudnn')) mode=mode_with_gpu.excluding('cudnn'))
f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), f2 = pfunc([], ds_op(tensor.as_tensor_variable(a), ds),
mode=mode_without_gpu) mode=mode_without_gpu)
assert any([isinstance(node.op, assert any([isinstance(node.op,
tcn.blas.GpuDownsampleFactorMax) tcn.blas.GpuDownsampleFactorMax)
...@@ -393,12 +393,12 @@ def test_downsample(): ...@@ -393,12 +393,12 @@ def test_downsample():
g = pfunc( g = pfunc(
[], [],
tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), tensor.grad(ds_op(tensor.as_tensor_variable(a), ds).sum(),
a), a),
mode=mode_with_gpu.excluding('cudnn')) mode=mode_with_gpu.excluding('cudnn'))
g2 = pfunc( g2 = pfunc(
[], [],
tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), tensor.grad(ds_op(tensor.as_tensor_variable(a), ds).sum(),
a), a),
mode=mode_without_gpu) mode=mode_without_gpu)
assert any([isinstance(node.op, assert any([isinstance(node.op,
...@@ -409,7 +409,7 @@ def test_downsample(): ...@@ -409,7 +409,7 @@ def test_downsample():
assert numpy.allclose(g(), g2()), shp assert numpy.allclose(g(), g2()), shp
ggf = gradient.Lop(tensor.grad((ds_op( ggf = gradient.Lop(tensor.grad((ds_op(
tensor.as_tensor_variable(a))**2).sum(), a), a, a) tensor.as_tensor_variable(a), ds)**2).sum(), a), a, a)
ref_mode = copy.copy(mode_without_gpu) ref_mode = copy.copy(mode_without_gpu)
ref_mode.check_py_code = False ref_mode.check_py_code = False
......
...@@ -381,9 +381,10 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, ...@@ -381,9 +381,10 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
(n_kern, logical_hid_shape[0] // 2, logical_hid_shape[1] // 2), (n_kern, logical_hid_shape[0] // 2, logical_hid_shape[1] // 2),
shape_kern1[2:], n_kern1, n_batch, 1, 1, verbose=verbose, version=version) shape_kern1[2:], n_kern1, n_batch, 1, 1, verbose=verbose, version=version)
ds_op = pool.Pool((2, 2), ignore_border=False) ds_op = pool.Pool(ignore_border=False)
if downsample_ops: if downsample_ops:
hid = tensor.tanh(ds_op(conv_op(x, w0) + b0.dimshuffle((0, 'x', 'x')))) hid = tensor.tanh(ds_op(conv_op(x, w0) + b0.dimshuffle((0, 'x', 'x')),
(2, 2)))
else: else:
hid = tensor.tanh( hid = tensor.tanh(
(conv_op(x, w0) + b0.dimshuffle( (conv_op(x, w0) + b0.dimshuffle(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论