提交 6f8d49aa authored 作者: Frederic's avatar Frederic

[CRASH,ENH]: don't have opt raise error due to mixing conv2d/3d op and make the opt work for conv3d

上级 755f2218
...@@ -33,7 +33,6 @@ from theano.tensor.nnet.abstract_conv2d import (AbstractConv2d, ...@@ -33,7 +33,6 @@ from theano.tensor.nnet.abstract_conv2d import (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs) AbstractConv2d_gradInputs)
def dnn_available(): def dnn_available():
if dnn_available.avail is None: if dnn_available.avail is None:
if not theano.sandbox.cuda.cuda_available: if not theano.sandbox.cuda.cuda_available:
...@@ -2232,43 +2231,43 @@ if True: ...@@ -2232,43 +2231,43 @@ if True:
70.0, 'fast_run', 'inplace', 'gpu', 'cudnn') 70.0, 'fast_run', 'inplace', 'gpu', 'cudnn')
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5, nd=4) @alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5)
def local_dnn_conv_alpha_merge(node, *inputs): def local_dnn_conv_alpha_merge(node, *inputs):
if not dnn_available() or version() == -1: if not dnn_available() or version() == -1:
return None return None
return [GpuDnnConv(algo=node.op.algo)(*inputs)] return [node.op(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, nd=4) @alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5)
def local_dnn_convw_alpha_merge(node, *inputs): def local_dnn_convw_alpha_merge(node, *inputs):
if not dnn_available() or version() == -1: if not dnn_available() or version() == -1:
return None return None
return [GpuDnnConvGradW()(*inputs)] return [node.op(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5, nd=4) @alpha_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5)
def local_dnn_convi_alpha_merge(node, *inputs): def local_dnn_convi_alpha_merge(node, *inputs):
if not dnn_available() or version() == -1: if not dnn_available() or version() == -1:
return None return None
return [GpuDnnConvGradI()(*inputs)] return [node.op(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConv, alpha_in=4, beta_in=5, out_in=2, nd=4) @output_merge(GpuDnnConv, alpha_in=4, beta_in=5, out_in=2)
def local_dnn_conv_output_merge(node, *inputs): def local_dnn_conv_output_merge(node, *inputs):
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConv(algo=node.op.algo)(*inputs)] return [node.op(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, out_in=2, nd=4) @output_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, out_in=2)
def local_dnn_convw_output_merge(node, *inputs): def local_dnn_convw_output_merge(node, *inputs):
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConvGradW()(*inputs)] return [node.op(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5, out_in=2, nd=4) @output_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5, out_in=2)
def local_dnn_convi_output_merge(node, *inputs): def local_dnn_convi_output_merge(node, *inputs):
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConvGradI()(*inputs)] return [node.op(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax]) @local_optimizer([GpuDownsampleFactorMax])
......
...@@ -57,7 +57,7 @@ def is_equal(var, val): ...@@ -57,7 +57,7 @@ def is_equal(var, val):
return False return False
def alpha_merge(cls, alpha_in, beta_in, nd): def alpha_merge(cls, alpha_in, beta_in):
def wrapper(maker): def wrapper(maker):
@local_optimizer([GpuElemwise]) @local_optimizer([GpuElemwise])
@wraps(maker) @wraps(maker)
...@@ -68,9 +68,13 @@ def alpha_merge(cls, alpha_in, beta_in, nd): ...@@ -68,9 +68,13 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
targ = find_node(node.inputs[0], cls) targ = find_node(node.inputs[0], cls)
if targ is None: if targ is None:
targ = find_node(node.inputs[1], cls) targ = find_node(node.inputs[1], cls)
lr = grab_cpu_scalar(node.inputs[0], nd=nd) if targ is None:
return
lr = grab_cpu_scalar(node.inputs[0],
nd=targ.outputs[0].ndim)
else: else:
lr = grab_cpu_scalar(node.inputs[1], nd=nd) lr = grab_cpu_scalar(node.inputs[1],
nd=targ.outputs[0].ndim)
if lr is None or targ is None: if lr is None or targ is None:
return None return None
inputs = list(targ.inputs) inputs = list(targ.inputs)
...@@ -93,7 +97,7 @@ def alpha_merge(cls, alpha_in, beta_in, nd): ...@@ -93,7 +97,7 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
return wrapper return wrapper
def output_merge(cls, alpha_in, beta_in, out_in, nd): def output_merge(cls, alpha_in, beta_in, out_in):
def wrapper(maker): def wrapper(maker):
@local_optimizer([GpuElemwise]) @local_optimizer([GpuElemwise])
@wraps(maker) @wraps(maker)
......
...@@ -1082,6 +1082,80 @@ def test_dnn_conv_alpha_output_merge(): ...@@ -1082,6 +1082,80 @@ def test_dnn_conv_alpha_output_merge():
utt.assert_allclose(v1, v2) utt.assert_allclose(v1, v2)
def test_dnn_conv3d_alpha_output_merge():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
t = T.TensorType(broadcastable=(False, False, False, False, False),
dtype='float32')
img = t()
kern = t()
out = t()
b = 1
c = 4
f = 3
it = 10
ih = 5
iw = 8
kt = 3
kh = 2
kw = 6
img_val = numpy.random.random((b, c, it, ih, iw)).astype('float32')
kern_val = numpy.random.random((f, c, kt, kh, kw)).astype('float32')
out_val = numpy.random.random((b, f, it - kt + 1, ih - kh + 1,
iw - kw + 1)).astype('float32')
conv = dnn.dnn_conv3d(img, kern)
gw = theano.grad(conv.sum(), kern)
gi = theano.grad(conv.sum(), img)
lr = numpy.asarray(0.05, dtype='float32')
if cuda.dnn.version() == -1:
# Can't merge alpha with cudnn v1
fr = conv + out
wr = kern + gw
ir = img + gi
else:
fr = lr * (conv + out)
wr = kern + lr * gw
ir = img + lr * gi
f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
dnn.GpuDnnConv)
assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op,
dnn.GpuDnnConvGradW)
assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op,
dnn.GpuDnnConvGradI)
mode = mode_with_gpu
mode = mode.excluding('local_dnn_conv_alpha_merge')
mode = mode.excluding('local_dnn_convw_alpha_merge')
mode = mode.excluding('local_dnn_convi_alpha_merge')
mode = mode.excluding('local_dnn_conv_output_merge')
mode = mode.excluding('local_dnn_convw_output_merge')
mode = mode.excluding('local_dnn_convi_output_merge')
f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode)
assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
dnn.GpuDnnConv3d)
assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op,
dnn.GpuDnnConv3dGradW)
assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op,
dnn.GpuDnnConv3dGradI)
out_f1 = f1(img_val, kern_val, out_val)
out_f2 = f2(img_val, kern_val, out_val)
assert len(out_f1) == len(out_f2)
for v1, v2 in zip(out_f1, out_f2):
utt.assert_allclose(v1, v2)
def test_dnn_conv_merge_mouts(): def test_dnn_conv_merge_mouts():
# make sure it doesn't attempt to output/alpha merge a convolution # make sure it doesn't attempt to output/alpha merge a convolution
# that has multiple clients. # that has multiple clients.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论