提交 fbd95029 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix alpha_merge and beta_merge to work correctly.

Modify some tests so that they execise some previously failing configurations.
上级 c68a0d18
...@@ -1559,47 +1559,41 @@ if True: ...@@ -1559,47 +1559,41 @@ if True:
70.0, 'fast_run', 'inplace', 'gpu', 'cudnn') 70.0, 'fast_run', 'inplace', 'gpu', 'cudnn')
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, nd=4) @alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5, nd=4)
def local_dnn_conv_alpha_merge(node, *inputs): def local_dnn_conv_alpha_merge(node, *inputs):
if not dnn_available() or version() == -1: if not dnn_available() or version() == -1:
return None return None
return [GpuDnnConv(workmem=node.op.workmem)(*inputs)] return [GpuDnnConv(workmem=node.op.workmem)(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConvGradW, alpha_in=4, nd=4) @alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, nd=4)
def local_dnn_convw_alpha_merge(node, *inputs): def local_dnn_convw_alpha_merge(node, *inputs):
if not dnn_available() or version() == -1: if not dnn_available() or version() == -1:
return None return None
return [GpuDnnConvGradW()(*inputs)] return [GpuDnnConvGradW()(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConvGradI, alpha_in=4, nd=4) @alpha_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5, nd=4)
def local_dnn_convi_alpha_merge(node, *inputs): def local_dnn_convi_alpha_merge(node, *inputs):
if not dnn_available() or version() == -1: if not dnn_available() or version() == -1:
return None return None
return [GpuDnnConvGradI()(*inputs)] return [GpuDnnConvGradI()(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConv, alpha_in=4, out_in=2, nd=4) @output_merge(GpuDnnConv, alpha_in=4, beta_in=5, out_in=2, nd=4)
def local_dnn_conv_output_merge(node, *inputs): def local_dnn_conv_output_merge(node, *inputs):
if not dnn_available() or version() == -1:
return None
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConv(workmem=node.op.workmem)(*inputs)] return [GpuDnnConv(workmem=node.op.workmem)(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConvGradW, alpha_in=4, out_in=2, nd=4) @output_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, out_in=2, nd=4)
def local_dnn_convw_output_merge(node, *inputs): def local_dnn_convw_output_merge(node, *inputs):
if not dnn_available() or version() == -1:
return None
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConvGradW()(*inputs)] return [GpuDnnConvGradW()(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConvGradI, alpha_in=4, out_in=2, nd=4) @output_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5, out_in=2, nd=4)
def local_dnn_convi_output_merge(node, *inputs): def local_dnn_convi_output_merge(node, *inputs):
if not dnn_available() or version() == -1:
return None
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConvGradI()(*inputs)] return [GpuDnnConvGradI()(*inputs)]
......
...@@ -5,11 +5,14 @@ import numpy ...@@ -5,11 +5,14 @@ import numpy
import theano import theano
from theano import scalar as scal, Constant from theano import scalar as scal, Constant
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor import DimShuffle from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError)
from theano.sandbox.cuda.basic_ops import ( from theano.sandbox.cuda.basic_ops import (
GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise) GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise)
_one = scal.constant(numpy.asarray(1.0, dtype='float32'))
def grab_cpu_scalar(v, nd): def grab_cpu_scalar(v, nd):
if v.owner is not None: if v.owner is not None:
n = v.owner n = v.owner
...@@ -28,6 +31,7 @@ def grab_cpu_scalar(v, nd): ...@@ -28,6 +31,7 @@ def grab_cpu_scalar(v, nd):
v.broadcastable == (True,) * nd): v.broadcastable == (True,) * nd):
return v.dimshuffle(()) return v.dimshuffle(())
def find_node(v, cls): def find_node(v, cls):
# This digs through possibly redundant transfers to for the node # This digs through possibly redundant transfers to for the node
# that has the op class specified. # that has the op class specified.
...@@ -42,7 +46,17 @@ def find_node(v, cls): ...@@ -42,7 +46,17 @@ def find_node(v, cls):
return None return None
def alpha_merge(cls, alpha_in, nd): def is_equal(var, val):
# Returns True if var is always equal to val (python value), False
# otherwise (including if var is not constant)
try:
v = get_scalar_constant_value(var)
return v == val
except NotScalarConstantValue:
return False
def alpha_merge(cls, alpha_in, beta_in, nd):
def wrapper(maker): def wrapper(maker):
@local_optimizer([GpuElemwise]) @local_optimizer([GpuElemwise])
@wraps(maker) @wraps(maker)
...@@ -60,19 +74,19 @@ def alpha_merge(cls, alpha_in, nd): ...@@ -60,19 +74,19 @@ def alpha_merge(cls, alpha_in, nd):
return None return None
inputs = list(targ.inputs) inputs = list(targ.inputs)
inputs[alpha_in] = lr * targ.inputs[alpha_in] inputs[alpha_in] = lr * targ.inputs[alpha_in]
inputs[beta_in] = lr * targ.inputs[beta_in]
return maker(targ, *inputs) return maker(targ, *inputs)
return opt return opt
return wrapper return wrapper
def output_merge(cls, alpha_in, out_in, nd): def output_merge(cls, alpha_in, beta_in, out_in, nd):
def wrapper(maker): def wrapper(maker):
@local_optimizer([GpuElemwise]) @local_optimizer([GpuElemwise])
@wraps(maker) @wraps(maker)
def opt(node): def opt(node):
if (isinstance(node.op, GpuElemwise) and if (isinstance(node.op, GpuElemwise) and
(node.op.scalar_op == scal.sub or node.op.scalar_op == scal.add and
node.op.scalar_op == scal.add) and
node.nin == 2): node.nin == 2):
targ = find_node(node.inputs[0], cls) targ = find_node(node.inputs[0], cls)
W = node.inputs[1] W = node.inputs[1]
...@@ -81,15 +95,12 @@ def output_merge(cls, alpha_in, out_in, nd): ...@@ -81,15 +95,12 @@ def output_merge(cls, alpha_in, out_in, nd):
W = node.inputs[0] W = node.inputs[0]
if targ is None: if targ is None:
return None return None
if node.op.scalar_op == scal.sub: if not is_equal(targ.inputs[beta_in], 0.0):
alpha = -targ.inputs[alpha_in] # other cases are too complex for now
W = W - targ.inputs[out_in] return None
else:
alpha = targ.inputs[alpha_in]
W = W + targ.inputs[out_in]
inputs = list(targ.inputs) inputs = list(targ.inputs)
inputs[out_in] = W inputs[out_in] = W
inputs[alpha_in] = alpha inputs[beta_in] = _one.clone()
return maker(targ, *inputs) return maker(targ, *inputs)
return opt return opt
return wrapper return wrapper
...@@ -466,7 +466,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -466,7 +466,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
def test_dnn_conv_merge(): def test_dnn_conv_merge():
if not cuda.dnn.dnn_available() or cuda.dnn.version() == -1: if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
img = T.ftensor4() img = T.ftensor4()
kern = T.ftensor4() kern = T.ftensor4()
...@@ -489,9 +489,15 @@ def test_dnn_conv_merge(): ...@@ -489,9 +489,15 @@ def test_dnn_conv_merge():
lr = numpy.asarray(0.05, dtype='float32') lr = numpy.asarray(0.05, dtype='float32')
fr = out - lr * conv if cuda.dnn.version() == -1:
wr = kern - lr * gw # Can't merge alpha with cudnn v1
ir = img - lr * gi fr = conv + out
wr = kern + gw
ir = img + gi
else:
fr = lr * (conv + out)
wr = kern + lr * gw
ir = img + lr * gi
f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu) f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op, assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
...@@ -545,17 +551,19 @@ def test_dnn_conv_grad(): ...@@ -545,17 +551,19 @@ def test_dnn_conv_grad():
def dconv(img, kern, out): def dconv(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='conv')(img.shape, kern.shape) conv_mode='conv')(img.shape, kern.shape)
return dnn.GpuDnnConv()(img, kern, out, desc) return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75)
def dconvi(img, kern, out): def dconvi(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='conv')(img.shape, kern.shape) conv_mode='conv')(img.shape, kern.shape)
return dnn.GpuDnnConvGradI()(kern, out, img, desc) return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0,
beta=0.0)
def dconvw(img, kern, out): def dconvw(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='conv')(img.shape, kern.shape) conv_mode='conv')(img.shape, kern.shape)
return dnn.GpuDnnConvGradW()(img, out, kern, desc) return dnn.GpuDnnConvGradW()(img, out, kern, desc, alpha=0.75,
beta=-1.0)
utt.verify_grad(dconv, [img_val, kern_val, out_val]) utt.verify_grad(dconv, [img_val, kern_val, out_val])
utt.verify_grad(dconvi, [img_val, kern_val, out_val]) utt.verify_grad(dconvi, [img_val, kern_val, out_val])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论