提交 275dd0d0 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5434 from nouiz/bn4

mixed
......@@ -547,6 +547,7 @@ class Constant(Variable):
def __init__(self, type, data, name=None):
Variable.__init__(self, type, None, None, name)
self.data = type.filter(data)
utils.add_tag_trace(self)
def equals(self, other):
# this does what __eq__ should do, but Variable and Apply should always be hashable by id
......
......@@ -346,7 +346,14 @@ class TestAutoName:
r1 = tensor.constant(1.5)
r2 = tensor.constant(1.5)
assert r1.auto_name == "auto_" + str(autoname_id)
assert r2.auto_name == "auto_" + str(autoname_id + 1)
# We reuse the same variable
assert r2.auto_name == "auto_" + str(autoname_id)
assert r1 is r2
r3 = tensor.constant(1.6)
# The cache still create a new object that we don't return.
# This is why we must increase by 2 and not 1.
assert r3.auto_name == "auto_" + str(autoname_id + 2)
def test_tensorvariable(self):
# Get counter value
......
......@@ -192,7 +192,7 @@ class InputToGpuOptimizer(Optimizer):
# This happen frequently as we do 2 pass of the gpu optimizations
if (len(input.clients) == 1 and
(input.clients[0][0] == 'output' or
input.clients[0][0].op == gpu_from_host)):
isinstance(input.clients[0][0].op, GpuFromHost))):
continue
try:
......@@ -215,7 +215,7 @@ gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(),
'merge') # TODO: how to make it mandatory for gpu_seqopt?
@local_optimizer([gpu_from_host, host_from_gpu])
@local_optimizer([GpuFromHost, HostFromGpu])
def local_cut_gpu_host_gpu(node):
if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
return [node.inputs[0].owner.inputs[0]]
......@@ -336,7 +336,7 @@ def local_gpu_elemwise_0(node):
@register_opt()
@local_optimizer([gpu_from_host])
@local_optimizer([GpuFromHost])
def local_gpu_elemwise_1(node):
"""
gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
......@@ -346,6 +346,7 @@ def local_gpu_elemwise_1(node):
host_i, = node.inputs
if (host_i.owner and
isinstance(host_i.owner.op, tensor.Elemwise) and
len(host_i.owner.outputs) == 1 and
len(host_i.clients) == 1 and
dtype_in_elemwise_supported(node.op)):
......@@ -392,7 +393,7 @@ def local_gpu_split(node):
@register_opt()
@local_optimizer([tensor.DimShuffle, gpu_from_host])
@local_optimizer([tensor.DimShuffle, GpuFromHost])
def local_gpu_dimshuffle_0(node):
"""
dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
......@@ -421,7 +422,7 @@ def local_gpu_dimshuffle_0(node):
@register_opt()
@local_optimizer([tensor.SpecifyShape, gpu_from_host])
@local_optimizer([tensor.SpecifyShape, GpuFromHost])
def local_gpu_specifyShape_0(node):
"""
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
......@@ -445,7 +446,7 @@ def local_gpu_specifyShape_0(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.basic.Dot])
@local_optimizer([GpuFromHost, tensor.basic.Dot])
def local_gpu_dot_to_dot22(node):
"""
gpu_from_host(dot) -> gpudot(gpu_from_host)
......@@ -537,7 +538,7 @@ optdb.register('gpu_assert_no_cpu_op', assert_no_cpu_op, 49.2,
@register_opt()
@local_optimizer([theano.ifelse.IfElse, gpu_from_host])
@local_optimizer([theano.ifelse.IfElse, GpuFromHost])
def local_gpu_lazy_ifelse(node):
"""
gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host)
......@@ -606,7 +607,7 @@ def local_gpu_lazy_ifelse(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas.Dot22])
@local_optimizer([GpuFromHost, tensor.blas.Dot22])
def local_gpu_dot22(node):
"""
gpu_from_host(dot22) -> gpudot(gpu_from_host)
......@@ -631,7 +632,7 @@ def local_gpu_dot22(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas.BatchedDot])
@local_optimizer([GpuFromHost, tensor.blas.BatchedDot])
def local_gpu_batched_dot(node):
"""
gpu_from_host(batched_dot) -> gpu_batched_dot(gpu_from_host)
......@@ -670,7 +671,7 @@ def local_gpu_batched_dot(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas.Dot22Scalar])
@local_optimizer([GpuFromHost, tensor.blas.Dot22Scalar])
def local_gpu_dot22scalar(node):
"""
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
......@@ -699,7 +700,7 @@ def local_gpu_dot22scalar(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas_c.CGemv, tensor.blas.Gemv])
@local_optimizer([GpuFromHost, tensor.blas_c.CGemv, tensor.blas.Gemv])
def local_gpu_gemv(node):
"""
gpu_from_host(gemv) -> gpu_gemv(gpu_from_host)
......@@ -737,7 +738,7 @@ def local_gpu_gemv(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas_c.CGer, tensor.blas.Ger,
@local_optimizer([GpuFromHost, tensor.blas_c.CGer, tensor.blas.Ger,
tensor.blas_scipy.ScipyGer])
def local_gpu_ger(node):
"""
......@@ -777,7 +778,7 @@ def local_gpu_ger(node):
@register_opt()
@local_optimizer([tensor.blas.Gemm, gpu_from_host])
@local_optimizer([tensor.blas.Gemm, GpuFromHost])
def local_gpu_gemm(node):
"""
gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)
......@@ -966,7 +967,7 @@ def local_gpu_elemwise_careduce(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Reshape])
@local_optimizer([GpuFromHost, tensor.Reshape])
def local_gpu_reshape(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -999,7 +1000,7 @@ def local_gpu_reshape(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Flatten])
@local_optimizer([GpuFromHost, tensor.Flatten])
def local_gpu_flatten(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1019,7 +1020,7 @@ def local_gpu_flatten(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Subtensor])
@local_optimizer([GpuFromHost, tensor.Subtensor])
def local_gpu_subtensor(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1062,7 +1063,7 @@ def local_gpu_subtensor(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedSubtensor1])
@local_optimizer([GpuFromHost, tensor.AdvancedSubtensor1])
def local_gpu_advanced_subtensor1(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1083,7 +1084,7 @@ def local_gpu_advanced_subtensor1(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedIncSubtensor1])
@local_optimizer([GpuFromHost, tensor.AdvancedIncSubtensor1])
def local_gpu_advanced_incsubtensor1(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1153,7 +1154,7 @@ def local_gpu_advanced_incsubtensor1(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.IncSubtensor])
@local_optimizer([GpuFromHost, tensor.IncSubtensor])
def local_gpu_incsubtensor(node):
if isinstance(node.op, GpuFromHost):
host_output = node.inputs[0]
......@@ -1463,7 +1464,7 @@ def values_eq_approx_high_tol(a, b):
return CudaNdarrayType.values_eq_approx(a, b, atol=atol)
@local_optimizer([gpu_from_host, conv.ConvOp])
@local_optimizer([GpuFromHost, conv.ConvOp])
def local_gpu_conv(node):
"""
gpu_from_host(conv) -> gpu_conv(gpu_from_host)
......@@ -2309,7 +2310,7 @@ def local_gpu_contiguous(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Eye])
@local_optimizer([GpuFromHost, tensor.Eye])
def local_gpu_eye(node):
"""
gpu_from_host(eye) -> gpueye(gpu_from_host)
......@@ -2438,7 +2439,7 @@ def typeConstructor(broadcastable, dtype):
@register_opt('scan')
@local_optimizer([gpu_from_host, scan_op.Scan])
@local_optimizer([GpuFromHost, scan_op.Scan])
def gpuScanOptimization(node):
"""
scan(host_from_gpu) -> host_from_gpu(GPUscan)
......@@ -2560,7 +2561,7 @@ def gpuScanOptimization(node):
@register_opt()
@local_optimizer([tensor.AllocEmpty, gpu_from_host])
@local_optimizer([tensor.AllocEmpty, GpuFromHost])
def local_gpu_allocempty(node):
if (isinstance(node.op, tensor.AllocEmpty) and
node.op.dtype == "float32"):
......@@ -2727,7 +2728,7 @@ optdb.register('local_inplace_gpu_sparse_block_outer',
# Move to Gpu optimization
@local_optimizer([gpu_from_host,
@local_optimizer([GpuFromHost,
AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs,
......
......@@ -277,10 +277,10 @@ def convert(x, dtype=None):
return x_
def constant(x):
x = convert(x)
def constant(x, name=None, dtype=None):
x = convert(x, dtype=dtype)
assert x.ndim == 0
return ScalarConstant(get_scalar_type(str(x.dtype)), x)
return ScalarConstant(get_scalar_type(str(x.dtype)), x, name=name)
class Scalar(Type):
......
......@@ -488,5 +488,14 @@ def test_grad_abs():
# in test_fusion, TestCompositeCodegen
def test_constant():
c = constant(2, name='a')
assert c.name == 'a'
assert c.dtype == 'int8'
c = constant(2, dtype='float32')
assert c.name is None
assert c.dtype == 'float32'
if __name__ == '__main__':
unittest.main()
......@@ -276,7 +276,9 @@ def constant(x, name=None, ndim=None, dtype=None):
if (sig not in constant_cache and ret.data.size == 1 and
(-10) <= ret.data <= 10 and
(ret.dtype in int_dtypes or ret.dtype in uint_dtypes or
(ret.dtype in float_dtypes and int(ret.data) == ret.data))):
(ret.dtype in float_dtypes and
# Limit the size of the cache.
len(constant_cache) < 10000))):
constant_cache[sig] = ret
# This is needed to raise a good error to the user.
ret.cached = True
......
......@@ -1045,8 +1045,8 @@ second dimension
Py_XINCREF(%(oname)s);
""" % locals()
# We alias the scalar variables
defines += "#define %(oname)s_i %(iname)s_i" % locals()
undefs += "#undef %(oname)s_i" % locals()
defines += "#define %(oname)s_i %(iname)s_i\n" % locals()
undefs += "#undef %(oname)s_i\n" % locals()
# Note: here, olv_index is either the index of the last output
# which is allocated, OR, if there are any aliased outputs,
......
......@@ -23,10 +23,12 @@ class BNComposite(Composite):
def grad(self, inps, grads):
x, mean, std, gamma, beta = inps
top, = grads
dx = (top * gamma) / std
dmean = -(top * gamma) / std
dstd = -(top * gamma * (x - mean)) / (std * std)
dgamma = top * (x - mean) / std
top_gamma = top * gamma
x_mean = x - mean
dx = top_gamma / std
dmean = -dx
dstd = -(top_gamma * x_mean) / (std * std)
dgamma = top * x_mean / std
return [dx, dmean, dstd, dgamma, top]
......
......@@ -1041,11 +1041,14 @@ class ShapeFeature(object):
rval.append(None)
return rval
def unpack(self, s_i):
def unpack(self, s_i, var):
"""Return a symbolic integer scalar for the shape element s_i.
The s_i argument was produced by the infer_shape() of an Op subclass.
var: the variable that correspond to s_i. This is just for
error reporting.
"""
# unpack the s_i that the Op returned
assert s_i is not None
......@@ -1059,7 +1062,10 @@ class ShapeFeature(object):
isinstance(s_i, numpy.integer) or
(isinstance(s_i, numpy.ndarray) and s_i.ndim == 0)):
# this shape is a constant
assert s_i >= 0
if s_i < 0:
msg = "There is a negative shape in the graph!"
msg += gof.utils.get_variable_trace_string(var)
raise ValueError(msg)
return T.constant(s_i, dtype='int64')
if type(s_i) in (tuple, list):
# this dimension is the same as many of the inputs
......@@ -1137,7 +1143,7 @@ class ShapeFeature(object):
r.type.broadcastable[i]):
shape_vars.append(self.lscalar_one)
else:
shape_vars.append(self.unpack(s[i]))
shape_vars.append(self.unpack(s[i], r))
assert all([not hasattr(r.type, "broadcastable") or
not r.type.broadcastable[i] or
# The two following comparison are a speed optimization
......@@ -1238,7 +1244,7 @@ class ShapeFeature(object):
new_shape = []
for j, s_j in enumerate(prev_shape):
if j == i:
new_shape.append(self.unpack(s_i))
new_shape.append(self.unpack(s_i, r))
else:
new_shape.append(s_j)
assert all([not hasattr(r.type, "broadcastable") or
......@@ -7001,6 +7007,10 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
if type(node.op) is not OP:
return False
if len(node.outputs) > 1:
# We don't support the fusion for node with multiple outputs.
return
inputs = [] # inputs of the new Elemwise op.
s_inputs = [] # inputs of the new scalar op used by the Composite.
# Inputs of the new scalar op that represents the current node.
......@@ -7331,6 +7341,26 @@ else:
'FusionOptimizer')
@register_canonicalize
@gof.local_optimizer([Elemwise])
def local_useless_composite(node):
"""For elemwise Composite that have multiple outputs, remove the
outputs that are not used.
"""
if (not isinstance(node.op, Elemwise) or
not isinstance(node.op.scalar_op, scalar.Composite)):
return
comp = node.op.scalar_op
idx = [i for i, o_extern in enumerate(node.outputs)
if o_extern.clients]
if len(idx) < len(node.outputs):
new_outputs = [comp.outputs[i] for i in idx]
c = scalar.Composite(inputs=comp.inputs,
outputs=new_outputs)
e = Elemwise(scalar_op=c)(*node.inputs, return_list=True)
return dict(zip([node.outputs[i] for i in idx], e))
# ############################
# # Remove consider_constant #
# ############################
......
......@@ -1526,6 +1526,26 @@ class TestCompositeCodegen(unittest.TestCase):
fval = numpy.asarray(f([1, 2, 3]))
assert numpy.all(fval == [6, 12, 18]), fval
def test_local_useless_composite(self):
x = theano.scalar.float32()
c = theano.scalar.Composite([x], [x + 1, x - 1])
X = theano.tensor.matrix()
o = theano.tensor.Elemwise(scalar_op=c)(X)
mode = theano.compile.mode.get_default_mode().including(
'local_useless_composite')
f = theano.function([X], o[0], mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert len(topo[0].outputs) == 1
utt.assert_allclose(f([[1.]]), [[2.]])
f = theano.function([X], o[1], mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert len(topo[0].outputs) == 1
utt.assert_allclose(f([[1.]]), [[0.]])
def test_log1p():
m = theano.config.mode
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论