提交 14c219b2 authored 作者: Dustin Webb's avatar Dustin Webb

Merge branch 'master' of https://github.com/Theano/Theano into mastery

Conflicts: theano/tensor/tests/test_basic.py
...@@ -188,7 +188,7 @@ import theano and print the config variable, as in: ...@@ -188,7 +188,7 @@ import theano and print the config variable, as in:
String value: either 'ignore', 'warn', 'raise' or 'pdb' String value: either 'ignore', 'warn', 'raise' or 'pdb'
Default: 'float64' Default: 'ignore'
When creating a TensorVariable with dtype float64, what should be done? When creating a TensorVariable with dtype float64, what should be done?
This is useful to help find upcast to float64 in user code. This is useful to help find upcast to float64 in user code.
......
...@@ -28,7 +28,7 @@ def test_profiling(): ...@@ -28,7 +28,7 @@ def test_profiling():
p = theano.ProfileStats(False) p = theano.ProfileStats(False)
if theano.config.mode in ["DebugMode", "DEBUG_MODE"]: if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
m = "FAST_RUN" m = "FAST_RUN"
else: else:
m = None m = None
......
...@@ -1466,7 +1466,7 @@ class _CThunk(object): ...@@ -1466,7 +1466,7 @@ class _CThunk(object):
# note that the failure code is distributed in two lists # note that the failure code is distributed in two lists
if failure_code < 2 * n: if failure_code < 2 * n:
return [self.init_tasks, self.tasks][ return [self.init_tasks, self.tasks][
failure_code % 2][failure_code / 2] failure_code % 2][failure_code // 2]
else: else:
return self.tasks[failure_code - n] return self.tasks[failure_code - n]
......
...@@ -663,7 +663,10 @@ class Op(utils.object2, PureOp, CLinkerOp): ...@@ -663,7 +663,10 @@ class Op(utils.object2, PureOp, CLinkerOp):
if len(self.__props__) == 0: if len(self.__props__) == 0:
return "%s" % (self.__class__.__name__,) return "%s" % (self.__class__.__name__,)
else: else:
return "%s{%s}" % (self.__class__.__name__, ", ".join("%s=%r" % (p, getattr(self, p)) for p in self.__props__)) return "%s{%s}" % (
self.__class__.__name__,
", ".join("%s=%r" % (p, getattr(self, p))
for p in self.__props__))
else: else:
return super(Op, self).__str__() return super(Op, self).__str__()
......
...@@ -132,17 +132,21 @@ class TestOp: ...@@ -132,17 +132,21 @@ class TestOp:
def test_op_struct(self): def test_op_struct(self):
sop = StructOp() sop = StructOp()
c = sop(theano.tensor.constant(0)) c = sop(theano.tensor.constant(0))
f = theano.function([], c) mode = None
if theano.config.mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
f = theano.function([], c, mode=mode)
rval = f() rval = f()
assert rval == 0 assert rval == 0
rval = f() rval = f()
assert rval == 1 assert rval == 1
c2 = sop(theano.tensor.constant(1)) c2 = sop(theano.tensor.constant(1))
f2 = theano.function([], [c, c2]) f2 = theano.function([], [c, c2], mode=mode)
rval = f2() rval = f2()
assert rval == [0, 0] assert rval == [0, 0]
class TestMakeThunk(unittest.TestCase): class TestMakeThunk(unittest.TestCase):
def test_no_c_code(self): def test_no_c_code(self):
class IncOnePython(Op): class IncOnePython(Op):
......
...@@ -2888,7 +2888,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2888,7 +2888,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
returns a C code expression to copy source into view, and returns a C code expression to copy source into view, and
return 0 on success return 0 on success
""" """
return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s)""" % locals() # On the CPU it unbroadcast based on the run time shapes. We
# need the same behavior on the GPU.
return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s, 1)""" % locals()
def add_to_zview(self, name, x, fail): def add_to_zview(self, name, x, fail):
...@@ -2910,7 +2912,7 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2910,7 +2912,7 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version() parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if parent_version: if parent_version:
return parent_version + (0,) return parent_version + (1,)
return () return ()
...@@ -3343,6 +3345,13 @@ class GpuContiguous(GpuOp): ...@@ -3343,6 +3345,13 @@ class GpuContiguous(GpuOp):
input = as_cuda_ndarray_variable(input) input = as_cuda_ndarray_variable(input)
return Apply(self, [input], [input.type()]) return Apply(self, [input], [input.type()])
def perform(self, node, inp, out):
i = inp[0]
if not i.is_c_contiguous():
i = i.copy()
assert i.is_c_contiguous()
out[0][0] = i
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
input, = inp input, = inp
z, = out z, = out
......
...@@ -852,8 +852,11 @@ class GpuCorrMM(BaseGpuCorrMM): ...@@ -852,8 +852,11 @@ class GpuCorrMM(BaseGpuCorrMM):
class GpuCorrMM_gradWeights(BaseGpuCorrMM): class GpuCorrMM_gradWeights(BaseGpuCorrMM):
"""Gradient wrt. filters for `GpuCorrMM`. """Gradient wrt. filters for `GpuCorrMM`.
:note: You will not want to use this directly, but rely on Theano's :note: You will not want to use this directly, but rely on
automatic differentiation or graph optimization to use it as needed.""" Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self, border_mode="valid", def __init__(self, border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
...@@ -906,8 +909,11 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM): ...@@ -906,8 +909,11 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
class GpuCorrMM_gradInputs(BaseGpuCorrMM): class GpuCorrMM_gradInputs(BaseGpuCorrMM):
"""Gradient wrt. inputs for `GpuCorrMM`. """Gradient wrt. inputs for `GpuCorrMM`.
:note: You will not want to use this directly, but rely on Theano's :note: You will not want to use this directly, but rely on
automatic differentiation or graph optimization to use it as needed.""" Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self, border_mode="valid", def __init__(self, border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
......
...@@ -1002,7 +1002,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -1002,7 +1002,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
return NULL; return NULL;
indices = (CudaNdarray*) CudaNdarray_New(); indices = (CudaNdarray*) CudaNdarray_New();
if (verbose) printf("ndarray after new\n"); if (verbose) printf("\nndarray after new\n");
if (! indices){ if (! indices){
Py_DECREF(indices_float32); Py_DECREF(indices_float32);
return NULL; return NULL;
...@@ -1140,6 +1140,13 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -1140,6 +1140,13 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
} }
dim3 n_blocks(std::min(CudaNdarray_HOST_DIMS(out)[0],65535),1,1); dim3 n_blocks(std::min(CudaNdarray_HOST_DIMS(out)[0],65535),1,1);
if(CudaNdarray_HOST_DIMS(out)[0] == 0){
// We take 0 elements, so no need for the rest of the code.
// This speed up that case AND fix crash otherwise.
free(dims);
Py_DECREF(indices);
return (PyObject *)out;
}
switch (self->nd) { switch (self->nd) {
case 1: case 1:
...@@ -1149,7 +1156,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -1149,7 +1156,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
printf("cudaGetLastError=%d, nd=%d" printf("cudaGetLastError=%d, nd=%d"
" kernel config: (n_blocks.x=%d, n_blocks.y=%d," " kernel config: (n_blocks.x=%d, n_blocks.y=%d,"
" n_threads.x=%i, n_threads.y=%i)\n", " n_threads.x=%i, n_threads.y=%i)\n",
self->nd, cudaGetLastError(), cudaGetLastError(), self->nd,
n_blocks.x, n_blocks.y, n_threads.x, n_threads.y); n_blocks.x, n_blocks.y, n_threads.x, n_threads.y);
k3<<<n_blocks, n_threads>>>( k3<<<n_blocks, n_threads>>>(
dims[0], dims[0],
...@@ -1205,7 +1212,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -1205,7 +1212,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
printf("cudaGetLastError=%d, nd=%d" printf("cudaGetLastError=%d, nd=%d"
" kernel config: (n_blocks.x=%d, n_blocks.y=%d," " kernel config: (n_blocks.x=%d, n_blocks.y=%d,"
" n_threads.x=%i, n_threads.y=%i)\n", " n_threads.x=%i, n_threads.y=%i)\n",
self->nd, cudaGetLastError(), cudaGetLastError(), self->nd,
n_blocks.x, n_blocks.y, n_threads.x, n_threads.y); n_blocks.x, n_blocks.y, n_threads.x, n_threads.y);
k3<<<n_blocks, n_threads>>>( k3<<<n_blocks, n_threads>>>(
dims[0], //dimensions dims[0], //dimensions
......
...@@ -1680,6 +1680,16 @@ def local_gpualloc(node): ...@@ -1680,6 +1680,16 @@ def local_gpualloc(node):
return [new_out] return [new_out]
@register_opt()
@local_optimizer([theano.tensor.opt.Assert])
def local_assert(node):
if (isinstance(node.op, theano.tensor.opt.Assert) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op,
HostFromGpu)):
return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
@register_opt() @register_opt()
@local_optimizer([GpuAlloc]) @local_optimizer([GpuAlloc])
def local_gpualloc_memset_0(node): def local_gpualloc_memset_0(node):
......
...@@ -967,6 +967,8 @@ class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor): ...@@ -967,6 +967,8 @@ class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor):
# version when we should. Users should not use it. # version when we should. Users should not use it.
for shape, idx, fast in [((70000,), range(70000), True), for shape, idx, fast in [((70000,), range(70000), True),
((70000, 5), range(70000), True), ((70000, 5), range(70000), True),
((70000, 5), numpy.zeros((0,), 'int64'),
True),
((70000, 2, 3), range(70000), True), ((70000, 2, 3), range(70000), True),
((1025, 1025), [5, 10], True), ((1025, 1025), [5, 10], True),
((3, 1025, 1026), [1, 2], True), ((3, 1025, 1026), [1, 2], True),
......
...@@ -641,7 +641,8 @@ def test_valid(conv_gemm=False): ...@@ -641,7 +641,8 @@ def test_valid(conv_gemm=False):
shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2)) shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2)) shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2))
else: else:
mode = cls = None mode = theano_mode
cls = None
exec_conv(version, shapes, verbose, random, 'valid', exec_conv(version, shapes, verbose, random, 'valid',
print_=print_, ones=ones, rtol=1.1e-5, print_=print_, ones=ones, rtol=1.1e-5,
theano_mode=mode, cls=cls) theano_mode=mode, cls=cls)
...@@ -717,7 +718,8 @@ def test_full(conv_gemm=False): ...@@ -717,7 +718,8 @@ def test_full(conv_gemm=False):
# dummy version; not used by GpuCorrMM so one version is enough # dummy version; not used by GpuCorrMM so one version is enough
version = [-1] version = [-1]
else: else:
mode = cls = None mode = theano_mode
cls = None
exec_conv(version, shapes, verbose, random, 'full', exec_conv(version, shapes, verbose, random, 'full',
theano_mode=mode, cls=cls) theano_mode=mode, cls=cls)
...@@ -757,7 +759,8 @@ def test_subsample(conv_gemm=False): ...@@ -757,7 +759,8 @@ def test_subsample(conv_gemm=False):
# dummy version; not used by GpuCorrMM so one version is enough # dummy version; not used by GpuCorrMM so one version is enough
version_valid = version_full = [-1] version_valid = version_full = [-1]
else: else:
mode = cls = None mode = theano_mode
cls = None
exec_conv(version_valid, shapes, verbose, random, 'valid', exec_conv(version_valid, shapes, verbose, random, 'valid',
print_=print_, ones=ones, print_=print_, ones=ones,
......
...@@ -41,6 +41,17 @@ def test_no_shared_var_graph(): ...@@ -41,6 +41,17 @@ def test_no_shared_var_graph():
assert numpy.any(isinstance(x.op,cuda.GpuFromHost) for x in l) assert numpy.any(isinstance(x.op,cuda.GpuFromHost) for x in l)
assert numpy.any(isinstance(x.op,cuda.HostFromGpu) for x in l) assert numpy.any(isinstance(x.op,cuda.HostFromGpu) for x in l)
def test_local_assert():
x = theano.tensor.fmatrix()
a = theano.tensor.opt.assert_op(x, theano.tensor.eq(x, 0).any())
f = theano.function([x], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
a_op = [n for n in topo if isinstance(n.op, theano.tensor.opt.Assert)]
assert len(a_op) == 1
assert isinstance(a_op[0].inputs[0].type, CudaNdarrayType)
def test_int_pow(): def test_int_pow():
a = CudaNdarrayType([False])() a = CudaNdarrayType([False])()
......
...@@ -496,6 +496,16 @@ def local_gpua_softmaxwithbias(node): ...@@ -496,6 +496,16 @@ def local_gpua_softmaxwithbias(node):
return GpuSoftmaxWithBias() return GpuSoftmaxWithBias()
@register_opt()
@local_optimizer([theano.tensor.opt.Assert])
def local_assert(node):
if (isinstance(node.op, theano.tensor.opt.Assert) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op,
HostFromGpu)):
return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
@register_opt() @register_opt()
@op_lifter([gpu_from_host, ConvOp]) @op_lifter([gpu_from_host, ConvOp])
def local_gpu_conv(node): def local_gpu_conv(node):
......
...@@ -16,6 +16,16 @@ from theano.tests.unittest_tools import SkipTest ...@@ -16,6 +16,16 @@ from theano.tests.unittest_tools import SkipTest
from theano.tensor.tests.test_basic import TestSpecifyShape from theano.tensor.tests.test_basic import TestSpecifyShape
def test_local_assert():
x = theano.tensor.fmatrix()
a = theano.tensor.opt.assert_op(x, theano.tensor.eq(x, 0).any())
f = theano.function([x], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
a_op = [n for n in topo if isinstance(n.op, theano.tensor.opt.Assert)]
assert len(a_op) == 1
assert isinstance(a_op[0].inputs[0].type, GpuArrayType)
def test_flatten(): def test_flatten():
m = theano.tensor.fmatrix() m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu) f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
...@@ -164,10 +164,15 @@ def as_tensor_variable(x, name=None, ndim=None): ...@@ -164,10 +164,15 @@ def as_tensor_variable(x, name=None, ndim=None):
return x return x
else: else:
if (x.type.ndim > ndim): if (x.type.ndim > ndim):
# TODO: strip off leading broadcastable dimensions # strip off leading broadcastable dimensions
first_non_broadcastable = [idx for idx in range(x.ndim)
if x.broadcastable[idx] == False][0]
x = x.dimshuffle(range(x.ndim)[first_non_broadcastable:])
if x.ndim > ndim:
raise ValueError( raise ValueError(
'TensorType could not be cast to have %i dimensions' % 'TensorType could not be cast to have %i dimensions' % ndim, x.type
ndim, x.type) )
return x
elif (x.type.ndim < ndim): elif (x.type.ndim < ndim):
return shape_padleft(x, n_ones=(ndim - x.type.ndim)) return shape_padleft(x, n_ones=(ndim - x.type.ndim))
else: else:
......
...@@ -811,8 +811,8 @@ class ConvOp(OpenMPOp): ...@@ -811,8 +811,8 @@ class ConvOp(OpenMPOp):
shuffled_kerns.name = 'shuffled_for_conv3D(%s)' % flipped_kerns.name shuffled_kerns.name = 'shuffled_for_conv3D(%s)' % flipped_kerns.name
tmp_node = theano.tensor.nnet.conv3D( tmp_node = theano.tensor.nnet.conv3D(
V = shuffled_inputs, V=shuffled_inputs,
W= shuffled_kerns, W=shuffled_kerns,
b=theano.tensor.alloc(numpy.asarray(0, dtype=kerns.dtype), b=theano.tensor.alloc(numpy.asarray(0, dtype=kerns.dtype),
kerns.shape[0]), kerns.shape[0]),
d=(self.dx, self.dy, 1)) d=(self.dx, self.dy, 1))
......
...@@ -799,7 +799,21 @@ class ShapeFeature(object): ...@@ -799,7 +799,21 @@ class ShapeFeature(object):
# #
# worst case, we loop over shape_of and replace things # worst case, we loop over shape_of and replace things
raise NotImplementedError(s_i) raise NotImplementedError(s_i)
elif s_i.type.dtype[:3] in ('int', 'uint'):
# s_i is x.shape[i], we change it to Shape_i.
if (s_i.owner and
isinstance(s_i.owner.op, Subtensor) and
s_i.owner.inputs[0].owner and
isinstance(s_i.owner.inputs[0].owner.op, T.Shape)):
assert s_i.ndim == 0
assert len(s_i.owner.inputs) == 2
try:
i = get_scalar_constant_value(s_i.owner.inputs[1])
s_i = Shape_i(i)(s_i.owner.inputs[0].owner.inputs[0])
except NotScalarConstantError:
pass
if s_i.type.dtype[:3] in ('int', 'uint'):
if getattr(s_i.type, 'ndim', 0): if getattr(s_i.type, 'ndim', 0):
raise TypeError('Shape element must be scalar', s_i) raise TypeError('Shape element must be scalar', s_i)
return s_i return s_i
...@@ -1131,6 +1145,40 @@ class ShapeFeature(object): ...@@ -1131,6 +1145,40 @@ class ShapeFeature(object):
self.set_shape_i(v, ii, new_r) self.set_shape_i(v, ii, new_r)
self.shape_of_reverse_index[r] = set() self.shape_of_reverse_index[r] = set()
def same_shape(self, x, y):
"""Return True if we are able to assert that x and y have the
same shape
"""
sx = self.shape_of[x]
sy = self.shape_of[y]
if sx is None or sy is None:
return False
assert len(sx) == len(sy)
for dx, dy in zip(sx, sy):
if dx is dy:
continue
# Need to try to find that they are the same shape. We
# need to compare the full graph. It could be slow. So I
# just implement for now the case of Shape_i.
if not dx.owner or not dy.owner:
return False
if (not isinstance(dx.owner.op, Shape_i) or
not isinstance(dy.owner.op, Shape_i)):
return False
opx = dx.owner.op
opy = dy.owner.op
if not (opx.i == opy.i):
return False
# FB I'm not sure is this handle correctly constants.
if dx.owner.inputs[0] == dy.owner.inputs[0]:
return True
# To be sure to cover all case, call equal_computation.
# Can't use theano.gof.graph.is_same_graph(dx, dy)
# As it currently expect that dx and dy aren't in a FunctionGraph
from theano.scan_module.scan_utils import equal_computations
return equal_computations([dx], [dy])
class ShapeOptimizer(Optimizer): class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an fgraph feature. """Optimizer that serves to add ShapeFeature as an fgraph feature.
...@@ -1640,6 +1688,54 @@ def local_upcast_elemwise_constant_inputs(node): ...@@ -1640,6 +1688,54 @@ def local_upcast_elemwise_constant_inputs(node):
################## ##################
@register_canonicalize
@register_specialize
@gof.local_optimizer([IncSubtensor])
def local_useless_inc_subtensor(node):
"""Remove IncSubtensor, when we overwrite the full inputs with the
new value.
"""
if not isinstance(node.op, IncSubtensor):
return
if node.op.set_instead_of_inc is False:
# This is an IncSubtensor, so the init value must be zeros
try:
c = get_scalar_constant_value(node.inputs[0])
if c != 0:
return
except NotScalarConstantError:
return
if (node.inputs[0].ndim != node.inputs[1].ndim or
node.inputs[0].broadcastable != node.inputs[1].broadcastable):
# FB: I didn't check if this case can happen, but this opt
# don't support it.
return
# We have a SetSubtensor or an IncSubtensor on zeros
# If is this IncSubtensor useful?
# Check that we keep all the original data.
# Put the constant inputs in the slice.
idx_cst = theano.tensor.subtensor.get_idx_list(node.inputs[1:],
node.op.idx_list)
if all(isinstance(e, slice) and e.start is None and
e.stop is None and (e.step is None or T.extract_constant(e.step) == -1)
for e in idx_cst):
# IncSubtensor broadcast node.inputs[1] on node.inputs[0]
# based on run time shapes, so we must check they are the same.
if not hasattr(node.fgraph, 'shape_feature'):
return
if not node.fgraph.shape_feature.same_shape(node.inputs[0],
node.inputs[1]):
return
# There is no reverse, so we don't need a replacement.
if all(e.step is None
for e in node.op.idx_list):
# They are the same shape, so we can remore this IncSubtensor
return [node.inputs[1]]
return [Subtensor(node.op.idx_list)(*node.inputs[1:])]
@register_canonicalize @register_canonicalize
@register_specialize @register_specialize
@gof.local_optimizer([Subtensor]) @gof.local_optimizer([Subtensor])
...@@ -3366,11 +3462,17 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any, ...@@ -3366,11 +3462,17 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
T.elemwise.Sum, T.elemwise.Prod, T.elemwise.Sum, T.elemwise.Prod,
T.elemwise.ProdWithoutZeros] T.elemwise.ProdWithoutZeros]
@register_canonicalize @register_canonicalize
@register_uncanonicalize # Needed for MaxAndArgmax -> CAReduce @register_uncanonicalize # Needed for MaxAndArgmax -> CAReduce
@gof.local_optimizer(ALL_REDUCE) @gof.local_optimizer(ALL_REDUCE)
def local_reduce_join(node): def local_reduce_join(node):
"""Max(Join(a,b), axis=0) -> Maximum(a,b) """ """Reduce{scalar.op}(Join(a, b), axis=0) -> Elemwise{scalar.op}(a, b)
:note: supported scalar.op are Maximum, Mimimum in some cases and
Add and Mul in all cases.
"""
if (isinstance(node.op, T.CAReduce) and if (isinstance(node.op, T.CAReduce) and
node.inputs[0].owner and node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, T.Join)): isinstance(node.inputs[0].owner.op, T.Join)):
...@@ -3385,6 +3487,9 @@ def local_reduce_join(node): ...@@ -3385,6 +3487,9 @@ def local_reduce_join(node):
return return
elif not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul)): elif not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul)):
return return
elif len(join.inputs) <= 2:
# This is a useless join, that will get removed by another opt.
return
new_inp = [] new_inp = []
for inp in join.inputs[1:]: for inp in join.inputs[1:]:
......
...@@ -155,7 +155,7 @@ class Solve(Op): ...@@ -155,7 +155,7 @@ class Solve(Op):
self.overwrite_b = overwrite_b self.overwrite_b = overwrite_b
def __repr__(self): def __repr__(self):
return 'Solve{%s}' % str(self.props()) return 'Solve{%s}' % str(self._props())
def make_node(self, A, b): def make_node(self, A, b):
assert imported_scipy, ( assert imported_scipy, (
......
...@@ -1930,7 +1930,8 @@ class ApplyDefaultTestOp(theano.Op): ...@@ -1930,7 +1930,8 @@ class ApplyDefaultTestOp(theano.Op):
class TestAsTensorVariable(unittest.TestCase): class TestAsTensorVariable(unittest.TestCase):
""" """
Unit test for ensuring that as_tensor_variable handles Apply objects correctly. Unit test for ensuring that as_tensor_variable handles Apply objects
correctly and removes leading broadcastable dimensions when possible.
""" """
def setUp(self): def setUp(self):
self.x = tensor.scalar('x') self.x = tensor.scalar('x')
...@@ -1951,6 +1952,18 @@ class TestAsTensorVariable(unittest.TestCase): ...@@ -1951,6 +1952,18 @@ class TestAsTensorVariable(unittest.TestCase):
bad_apply_var = ApplyDefaultTestOp([0, 1]).make_node(self.x) bad_apply_var = ApplyDefaultTestOp([0, 1]).make_node(self.x)
self.assertRaises(AttributeError, as_tensor_variable, bad_apply_var) self.assertRaises(AttributeError, as_tensor_variable, bad_apply_var)
def test_strip_leading_broadcastable(self):
x = tensor.TensorType(config.floatX, (True, False))('x')
x = as_tensor_variable(x, ndim=1)
assert(x.ndim == 1)
x = tensor.matrix('x', dtype=config.floatX)
try:
x = as_tensor_variable(x, ndim=1)
assert(False) # The call above should have failed
except ValueError:
pass
class TestAlloc(unittest.TestCase): class TestAlloc(unittest.TestCase):
dtype = config.floatX dtype = config.floatX
......
...@@ -1571,6 +1571,53 @@ def test_log_add(): ...@@ -1571,6 +1571,53 @@ def test_log_add():
#TODO: (write and) test that the optimization works with Sum in addition to working with Add. #TODO: (write and) test that the optimization works with Sum in addition to working with Add.
def test_local_useless_inc_subtensor():
x = tensor.matrix('x')
y = tensor.matrix('y')
for sub in [slice(None), slice(None, None, -1)]:
o = tensor.set_subtensor(x[::, sub], y)
f = theano.function([x, y], o)
o_shape = tensor.set_subtensor(x[::, sub],
tensor.specify_shape(y, x.shape))
f_shape = theano.function([x, y], o_shape)
# Test with shape info
topo = f_shape.maker.fgraph.toposort()
assert not any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
out = f_shape([[2, 3]], [[3, 4]])
assert (out == numpy.asarray([[3, 4]])[::, sub]).all()
# Test that without shape info, we don't apply the opt.
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, tensor.IncSubtensor)
out = f([[2, 3]], [[3, 4]])
assert (out == numpy.asarray([[3, 4]])[::, sub]).all()
# Test that we don't remove shape error
try:
f([[2, 3]], [[3, 4], [4, 5]])
assert False
except (ValueError, AssertionError):
pass
# Test that we don't remove broadcastability
out = f([[2, 3], [3, 4]], [[5, 6]])
assert (out == numpy.asarray([[5, 6], [5, 6]])[::, sub]).all()
# Test that we do not optimize others strides even when sub and y
# have same shapes
sub = x[::, ::2]
o_shape = tensor.set_subtensor(sub,
tensor.specify_shape(y, sub.shape))
f_shape = theano.function([x, y], o_shape)
topo = f_shape.maker.fgraph.toposort()
theano.printing.debugprint(f_shape)
assert any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
out = f_shape([[2, 3, 6, 7]], [[8, 9]])
assert (out == numpy.asarray([[8, 3, 9, 7]])).all()
def test_local_useless_subtensor(): def test_local_useless_subtensor():
x = tensor.matrix('x') x = tensor.matrix('x')
...@@ -2887,10 +2934,13 @@ class T_Tile(unittest.TestCase): ...@@ -2887,10 +2934,13 @@ class T_Tile(unittest.TestCase):
def test_local_useless_tile(self): def test_local_useless_tile(self):
v = T.vector() v = T.vector()
m = T.matrix() m = T.matrix()
mode = None
if theano.config.mode == "FAST_COMPILE":
mode = "FAST_RUN"
for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]: for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]:
# Currently, only a repeat patter == ndim is supported. # Currently, only a repeat patter == ndim is supported.
for ndim in [var.ndim]: # range(1, var.ndim): for ndim in [var.ndim]: # range(1, var.ndim):
f = theano.function([var], T.tile(var, (1,)*ndim)) f = theano.function([var], T.tile(var, (1,)*ndim), mode=mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, compile.DeepCopyOp) assert isinstance(topo[0].op, compile.DeepCopyOp)
......
...@@ -863,7 +863,25 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -863,7 +863,25 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
inc_slice(2, 1), inc_slice(2, 1),
(numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),)) (numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),))
def test_advanced_inc_and_set(self): def test_inc_and_set_subtensor(self):
"""
Test increment and set with broadcast
"""
X = tensor.matrix(dtype=self.dtype)
y = set_subtensor(X[1::, 1::], 0)
f = self.function([X], [y],
op=self.inc_sub,
N=1)
x_ = numpy.ones((9, 9))
out = f(x_.astype('float32'))
res = x_.copy()
res[1::, 1::] = 0
assert numpy.allclose(out, res)
def test_advanced1_inc_and_set(self):
""" """
Test advanced increment and set. Test advanced increment and set.
""" """
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论