提交 14c219b2 authored 作者: Dustin Webb's avatar Dustin Webb

Merge branch 'master' of https://github.com/Theano/Theano into mastery

Conflicts: theano/tensor/tests/test_basic.py
......@@ -188,7 +188,7 @@ import theano and print the config variable, as in:
String value: either 'ignore', 'warn', 'raise' or 'pdb'
Default: 'float64'
Default: 'ignore'
When creating a TensorVariable with dtype float64, what should be done?
This is useful to help find upcast to float64 in user code.
......
......@@ -28,7 +28,7 @@ def test_profiling():
p = theano.ProfileStats(False)
if theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
m = "FAST_RUN"
else:
m = None
......
......@@ -1466,7 +1466,7 @@ class _CThunk(object):
# note that the failure code is distributed in two lists
if failure_code < 2 * n:
return [self.init_tasks, self.tasks][
failure_code % 2][failure_code / 2]
failure_code % 2][failure_code // 2]
else:
return self.tasks[failure_code - n]
......
......@@ -663,7 +663,10 @@ class Op(utils.object2, PureOp, CLinkerOp):
if len(self.__props__) == 0:
return "%s" % (self.__class__.__name__,)
else:
return "%s{%s}" % (self.__class__.__name__, ", ".join("%s=%r" % (p, getattr(self, p)) for p in self.__props__))
return "%s{%s}" % (
self.__class__.__name__,
", ".join("%s=%r" % (p, getattr(self, p))
for p in self.__props__))
else:
return super(Op, self).__str__()
......
......@@ -132,17 +132,21 @@ class TestOp:
def test_op_struct(self):
sop = StructOp()
c = sop(theano.tensor.constant(0))
f = theano.function([], c)
mode = None
if theano.config.mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
f = theano.function([], c, mode=mode)
rval = f()
assert rval == 0
rval = f()
assert rval == 1
c2 = sop(theano.tensor.constant(1))
f2 = theano.function([], [c, c2])
f2 = theano.function([], [c, c2], mode=mode)
rval = f2()
assert rval == [0, 0]
class TestMakeThunk(unittest.TestCase):
def test_no_c_code(self):
class IncOnePython(Op):
......
......@@ -2888,7 +2888,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
returns a C code expression to copy source into view, and
return 0 on success
"""
return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s)""" % locals()
# On the CPU it unbroadcast based on the run time shapes. We
# need the same behavior on the GPU.
return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s, 1)""" % locals()
def add_to_zview(self, name, x, fail):
......@@ -2910,7 +2912,7 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if parent_version:
return parent_version + (0,)
return parent_version + (1,)
return ()
......@@ -3343,6 +3345,13 @@ class GpuContiguous(GpuOp):
input = as_cuda_ndarray_variable(input)
return Apply(self, [input], [input.type()])
def perform(self, node, inp, out):
i = inp[0]
if not i.is_c_contiguous():
i = i.copy()
assert i.is_c_contiguous()
out[0][0] = i
def c_code(self, node, name, inp, out, sub):
input, = inp
z, = out
......
......@@ -852,8 +852,11 @@ class GpuCorrMM(BaseGpuCorrMM):
class GpuCorrMM_gradWeights(BaseGpuCorrMM):
"""Gradient wrt. filters for `GpuCorrMM`.
:note: You will not want to use this directly, but rely on Theano's
automatic differentiation or graph optimization to use it as needed."""
:note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self, border_mode="valid",
subsample=(1, 1),
......@@ -906,8 +909,11 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
class GpuCorrMM_gradInputs(BaseGpuCorrMM):
"""Gradient wrt. inputs for `GpuCorrMM`.
:note: You will not want to use this directly, but rely on Theano's
automatic differentiation or graph optimization to use it as needed."""
:note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def __init__(self, border_mode="valid",
subsample=(1, 1),
......
......@@ -1002,7 +1002,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
return NULL;
indices = (CudaNdarray*) CudaNdarray_New();
if (verbose) printf("ndarray after new\n");
if (verbose) printf("\nndarray after new\n");
if (! indices){
Py_DECREF(indices_float32);
return NULL;
......@@ -1140,6 +1140,13 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
}
dim3 n_blocks(std::min(CudaNdarray_HOST_DIMS(out)[0],65535),1,1);
if(CudaNdarray_HOST_DIMS(out)[0] == 0){
// We take 0 elements, so no need for the rest of the code.
// This speed up that case AND fix crash otherwise.
free(dims);
Py_DECREF(indices);
return (PyObject *)out;
}
switch (self->nd) {
case 1:
......@@ -1149,7 +1156,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
printf("cudaGetLastError=%d, nd=%d"
" kernel config: (n_blocks.x=%d, n_blocks.y=%d,"
" n_threads.x=%i, n_threads.y=%i)\n",
self->nd, cudaGetLastError(),
cudaGetLastError(), self->nd,
n_blocks.x, n_blocks.y, n_threads.x, n_threads.y);
k3<<<n_blocks, n_threads>>>(
dims[0],
......@@ -1205,7 +1212,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
printf("cudaGetLastError=%d, nd=%d"
" kernel config: (n_blocks.x=%d, n_blocks.y=%d,"
" n_threads.x=%i, n_threads.y=%i)\n",
self->nd, cudaGetLastError(),
cudaGetLastError(), self->nd,
n_blocks.x, n_blocks.y, n_threads.x, n_threads.y);
k3<<<n_blocks, n_threads>>>(
dims[0], //dimensions
......
......@@ -1680,6 +1680,16 @@ def local_gpualloc(node):
return [new_out]
@register_opt()
@local_optimizer([theano.tensor.opt.Assert])
def local_assert(node):
if (isinstance(node.op, theano.tensor.opt.Assert) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op,
HostFromGpu)):
return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
@register_opt()
@local_optimizer([GpuAlloc])
def local_gpualloc_memset_0(node):
......
......@@ -967,6 +967,8 @@ class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor):
# version when we should. Users should not use it.
for shape, idx, fast in [((70000,), range(70000), True),
((70000, 5), range(70000), True),
((70000, 5), numpy.zeros((0,), 'int64'),
True),
((70000, 2, 3), range(70000), True),
((1025, 1025), [5, 10], True),
((3, 1025, 1026), [1, 2], True),
......
......@@ -641,7 +641,8 @@ def test_valid(conv_gemm=False):
shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2))
else:
mode = cls = None
mode = theano_mode
cls = None
exec_conv(version, shapes, verbose, random, 'valid',
print_=print_, ones=ones, rtol=1.1e-5,
theano_mode=mode, cls=cls)
......@@ -717,7 +718,8 @@ def test_full(conv_gemm=False):
# dummy version; not used by GpuCorrMM so one version is enough
version = [-1]
else:
mode = cls = None
mode = theano_mode
cls = None
exec_conv(version, shapes, verbose, random, 'full',
theano_mode=mode, cls=cls)
......@@ -757,7 +759,8 @@ def test_subsample(conv_gemm=False):
# dummy version; not used by GpuCorrMM so one version is enough
version_valid = version_full = [-1]
else:
mode = cls = None
mode = theano_mode
cls = None
exec_conv(version_valid, shapes, verbose, random, 'valid',
print_=print_, ones=ones,
......
......@@ -41,6 +41,17 @@ def test_no_shared_var_graph():
assert numpy.any(isinstance(x.op,cuda.GpuFromHost) for x in l)
assert numpy.any(isinstance(x.op,cuda.HostFromGpu) for x in l)
def test_local_assert():
x = theano.tensor.fmatrix()
a = theano.tensor.opt.assert_op(x, theano.tensor.eq(x, 0).any())
f = theano.function([x], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
a_op = [n for n in topo if isinstance(n.op, theano.tensor.opt.Assert)]
assert len(a_op) == 1
assert isinstance(a_op[0].inputs[0].type, CudaNdarrayType)
def test_int_pow():
a = CudaNdarrayType([False])()
......
......@@ -496,6 +496,16 @@ def local_gpua_softmaxwithbias(node):
return GpuSoftmaxWithBias()
@register_opt()
@local_optimizer([theano.tensor.opt.Assert])
def local_assert(node):
if (isinstance(node.op, theano.tensor.opt.Assert) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op,
HostFromGpu)):
return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
@register_opt()
@op_lifter([gpu_from_host, ConvOp])
def local_gpu_conv(node):
......
......@@ -16,6 +16,16 @@ from theano.tests.unittest_tools import SkipTest
from theano.tensor.tests.test_basic import TestSpecifyShape
def test_local_assert():
x = theano.tensor.fmatrix()
a = theano.tensor.opt.assert_op(x, theano.tensor.eq(x, 0).any())
f = theano.function([x], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
a_op = [n for n in topo if isinstance(n.op, theano.tensor.opt.Assert)]
assert len(a_op) == 1
assert isinstance(a_op[0].inputs[0].type, GpuArrayType)
def test_flatten():
m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
......@@ -164,10 +164,15 @@ def as_tensor_variable(x, name=None, ndim=None):
return x
else:
if (x.type.ndim > ndim):
# TODO: strip off leading broadcastable dimensions
raise ValueError(
'TensorType could not be cast to have %i dimensions' %
ndim, x.type)
# strip off leading broadcastable dimensions
first_non_broadcastable = [idx for idx in range(x.ndim)
if x.broadcastable[idx] == False][0]
x = x.dimshuffle(range(x.ndim)[first_non_broadcastable:])
if x.ndim > ndim:
raise ValueError(
'TensorType could not be cast to have %i dimensions' % ndim, x.type
)
return x
elif (x.type.ndim < ndim):
return shape_padleft(x, n_ones=(ndim - x.type.ndim))
else:
......
......@@ -811,8 +811,8 @@ class ConvOp(OpenMPOp):
shuffled_kerns.name = 'shuffled_for_conv3D(%s)' % flipped_kerns.name
tmp_node = theano.tensor.nnet.conv3D(
V = shuffled_inputs,
W= shuffled_kerns,
V=shuffled_inputs,
W=shuffled_kerns,
b=theano.tensor.alloc(numpy.asarray(0, dtype=kerns.dtype),
kerns.shape[0]),
d=(self.dx, self.dy, 1))
......
......@@ -799,7 +799,21 @@ class ShapeFeature(object):
#
# worst case, we loop over shape_of and replace things
raise NotImplementedError(s_i)
elif s_i.type.dtype[:3] in ('int', 'uint'):
# s_i is x.shape[i], we change it to Shape_i.
if (s_i.owner and
isinstance(s_i.owner.op, Subtensor) and
s_i.owner.inputs[0].owner and
isinstance(s_i.owner.inputs[0].owner.op, T.Shape)):
assert s_i.ndim == 0
assert len(s_i.owner.inputs) == 2
try:
i = get_scalar_constant_value(s_i.owner.inputs[1])
s_i = Shape_i(i)(s_i.owner.inputs[0].owner.inputs[0])
except NotScalarConstantError:
pass
if s_i.type.dtype[:3] in ('int', 'uint'):
if getattr(s_i.type, 'ndim', 0):
raise TypeError('Shape element must be scalar', s_i)
return s_i
......@@ -1131,6 +1145,40 @@ class ShapeFeature(object):
self.set_shape_i(v, ii, new_r)
self.shape_of_reverse_index[r] = set()
def same_shape(self, x, y):
"""Return True if we are able to assert that x and y have the
same shape
"""
sx = self.shape_of[x]
sy = self.shape_of[y]
if sx is None or sy is None:
return False
assert len(sx) == len(sy)
for dx, dy in zip(sx, sy):
if dx is dy:
continue
# Need to try to find that they are the same shape. We
# need to compare the full graph. It could be slow. So I
# just implement for now the case of Shape_i.
if not dx.owner or not dy.owner:
return False
if (not isinstance(dx.owner.op, Shape_i) or
not isinstance(dy.owner.op, Shape_i)):
return False
opx = dx.owner.op
opy = dy.owner.op
if not (opx.i == opy.i):
return False
# FB I'm not sure is this handle correctly constants.
if dx.owner.inputs[0] == dy.owner.inputs[0]:
return True
# To be sure to cover all case, call equal_computation.
# Can't use theano.gof.graph.is_same_graph(dx, dy)
# As it currently expect that dx and dy aren't in a FunctionGraph
from theano.scan_module.scan_utils import equal_computations
return equal_computations([dx], [dy])
class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an fgraph feature.
......@@ -1640,6 +1688,54 @@ def local_upcast_elemwise_constant_inputs(node):
##################
@register_canonicalize
@register_specialize
@gof.local_optimizer([IncSubtensor])
def local_useless_inc_subtensor(node):
"""Remove IncSubtensor, when we overwrite the full inputs with the
new value.
"""
if not isinstance(node.op, IncSubtensor):
return
if node.op.set_instead_of_inc is False:
# This is an IncSubtensor, so the init value must be zeros
try:
c = get_scalar_constant_value(node.inputs[0])
if c != 0:
return
except NotScalarConstantError:
return
if (node.inputs[0].ndim != node.inputs[1].ndim or
node.inputs[0].broadcastable != node.inputs[1].broadcastable):
# FB: I didn't check if this case can happen, but this opt
# don't support it.
return
# We have a SetSubtensor or an IncSubtensor on zeros
# If is this IncSubtensor useful?
# Check that we keep all the original data.
# Put the constant inputs in the slice.
idx_cst = theano.tensor.subtensor.get_idx_list(node.inputs[1:],
node.op.idx_list)
if all(isinstance(e, slice) and e.start is None and
e.stop is None and (e.step is None or T.extract_constant(e.step) == -1)
for e in idx_cst):
# IncSubtensor broadcast node.inputs[1] on node.inputs[0]
# based on run time shapes, so we must check they are the same.
if not hasattr(node.fgraph, 'shape_feature'):
return
if not node.fgraph.shape_feature.same_shape(node.inputs[0],
node.inputs[1]):
return
# There is no reverse, so we don't need a replacement.
if all(e.step is None
for e in node.op.idx_list):
# They are the same shape, so we can remore this IncSubtensor
return [node.inputs[1]]
return [Subtensor(node.op.idx_list)(*node.inputs[1:])]
@register_canonicalize
@register_specialize
@gof.local_optimizer([Subtensor])
......@@ -3366,11 +3462,17 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
T.elemwise.Sum, T.elemwise.Prod,
T.elemwise.ProdWithoutZeros]
@register_canonicalize
@register_uncanonicalize # Needed for MaxAndArgmax -> CAReduce
@gof.local_optimizer(ALL_REDUCE)
def local_reduce_join(node):
"""Max(Join(a,b), axis=0) -> Maximum(a,b) """
"""Reduce{scalar.op}(Join(a, b), axis=0) -> Elemwise{scalar.op}(a, b)
:note: supported scalar.op are Maximum, Mimimum in some cases and
Add and Mul in all cases.
"""
if (isinstance(node.op, T.CAReduce) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, T.Join)):
......@@ -3385,6 +3487,9 @@ def local_reduce_join(node):
return
elif not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul)):
return
elif len(join.inputs) <= 2:
# This is a useless join, that will get removed by another opt.
return
new_inp = []
for inp in join.inputs[1:]:
......
......@@ -155,7 +155,7 @@ class Solve(Op):
self.overwrite_b = overwrite_b
def __repr__(self):
return 'Solve{%s}' % str(self.props())
return 'Solve{%s}' % str(self._props())
def make_node(self, A, b):
assert imported_scipy, (
......
......@@ -1930,7 +1930,8 @@ class ApplyDefaultTestOp(theano.Op):
class TestAsTensorVariable(unittest.TestCase):
"""
Unit test for ensuring that as_tensor_variable handles Apply objects correctly.
Unit test for ensuring that as_tensor_variable handles Apply objects
correctly and removes leading broadcastable dimensions when possible.
"""
def setUp(self):
self.x = tensor.scalar('x')
......@@ -1951,6 +1952,18 @@ class TestAsTensorVariable(unittest.TestCase):
bad_apply_var = ApplyDefaultTestOp([0, 1]).make_node(self.x)
self.assertRaises(AttributeError, as_tensor_variable, bad_apply_var)
def test_strip_leading_broadcastable(self):
x = tensor.TensorType(config.floatX, (True, False))('x')
x = as_tensor_variable(x, ndim=1)
assert(x.ndim == 1)
x = tensor.matrix('x', dtype=config.floatX)
try:
x = as_tensor_variable(x, ndim=1)
assert(False) # The call above should have failed
except ValueError:
pass
class TestAlloc(unittest.TestCase):
dtype = config.floatX
......
......@@ -1571,6 +1571,53 @@ def test_log_add():
#TODO: (write and) test that the optimization works with Sum in addition to working with Add.
def test_local_useless_inc_subtensor():
x = tensor.matrix('x')
y = tensor.matrix('y')
for sub in [slice(None), slice(None, None, -1)]:
o = tensor.set_subtensor(x[::, sub], y)
f = theano.function([x, y], o)
o_shape = tensor.set_subtensor(x[::, sub],
tensor.specify_shape(y, x.shape))
f_shape = theano.function([x, y], o_shape)
# Test with shape info
topo = f_shape.maker.fgraph.toposort()
assert not any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
out = f_shape([[2, 3]], [[3, 4]])
assert (out == numpy.asarray([[3, 4]])[::, sub]).all()
# Test that without shape info, we don't apply the opt.
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, tensor.IncSubtensor)
out = f([[2, 3]], [[3, 4]])
assert (out == numpy.asarray([[3, 4]])[::, sub]).all()
# Test that we don't remove shape error
try:
f([[2, 3]], [[3, 4], [4, 5]])
assert False
except (ValueError, AssertionError):
pass
# Test that we don't remove broadcastability
out = f([[2, 3], [3, 4]], [[5, 6]])
assert (out == numpy.asarray([[5, 6], [5, 6]])[::, sub]).all()
# Test that we do not optimize others strides even when sub and y
# have same shapes
sub = x[::, ::2]
o_shape = tensor.set_subtensor(sub,
tensor.specify_shape(y, sub.shape))
f_shape = theano.function([x, y], o_shape)
topo = f_shape.maker.fgraph.toposort()
theano.printing.debugprint(f_shape)
assert any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
out = f_shape([[2, 3, 6, 7]], [[8, 9]])
assert (out == numpy.asarray([[8, 3, 9, 7]])).all()
def test_local_useless_subtensor():
x = tensor.matrix('x')
......@@ -2887,10 +2934,13 @@ class T_Tile(unittest.TestCase):
def test_local_useless_tile(self):
v = T.vector()
m = T.matrix()
mode = None
if theano.config.mode == "FAST_COMPILE":
mode = "FAST_RUN"
for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]:
# Currently, only a repeat patter == ndim is supported.
for ndim in [var.ndim]: # range(1, var.ndim):
f = theano.function([var], T.tile(var, (1,)*ndim))
f = theano.function([var], T.tile(var, (1,)*ndim), mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, compile.DeepCopyOp)
......
......@@ -863,7 +863,25 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
inc_slice(2, 1),
(numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),))
def test_advanced_inc_and_set(self):
def test_inc_and_set_subtensor(self):
"""
Test increment and set with broadcast
"""
X = tensor.matrix(dtype=self.dtype)
y = set_subtensor(X[1::, 1::], 0)
f = self.function([X], [y],
op=self.inc_sub,
N=1)
x_ = numpy.ones((9, 9))
out = f(x_.astype('float32'))
res = x_.copy()
res[1::, 1::] = 0
assert numpy.allclose(out, res)
def test_advanced1_inc_and_set(self):
"""
Test advanced increment and set.
"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论