提交 ab206dc1 authored 作者: abergeron's avatar abergeron

Merge pull request #1952 from nouiz/gpu_sum

Move sum to the GPU more frequently, and fix crashes
...@@ -571,7 +571,10 @@ the elements of the shape). ...@@ -571,7 +571,10 @@ the elements of the shape).
.. code-block:: python .. code-block:: python
theano.compile.ops.register_shape_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=()) theano.compile.ops.register_shape_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
theano.compile.ops.register_shape_i_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=()) theano.compile.ops.register_shape_i_c_code(YOUR_TYPE_CLASS, THE_C_CODE, CHECK_INPUT, version=())
The C code works as the ViewOp. Shape_i has the additional ``i`` parameter The C code works as the ViewOp. Shape_i has the additional ``i`` parameter
that you can use with ``%(i)s``. that you can use with ``%(i)s``.
In your CHECK_INPUT, you must check that the input have enough ndim to
be able to get the ith shapes.
...@@ -349,13 +349,13 @@ class Shape_i(gof.Op): ...@@ -349,13 +349,13 @@ class Shape_i(gof.Op):
version = [] version = []
# If any of the c code is unversionned, we have to return () # If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs. # Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items(), for t, (c, ci, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])): key=lambda pair: str(pair[0])):
if not v: if not v:
warnings.warn("Type %s has C code for Shape_i, but it has " warnings.warn("Type %s has C code for Shape_i, but it has "
"no version. You should add a 'version' keyword arg " "no version. You should add a 'version' keyword "
"when calling register_shape_i_c_code." % t, "arg when calling register_shape_i_c_code." % t,
stacklevel=2) stacklevel=2)
return () return ()
version.append((str(t), v)) version.append((str(t), v))
...@@ -372,14 +372,8 @@ class Shape_i(gof.Op): ...@@ -372,14 +372,8 @@ class Shape_i(gof.Op):
itype = node.inputs[0].type.__class__ itype = node.inputs[0].type.__class__
if itype in self.c_code_and_version: if itype in self.c_code_and_version:
sc = """ code, check_input, version = self.c_code_and_version[itype]
if (%(i)s>=PyArray_NDIM(%(iname)s)){ return (check_input + code) % locals()
PyErr_SetString(PyExc_TypeError, "Number of dimensions lower than expected");
%(fail)s
}
""" % locals()
code, version = self.c_code_and_version[itype]
return sc + code % locals()
# Else, no C code # Else, no C code
return super(Shape_i, self).c_code(node, name, inames, onames, sub) return super(Shape_i, self).c_code(node, name, inames, onames, sub)
...@@ -391,7 +385,7 @@ class Shape_i(gof.Op): ...@@ -391,7 +385,7 @@ class Shape_i(gof.Op):
return [None] return [None]
def register_shape_i_c_code(typ, code, version=()): def register_shape_i_c_code(typ, code, check_input, version=()):
""" Tell Shape_i how to generate C code for a Theano Type """ Tell Shape_i how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an :param typ: A Theano type. It must be the Theano class itself and not an
...@@ -401,13 +395,14 @@ def register_shape_i_c_code(typ, code, version=()): ...@@ -401,13 +395,14 @@ def register_shape_i_c_code(typ, code, version=()):
variable names respectively. variable names respectively.
:param version: A number indicating the version of the code, for cache. :param version: A number indicating the version of the code, for cache.
""" """
Shape_i.c_code_and_version[typ] = (code, version) Shape_i.c_code_and_version[typ] = (code, check_input, version)
# List of Theano Types that one can add an extra dimension and for which # List of Theano Types that one can add an extra dimension and for which
# Scan can deal with. # Scan can deal with.
expandable_types = () expandable_types = ()
class FromFunctionOp(gof.Op): class FromFunctionOp(gof.Op):
""" """
Build a basic Theano Op around a function. Build a basic Theano Op around a function.
......
...@@ -342,10 +342,10 @@ class ProfileStats(object): ...@@ -342,10 +342,10 @@ class ProfileStats(object):
es += [' %2s '] es += [' %2s ']
hs += ['<#call>'] hs += ['<#call>']
es += [' %5d '] es += ['%6d ']
hs += ['<#apply>'] hs += ['<#apply>']
es += [' %4d '] es += [' %4d ']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs) upto_length = numpy.sum([len(x) for x in hs]) + len(hs)
maxlen = self.line_width - upto_length maxlen = self.line_width - upto_length
...@@ -587,6 +587,7 @@ class ProfileStats(object): ...@@ -587,6 +587,7 @@ class ProfileStats(object):
print >> file, ' Time in thunks: %es (%.3f%%)' % ( print >> file, ' Time in thunks: %es (%.3f%%)' % (
local_time, 100*local_time / self.fct_call_time) local_time, 100*local_time / self.fct_call_time)
print >> file, ' Total compile time: %es' % self.compile_time print >> file, ' Total compile time: %es' % self.compile_time
print >> file, ' Number of Apply nodes: %s' % len(self.apply_time)
print >> file, ' Theano Optimizer time: %es' % self.optimizer_time print >> file, ' Theano Optimizer time: %es' % self.optimizer_time
print >> file, ' Theano validate time: %es' % self.validate_time print >> file, ' Theano validate time: %es' % self.validate_time
print >> file, (' Theano Linker time (includes C,' print >> file, (' Theano Linker time (includes C,'
......
...@@ -783,7 +783,7 @@ def pydotprint(fct, outfile=None, ...@@ -783,7 +783,7 @@ def pydotprint(fct, outfile=None,
elif var.name or not compact: elif var.name or not compact:
g.add_edge(pd.Edge(astr, varstr, label=label)) g.add_edge(pd.Edge(astr, varstr, label=label))
# else: # else:
#don't add egde here as it is already added from the inputs. # don't add egde here as it is already added from the inputs.
if cond_highlight: if cond_highlight:
g.add_subgraph(c1) g.add_subgraph(c1)
...@@ -863,8 +863,8 @@ def pydotprint_variables(vars, ...@@ -863,8 +863,8 @@ def pydotprint_variables(vars,
dstr = dstr[:dstr.index('\n')] dstr = dstr[:dstr.index('\n')]
varstr = '%s %s' % (dstr, str(var.type)) varstr = '%s %s' % (dstr, str(var.type))
else: else:
#a var id is needed as otherwise var with the same type will be # a var id is needed as otherwise var with the same type will be
#merged in the graph. # merged in the graph.
varstr = str(var.type) varstr = str(var.type)
varstr += ' ' + str(len(var_str)) varstr += ' ' + str(len(var_str))
...@@ -1090,8 +1090,6 @@ def min_informative_str(obj, indent_level=0, ...@@ -1090,8 +1090,6 @@ def min_informative_str(obj, indent_level=0,
return rval return rval
def var_descriptor(obj, _prev_obs=None, _tag_generator=None): def var_descriptor(obj, _prev_obs=None, _tag_generator=None):
""" """
Returns a string, with no endlines, fully specifying Returns a string, with no endlines, fully specifying
...@@ -1154,6 +1152,7 @@ def var_descriptor(obj, _prev_obs=None, _tag_generator=None): ...@@ -1154,6 +1152,7 @@ def var_descriptor(obj, _prev_obs=None, _tag_generator=None):
return rval return rval
def position_independent_str(obj): def position_independent_str(obj):
if isinstance(obj, theano.gof.graph.Variable): if isinstance(obj, theano.gof.graph.Variable):
rval = 'theano_var' rval = 'theano_var'
......
...@@ -86,6 +86,29 @@ register_opt()(theano.tensor.opt.local_track_shape_i) ...@@ -86,6 +86,29 @@ register_opt()(theano.tensor.opt.local_track_shape_i)
register_opt(name='gpu_constant_folding')( register_opt(name='gpu_constant_folding')(
tensor.opt.constant_folding) tensor.opt.constant_folding)
# This is a partial list of CPU ops that can be in some circonstance
# moved to the GPU. This list is used by an optimization.
# Hopefully, we can keep this list up to date.
import theano.tensor.signal.downsample
import theano.sandbox.neighbours
cpu_ops_moved_to_gpu = [
tensor.blas.Dot22, tensor.blas.Dot22Scalar, tensor.blas.Gemm,
tensor.blas.Gemv, tensor.blas.Ger, tensor.nnet.conv.ConvOp,
tensor.signal.downsample.DownsampleFactorMax,
tensor.signal.downsample.DownsampleFactorMaxGrad,
theano.sandbox.neighbours.Images2Neibs,
tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias,
tensor.nnet.CrossentropySoftmax1HotWithBiasDx,
tensor.nnet.Softmax, tensor.nnet.SoftmaxWithBias,
tensor.Elemwise, tensor.DimShuffle, tensor.CAReduce,
tensor.elemwise.All, tensor.elemwise.Any,
tensor.elemwise.CAReduceDtype, tensor.elemwise.Sum,
tensor.elemwise.Prod, tensor.elemwise.ProdWithoutZeros,
tensor.Reshape, tensor.Flatten, tensor.Subtensor,
tensor.AdvancedSubtensor1, tensor.AdvancedIncSubtensor1,
tensor.IncSubtensor, tensor.Shape, tensor.Join,
tensor.Alloc, tensor.Eye]
class InputToGpuOptimizer(Optimizer): class InputToGpuOptimizer(Optimizer):
""" """
...@@ -617,7 +640,33 @@ def local_gpu_careduce(node): ...@@ -617,7 +640,33 @@ def local_gpu_careduce(node):
if isinstance(node.op.scalar_op, (scal.Add, scal.Mul, if isinstance(node.op.scalar_op, (scal.Add, scal.Mul,
scal.Maximum, scal.Minimum)): scal.Maximum, scal.Minimum)):
x, = node.inputs x, = node.inputs
replace = False
if x.owner and isinstance(x.owner.op, HostFromGpu): if x.owner and isinstance(x.owner.op, HostFromGpu):
replace = True
elif (all([c != "output" and isinstance(c.op, GpuFromHost)
for c, i in node.outputs[0].clients])
and x.owner and x.owner.op.__class__ in
cpu_ops_moved_to_gpu):
# It is not always good to transfer the reduction to
# the GPU when the clients are on the GPU but not the
# reduction input. It mean we will transfer the
# (bigger) input to the GPU instead of the
# output(smaller) if we stop optimization there. Most
# of the time, we will also move to the GPU what
# created the input of the reduction. In that case, we
# don't introduce a bigger transfer. It is hard to
# know if after all optimization we will do the bigger
# transfer or not. I'm guessing an heuristic to find
# that. I suppose that if the input of the recution is
# generated by an op that we can in some cases move to
# the GPU, that we will move it. If some CPU ops are
# supported only in some cases on the GPU, this will
# move to the GPU the reduction when it wasn't a good
# idea.
replace = True
if replace:
if node.op.axis is None: if node.op.axis is None:
reduce_mask = [1] * x.type.ndim reduce_mask = [1] * x.type.ndim
else: else:
......
...@@ -454,12 +454,22 @@ theano.compile.register_view_op_c_code( ...@@ -454,12 +454,22 @@ theano.compile.register_view_op_c_code(
""", """,
version=1) version=1)
theano.compile.register_shape_i_c_code(CudaNdarrayType, """ theano.compile.register_shape_i_c_code(
CudaNdarrayType,
"""
if(!%(oname)s) if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0); %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0] = ((npy_int64*)PyArray_DATA(%(oname)s))[0] =
CudaNdarray_HOST_DIMS(%(iname)s)[%(i)s]; CudaNdarray_HOST_DIMS(%(iname)s)[%(i)s];
""", version=(0,)) """,
"""
if (%(i)s>=CudaNdarray_NDIM(%(iname)s)){
PyErr_SetString(PyExc_TypeError,
"Number of dimensions lower than expected");
%(fail)s
}
""",
version=(1,))
# Register CudaNdarrayType to the DeepCopyOp list of types with c code. # Register CudaNdarrayType to the DeepCopyOp list of types with c code.
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
......
...@@ -20,7 +20,7 @@ from theano.gof.python25 import all, any ...@@ -20,7 +20,7 @@ from theano.gof.python25 import all, any
from theano.tensor.nnet.conv import ConvOp from theano.tensor.nnet.conv import ConvOp
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import ( from theano.sandbox.gpuarray.basic_ops import (
host_from_gpu, gpu_from_host, HostFromGpu, host_from_gpu, gpu_from_host, HostFromGpu, GpuSplit,
gpu_alloc, GpuAlloc, GpuReshape, GpuEye, gpu_join, GpuJoin, gpu_alloc, GpuAlloc, GpuReshape, GpuEye, gpu_join, GpuJoin,
) )
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
...@@ -316,6 +316,7 @@ def local_gpuajoin_1(node): ...@@ -316,6 +316,7 @@ def local_gpuajoin_1(node):
len(node.inputs) == 2): len(node.inputs) == 2):
return [node.inputs[1]] return [node.inputs[1]]
@register_opt() @register_opt()
@op_lifter([tensor.Split]) @op_lifter([tensor.Split])
def local_gpua_split(node): def local_gpua_split(node):
...@@ -334,7 +335,7 @@ def local_gpua_incsubtensor(node): ...@@ -334,7 +335,7 @@ def local_gpua_incsubtensor(node):
return GpuIncSubtensor(node.op.idx_list, node.op.inplace, return GpuIncSubtensor(node.op.idx_list, node.op.inplace,
node.op.set_instead_of_inc, node.op.set_instead_of_inc,
node.op.destroyhandler_tolerate_aliased) node.op.destroyhandler_tolerate_aliased)
@register_opt() @register_opt()
@op_lifter([tensor.AdvancedIncSubtensor1]) @op_lifter([tensor.AdvancedIncSubtensor1])
...@@ -371,8 +372,8 @@ def local_gpua_careduce(node): ...@@ -371,8 +372,8 @@ def local_gpua_careduce(node):
dtype=getattr(node.op, 'dtype', None), dtype=getattr(node.op, 'dtype', None),
acc_dtype=getattr(node.op, 'acc_dtype', None)) acc_dtype=getattr(node.op, 'acc_dtype', None))
gvar = greduce(x) gvar = greduce(x)
#We need to have the make node called, otherwise the mask can # We need to have the make node called, otherwise the mask can
#be None # be None
if gvar.owner.op.supports_c_code([gpu_from_host(x)]): if gvar.owner.op.supports_c_code([gpu_from_host(x)]):
return greduce return greduce
else: else:
...@@ -406,7 +407,7 @@ def local_gpua_careduce(node): ...@@ -406,7 +407,7 @@ def local_gpua_careduce(node):
for idx, m in enumerate(new_mask): for idx, m in enumerate(new_mask):
if m == 1: if m == 1:
new_axis.append(idx) new_axis.append(idx)
new_greduce = GpuCAReduceCuda( greduce = GpuCAReduceCuda(
node.op.scalar_op, node.op.scalar_op,
axis=new_axis, reduce_mask=new_mask, axis=new_axis, reduce_mask=new_mask,
dtype=getattr(node.op, 'dtype', None), dtype=getattr(node.op, 'dtype', None),
...@@ -415,12 +416,12 @@ def local_gpua_careduce(node): ...@@ -415,12 +416,12 @@ def local_gpua_careduce(node):
reshaped_x = x.reshape(tensor.stack(*new_in_shp)) reshaped_x = x.reshape(tensor.stack(*new_in_shp))
gpu_reshaped_x = gpu_from_host(reshaped_x) gpu_reshaped_x = gpu_from_host(reshaped_x)
gvar = greduce(gpu_reshaped_x) gvar = greduce(gpu_reshaped_x)
#We need to have the make node called, otherwise the mask can # We need to have the make node called, otherwise the mask can
#be None # be None
reshaped_gpu_inputs = [gpu_reshaped_x] reshaped_gpu_inputs = [gpu_reshaped_x]
if new_greduce.supports_c_code(reshaped_gpu_inputs): if greduce.supports_c_code(reshaped_gpu_inputs):
reduce_reshaped_x = host_from_gpu( reduce_reshaped_x = host_from_gpu(
new_greduce(gpu_reshaped_x)) greduce(gpu_reshaped_x))
if reduce_reshaped_x.ndim != node.outputs[0].ndim: if reduce_reshaped_x.ndim != node.outputs[0].ndim:
unreshaped_reduce = reduce_reshaped_x.reshape( unreshaped_reduce = reduce_reshaped_x.reshape(
...@@ -497,8 +498,8 @@ def local_gpu_conv(node): ...@@ -497,8 +498,8 @@ def local_gpu_conv(node):
if op.kshp_logical is not None and op.kshp_logical != op.kshp: if op.kshp_logical is not None and op.kshp_logical != op.kshp:
return None return None
#print op.kshp, op.imshp[1:3] # print op.kshp, op.imshp[1:3]
#print op.kshp_logical, logical_img_hw # print op.kshp_logical, logical_img_hw
ret = GpuConv(border_mode=op.out_mode, ret = GpuConv(border_mode=op.out_mode,
subsample=(op.dx, op.dy), subsample=(op.dx, op.dy),
logical_img_hw=logical_img_hw, logical_img_hw=logical_img_hw,
...@@ -508,12 +509,12 @@ def local_gpu_conv(node): ...@@ -508,12 +509,12 @@ def local_gpu_conv(node):
version=op.version, version=op.version,
verbose=op.verbose, verbose=op.verbose,
imshp=op.imshp, imshp=op.imshp,
) )
if op.imshp_logical is not None: if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3] logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]: if logical_img_hw != op.imshp[1:3]:
# this case is not implemented # this case is not implemented
#return None # return None
rstride = int(numpy.ceil(op.imshp_logical[1] / rstride = int(numpy.ceil(op.imshp_logical[1] /
float(op.imshp[1]))) float(op.imshp[1])))
cstride = int(numpy.ceil(op.imshp_logical[2] / cstride = int(numpy.ceil(op.imshp_logical[2] /
...@@ -542,7 +543,7 @@ def local_gpu_conv(node): ...@@ -542,7 +543,7 @@ def local_gpu_conv(node):
assert a.ndim == 4 assert a.ndim == 4
atol = None atol = None
if a.shape[-1] * a.shape[-2] > 100: if a.shape[-1] * a.shape[-2] > 100:
#For float32 the default atol is 1e-5 # For float32 the default atol is 1e-5
atol = 3e-5 atol = 3e-5
return GpuArrayType.values_eq_approx(a, b, atol=atol) return GpuArrayType.values_eq_approx(a, b, atol=atol)
...@@ -557,7 +558,7 @@ def local_gpu_conv(node): ...@@ -557,7 +558,7 @@ def local_gpu_conv(node):
out = tensor.patternbroadcast( out = tensor.patternbroadcast(
host_from_gpu(out), host_from_gpu(out),
node.outputs[0].broadcastable) node.outputs[0].broadcastable)
#op_lifter want the output on the GPU. # op_lifter want the output on the GPU.
out = gpu_from_host(out) out = gpu_from_host(out)
out.values_eq_approx = values_eq_approx out.values_eq_approx = values_eq_approx
return [out] return [out]
......
...@@ -356,6 +356,17 @@ class G_Join_and_Split(T_Join_and_Split): ...@@ -356,6 +356,17 @@ class G_Join_and_Split(T_Join_and_Split):
self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE'] self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']
self.shared = gpuarray_shared_constructor self.shared = gpuarray_shared_constructor
def test_gpusplit_opt(self):
rng = numpy.random.RandomState(seed=utt.fetch_seed())
m = self.shared(rng.rand(4, 6).astype(self.floatX))
o = T.Split(2)(m, 0, [2, 2])
f = theano.function([], o, mode=self.mode)
assert any([isinstance(node.op, self.split_op)
for node in f.maker.fgraph.toposort()])
o1, o2 = f()
assert numpy.allclose(o1, m.get_value(borrow=True)[:2])
assert numpy.allclose(o2, m.get_value(borrow=True)[2:])
def test_gpujoin_gpualloc(): def test_gpujoin_gpualloc():
a = T.fmatrix('a') a = T.fmatrix('a')
......
...@@ -315,12 +315,22 @@ theano.compile.register_shape_c_code( ...@@ -315,12 +315,22 @@ theano.compile.register_shape_c_code(
""", """,
version=1) version=1)
theano.compile.register_shape_i_c_code(GpuArrayType, """ theano.compile.register_shape_i_c_code(
GpuArrayType,
"""
if(!%(oname)s) if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0); %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0] = ((npy_int64*)PyArray_DATA(%(oname)s))[0] =
%(iname)s->ga.dimensions[%(i)s]; %(iname)s->ga.dimensions[%(i)s];
""", version=(0,)) """,
"""
if (%(i)s>=%(iname)s->ga.nd){
PyErr_SetString(PyExc_TypeError,
"Number of dimensions lower than expected");
%(fail)s
}
""",
version=(1,))
theano.compile.register_deep_copy_op_c_code(GpuArrayType, """ theano.compile.register_deep_copy_op_c_code(GpuArrayType, """
Py_XDECREF(%(oname)s); Py_XDECREF(%(oname)s);
...@@ -331,11 +341,11 @@ theano.compile.register_deep_copy_op_c_code(GpuArrayType, """ ...@@ -331,11 +341,11 @@ theano.compile.register_deep_copy_op_c_code(GpuArrayType, """
theano.compile.register_rebroadcast_c_code( theano.compile.register_rebroadcast_c_code(
GpuArrayType, GpuArrayType,
""" """
if(PyGpuArray_DIMS(%(iname)s)[%(axis)s] != 1){ if(%(iname)s->ga.dimensions[%(axis)s] != 1){
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"Dimension %(axis)s in Rebroadcast's input was" "Dimension %(axis)s in Rebroadcast's input was"
" supposed to be 1 (got %%d instead)", " supposed to be 1 (got %%d instead)",
PyGpuArray_DIMS(%(iname)s)[%(axis)s]); %(iname)s->ga.dimensions[%(axis)s]);
%(fail)s %(fail)s
} }
""", """,
......
...@@ -754,6 +754,9 @@ class ShapeFeature(object): ...@@ -754,6 +754,9 @@ class ShapeFeature(object):
def shape_tuple(self, r): def shape_tuple(self, r):
"""Return a tuple of symbolic shape vars for tensor variable r""" """Return a tuple of symbolic shape vars for tensor variable r"""
if not hasattr(r, 'ndim'):
# This happen for NoneConst.
return None
return tuple([self.shape_ir(i, r) for i in xrange(r.ndim)]) return tuple([self.shape_ir(i, r) for i in xrange(r.ndim)])
def default_infer_shape(self, node, i_shapes): def default_infer_shape(self, node, i_shapes):
...@@ -782,7 +785,9 @@ class ShapeFeature(object): ...@@ -782,7 +785,9 @@ class ShapeFeature(object):
# don't make the optimizer merge a zillion ones together # don't make the optimizer merge a zillion ones together
# by always returning the same object to represent 1 # by always returning the same object to represent 1
return self.lscalar_one return self.lscalar_one
if type(s_i) in (int, long) or isinstance(s_i, numpy.integer): if (type(s_i) in (int, long) or
isinstance(s_i, numpy.integer) or
(isinstance(s_i, numpy.ndarray) and s_i.ndim == 0)):
# this shape is a constant # this shape is a constant
assert s_i >= 0 assert s_i >= 0
return T.constant(s_i, dtype='int64') return T.constant(s_i, dtype='int64')
......
...@@ -3246,7 +3246,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3246,7 +3246,7 @@ class T_Join_and_Split(unittest.TestCase):
# assert tensor.grad(join(1,a,b), a # assert tensor.grad(join(1,a,b), a
utt.verify_grad(lambda a, b: join(1, a, b), [av, bv], utt.verify_grad(lambda a, b: join(1, a, b), [av, bv],
eps=1.0e-4, rel_tol=1.0e-3) eps=1.0e-4, rel_tol=1.0e-3, mode=self.mode)
def test_join_matrix1_using_vertical_stack(self): def test_join_matrix1_using_vertical_stack(self):
a = self.shared(numpy.array([[1, 2, 3], [4, 5, 6]], dtype=self.floatX)) a = self.shared(numpy.array([[1, 2, 3], [4, 5, 6]], dtype=self.floatX))
...@@ -3272,7 +3272,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3272,7 +3272,7 @@ class T_Join_and_Split(unittest.TestCase):
self.assertTrue((out == want).all()) self.assertTrue((out == want).all())
utt.verify_grad(lambda a, b: join(1, a, b), [av, bv], utt.verify_grad(lambda a, b: join(1, a, b), [av, bv],
eps=1.0e-4, rel_tol=1.0e-3) eps=1.0e-4, rel_tol=1.0e-3, mode=self.mode)
def test_join_matrixV(self): def test_join_matrixV(self):
"""variable join axis""" """variable join axis"""
...@@ -3294,8 +3294,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3294,8 +3294,8 @@ class T_Join_and_Split(unittest.TestCase):
got = f(1) got = f(1)
self.assertTrue((got == want).all(), (got, want)) self.assertTrue((got == want).all(), (got, want))
utt.verify_grad(lambda a, b: join(0, a, b), [v, 2 * v]) utt.verify_grad(lambda a, b: join(0, a, b), [v, 2 * v], mode=self.mode)
utt.verify_grad(lambda a, b: join(1, a, b), [v, 2 * v]) utt.verify_grad(lambda a, b: join(1, a, b), [v, 2 * v], mode=self.mode)
def test_vector_len(self): def test_vector_len(self):
x = lscalar('x') x = lscalar('x')
...@@ -3344,7 +3344,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3344,7 +3344,8 @@ class T_Join_and_Split(unittest.TestCase):
assert [True for node in topo if isinstance(node.op, self.join_op)] assert [True for node in topo if isinstance(node.op, self.join_op)]
f() f()
utt.verify_grad((lambda a, b: join(1, a, b)), [a_val, b_val], rng=rng) utt.verify_grad((lambda a, b: join(1, a, b)), [a_val, b_val], rng=rng,
mode=self.mode)
# Should raise an error if dimension 0 does not match # Should raise an error if dimension 0 does not match
a.set_value(rng.rand(2, 4, 1).astype(self.floatX)) a.set_value(rng.rand(2, 4, 1).astype(self.floatX))
...@@ -3370,7 +3371,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3370,7 +3371,8 @@ class T_Join_and_Split(unittest.TestCase):
assert [True for node in topo if isinstance(node.op, self.join_op)] assert [True for node in topo if isinstance(node.op, self.join_op)]
f() f()
utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng) utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng,
mode=self.mode)
# Should raise an error if b_val.shape[0] is not 1 # Should raise an error if b_val.shape[0] is not 1
# We can't set the value| # We can't set the value|
self.assertRaises(TypeError, b.set_value, self.assertRaises(TypeError, b.set_value,
...@@ -3402,7 +3404,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3402,7 +3404,8 @@ class T_Join_and_Split(unittest.TestCase):
assert [True for node in topo if isinstance(node.op, self.join_op)] assert [True for node in topo if isinstance(node.op, self.join_op)]
f() f()
utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng) utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng,
mode=self.mode)
def test_broadcastable_single_input_broadcastable_dimension(self): def test_broadcastable_single_input_broadcastable_dimension(self):
# Test that all broadcastable flags are preserved by a # Test that all broadcastable flags are preserved by a
...@@ -3422,7 +3425,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3422,7 +3425,8 @@ class T_Join_and_Split(unittest.TestCase):
node.op, self.join_op)] node.op, self.join_op)]
f() f()
utt.verify_grad((lambda a: join(0, a)), [a_val], rng=rng) utt.verify_grad((lambda a: join(0, a)), [a_val], rng=rng,
mode=self.mode)
# Should raise an error if length of dimension 0 is not 1 # Should raise an error if length of dimension 0 is not 1
self.assertRaises(TypeError, a.set_value, self.assertRaises(TypeError, a.set_value,
rng.rand(2, 4, 1).astype(self.floatX)) rng.rand(2, 4, 1).astype(self.floatX))
...@@ -3458,7 +3462,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3458,7 +3462,8 @@ class T_Join_and_Split(unittest.TestCase):
e_val = rng.rand(1, 1, 1, 1, 2, 1).astype(self.floatX) e_val = rng.rand(1, 1, 1, 1, 2, 1).astype(self.floatX)
f(a_val, b_val, c_val, d_val, e_val) f(a_val, b_val, c_val, d_val, e_val)
utt.verify_grad((lambda a, b, c, d, e: join(0, a, b, c, d, e)), utt.verify_grad((lambda a, b, c, d, e: join(0, a, b, c, d, e)),
[a_val, b_val, c_val, d_val, e_val], rng=rng) [a_val, b_val, c_val, d_val, e_val], rng=rng,
mode=self.mode)
# Should raise an error if length of dimension 0 is not 1 # Should raise an error if length of dimension 0 is not 1
bad_val = rng.rand(2, 1, 1, 1, 2, 1).astype(self.floatX) bad_val = rng.rand(2, 1, 1, 1, 2, 1).astype(self.floatX)
self.assertRaises(TypeError, f, bad_val, b_val, c_val, d_val, e_val) self.assertRaises(TypeError, f, bad_val, b_val, c_val, d_val, e_val)
......
...@@ -646,7 +646,14 @@ theano.compile.register_shape_i_c_code( ...@@ -646,7 +646,14 @@ theano.compile.register_shape_i_c_code(
%(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0); %(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s]; ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""", """,
version=2) """
if (%(i)s>=PyArray_NDIM(%(iname)s)){
PyErr_SetString(PyExc_TypeError,
"Number of dimensions lower than expected");
%(fail)s
}
""",
version=3)
# Register TensorType C code for DeepCopyOp # Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论