提交 7af47dd8 authored 作者: abergeron's avatar abergeron

Merge pull request #1925 from nouiz/gpuarray

GpuContiguous, tests, better opencl support
......@@ -5,6 +5,7 @@ import numpy
import theano
from theano import Op, Apply
from theano import tensor, scalar, config
from theano.gradient import grad_undefined
from theano.scalar import Scalar
from theano.tensor.basic import Alloc, Join, Split
......@@ -516,7 +517,7 @@ class CudaFromGpu(Op):
return [gpu_from_cuda(gz)]
def R_op(self, inputs, eval_points):
from theano.sandbox.cuda import CudaNdArrayType
from theano.sandbox.cuda import CudaNdarrayType
ev, = eval_points
if (isinstance(ev, CudaNdarrayType)):
return [gpu_from_cuda(ev)]
......@@ -750,6 +751,73 @@ class GpuAlloc(HideC, Alloc):
gpu_alloc = GpuAlloc()
class GpuContiguous(Op):
"""
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
"""
view_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def grad(self, inputs, dout):
x, = inputs
dout, = dout
dout = as_gpuarray_variable(dout)
return [dout]
def __str__(self):
return self.__class__.__name__
def make_node(self, input):
input = as_gpuarray_variable(input)
return Apply(self, [input], [input.type()])
def c_headers(self):
return ['<numpy_compat.h>']
def c_code_cache_version(self):
return (3,)
def c_code(self, node, name, inp, out, sub):
input, = inp
z, = out
fail = sub['fail']
str = """
{
if (GpuArray_IS_C_CONTIGUOUS(&(%(input)s->ga))){
Py_XDECREF(%(z)s);
%(z)s = %(input)s;
Py_INCREF(%(z)s);
} else if ((NULL == %(z)s)""" % locals()
for i in xrange(len(node.inputs[0].type.broadcastable)):
str += "\n|| (PyGpuArray_DIMS(%(input)s)[%(i)s] != PyGpuArray_DIMS(%(z)s)[%(i)s])" % locals()
str += """
|| !GpuArray_IS_C_CONTIGUOUS(&(%(z)s->ga)))
{
Py_XDECREF(%(z)s);
%(z)s = pygpu_copy(%(input)s, GA_C_ORDER);
if (!%(z)s)
{
%(fail)s;
}
}else if(pygpu_move(%(z)s, %(input)s) == -1) {
%(fail)s;
}
}
""" % locals()
return str
gpu_contiguous = GpuContiguous()
class GpuReshape(HideC, tensor.Reshape):
"""
Implement Reshape on the gpu.
......@@ -769,7 +837,6 @@ class GpuReshape(HideC, tensor.Reshape):
raise ValueError('shape argument to GpuReshape.perform'
' has incorrect length %i'
', should be %i' % (len(shp), self.ndim), shp)
s = shp.prod()
if shp.prod() != x.size:
# We need to do check here to raise the same error as NumPy.
......@@ -872,7 +939,8 @@ class GpuEye(GpuKernelBase, Op):
return [out_shape]
def grad(self, inp, grads):
return [grad_undefined(self, i, inp[i]) for i in xrange(3)]
return [grad_undefined(self, i, inp[i])
for i in xrange(3)]
def __eq__(self, other):
return type(self) == type(other) and self.dtype == other.dtype
......
......@@ -31,7 +31,8 @@ from theano.sandbox.gpuarray.nnet import (
GpuSoftmaxWithBias, GpuSoftmax
)
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduceCuda)
GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY)
from theano.sandbox.gpuarray.subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
......@@ -366,15 +367,25 @@ def local_gpua_advanced_incsubtensor(node):
def local_gpua_careduce(node):
if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
scalar.Maximum, scalar.Minimum)):
dev = theano.sandbox.gpuarray.init_dev.device
if dev.startswith('opencl'):
op = GpuCAReduceCPY
if node.op.scalar_op not in [scalar.add, scalar.mul]:
# We don't support yet all reduction with cpy code.
return
else:
op = GpuCAReduceCuda
x, = node.inputs
greduce = GpuCAReduceCuda(
greduce = op(
node.op.scalar_op, axis=node.op.axis,
dtype=getattr(node.op, 'dtype', None),
acc_dtype=getattr(node.op, 'acc_dtype', None))
gvar = greduce(x)
# We need to have the make node called, otherwise the mask can
# be None
if gvar.owner.op.supports_c_code([gpu_from_host(x)]):
if (op is GpuCAReduceCPY or
gvar.owner.op.supports_c_code([gpu_from_host(x)])):
return greduce
else:
# Try to make a simpler pattern based on reshaping
......@@ -407,7 +418,7 @@ def local_gpua_careduce(node):
for idx, m in enumerate(new_mask):
if m == 1:
new_axis.append(idx)
greduce = GpuCAReduceCuda(
greduce = op(
node.op.scalar_op,
axis=new_axis, reduce_mask=new_mask,
dtype=getattr(node.op, 'dtype', None),
......
......@@ -42,7 +42,8 @@ from theano.sandbox.gpuarray.basic_ops import (
gpu_from_cuda,
cuda_from_gpu, HostFromGpu,
GpuFromHost, GpuReshape,
gpu_join, GpuJoin, GpuSplit, GpuEye)
gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous)
from theano.sandbox.gpuarray.subtensor import GpuSubtensor
from theano.tests import unittest_tools as utt
utt.seed_rng()
......@@ -73,6 +74,7 @@ def may_fail(msg, EClass):
return wrapper
return test_decorator
def inplace_func(inputs, outputs, mode=None, allow_input_downcast=False,
on_unused_input='raise', name=None):
if mode is None:
......@@ -93,6 +95,7 @@ def fake_shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
except TypeError:
continue
def rand_gpuarray(*shape, **kwargs):
r = rng.rand(*shape) * 2 - 1
dtype = kwargs.pop('dtype', theano.config.floatX)
......@@ -208,10 +211,10 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
def test_transfer_cpu_gpu():
a = T.fmatrix('a')
g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
av = numpy.asarray(rng.rand(5, 4), dtype='float32')
gv = gpuarray.array(av)
f = theano.function([a], gpu_from_host(a))
fv = f(av)
assert GpuArrayType.values_eq(fv, gv)
......@@ -231,8 +234,8 @@ def test_transfer_strided():
av = numpy.asarray(rng.rand(5, 8), dtype='float32')
gv = gpuarray.array(av)
av = av[:,::2]
gv = gv[:,::2]
av = av[:, ::2]
gv = gv[:, ::2]
f = theano.function([a], gpu_from_host(a))
fv = f(av)
......@@ -247,7 +250,7 @@ def test_transfer_strided():
"that the tests will be run this way", ValueError)
def test_transfer_cuda_gpu():
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
if cuda_ndarray.cuda_available is False:
raise SkipTest("Can't test interaction with cuda if cuda not present")
g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
c = cuda_ndarray.CudaNdarrayType((False, False))('c')
......@@ -255,8 +258,8 @@ def test_transfer_cuda_gpu():
av = theano._asarray(rng.rand(5, 4), dtype='float32')
gv = gpuarray.array(av)
cv = cuda_ndarray.CudaNdarray(av)
gvs = gv[:,::-2]
cvs = cv[:,::-2]
gvs = gv[:, ::-2]
cvs = cv[:, ::-2]
f = theano.function([c], gpu_from_cuda(c))
fv = f(cv)
......@@ -324,6 +327,19 @@ def test_shape():
assert isinstance(topo[0].op, T.Shape)
def test_gpu_contiguous():
a = T.fmatrix('a')
i = T.iscalar('i')
a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32')
f = theano.function([a, i], gpu_contiguous(a[::i]),
mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert any([isinstance(node.op, GpuSubtensor) for node in topo])
assert f(a_val, 1).flags.c_contiguous
assert f(a_val, 2).flags.c_contiguous
assert f(a_val, 2).flags.c_contiguous
class G_reshape(T_reshape):
def shortDescription(self):
return None
......@@ -335,11 +351,11 @@ class G_reshape(T_reshape):
mode=mode_with_gpu,
# avoid errors with limited devices
# dtype='float32',
ignore_topo=(HostFromGpu, GpuFromHost,
theano.compile.DeepCopyOp,
theano.sandbox.gpuarray.elemwise.GpuElemwise,
theano.tensor.opt.Shape_i,
theano.tensor.opt.MakeVector))
ignore_topo=(HostFromGpu, GpuFromHost,
theano.compile.DeepCopyOp,
theano.sandbox.gpuarray.elemwise.GpuElemwise,
theano.tensor.opt.Shape_i,
theano.tensor.opt.MakeVector))
assert self.op == GpuReshape
......@@ -429,7 +445,8 @@ def test_hostfromgpu_shape_i():
"""
m = mode_with_gpu.including('local_dot_to_dot22',
'local_dot22_to_dot22scalar','specialize')
'local_dot22_to_dot22scalar',
'specialize')
a = T.fmatrix('a')
ca = theano.sandbox.gpuarray.type.GpuArrayType('float32', (False, False))()
av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
......
import theano
from theano import scalar, gof
from theano.gof.python25 import all, any
from theano.tests.unittest_tools import SkipTest
from theano.tensor.tests.test_elemwise import (test_Broadcast, test_DimShuffle,
test_CAReduce, T_reduce_dtype)
......@@ -19,17 +21,32 @@ class test_gpu_Broadcast(test_Broadcast):
type = GpuArrayType
cop = GpuElemwise
ctype = GpuArrayType
# The order is important
linkers = [gof.PerformLinker, gof.CLinker]
def setUp(self):
dev = theano.sandbox.gpuarray.init_dev.device
if not dev.startswith('cuda'):
self.linkers = [gof.PerformLinker]
def rand_val(self, shp):
return rand_gpuarray(*shp, **dict(cls=gpuarray))
# no c_code() yet
#cop = GpuElemwise
#ctype = GpuArrayType
def rand_cval(self, shp):
return rand_gpuarray(*shp, **dict(cls=gpuarray))
def test_c(self):
dev = theano.sandbox.gpuarray.init_dev.device
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c()
def test_c_inplace(self):
dev = theano.sandbox.gpuarray.init_dev.device
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c_inplace()
class test_GpuDimShuffle(test_DimShuffle):
op = GpuDimShuffle
......@@ -149,7 +166,7 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
# ((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
# ((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111
# ((5,4,3,10,11),[1,2]),
]
]
op = GpuCAReduceCuda
reds = [scalar.add, scalar.mul,
scalar.maximum, scalar.minimum]
......@@ -161,6 +178,12 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
def test_perform_nan(self):
return
def setUp(self):
super(test_GpuCAReduceCuda, self).setUp()
dev = theano.sandbox.gpuarray.init_dev.device
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests")
class T_gpureduce_dtype(T_reduce_dtype):
mode = mode_with_gpu.excluding('local_cut_useless_reduce')
......@@ -172,6 +195,11 @@ class T_gpureduce_dtype(T_reduce_dtype):
'uint8', 'uint16', 'uint32', 'uint64',
'float32', 'float64']
def setUp(self):
dev = theano.sandbox.gpuarray.init_dev.device
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests")
def speed_reduce10():
import numpy
......
......@@ -7,7 +7,8 @@ import theano.sandbox.gpuarray
from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import (
GpuAlloc, GpuReshape, gpu_alloc, gpu_from_host, host_from_gpu)
from theano.sandbox.gpuarray.elemwise import GpuCAReduceCuda, GpuElemwise
from theano.sandbox.gpuarray.elemwise import (
GpuCAReduceCuda, GpuCAReduceCPY, GpuElemwise)
from theano.sandbox.gpuarray.tests.test_basic_ops import (
rand_gpuarray, mode_with_gpu, mode_without_gpu
)
......@@ -50,17 +51,26 @@ def test_flatten():
def test_reduce():
for method in ['sum', 'prod', 'max', 'min']:
dev = theano.sandbox.gpuarray.init_dev.device
for method, param in [('sum', dict(acc_dtype='float32')),
('prod', dict(acc_dtype='float32')),
('max', {}), ('min', {})]:
m = theano.tensor.fmatrix()
f = theano.function([m], getattr(m, method)(axis=0),
f = theano.function([m], getattr(m, method)(axis=0,
**param),
mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32")
res = f(val)
utt.assert_allclose(res, getattr(val, method)(axis=0))
assert res.shape == (11,)
topo = f.maker.fgraph.toposort()
assert GpuCAReduceCuda in [type(node.op)
for node in topo], topo
ops = [type(node.op) for node in topo]
if dev.startswith('opencl') and method in ["max", "min"]:
assert not(GpuCAReduceCuda in ops or GpuCAReduceCPY in ops)
else:
assert GpuCAReduceCuda in ops or GpuCAReduceCPY in ops
def test_local_gpualloc_memset_0():
......
......@@ -33,3 +33,10 @@ def test_values_eq_approx():
b = a.copy()
b[0] = -numpy.asarray(b[0])
assert not GpuArrayType.values_eq_approx(a, b)
def test_specify_shape():
a = rand_gpuarray(20, dtype='float32')
g = GpuArrayType(dtype='float32', broadcastable=(False,))('g')
f = theano.function([g], theano.tensor.specify_shape(g, [20]))
f(a)
......@@ -19,6 +19,7 @@ from theano.tensor.elemwise import (CAReduce, Elemwise, DimShuffle,
from theano.tests import unittest_tools
import math
def FunctionGraph(i, o):
e = gof.FunctionGraph(i, o)
return e
......@@ -46,8 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
#test that DimShuffle.infer_shape work correctly
x = TensorType('float64', ib)('x')
e = self.op(ib, shuffle)(x)
f = copy(linker).accept(FunctionGraph([x], [e.
shape])).make_function()
f = copy(linker).accept(FunctionGraph([x],
[e.shape])).make_function()
assert all(f(numpy.ones(xsh))) == all(zsh)
# Test when we drop a axis that is not broadcastable
......@@ -100,44 +101,52 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
y = x.dimshuffle(('x',) * (numpy.MAXDIMS + 1))
self.assertRaises(ValueError, y.eval, {x: 0})
class test_reduce_axes(unittest.TestCase):
def test_sum_axes(self):
axes = [None, 0, 1, [0, 1], numpy.array(1), [numpy.array(0), numpy.array(1)]]
axes = [None, 0, 1, [0, 1], numpy.array(1),
[numpy.array(0), numpy.array(1)]]
for a in axes:
x = tensor.matrix()
m = x.sum(a)
def test_mean_axes(self):
axes = [None, 0, 1, [0, 1], numpy.array(1), [numpy.array(0), numpy.array(1)]]
axes = [None, 0, 1, [0, 1], numpy.array(1),
[numpy.array(0), numpy.array(1)]]
for a in axes:
x = tensor.matrix()
m = x.mean(a)
def test_max_axes(self):
axes = [None, 0, 1, [0, 1], numpy.array(1), [numpy.array(0), numpy.array(1)]]
axes = [None, 0, 1, [0, 1], numpy.array(1),
[numpy.array(0), numpy.array(1)]]
for a in axes:
x = tensor.matrix()
m = x.max(a)
def test_min_axes(self):
axes = [None, 0, 1, [0, 1], numpy.array(1), [numpy.array(0), numpy.array(1)]]
axes = [None, 0, 1, [0, 1], numpy.array(1),
[numpy.array(0), numpy.array(1)]]
for a in axes:
x = tensor.matrix()
m = x.min(a)
def test_argmax_axes(self):
axes = [None, 0, 1, [0, 1], numpy.array(1), [numpy.array(0), numpy.array(1)]]
axes = [None, 0, 1, [0, 1], numpy.array(1),
[numpy.array(0), numpy.array(1)]]
for a in axes:
x = tensor.matrix()
m = x.argmax(a)
def test_var_axes(self):
axes = [None, 0, 1, [0, 1], numpy.array(1), [numpy.array(0), numpy.array(1)]]
axes = [None, 0, 1, [0, 1], numpy.array(1),
[numpy.array(0), numpy.array(1)]]
for a in axes:
x = tensor.matrix()
m = x.var(a)
class test_Broadcast(unittest.TestCase):
# this is to allow other types to reuse this class to test their ops
type = TensorType
......@@ -149,6 +158,9 @@ class test_Broadcast(unittest.TestCase):
openmp_minsize = 2*config.openmp_elemwise_minsize
openmp_minsize_sqrt = math.ceil(math.sqrt(openmp_minsize))
# The order is important if you change them.
linkers = [gof.PerformLinker, gof.CLinker]
def rand_val(self, shp):
return numpy.asarray(numpy.random.rand(*shp))
......@@ -165,7 +177,10 @@ class test_Broadcast(unittest.TestCase):
((1, 5), (5, 1)),
((1, 1), (1, 1)),
((self.openmp_minsize,), (self.openmp_minsize,)),
((self.openmp_minsize_sqrt, self.openmp_minsize_sqrt), (self.openmp_minsize_sqrt, self.openmp_minsize_sqrt)),
((self.openmp_minsize_sqrt,
self.openmp_minsize_sqrt),
(self.openmp_minsize_sqrt,
self.openmp_minsize_sqrt)),
((2, 3, 4, 5), (2, 3, 4, 5)),
((2, 3, 4, 5), (1, 3, 1, 5)),
((2, 3, 4, 5), (1, 1, 1, 1)),
......@@ -186,8 +201,8 @@ class test_Broadcast(unittest.TestCase):
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
e = op(scalar.add)(x, y)
f = copy(linker).accept(FunctionGraph([x,
y], [e.shape])).make_function()
f = copy(linker).accept(FunctionGraph(
[x, y], [e.shape])).make_function()
assert tuple(f(xv, yv)) == tuple(zv.shape)
def with_linker_inplace(self, linker, op, type, rand_val):
......@@ -216,8 +231,8 @@ class test_Broadcast(unittest.TestCase):
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
f = copy(linker).accept(FunctionGraph([x,
y], [e.shape])).make_function()
f = copy(linker).accept(FunctionGraph(
[x, y], [e.shape])).make_function()
xv = rand_val(xsh)
yv = rand_val(ysh)
zv = xv + yv
......@@ -250,12 +265,13 @@ class test_Broadcast(unittest.TestCase):
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0])('x')
y = self.ctype('float64', [1, 1])('y')
e = self.cop(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y)
f = gof.CLinker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((5, 5))
yv = self.rand_cval((1, 1))
f(xv, yv)
assert (xv == yv).all()
for linker, op in zip(self.linkers, [self.op, self.cop]):
e = op(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y)
f = linker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((5, 5))
yv = self.rand_cval((1, 1))
f(xv, yv)
assert (xv == yv).all()
def test_fill_var(self):
x = tensor.matrix()
......@@ -274,22 +290,24 @@ class test_Broadcast(unittest.TestCase):
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0, 0, 0, 0])('x')
y = self.ctype('float64', [0, 0, 0, 0, 0])('y')
e = self.cop(scalar.add)(x, y)
f = gof.CLinker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((2, 2, 2, 2, 2))
yv = self.rand_cval((2, 2, 2, 2, 2)).transpose(4, 0, 3, 1, 2)
zv = xv + yv
assert (f(xv, yv) == zv).all()
for linker, op in zip(self.linkers, [self.op, self.cop]):
e = op(scalar.add)(x, y)
f = linker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((2, 2, 2, 2, 2))
yv = self.rand_cval((2, 2, 2, 2, 2)).transpose(4, 0, 3, 1, 2)
zv = xv + yv
assert (f(xv, yv) == zv).all()
def test_same_inputs(self):
if not theano.config.cxx:
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0])('x')
e = self.cop(scalar.add)(x, x)
f = gof.CLinker().accept(FunctionGraph([x], [e])).make_function()
xv = self.rand_cval((2, 2))
zv = xv + xv
assert (f(xv) == zv).all()
for linker, op in zip(self.linkers, [self.op, self.cop]):
e = op(scalar.add)(x, x)
f = linker().accept(FunctionGraph([x], [e])).make_function()
xv = self.rand_cval((2, 2))
zv = xv + xv
assert (f(xv) == zv).all()
class test_CAReduce(unittest_tools.InferShapeTester):
......@@ -309,7 +327,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
((5, 0), ()),
((), None),
((), ())
]
]
def with_linker(self, linker, scalar_op=scalar.add, dtype="floatX",
pre_scalar_op=None,
......@@ -429,7 +447,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
try:
f_xv = f(xv)
self.assertTrue((f_xv.shape == zv.shape), (f_xv, zv))
self.assertTrue(numpy.allclose(f_xv, zv), (f_xv, zv, xsh, tosum))
self.assertTrue(numpy.allclose(f_xv, zv),
(f_xv, zv, xsh, tosum))
except NotImplementedError:
# GpuCAReduce don't implement all cases when size is 0
assert xv.size == 0
......@@ -553,7 +572,7 @@ class test_Prod(unittest.TestCase):
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
dtype='float32')
dtype='float32')
# now with verify_grad
unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode)
......@@ -568,7 +587,7 @@ class test_Prod(unittest.TestCase):
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val = numpy.asarray([[1., 2., 3.], [0., 5., 6.], [0., 0., 9.]],
dtype='float32')
dtype='float32')
x = theano.tensor.dmatrix()
# sanity check
......@@ -760,7 +779,8 @@ class T_reduce_dtype(unittest.TestCase):
).get(dtype, dtype)
f = theano.function([x], s, mode=self.mode)
topo = f.maker.fgraph.toposort()
assert [n for n in topo if isinstance(n.op, self.op)], (topo, dtype)
assert [n for n in topo if isinstance(n.op, self.op)], (topo,
dtype)
data = numpy.random.rand(3, 4) * 10
data = data.astype(dtype)
f(data)
......@@ -785,7 +805,8 @@ class T_reduce_dtype(unittest.TestCase):
).get(dtype, dtype)
f = theano.function([x], s, mode=self.mode)
topo = f.maker.fgraph.toposort()
assert [n for n in topo if isinstance(n.op, self.op)], (topo, dtype)
assert [n for n in topo if isinstance(n.op, self.op)], (topo,
dtype)
data = numpy.random.rand(3, 4) * 10
data = data.astype(dtype)
f(data)
......@@ -814,7 +835,8 @@ class T_reduce_dtype(unittest.TestCase):
f = theano.function([x], var, mode=self.mode)
topo = f.maker.fgraph.toposort()
assert [n for n in topo if isinstance(n.op, self.op)], (topo, dtype)
assert [n for n in topo if isinstance(n.op, self.op)], (topo,
dtype)
data = numpy.random.rand(3, 4) * 10
data = data.astype(input_dtype)
f(data)
......@@ -850,7 +872,8 @@ class T_reduce_dtype(unittest.TestCase):
(input_dtype in tensor.discrete_dtypes and
acc_dtype in tensor.continuous_dtypes)
):
var = getattr(x, method)(acc_dtype=acc_dtype, axis=axis)
var = getattr(x, method)(acc_dtype=acc_dtype,
axis=axis)
assert var.owner.op.acc_dtype == acc_dtype
if "complex" in input_dtype:
......@@ -873,10 +896,12 @@ class T_reduce_dtype(unittest.TestCase):
s = getattr(x, method)()
f = theano.function([], s, mode=self.mode)
topo = f.maker.fgraph.toposort()
assert [n for n in topo if isinstance(n.op, self.op)], (topo, dtype)
assert [n for n in topo if isinstance(n.op, self.op)], (topo,
dtype)
s_val = f()
# Use extra precision in NumPy to compute the good answer.
ret = getattr(numpy.asarray([1e8, 1, -1e8], dtype='float64'), method)()
ret = getattr(numpy.asarray([1e8, 1, -1e8], dtype='float64'),
method)()
assert numpy.allclose(s_val, ret), (s_val, ret)
......@@ -922,10 +947,10 @@ class T_mean_dtype(unittest.TestCase):
# Executed if no TypeError was raised
if sum_dtype in tensor.discrete_dtypes and axis != []:
assert mean_var.dtype == 'float64', (
(mean_var.dtype, sum_dtype))
(mean_var.dtype, sum_dtype))
else:
assert mean_var.dtype == sum_dtype, (
(mean_var.dtype, sum_dtype))
(mean_var.dtype, sum_dtype))
if (('complex' in input_dtype or
'complex' in sum_dtype) and
input_dtype != sum_dtype):
......@@ -970,13 +995,13 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
axis = axes[idx % len(axes)]
x = ProdWithoutZeros(axis=axis)(tensor.matrix(dtype=dtype))
assert x.dtype == dict(
int8='int64',
int16='int64',
int32='int64',
uint8='uint64',
uint16='uint64',
uint32='uint64',
).get(dtype, dtype)
int8='int64',
int16='int64',
int32='int64',
uint8='uint64',
uint16='uint64',
uint32='uint64',
).get(dtype, dtype)
def test_prod_without_zeros_default_acc_dtype(self):
"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论