提交 2284a814 authored 作者: Frederic's avatar Frederic 提交者: Arnaud Bergeron

Add GpuReshape in the new gpu back-end.

上级 3aedd9be
...@@ -518,3 +518,44 @@ class GpuAlloc(HideC, Alloc): ...@@ -518,3 +518,44 @@ class GpuAlloc(HideC, Alloc):
return (1,) return (1,)
gpu_alloc = GpuAlloc() gpu_alloc = GpuAlloc()
class GpuReshape(HideC, tensor.Reshape):
"""
Implement Reshape on the gpu.
"""
# __hash__, __eq__, __str__ come from tensor.Reshape
def make_node(self, x, shp):
x = as_gpuarray_variable(x)
res = host_from_gpu(x).reshape(shp, ndim=self.ndim)
otype = GpuArrayType(dtype=res.dtype,
broadcastable=res.broadcastable)
return Apply(self, [x, shp], [otype()])
def perform(self, node, inp, out_):
x, shp = inp
out, = out_
if (len(shp) != self.ndim):
raise ValueError('shape argument to GpuReshape.perform'
' has incorrect length %i'
', should be %i' % (len(shp), self.ndim), shp)
s = shp.prod()
if shp.prod() != x.size:
# We need to do check here to raise the same error as NumPy.
# We should make pygpu do the same.
ss = 1
nb_m1 = 0
for i in shp:
if i == -1:
nb_m1 += 1
else:
ss *= i
if nb_m1 > 1:
raise ValueError("Only one -1 is accepted in the new shape")
elif nb_m1 == 1:
if (x.size % ss) != 0:
raise ValueError("When using -1 in new shape, the computed new shape must be an multiple of the original shape.")
else:
raise ValueError("total size of new array must be unchanged")
out[0] = x.reshape(tuple(shp))
...@@ -11,7 +11,7 @@ from theano.gof.python25 import all, any ...@@ -11,7 +11,7 @@ from theano.gof.python25 import all, any
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host, from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
gpu_alloc) gpu_alloc, GpuReshape)
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar, from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduce) GpuDimShuffle, GpuCAReduce)
from theano.sandbox.gpuarray.subtensor import GpuSubtensor from theano.sandbox.gpuarray.subtensor import GpuSubtensor
...@@ -120,6 +120,20 @@ def local_gpualloc(node): ...@@ -120,6 +120,20 @@ def local_gpualloc(node):
return gpu_alloc return gpu_alloc
@register_opt()
@op_lifter(tensor.Reshape)
def local_gpureshape(node):
op = node.op
name = op.name
if type(node.op) is not tensor.Reshape:
return None
if name:
name = 'Gpu'+name
res = GpuReshape(op.ndim, op.name)
o = res(*node.inputs)
return res
@register_opt() @register_opt()
@op_lifter(tensor.Elemwise) @op_lifter(tensor.Elemwise)
def local_gpu_elemwise(node): def local_gpu_elemwise(node):
......
...@@ -7,7 +7,7 @@ import theano ...@@ -7,7 +7,7 @@ import theano
import theano.tensor as T import theano.tensor as T
from theano.tensor import TensorType from theano.tensor import TensorType
from theano.tensor.basic import alloc from theano.tensor.basic import alloc
from theano.tensor.tests.test_basic import rand, safe_make_node from theano.tensor.tests.test_basic import rand, safe_make_node, T_reshape
from theano.tests.unittest_tools import SkipTest from theano.tests.unittest_tools import SkipTest
from numpy.testing.noseclasses import KnownFailureTest from numpy.testing.noseclasses import KnownFailureTest
...@@ -35,7 +35,7 @@ from theano.sandbox.gpuarray.type import (GpuArrayType, ...@@ -35,7 +35,7 @@ from theano.sandbox.gpuarray.type import (GpuArrayType,
from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host, from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
gpu_alloc, gpu_from_cuda, gpu_alloc, gpu_from_cuda,
cuda_from_gpu, HostFromGpu, cuda_from_gpu, HostFromGpu,
GpuFromHost) GpuFromHost, GpuReshape)
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
utt.seed_rng() utt.seed_rng()
...@@ -44,11 +44,10 @@ rng = numpy.random.RandomState(seed=utt.fetch_seed()) ...@@ -44,11 +44,10 @@ rng = numpy.random.RandomState(seed=utt.fetch_seed())
from pygpu import gpuarray from pygpu import gpuarray
if theano.config.mode == 'FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpuarray') mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpuarray').excluding('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpuarray'\ mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpuarray')
)
else: else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray').excluding('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray')
...@@ -288,3 +287,22 @@ GpuAllocTester = makeTester( ...@@ -288,3 +287,22 @@ GpuAllocTester = makeTester(
bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)), bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
) )
) )
class G_reshape(T_reshape):
def shortDescription(self):
return None
def __init__(self, name):
T_reshape.__init__(self, name,
shared=gpuarray_shared_constructor,
op=GpuReshape,
mode=mode_with_gpu,
# avoid errors with limited devices
# dtype='float32',
ignore_topo=(HostFromGpu, GpuFromHost,
theano.compile.DeepCopyOp,
theano.sandbox.gpuarray.elemwise.GpuElemwise,
theano.tensor.opt.Shape_i,
theano.tensor.opt.MakeVector))
assert self.op == GpuReshape
...@@ -44,7 +44,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as, ...@@ -44,7 +44,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
dtensor3, SpecifyShape, Mean, dtensor3, SpecifyShape, Mean,
itensor3, Tile, switch, Diagonal, Diag, itensor3, Tile, switch, Diagonal, Diag,
nonzero, flatnonzero, nonzero_values, nonzero, flatnonzero, nonzero_values,
stacklists) stacklists, DimShuffle)
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
...@@ -4204,9 +4204,30 @@ class T_op_cache(unittest.TestCase): ...@@ -4204,9 +4204,30 @@ class T_op_cache(unittest.TestCase):
self.assertTrue(numpy.all(fn_py(a) == fn_c_or_py(a))) self.assertTrue(numpy.all(fn_py(a) == fn_c_or_py(a)))
class T_reshape(unittest.TestCase): class T_reshape(utt.InferShapeTester, utt.TestOptimizationMixin):
def setUp(self): def __init__(self, name, shared=tensor._shared, op=Reshape, mode=None,
utt.seed_rng() ignore_topo=(DeepCopyOp, opt.MakeVector,
opt.Shape_i, DimShuffle, theano.tensor.Elemwise)):
self.shared = shared
self.op = op
#The tag canonicalize is needed for the shape test in FAST_COMPILE
self.mode = mode
self.ignore_topo = ignore_topo
return super(T_reshape, self).__init__(name)
def function(self, inputs, outputs):
f = function(inputs, outputs, mode=self.mode)
if self.mode is not None or theano.config.mode != "FAST_COMPILE":
topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)]
assert len(topo_) == 1, topo_
return f
def eval_output_and_check(self, t):
f = self.function([], t)
tval = f()
return tval
def test_reshape(self): def test_reshape(self):
a = dvector() a = dvector()
...@@ -4215,7 +4236,7 @@ class T_reshape(unittest.TestCase): ...@@ -4215,7 +4236,7 @@ class T_reshape(unittest.TestCase):
#basic to 1 dim(without list) #basic to 1 dim(without list)
c = reshape(b, as_tensor_variable(6), ndim=1) c = reshape(b, as_tensor_variable(6), ndim=1)
f = inplace_func([b], c) f = self.function([b], c)
b_val1 = numpy.asarray([[0, 1, 2], [3, 4, 5]]) b_val1 = numpy.asarray([[0, 1, 2], [3, 4, 5]])
c_val1 = numpy.asarray([0, 1, 2, 3, 4, 5]) c_val1 = numpy.asarray([0, 1, 2, 3, 4, 5])
...@@ -4231,7 +4252,7 @@ class T_reshape(unittest.TestCase): ...@@ -4231,7 +4252,7 @@ class T_reshape(unittest.TestCase):
#basic to 1 dim(with list) #basic to 1 dim(with list)
c = reshape(b, (as_tensor_variable(6),), ndim=1) c = reshape(b, (as_tensor_variable(6),), ndim=1)
f = inplace_func([b], c) f = self.function([b], c)
assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) == assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) ==
numpy.asarray([0, 1, 2, 3, 4, 5])) numpy.asarray([0, 1, 2, 3, 4, 5]))
#print f.maker.fgraph.toposort() #print f.maker.fgraph.toposort()
...@@ -4239,14 +4260,14 @@ class T_reshape(unittest.TestCase): ...@@ -4239,14 +4260,14 @@ class T_reshape(unittest.TestCase):
#basic to shape object of same ndim #basic to shape object of same ndim
c = reshape(b, d.shape) c = reshape(b, d.shape)
f = inplace_func([b, d], c) f = self.function([b, d], c)
assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]]), assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]]),
[[0, 1], [2, 3], [4, 5]]) == [[0, 1], [2, 3], [4, 5]]) ==
numpy.asarray([[0, 1], [2, 3], [4, 5]])) numpy.asarray([[0, 1], [2, 3], [4, 5]]))
#basic to 2 dims #basic to 2 dims
c = reshape(a, [2, 3]) c = reshape(a, [2, 3])
f = inplace_func([a], c) f = self.function([a], c)
assert numpy.all(f(numpy.asarray([0, 1, 2, 3, 4, 5])) == assert numpy.all(f(numpy.asarray([0, 1, 2, 3, 4, 5])) ==
numpy.asarray([[0, 1, 2], [3, 4, 5]])) numpy.asarray([[0, 1, 2], [3, 4, 5]]))
...@@ -4255,7 +4276,7 @@ class T_reshape(unittest.TestCase): ...@@ -4255,7 +4276,7 @@ class T_reshape(unittest.TestCase):
a_val_copy = numpy.asarray([0, 1, 2, 3, 4, 5]) a_val_copy = numpy.asarray([0, 1, 2, 3, 4, 5])
b_val = numpy.asarray([[0, 1, 2], [3, 4, 5]]) b_val = numpy.asarray([[0, 1, 2], [3, 4, 5]])
f_sub = inplace_func([a, b], c - b) f_sub = self.function([a, b], c - b)
assert numpy.all(f_sub(a_val, b_val) == 0.0) assert numpy.all(f_sub(a_val, b_val) == 0.0)
assert numpy.all(a_val == a_val_copy) assert numpy.all(a_val == a_val_copy)
...@@ -4264,35 +4285,33 @@ class T_reshape(unittest.TestCase): ...@@ -4264,35 +4285,33 @@ class T_reshape(unittest.TestCase):
a_val_copy = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float64') a_val_copy = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float64')
b_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float64') b_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float64')
f_sub = inplace_func([a, b], c - b) f_sub = self.function([a, b], c - b)
assert numpy.all(f_sub(a_val, b_val) == 0.0) assert numpy.all(f_sub(a_val, b_val) == 0.0)
assert numpy.all(a_val == a_val_copy) assert numpy.all(a_val == a_val_copy)
# verify gradient # verify gradient
def just_vals(v): def just_vals(v):
return Reshape(2)(v, theano._asarray([2, 3], dtype='int32')) return Reshape(2)(v, theano._asarray([2, 3], dtype='int32'))
utt.verify_grad(just_vals, [a_val]) utt.verify_grad(just_vals, [a_val], mode=self.mode)
#test infer_shape #test infer_shape
f_sub = function([a, b], (c - b).shape) self._compile_and_check([a], [c], (a_val,), self.op)
if config.mode == "FAST_COMPILE":
assert len(f_sub.maker.fgraph.toposort()) == 3
else:
topo = f_sub.maker.fgraph.toposort()
assert len(topo) == 1
topo[0].op == theano.compile.function_module.deep_copy_op
#assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5]]))==[2,3])#work in FAST_RUN, but fail on other!
#assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5],[6,7]]))==[2,3])#work in FAST_RUN, but fail on other!
# test broadcast flag for constant value of 1 # test broadcast flag for constant value of 1
c = reshape(b, (b.shape[0], b.shape[1], 1)) c = reshape(b, (b.shape[0], b.shape[1], 1))
f = inplace_func([b], c) f = self.function([b], c)
assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) == assert numpy.all(f(numpy.asarray([[0, 1, 2], [3, 4, 5]])) ==
numpy.asarray([[[0], [1], [2]], [[3], [4], [5]]])) numpy.asarray([[[0], [1], [2]], [[3], [4], [5]]]))
assert (f.maker.fgraph.toposort()[-2].outputs[0].type.broadcastable == assert (f.maker.fgraph.toposort()[-2].outputs[0].type.broadcastable ==
(False, False, True)) (False, False, True))
assert numpy.all(f_sub(a_val, b_val) == [2, 3]) def test_m1(self):
t = tensor3()
rng = numpy.random.RandomState(seed=utt.fetch_seed())
val = rng.uniform(size=(3, 4, 5)).astype(config.floatX)
for out in [t.reshape([-1]), t.reshape([-1, 5]),
t.reshape([5, -1]), t.reshape([5, -1, 3])]:
self._compile_and_check([t], [out], [val], self.op)
def test_reshape_long_in_shape(self): def test_reshape_long_in_shape(self):
v = dvector('v') v = dvector('v')
...@@ -4311,14 +4330,14 @@ class T_reshape(unittest.TestCase): ...@@ -4311,14 +4330,14 @@ class T_reshape(unittest.TestCase):
r = a.reshape(shapes, ndim=1) r = a.reshape(shapes, ndim=1)
z = zeros_like(r) z = zeros_like(r)
f = function([a, shapes], z.shape) f = self.function([a, shapes], z.shape)
self.assertRaises(ValueError, f, a_val, [13]) self.assertRaises(ValueError, f, a_val, [13])
#Test reshape to 2 dim #Test reshape to 2 dim
r = a.reshape(shapes, ndim=2) r = a.reshape(shapes, ndim=2)
z = zeros_like(r) z = zeros_like(r)
f = function([a, shapes], z.shape) f = self.function([a, shapes], z.shape)
self.assertRaises(ValueError, f, a_val, [-1, 5]) self.assertRaises(ValueError, f, a_val, [-1, 5])
self.assertRaises(ValueError, f, a_val, [7, -1]) self.assertRaises(ValueError, f, a_val, [7, -1])
......
...@@ -182,7 +182,10 @@ class InferShapeTester(unittest.TestCase): ...@@ -182,7 +182,10 @@ class InferShapeTester(unittest.TestCase):
def setUp(self): def setUp(self):
seed_rng() seed_rng()
# Take into account any mode that may be defined in a child class # Take into account any mode that may be defined in a child class
mode = getattr(self, 'mode', theano.compile.get_default_mode()) # and it can be None
mode = getattr(self, 'mode', None)
if mode is None:
mode = theano.compile.get_default_mode()
# This mode seems to be the minimal one including the shape_i # This mode seems to be the minimal one including the shape_i
# optimizations, if we don't want to enumerate them explicitly. # optimizations, if we don't want to enumerate them explicitly.
self.mode = mode.including("canonicalize") self.mode = mode.including("canonicalize")
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论