提交 c500bafa authored 作者: Frederic's avatar Frederic

finish gpu ger tests:

-renamed TestGer_local_gemm_to_ger to TestGer -changed the Ger.infer_shape methode with ger.perform This make test of cpu and gpu ger op easier and follow what is done most of the time -Moved tests in TestGer_make_thunk to TestGer and use theano.function This was needed to make them work on cpu and gpu -make TestGer always call call the compiled function
上级 6174405c
...@@ -17,7 +17,7 @@ import theano.sandbox.cuda as tcn ...@@ -17,7 +17,7 @@ import theano.sandbox.cuda as tcn
from theano.tensor.signal.downsample import DownsampleFactorMax, DownsampleFactorMaxGrad from theano.tensor.signal.downsample import DownsampleFactorMax, DownsampleFactorMaxGrad
import theano.compile.mode import theano.compile.mode
from theano.tensor.tests.test_blas import BaseGemv, TestGer_local_gemm_to_ger from theano.tensor.tests.test_blas import BaseGemv, TestGer
from theano.sandbox.cuda.blas import gpu_gemv_no_inplace, gpu_gemv_inplace from theano.sandbox.cuda.blas import gpu_gemv_no_inplace, gpu_gemv_inplace
from theano.sandbox.cuda.blas import gpu_ger_inplace, gpu_ger_no_inplace from theano.sandbox.cuda.blas import gpu_ger_inplace, gpu_ger_no_inplace
...@@ -261,10 +261,9 @@ class TestGpuGemv(TestCase, BaseGemv, ...@@ -261,10 +261,9 @@ class TestGpuGemv(TestCase, BaseGemv,
gemv_inplace = gpu_gemv_inplace gemv_inplace = gpu_gemv_inplace
class TestGpuGer(TestGer_local_gemm_to_ger): class TestGpuGer(TestGer):
def setUp(self): def setUp(self):
self.mode = theano.compile.get_default_mode().including( self.mode = mode_with_gpu
'fast_run', 'gpu')
self.mode = self.mode.excluding('c_blas') self.mode = self.mode.excluding('c_blas')
dtype = self.dtype = 'float32' # optimization isn't dtype-dependent dtype = self.dtype = 'float32' # optimization isn't dtype-dependent
self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False)) self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False))
......
...@@ -282,31 +282,19 @@ class Ger(Op): ...@@ -282,31 +282,19 @@ class Ger(Op):
raise TypeError('only float and complex types supported', x.dtype) raise TypeError('only float and complex types supported', x.dtype)
return Apply(self, [A, alpha, x, y], [A.type()]) return Apply(self, [A, alpha, x, y], [A.type()])
def make_thunk(self, node, storage_map, compute_map, no_recycling): def perform(self, node, inp, out):
node_input_storage = [storage_map[r] for r in node.inputs] cA, calpha, cx, cy = inp
node_output_storage = [storage_map[r] for r in node.outputs] cZ, = out
if self.destructive:
# get vars for containers A = cA
cA, calpha, cx, cy = node_input_storage else:
cZ, = node_output_storage A = cA.copy()
if calpha != 1:
A += calpha * numpy.outer(cx, cy)
else:
A += numpy.outer(cx, cy)
cZ[0] = A
def rval():
if self.destructive:
A = cA[0]
else:
A = cA[0].copy()
if calpha[0] != 1:
A += calpha[0] * numpy.outer(cx[0], cy[0])
else:
A += numpy.outer(cx[0], cy[0])
cZ[0] = A
#TODO: If this is currently an unofficial part of the thunk API,
# then maybe it should be documented and made official?
rval.inputs = node_input_storage
rval.outputs = node_output_storage
rval.lazy = False
return rval
ger = Ger(destructive=False) ger = Ger(destructive=False)
ger_destructive = Ger(destructive=True) ger_destructive = Ger(destructive=True)
......
...@@ -200,9 +200,6 @@ class CGer(BaseBLAS, Ger): ...@@ -200,9 +200,6 @@ class CGer(BaseBLAS, Ger):
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (2,)
def make_thunk(*args, **kwargs):
# skip over Ger.make_thunk
return Op.make_thunk(*args, **kwargs)
@local_optimizer([ger, ger_destructive]) @local_optimizer([ger, ger_destructive])
def use_c_ger(node): def use_c_ger(node):
......
...@@ -1280,74 +1280,7 @@ class TestGer_OpContract(TestCase, unittest_tools.T_OpContractMixin): ...@@ -1280,74 +1280,7 @@ class TestGer_OpContract(TestCase, unittest_tools.T_OpContractMixin):
return Ger(op.destructive) return Ger(op.destructive)
class TestGer_make_thunk(TestCase): class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
def setUp(self):
self.rng = numpy.random.RandomState(unittest_tools.fetch_seed())
def given_dtype(self, dtype, M, N):
sA = T.tensor(dtype=dtype, broadcastable=(False, False))
sa = T.tensor(dtype=dtype, broadcastable=())
sx = T.tensor(dtype=dtype, broadcastable=(False,))
sy = T.tensor(dtype=dtype, broadcastable=(False,))
sZ = ger(sA, sa, sx, sy)
node = sZ.owner
storage_map = {sA:[None], sa:[None], sx:[None], sy:[None], sZ:[None]}
thunk = ger.make_thunk(node, storage_map,
compute_map={}, no_recycling=[])
# non-standard for make_thunk to receive node.op != self,
# but works for now.
thunk_d = ger_destructive.make_thunk(node, storage_map,
compute_map={}, no_recycling=[])
def rand(*shape):
return numpy.asarray(1 + self.rng.rand(*shape), dtype=dtype)
storage_map[sA][0] = rand(M, N)
storage_map[sa][0] = rand()
storage_map[sx][0] = rand(M)
storage_map[sy][0] = rand(N)
storage_map_copy = dict([(k,[deepcopy(v[0])]) for k,v in storage_map.items()])
# TODO: do some DebugMode-type verifications here
# if this can be refactored into a Mixin that does the DebugMode
# stuff on just one thunk at a time. Do it in the style of
# TestOpContractMixin?
# - Compare with Elemwise testers
thunk()
assert numpy.all(storage_map[sZ][0] ==
storage_map[sA][0] + storage_map[sa][0] *
numpy.outer(storage_map[sx][0], storage_map[sy][0]))
assert storage_map[sZ][0].dtype == dtype
assert storage_map[sZ][0].shape == (M, N)
thunk_d()
assert numpy.all(storage_map[sZ][0] !=
storage_map[sA][0] + storage_map[sa][0] *
numpy.outer(storage_map[sx][0], storage_map[sy][0]))
assert numpy.all(storage_map[sZ][0] ==
storage_map_copy[sA][0] + storage_map[sa][0] *
numpy.outer(storage_map[sx][0], storage_map[sy][0]))
assert storage_map[sZ][0].dtype == dtype
assert storage_map[sZ][0].shape == (M, N)
def test_f32_0_0(self): return self.given_dtype('float32', 0, 0)
def test_f32_1_0(self): return self.given_dtype('float32', 1, 0)
def test_f32_0_1(self): return self.given_dtype('float32', 0, 1)
def test_f32_1_1(self): return self.given_dtype('float32', 1, 1)
def test_f32_4_4(self): return self.given_dtype('float32', 4, 4)
def test_f64_4_5(self): return self.given_dtype('float64', 4, 5)
def test_c64_7_1(self): return self.given_dtype('complex64', 7, 1)
def test_c128_1_9(self): return self.given_dtype('complex128', 1, 9)
class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
def setUp(self): def setUp(self):
self.mode = theano.compile.get_default_mode().including('fast_run') self.mode = theano.compile.get_default_mode().including('fast_run')
...@@ -1366,23 +1299,26 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1366,23 +1299,26 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
def tearDown(self): def tearDown(self):
theano.tensor.blas_scipy.optimizations_enabled = self.origval theano.tensor.blas_scipy.optimizations_enabled = self.origval
def function(self, inputs, outputs): def function(self, inputs, outputs, updates={}):
return theano.function(inputs, outputs, self.mode) return theano.function(inputs, outputs, self.mode, updates=updates)
def b(self, bval): def b(self, bval):
return T.as_tensor_variable(numpy.asarray(bval, dtype=self.dtype)) return T.as_tensor_variable(numpy.asarray(bval, dtype=self.dtype))
def test_b_0_triggers_ger(self): def test_b_0_triggers_ger(self):
""" test local_gemm_to_ger opt"""
assert T.blas.local_gemm_to_ger.transform( assert T.blas.local_gemm_to_ger.transform(
gemm_no_inplace( gemm_no_inplace(
self.A, self.a, self.x.dimshuffle(0,'x'), self.A, self.a, self.x.dimshuffle(0,'x'),
self.y.dimshuffle('x', 0), self.b(0)).owner) self.y.dimshuffle('x', 0), self.b(0)).owner)
def test_b_1_triggers_ger(self): def test_b_1_triggers_ger(self):
""" test local_gemm_to_ger opt"""
assert T.blas.local_gemm_to_ger.transform( assert T.blas.local_gemm_to_ger.transform(
gemm_no_inplace( gemm_no_inplace(
self.A, self.a, self.x.dimshuffle(0,'x'), self.A, self.a, self.x.dimshuffle(0,'x'),
self.y.dimshuffle('x', 0), self.b(1)).owner) self.y.dimshuffle('x', 0), self.b(1)).owner)
def test_b_other_does_not_triggers_ger(self): def test_b_other_does_not_triggers_ger(self):
""" test local_gemm_to_ger opt"""
assert not T.blas.local_gemm_to_ger.transform( assert not T.blas.local_gemm_to_ger.transform(
gemm_no_inplace( gemm_no_inplace(
self.A, self.a, self.x.dimshuffle(0,'x'), self.A, self.a, self.x.dimshuffle(0,'x'),
...@@ -1391,16 +1327,24 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1391,16 +1327,24 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
def test_outer(self): def test_outer(self):
f = self.function([self.x, self.y], T.outer(self.x, self.y)) f = self.function([self.x, self.y], T.outer(self.x, self.y))
self.assertFunctionContains(f, self.ger_destructive) self.assertFunctionContains(f, self.ger_destructive)
f(numpy.random.rand(5).astype(self.dtype),
numpy.random.rand(4).astype(self.dtype))
def test_A_plus_outer(self): def test_A_plus_outer(self):
f = self.function([self.A, self.x, self.y], f = self.function([self.A, self.x, self.y],
self.A + T.outer(self.x, self.y)) self.A + T.outer(self.x, self.y))
self.assertFunctionContains(f, self.ger) self.assertFunctionContains(f, self.ger)
f(numpy.random.rand(5, 4).astype(self.dtype),
numpy.random.rand(5).astype(self.dtype),
numpy.random.rand(4).astype(self.dtype))
def test_A_plus_scaled_outer(self): def test_A_plus_scaled_outer(self):
f = self.function([self.A, self.x, self.y], f = self.function([self.A, self.x, self.y],
self.A + 0.1 * T.outer(self.x, self.y)) self.A + 0.1 * T.outer(self.x, self.y))
self.assertFunctionContains(f, self.ger) self.assertFunctionContains(f, self.ger)
f(numpy.random.rand(5, 4).astype(self.dtype),
numpy.random.rand(5).astype(self.dtype),
numpy.random.rand(4).astype(self.dtype))
def test_scaled_A_plus_scaled_outer(self): def test_scaled_A_plus_scaled_outer(self):
f = self.function([self.A, self.x, self.y], f = self.function([self.A, self.x, self.y],
...@@ -1410,3 +1354,49 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1410,3 +1354,49 @@ class TestGer_local_gemm_to_ger(TestCase, unittest_tools.TestOptimizationMixin):
# Why gemm? This make the graph simpler did we test that it # Why gemm? This make the graph simpler did we test that it
# make it faster? # make it faster?
self.assertFunctionContains(f, self.gemm) self.assertFunctionContains(f, self.gemm)
f(numpy.random.rand(5, 4).astype(self.dtype),
numpy.random.rand(5).astype(self.dtype),
numpy.random.rand(4).astype(self.dtype))
def given_dtype(self, dtype, M, N):
""" test corner case shape and dtype"""
f = self.function([self.A, self.x, self.y],
self.A + 0.1 * T.outer(self.x, self.y))
self.assertFunctionContains(f, self.ger)
f(numpy.random.rand(M, N).astype(self.dtype),
numpy.random.rand(M).astype(self.dtype),
numpy.random.rand(N).astype(self.dtype))
def test_f32_0_0(self):
return self.given_dtype('float32', 0, 0)
def test_f32_1_0(self):
return self.given_dtype('float32', 1, 0)
def test_f32_0_1(self):
return self.given_dtype('float32', 0, 1)
def test_f32_1_1(self):
return self.given_dtype('float32', 1, 1)
def test_f32_4_4(self):
return self.given_dtype('float32', 4, 4)
def test_f64_4_5(self):
return self.given_dtype('float64', 4, 5)
def test_c64_7_1(self):
return self.given_dtype('complex64', 7, 1)
def test_c128_1_9(self):
return self.given_dtype('complex128', 1, 9)
def test_inplace(self):
A = theano.shared(numpy.random.rand(4, 5).astype(self.dtype))
f = self.function([self.x, self.y], [],
updates={A: A + T.constant(0.1, dtype=self.dtype) *
T.outer(self.x, self.y)})
self.assertFunctionContains(f, self.ger_destructive)
f(numpy.random.rand(4).astype(self.dtype),
numpy.random.rand(5).astype(self.dtype))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论