提交 6c23f17d authored 作者: Dustin Webb's avatar Dustin Webb

Generalized the test code to work for both the CPU and GPU implementations.

There is still one problem in the tests to work out though so this is not ready to merge.
上级 5b6dd257
...@@ -256,9 +256,23 @@ class GpuElemwise(GpuOp): ...@@ -256,9 +256,23 @@ class GpuElemwise(GpuOp):
_inputs = [as_cuda_ndarray_variable(i) for i in inputs] _inputs = [as_cuda_ndarray_variable(i) for i in inputs]
if self.nin > 0 and len(_inputs) != self.nin: if self.nin > 0 and len(_inputs) != self.nin:
raise TypeError('Wrong argument count', (self.nin, len(_inputs))) raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
for i in _inputs[1:]:
if i.type.ndim != inputs[0].type.ndim: target_length = max([input.type.ndim for input in _inputs])
raise TypeError('different ranks among inputs')
args = []
for input in _inputs:
length = input.type.ndim
difference = target_length - length
if not difference:
args.append(input)
else:
# TODO: use LComplete instead
args.append(GpuDimShuffle(
input.type.broadcastable,
['x'] * difference + range(length)
)(input))
_inputs = args
# output is broadcastable only along dimensions where all # output is broadcastable only along dimensions where all
# inputs are broadcastable # inputs are broadcastable
...@@ -303,7 +317,7 @@ class GpuDimShuffle(GpuOp): ...@@ -303,7 +317,7 @@ class GpuDimShuffle(GpuOp):
def __init__(self, input_broadcastable, new_order): def __init__(self, input_broadcastable, new_order):
input_broadcastable = tuple(input_broadcastable) input_broadcastable = tuple(input_broadcastable)
self.input_broadcastable = input_broadcastable self.input_broadcastable = input_broadcastable
self.new_order = new_order self.new_order = tuple(new_order)
for i, b in enumerate(input_broadcastable): for i, b in enumerate(input_broadcastable):
if i not in new_order: if i not in new_order:
...@@ -351,8 +365,7 @@ class GpuDimShuffle(GpuOp): ...@@ -351,8 +365,7 @@ class GpuDimShuffle(GpuOp):
# Both case are good. # Both case are good.
ob = [] ob = []
if not isinstance(input.type, CudaNdarrayType): if not isinstance(input.type, CudaNdarrayType):
raise TypeError("The input of a GpuDimshuffle must" input = as_cuda_ndarray_variable(input)
" be a CudaNdarray")
for value in self.new_order: for value in self.new_order:
if value == 'x': if value == 'x':
ob.append(True) ob.append(True)
...@@ -3246,9 +3259,7 @@ class GpuAlloc(GpuOp): ...@@ -3246,9 +3259,7 @@ class GpuAlloc(GpuOp):
v = as_cuda_ndarray_variable(value) v = as_cuda_ndarray_variable(value)
sh = [tensor.as_tensor_variable(s) for s in shape] sh = [tensor.as_tensor_variable(s) for s in shape]
if v.ndim != len(shape): if v.ndim != len(shape):
raise TypeError( value = tensor.shape_padleft(value, len(shape) - v.ndim)
'GpuAlloc requires value of same dimensions as shape',
value, len(shape))
bcast = [] bcast = []
for s in sh: for s in sh:
......
...@@ -1814,7 +1814,7 @@ gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op( ...@@ -1814,7 +1814,7 @@ gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op(
optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75, optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75,
'fast_run', 'inplace', 'gpu_inplace') 'fast_run', 'inplace', 'gpu_inplace')
tensor.opt.register_specialize_device(tensor.opt.local_shape_to_shape_i) register_opt()(tensor.opt.local_shape_to_shape_i)
gpu_elemwise_alloc = gof.local_optimizer([GpuElemwise])( gpu_elemwise_alloc = gof.local_optimizer([GpuElemwise])(
tensor.opt.local_elemwise_alloc_op(GpuElemwise, GpuAlloc, GpuDimShuffle) tensor.opt.local_elemwise_alloc_op(GpuElemwise, GpuAlloc, GpuDimShuffle)
) )
...@@ -1847,8 +1847,8 @@ def local_gpualloc(node): ...@@ -1847,8 +1847,8 @@ def local_gpualloc(node):
val = node.inputs[0] val = node.inputs[0]
shp = node.inputs[1:] shp = node.inputs[1:]
old_out = node.outputs[0] old_out = node.outputs[0]
val2 = tensor.shape_padleft(val, len(shp) - val.ndim) new_out = host_from_gpu(gpu_alloc(val, *shp))
new_out = host_from_gpu(gpu_alloc(val2, *shp))
# Sigh. it's an annoying thing about theano # Sigh. it's an annoying thing about theano
# that you can't add information to the graph. # that you can't add information to the graph.
# If for some reason it has come to light that # If for some reason it has come to light that
......
...@@ -10,6 +10,7 @@ import theano ...@@ -10,6 +10,7 @@ import theano
from theano.compile.pfunc import pfunc from theano.compile.pfunc import pfunc
from theano import config, tensor from theano import config, tensor
import theano.tensor.tests.test_nlinalg import theano.tensor.tests.test_nlinalg
import theano.tensor.tests.test_opt as test_opt
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
...@@ -87,16 +88,34 @@ def test_gpualloc(): ...@@ -87,16 +88,34 @@ def test_gpualloc():
assert numpy.any([isinstance(x.op, cuda.GpuAlloc) for x in l]) assert numpy.any([isinstance(x.op, cuda.GpuAlloc) for x in l])
class Test_local_elemwise_alloc(unittest.TestCase): class Test_local_elemwise_alloc(test_opt.Test_local_elemwise_alloc):
dtype = config.floatX dtype = 'float32'
def setUp(self): def setUp(self):
self.vec = tensor.vector('vec', dtype=theano.config.floatX) super(Test_local_elemwise_alloc, self).setUp()
self.mat = tensor.matrix('mat', dtype=theano.config.floatX) self.fast_run_mode = mode_with_gpu
self.tens = tensor.tensor3('tens', dtype=theano.config.floatX)
#self.vec = tensor.vector('vec', dtype=dtype)
self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2) #self.mat = tensor.matrix('mat', dtype=dtype)
self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.vec.shape) #self.tens = tensor.tensor3('tens', dtype=dtype)
#self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2)
#self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape)
self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2)
self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape)
self.alloc_w_dep_tens = basic_ops.gpu_alloc(
self.vec,
self.tens.shape[0],
self.tens.shape[1]
)
self.tv_wo_dep = basic_ops.gpu_alloc(self.vec, 5, 5)
self.tm_wo_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5)
self.s = tensor.iscalar('s')
self.tv_w_dep = basic_ops.gpu_alloc(self.vec, self.s, self.s)
self.tm_w_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5)
self.row = tensor.row(dtype=self.dtype)
self.o = basic_ops.gpu_alloc(self.row, 5, 5)
def _verify_alloc_count(self, f, count): def _verify_alloc_count(self, f, count):
assert( assert(
...@@ -112,150 +131,6 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -112,150 +131,6 @@ class Test_local_elemwise_alloc(unittest.TestCase):
if elem.op is not None]) == count if elem.op is not None]) == count
) )
def test_remove_alloc_wo_dimshuffle(self):
# No optimization on alloc
from theano.printing import debugprint as dp
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on alloc with assert
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep + self.mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
# No optimization on alloc without assert
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on alloc without assert
temp_val = theano.config.experimental.local_alloc_elemwise_assert
theano.config.experimental.local_alloc_elemwise_assert = False
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep + self. mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0)
theano.config.experimental.local_alloc_elemwise_assert = temp_val
def test_remove_alloc_w_dimshuffle(self):
# No optimization on dimshuffle with assert
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep.dimshuffle(0, 'x') + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle with assert
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep.dimshuffle(0, 'x') + self.mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
# No optimization on dimshuffle without assert
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep.dimshuffle(0, 'x') + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle without assert
temp_val = theano.config.experimental.local_alloc_elemwise_assert
theano.config.experimental.local_alloc_elemwise_assert = False
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep + self. mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0)
theano.config.experimental.local_alloc_elemwise_assert = temp_val
def test_multi_input_single_alloc(self):
# No optimization on dimshuffle with assert
tv = basic_ops.gpu_alloc(self.vec, 5)
tm = basic_ops.gpu_alloc(self.mat, 5, 5)
func = theano.function(
[self.vec, self.mat],
tv + tm,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle with assert
func = theano.function(
[self.vec, self.mat],
tv + tm,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 1)
# No optimization on dimshuffle without assert
s = tensor.iscalar('s')
#tv = tensor.alloc(self.vec, s, s)
#tm = tensor.alloc(self.mat, 5, 5, 5)
tv = basic_ops.gpu_alloc(self.vec, s)
tm = basic_ops.gpu_alloc(self.mat, 5, 5)
func = theano.function(
[self.vec, self.mat, s],
tv + tm,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle without assert
temp_val = theano.config.experimental.local_alloc_elemwise_assert
theano.config.experimental.local_alloc_elemwise_assert = False
func = theano.function(
[self.vec, self.mat, s],
tv + tm,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
theano.config.experimental.local_alloc_elemwise_assert = temp_val
def test_error(self):
t3fft = theano.tensor.tensor(dtype=self.dtype,
broadcastable=(False, False, True))
row = theano.tensor.row(dtype=self.dtype)
o = basic_ops.gpu_alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
func = theano.function(
[t3fft, row],
o,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
d = numpy.random.rand(5, 5, 1).astype(self.dtype)
r = numpy.random.rand(1, 5).astype(self.dtype)
func(d, r)
def test_alloc_memset_0(): def test_alloc_memset_0():
i = tensor.iscalar() i = tensor.iscalar()
......
...@@ -2767,12 +2767,27 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2767,12 +2767,27 @@ class Test_local_elemwise_alloc(unittest.TestCase):
dtype = config.floatX dtype = config.floatX
def setUp(self): def setUp(self):
self.vec = T.vector('vec', dtype=theano.config.floatX) self.fast_compile_mode = 'FAST_COMPILE'
self.mat = T.matrix('mat', dtype=theano.config.floatX) self.fast_run_mode = 'FAST_RUN'
self.tens = T.tensor3('tens', dtype=theano.config.floatX)
self.vec = T.vector('vec', dtype=self.dtype)
self.mat = T.matrix('mat', dtype=self.dtype)
self.tens = T.tensor3('tens', dtype=self.dtype)
self.alloc_wo_dep = T.alloc(self.vec, 2, 2) self.alloc_wo_dep = T.alloc(self.vec, 2, 2)
self.alloc_w_dep = T.alloc(self.vec, *self.mat.shape) self.alloc_w_dep = T.alloc(self.vec, *self.mat.shape)
self.alloc_w_dep_tens = T.alloc(
self.vec,
self.tens.shape[0],
self.tens.shape[1]
)
self.tv_wo_dep = T.alloc(self.vec, 5, 5)
self.tm_wo_dep = T.alloc(self.mat, 5, 5, 5)
self.s = T.iscalar('s')
self.tv_w_dep = T.alloc(self.vec, self.s, self.s)
self.tm_w_dep = T.alloc(self.mat, 5, 5, 5)
self.row = theano.tensor.row(dtype=self.dtype)
self.o = T.alloc(self.row, 5, 5)
def _verify_alloc_count(self, f, count): def _verify_alloc_count(self, f, count):
assert( assert(
...@@ -2793,7 +2808,7 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2793,7 +2808,7 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function( func = function(
[self.vec, self.mat], [self.vec, self.mat],
self.alloc_wo_dep + self.mat, self.alloc_wo_dep + self.mat,
mode='FAST_COMPILE' mode=self.fast_compile_mode
) )
self._verify_alloc_count(func, 1) self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
...@@ -2802,8 +2817,9 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2802,8 +2817,9 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function( func = function(
[self.vec, self.mat], [self.vec, self.mat],
self.alloc_wo_dep + self.mat, self.alloc_wo_dep + self.mat,
mode='FAST_RUN' mode=self.fast_run_mode
) )
from theano.printing import debugprint as dp
self._verify_alloc_count(func, 0) self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1) self._verify_assert_count(func, 1)
...@@ -2811,7 +2827,7 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2811,7 +2827,7 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function( func = function(
[self.vec, self.mat], [self.vec, self.mat],
self.alloc_w_dep + self.mat, self.alloc_w_dep + self.mat,
mode='FAST_COMPILE' mode=self.fast_compile_mode
) )
self._verify_alloc_count(func, 1) self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
...@@ -2820,7 +2836,7 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2820,7 +2836,7 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function( func = function(
[self.vec, self.mat], [self.vec, self.mat],
self.alloc_w_dep + self. mat, self.alloc_w_dep + self. mat,
mode='FAST_RUN' mode=self.fast_run_mode
) )
self._verify_alloc_count(func, 0) self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
...@@ -2829,8 +2845,9 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2829,8 +2845,9 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# No optimization on dimshuffle with assert # No optimization on dimshuffle with assert
func = function( func = function(
[self.vec, self.tens], [self.vec, self.tens],
T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens, self.alloc_wo_dep.dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_COMPILE' #T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens,
mode=self.fast_compile_mode
) )
self._verify_alloc_count(func, 1) self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
...@@ -2838,8 +2855,9 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2838,8 +2855,9 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# Optimization on dimshuffle with assert # Optimization on dimshuffle with assert
func = function( func = function(
[self.vec, self.tens], [self.vec, self.tens],
T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens, #T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_RUN' self.alloc_wo_dep.dimshuffle(0, 1, 'x') + self.tens,
mode=self.fast_run_mode
) )
self._verify_alloc_count(func, 0) self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1) self._verify_assert_count(func, 1)
...@@ -2847,12 +2865,8 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2847,12 +2865,8 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# No optimization on dimshuffle without assert # No optimization on dimshuffle without assert
func = function( func = function(
[self.vec, self.tens], [self.vec, self.tens],
T.alloc( self.alloc_w_dep_tens.dimshuffle(0, 1, 'x') + self.tens,
self.vec, mode=self.fast_compile_mode
self.tens.shape[0],
self.tens.shape[1]
).dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_COMPILE'
) )
self._verify_alloc_count(func, 1) self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
...@@ -2860,52 +2874,51 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2860,52 +2874,51 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# Optimization on dimshuffle without assert # Optimization on dimshuffle without assert
func = function( func = function(
[self.vec, self.tens], [self.vec, self.tens],
T.alloc( self.alloc_w_dep_tens.dimshuffle(0, 1, 'x') + self.tens,
self.vec, mode=self.fast_run_mode
self.tens.shape[0],
self.tens.shape[1]
).dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_RUN'
) )
self._verify_alloc_count(func, 0) self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
def test_multi_input_single_alloc(self): def test_multi_input_single_alloc(self):
tv = T.alloc(self.vec, 5, 5) # No optimization on dimshuffle with assert
tm = T.alloc(self.mat, 5, 5, 5)
func = function( func = function(
[self.vec, self.mat], [self.vec, self.mat],
tv + tm, self.tv_wo_dep + self.tm_wo_dep,
mode='FAST_COMPILE' mode=self.fast_compile_mode
) )
self._verify_alloc_count(func, 2) self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
# Optimization on dimshuffle with assert
temp = self.tv_wo_dep + self.tm_wo_dep,
from theano.printing import debugprint as dp
import ipdb; ipdb.set_trace()
func = function( func = function(
[self.vec, self.mat], [self.vec, self.mat],
tv + tm, temp,
mode='FAST_RUN' mode=self.fast_run_mode
) )
self._verify_alloc_count(func, 1) self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
s = T.iscalar('s') # No optimization on dimshuffle without assert
tv = T.alloc(self.vec, s, s) #s = T.iscalar('s')
tm = T.alloc(self.mat, 5, 5, 5) #tv = T.alloc(self.vec, s, s)
#tm = T.alloc(self.mat, 5, 5, 5)
func = function( func = function(
[self.vec, self.mat, s], [self.vec, self.mat, self.s],
tv + tm, self.tv_w_dep + self.tm_w_dep,
mode='FAST_COMPILE' mode=self.fast_compile_mode
) )
self._verify_alloc_count(func, 2) self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0) self._verify_assert_count(func, 0)
# Optimization on dimshuffle without assert
func = function( func = function(
[self.vec, self.mat, s], [self.vec, self.mat, self.s],
tv + tm, self.tv_w_dep + self.tm_w_dep,
mode='FAST_RUN' mode=self.fast_run_mode
) )
self._verify_alloc_count(func, 1) self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 1) self._verify_assert_count(func, 1)
...@@ -2913,12 +2926,13 @@ class Test_local_elemwise_alloc(unittest.TestCase): ...@@ -2913,12 +2926,13 @@ class Test_local_elemwise_alloc(unittest.TestCase):
def test_error(self): def test_error(self):
t3fft = theano.tensor.tensor(dtype=self.dtype, t3fft = theano.tensor.tensor(dtype=self.dtype,
broadcastable=(False, False, True)) broadcastable=(False, False, True))
row = theano.tensor.row(dtype=self.dtype) #row = theano.tensor.row(dtype=self.dtype)
o = T.alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft #o = T.alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
o = self.o.dimshuffle(0, 1, 'x') + t3fft
func = function( func = function(
[t3fft, row], [t3fft, self.row],
o, o,
mode='FAST_RUN' mode=self.fast_run_mode
) )
self._verify_alloc_count(func, 0) self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1) self._verify_assert_count(func, 1)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论