提交 6c23f17d authored 作者: Dustin Webb's avatar Dustin Webb

Generalized the test code to work for both the CPU and GPU implementations.

There is still one problem in the tests to work out though so this is not ready to merge.
上级 5b6dd257
......@@ -256,9 +256,23 @@ class GpuElemwise(GpuOp):
_inputs = [as_cuda_ndarray_variable(i) for i in inputs]
if self.nin > 0 and len(_inputs) != self.nin:
raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
for i in _inputs[1:]:
if i.type.ndim != inputs[0].type.ndim:
raise TypeError('different ranks among inputs')
target_length = max([input.type.ndim for input in _inputs])
args = []
for input in _inputs:
length = input.type.ndim
difference = target_length - length
if not difference:
args.append(input)
else:
# TODO: use LComplete instead
args.append(GpuDimShuffle(
input.type.broadcastable,
['x'] * difference + range(length)
)(input))
_inputs = args
# output is broadcastable only along dimensions where all
# inputs are broadcastable
......@@ -303,7 +317,7 @@ class GpuDimShuffle(GpuOp):
def __init__(self, input_broadcastable, new_order):
input_broadcastable = tuple(input_broadcastable)
self.input_broadcastable = input_broadcastable
self.new_order = new_order
self.new_order = tuple(new_order)
for i, b in enumerate(input_broadcastable):
if i not in new_order:
......@@ -351,8 +365,7 @@ class GpuDimShuffle(GpuOp):
# Both case are good.
ob = []
if not isinstance(input.type, CudaNdarrayType):
raise TypeError("The input of a GpuDimshuffle must"
" be a CudaNdarray")
input = as_cuda_ndarray_variable(input)
for value in self.new_order:
if value == 'x':
ob.append(True)
......@@ -3246,9 +3259,7 @@ class GpuAlloc(GpuOp):
v = as_cuda_ndarray_variable(value)
sh = [tensor.as_tensor_variable(s) for s in shape]
if v.ndim != len(shape):
raise TypeError(
'GpuAlloc requires value of same dimensions as shape',
value, len(shape))
value = tensor.shape_padleft(value, len(shape) - v.ndim)
bcast = []
for s in sh:
......
......@@ -1814,7 +1814,7 @@ gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op(
optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75,
'fast_run', 'inplace', 'gpu_inplace')
tensor.opt.register_specialize_device(tensor.opt.local_shape_to_shape_i)
register_opt()(tensor.opt.local_shape_to_shape_i)
gpu_elemwise_alloc = gof.local_optimizer([GpuElemwise])(
tensor.opt.local_elemwise_alloc_op(GpuElemwise, GpuAlloc, GpuDimShuffle)
)
......@@ -1847,8 +1847,8 @@ def local_gpualloc(node):
val = node.inputs[0]
shp = node.inputs[1:]
old_out = node.outputs[0]
val2 = tensor.shape_padleft(val, len(shp) - val.ndim)
new_out = host_from_gpu(gpu_alloc(val2, *shp))
new_out = host_from_gpu(gpu_alloc(val, *shp))
# Sigh. it's an annoying thing about theano
# that you can't add information to the graph.
# If for some reason it has come to light that
......
......@@ -10,6 +10,7 @@ import theano
from theano.compile.pfunc import pfunc
from theano import config, tensor
import theano.tensor.tests.test_nlinalg
import theano.tensor.tests.test_opt as test_opt
from theano.tests import unittest_tools as utt
......@@ -87,16 +88,34 @@ def test_gpualloc():
assert numpy.any([isinstance(x.op, cuda.GpuAlloc) for x in l])
class Test_local_elemwise_alloc(unittest.TestCase):
dtype = config.floatX
class Test_local_elemwise_alloc(test_opt.Test_local_elemwise_alloc):
dtype = 'float32'
def setUp(self):
self.vec = tensor.vector('vec', dtype=theano.config.floatX)
self.mat = tensor.matrix('mat', dtype=theano.config.floatX)
self.tens = tensor.tensor3('tens', dtype=theano.config.floatX)
self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2)
self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.vec.shape)
super(Test_local_elemwise_alloc, self).setUp()
self.fast_run_mode = mode_with_gpu
#self.vec = tensor.vector('vec', dtype=dtype)
#self.mat = tensor.matrix('mat', dtype=dtype)
#self.tens = tensor.tensor3('tens', dtype=dtype)
#self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2)
#self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape)
self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2)
self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape)
self.alloc_w_dep_tens = basic_ops.gpu_alloc(
self.vec,
self.tens.shape[0],
self.tens.shape[1]
)
self.tv_wo_dep = basic_ops.gpu_alloc(self.vec, 5, 5)
self.tm_wo_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5)
self.s = tensor.iscalar('s')
self.tv_w_dep = basic_ops.gpu_alloc(self.vec, self.s, self.s)
self.tm_w_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5)
self.row = tensor.row(dtype=self.dtype)
self.o = basic_ops.gpu_alloc(self.row, 5, 5)
def _verify_alloc_count(self, f, count):
assert(
......@@ -112,150 +131,6 @@ class Test_local_elemwise_alloc(unittest.TestCase):
if elem.op is not None]) == count
)
def test_remove_alloc_wo_dimshuffle(self):
# No optimization on alloc
from theano.printing import debugprint as dp
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on alloc with assert
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep + self.mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
# No optimization on alloc without assert
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on alloc without assert
temp_val = theano.config.experimental.local_alloc_elemwise_assert
theano.config.experimental.local_alloc_elemwise_assert = False
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep + self. mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0)
theano.config.experimental.local_alloc_elemwise_assert = temp_val
def test_remove_alloc_w_dimshuffle(self):
# No optimization on dimshuffle with assert
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep.dimshuffle(0, 'x') + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle with assert
func = theano.function(
[self.vec, self.mat],
self.alloc_wo_dep.dimshuffle(0, 'x') + self.mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
# No optimization on dimshuffle without assert
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep.dimshuffle(0, 'x') + self.mat,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle without assert
temp_val = theano.config.experimental.local_alloc_elemwise_assert
theano.config.experimental.local_alloc_elemwise_assert = False
func = theano.function(
[self.vec, self.mat],
self.alloc_w_dep + self. mat,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0)
theano.config.experimental.local_alloc_elemwise_assert = temp_val
def test_multi_input_single_alloc(self):
# No optimization on dimshuffle with assert
tv = basic_ops.gpu_alloc(self.vec, 5)
tm = basic_ops.gpu_alloc(self.mat, 5, 5)
func = theano.function(
[self.vec, self.mat],
tv + tm,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle with assert
func = theano.function(
[self.vec, self.mat],
tv + tm,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 1)
# No optimization on dimshuffle without assert
s = tensor.iscalar('s')
#tv = tensor.alloc(self.vec, s, s)
#tm = tensor.alloc(self.mat, 5, 5, 5)
tv = basic_ops.gpu_alloc(self.vec, s)
tm = basic_ops.gpu_alloc(self.mat, 5, 5)
func = theano.function(
[self.vec, self.mat, s],
tv + tm,
mode='FAST_COMPILE'
)
self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle without assert
temp_val = theano.config.experimental.local_alloc_elemwise_assert
theano.config.experimental.local_alloc_elemwise_assert = False
func = theano.function(
[self.vec, self.mat, s],
tv + tm,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
theano.config.experimental.local_alloc_elemwise_assert = temp_val
def test_error(self):
t3fft = theano.tensor.tensor(dtype=self.dtype,
broadcastable=(False, False, True))
row = theano.tensor.row(dtype=self.dtype)
o = basic_ops.gpu_alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
func = theano.function(
[t3fft, row],
o,
mode=mode_with_gpu
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
d = numpy.random.rand(5, 5, 1).astype(self.dtype)
r = numpy.random.rand(1, 5).astype(self.dtype)
func(d, r)
def test_alloc_memset_0():
i = tensor.iscalar()
......
......@@ -2767,12 +2767,27 @@ class Test_local_elemwise_alloc(unittest.TestCase):
dtype = config.floatX
def setUp(self):
self.vec = T.vector('vec', dtype=theano.config.floatX)
self.mat = T.matrix('mat', dtype=theano.config.floatX)
self.tens = T.tensor3('tens', dtype=theano.config.floatX)
self.fast_compile_mode = 'FAST_COMPILE'
self.fast_run_mode = 'FAST_RUN'
self.vec = T.vector('vec', dtype=self.dtype)
self.mat = T.matrix('mat', dtype=self.dtype)
self.tens = T.tensor3('tens', dtype=self.dtype)
self.alloc_wo_dep = T.alloc(self.vec, 2, 2)
self.alloc_w_dep = T.alloc(self.vec, *self.mat.shape)
self.alloc_w_dep_tens = T.alloc(
self.vec,
self.tens.shape[0],
self.tens.shape[1]
)
self.tv_wo_dep = T.alloc(self.vec, 5, 5)
self.tm_wo_dep = T.alloc(self.mat, 5, 5, 5)
self.s = T.iscalar('s')
self.tv_w_dep = T.alloc(self.vec, self.s, self.s)
self.tm_w_dep = T.alloc(self.mat, 5, 5, 5)
self.row = theano.tensor.row(dtype=self.dtype)
self.o = T.alloc(self.row, 5, 5)
def _verify_alloc_count(self, f, count):
assert(
......@@ -2793,7 +2808,7 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function(
[self.vec, self.mat],
self.alloc_wo_dep + self.mat,
mode='FAST_COMPILE'
mode=self.fast_compile_mode
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
......@@ -2802,8 +2817,9 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function(
[self.vec, self.mat],
self.alloc_wo_dep + self.mat,
mode='FAST_RUN'
mode=self.fast_run_mode
)
from theano.printing import debugprint as dp
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
......@@ -2811,7 +2827,7 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function(
[self.vec, self.mat],
self.alloc_w_dep + self.mat,
mode='FAST_COMPILE'
mode=self.fast_compile_mode
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
......@@ -2820,7 +2836,7 @@ class Test_local_elemwise_alloc(unittest.TestCase):
func = function(
[self.vec, self.mat],
self.alloc_w_dep + self. mat,
mode='FAST_RUN'
mode=self.fast_run_mode
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0)
......@@ -2829,8 +2845,9 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# No optimization on dimshuffle with assert
func = function(
[self.vec, self.tens],
T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_COMPILE'
self.alloc_wo_dep.dimshuffle(0, 1, 'x') + self.tens,
#T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens,
mode=self.fast_compile_mode
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
......@@ -2838,8 +2855,9 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# Optimization on dimshuffle with assert
func = function(
[self.vec, self.tens],
T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_RUN'
#T.alloc(self.vec, 2, 2).dimshuffle(0, 1, 'x') + self.tens,
self.alloc_wo_dep.dimshuffle(0, 1, 'x') + self.tens,
mode=self.fast_run_mode
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
......@@ -2847,12 +2865,8 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# No optimization on dimshuffle without assert
func = function(
[self.vec, self.tens],
T.alloc(
self.vec,
self.tens.shape[0],
self.tens.shape[1]
).dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_COMPILE'
self.alloc_w_dep_tens.dimshuffle(0, 1, 'x') + self.tens,
mode=self.fast_compile_mode
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
......@@ -2860,52 +2874,51 @@ class Test_local_elemwise_alloc(unittest.TestCase):
# Optimization on dimshuffle without assert
func = function(
[self.vec, self.tens],
T.alloc(
self.vec,
self.tens.shape[0],
self.tens.shape[1]
).dimshuffle(0, 1, 'x') + self.tens,
mode='FAST_RUN'
self.alloc_w_dep_tens.dimshuffle(0, 1, 'x') + self.tens,
mode=self.fast_run_mode
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 0)
def test_multi_input_single_alloc(self):
tv = T.alloc(self.vec, 5, 5)
tm = T.alloc(self.mat, 5, 5, 5)
# No optimization on dimshuffle with assert
func = function(
[self.vec, self.mat],
tv + tm,
mode='FAST_COMPILE'
self.tv_wo_dep + self.tm_wo_dep,
mode=self.fast_compile_mode
)
self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle with assert
temp = self.tv_wo_dep + self.tm_wo_dep,
from theano.printing import debugprint as dp
import ipdb; ipdb.set_trace()
func = function(
[self.vec, self.mat],
tv + tm,
mode='FAST_RUN'
temp,
mode=self.fast_run_mode
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 0)
s = T.iscalar('s')
tv = T.alloc(self.vec, s, s)
tm = T.alloc(self.mat, 5, 5, 5)
# No optimization on dimshuffle without assert
#s = T.iscalar('s')
#tv = T.alloc(self.vec, s, s)
#tm = T.alloc(self.mat, 5, 5, 5)
func = function(
[self.vec, self.mat, s],
tv + tm,
mode='FAST_COMPILE'
[self.vec, self.mat, self.s],
self.tv_w_dep + self.tm_w_dep,
mode=self.fast_compile_mode
)
self._verify_alloc_count(func, 2)
self._verify_assert_count(func, 0)
# Optimization on dimshuffle without assert
func = function(
[self.vec, self.mat, s],
tv + tm,
mode='FAST_RUN'
[self.vec, self.mat, self.s],
self.tv_w_dep + self.tm_w_dep,
mode=self.fast_run_mode
)
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 1)
......@@ -2913,12 +2926,13 @@ class Test_local_elemwise_alloc(unittest.TestCase):
def test_error(self):
t3fft = theano.tensor.tensor(dtype=self.dtype,
broadcastable=(False, False, True))
row = theano.tensor.row(dtype=self.dtype)
o = T.alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
#row = theano.tensor.row(dtype=self.dtype)
#o = T.alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
o = self.o.dimshuffle(0, 1, 'x') + t3fft
func = function(
[t3fft, row],
[t3fft, self.row],
o,
mode='FAST_RUN'
mode=self.fast_run_mode
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论