提交 923a23b1 authored 作者: Frederic's avatar Frederic

Fix crash when we genereted a gpu function with too much parameter.

This is a strange case as we restect the 256 bytes limits!
上级 a62957f5
......@@ -943,9 +943,17 @@ def get_device_type_sizes():
def max_inputs_to_GpuElemwise(node):
"""
return the maximum number of input this Apply node to an GpuElemwise can accept.
This is needed as currently their is a limit of 256 bytes of paramter for the gpu function.
This mesure the number of paramter we put in our gpu function and compute the maximum number of inputs that respect the 256 bytes limits.
return the maximum number of input this GpuElemwise Apply node can
accept.
This is needed as currently their is a limit of 256 bytes of
paramter for the gpu function on device of compute capability
1.x. There is a 4k bytes limit on device compute capability
2.x (not used).
This mesure the number of paramter we put in our gpu function and
compute the maximum number of inputs that respect the 256 bytes
limits.
"""
type_sizes = get_device_type_sizes()
int_size = type_sizes['int_size']
......@@ -961,6 +969,11 @@ def max_inputs_to_GpuElemwise(node):
nb_bytes_avail = argument_limit - size_param_mandatory
nb_bytes_per_inputs = (ndim*int_size) + gpu_ptr_size
max_nb_inputs = nb_bytes_avail // nb_bytes_per_inputs
# There is a case this don't algo don't work. Is this related to
# the order of paramter to the gpu function?
if node.inputs[0].type.ndim==1 and max_nb_inputs>14:
return 14
return max_nb_inputs
def split_huge_add_or_mul(node):
......
......@@ -193,6 +193,41 @@ def test_huge_elemwise_fusion():
gen = lambda : theano._asarray(numpy.random.rand(*shape), dtype='float32')
f(gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen())
def gen(shape):
return theano._asarray(numpy.random.rand(*shape), dtype='float32')
max_var = 16 #excluded
for shape in [(2,),
(2,2),
(2,2,2),
(2,2,2,2),
(2,2,2,2,2), # 5d
(2,2,2,2,2,2),
# (2,2,2,2,2,2,2),
# (2,2,2,2,2,2,2,2),
# (2,2,2,1,1,1,1,2,2), # 9d
]:
vals = [cuda.shared_constructor(gen(shape)) for x in range(max_var)]
for use_tan in [True, False]:
if use_tan:
vars = [tensor.tanh(x) for x in vals]
else:
vars = vals
for nb_var in range(1, max_var):
out = reduce(lambda x, y: x+y, vars[:nb_var])
if not isinstance(out.type, CudaNdarrayType):
out = cuda.gpu_from_host(out)
f = pfunc([], [out], mode=mode_with_gpu)
topo = f.maker.env.toposort()
#print shape, nb_var, use_tan, len(topo)
assert (sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == len(topo) or
(nb_var == 1 and use_tan == False))
assert sum([isinstance(node.op, tensor.Elemwise) for node in topo]) == 0
#let debugmode catch errors
f()
def test_elemwise_fusion():
""" Test the the GpuElemwise fusion work correctly"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论