提交 0bd88f12 authored 作者: David Warde-Farley's avatar David Warde-Farley

STY: minor pep8 issues on previous commit

上级 16265ccb
...@@ -967,13 +967,14 @@ def max_inputs_to_GpuElemwise(node): ...@@ -967,13 +967,14 @@ def max_inputs_to_GpuElemwise(node):
for i in node.outputs) for i in node.outputs)
nb_bytes_avail = argument_limit - size_param_mandatory nb_bytes_avail = argument_limit - size_param_mandatory
nb_bytes_per_inputs = (ndim*int_size) + gpu_ptr_size nb_bytes_per_inputs = (ndim * int_size) + gpu_ptr_size
max_nb_inputs = nb_bytes_avail // nb_bytes_per_inputs max_nb_inputs = nb_bytes_avail // nb_bytes_per_inputs
# There is a case this don't algorithm doesn't work. Is this related to # There is a case this don't algorithm doesn't work. Is this related to
# the order of the parameters to the gpu function? # the order of the parameters to the gpu function?
if node.inputs[0].type.ndim==1 and max_nb_inputs>14: if node.inputs[0].type.ndim == 1 and max_nb_inputs > 14:
return 14 return 14
return max_nb_inputs return max_nb_inputs
def split_huge_add_or_mul(node): def split_huge_add_or_mul(node):
......
...@@ -186,27 +186,26 @@ def test_huge_elemwise_fusion(): ...@@ -186,27 +186,26 @@ def test_huge_elemwise_fusion():
f = pfunc(vars, [vars[0]-vars[1]-vars[2]-vars[3]-vars[4]-vars[5]-vars[6]], mode=mode_with_gpu) f = pfunc(vars, [vars[0]-vars[1]-vars[2]-vars[3]-vars[4]-vars[5]-vars[6]], mode=mode_with_gpu)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
#theano.printing.debugprint(f) #theano.printing.debugprint(f)
assert len(topo)==1 assert len(topo) == 1
assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo])==0 assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == 0
assert sum([isinstance(node.op, tensor.Elemwise) for node in topo])==1 assert sum([isinstance(node.op, tensor.Elemwise) for node in topo]) == 1
#let debugmode catch errors #let debugmode catch errors
gen = lambda : theano._asarray(numpy.random.rand(*shape), dtype='float32') gen = lambda: theano._asarray(numpy.random.rand(*shape), dtype='float32')
f(gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen()) f(gen(), gen(), gen(), gen(), gen(), gen(), gen(), gen(), gen(), gen())
def gen(shape): def gen(shape):
return theano._asarray(numpy.random.rand(*shape), dtype='float32') return theano._asarray(numpy.random.rand(*shape), dtype='float32')
max_var = 16 # excluded
max_var = 16 #excluded
for shape in [(2,), for shape in [(2,),
(2,2), (2, 2),
(2,2,2), (2, 2, 2),
(2,2,2,2), (2, 2, 2, 2),
(2,2,2,2,2), # 5d (2, 2, 2, 2, 2), # 5d
(2,2,2,2,2,2), (2, 2, 2, 2, 2, 2),
# (2,2,2,2,2,2,2), # (2, 2, 2, 2, 2, 2, 2),
# (2,2,2,2,2,2,2,2), # (2, 2, 2, 2, 2, 2, 2, 2),
# (2,2,2,1,1,1,1,2,2), # 9d # (2, 2, 2, 1, 1, 1, 1, 2, 2), # 9d
]: ]:
vals = [cuda.shared_constructor(gen(shape)) for x in range(max_var)] vals = [cuda.shared_constructor(gen(shape)) for x in range(max_var)]
for use_tan in [True, False]: for use_tan in [True, False]:
...@@ -215,20 +214,22 @@ def test_huge_elemwise_fusion(): ...@@ -215,20 +214,22 @@ def test_huge_elemwise_fusion():
else: else:
vars = vals vars = vals
for nb_var in range(1, max_var): for nb_var in range(1, max_var):
out = reduce(lambda x, y: x+y, vars[:nb_var]) out = reduce(lambda x, y: x + y, vars[:nb_var])
if not isinstance(out.type, CudaNdarrayType): if not isinstance(out.type, CudaNdarrayType):
out = cuda.gpu_from_host(out) out = cuda.gpu_from_host(out)
f = pfunc([], [out], mode=mode_with_gpu) f = pfunc([], [out], mode=mode_with_gpu)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
#print shape, nb_var, use_tan, len(topo) #print shape, nb_var, use_tan, len(topo)
assert (sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == len(topo) or assert (sum([isinstance(node.op, cuda.GpuElemwise)
for node in topo]) == len(topo) or
(nb_var == 1 and use_tan == False)) (nb_var == 1 and use_tan == False))
assert sum([isinstance(node.op, tensor.Elemwise) for node in topo]) == 0 assert sum([isinstance(node.op, tensor.Elemwise)
for node in topo]) == 0
#let debugmode catch errors #let debugmode catch errors
f() f()
def test_elemwise_fusion(): def test_elemwise_fusion():
""" Test the the GpuElemwise fusion work correctly""" """ Test the the GpuElemwise fusion work correctly"""
shape = (3,4) shape = (3,4)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论