提交 082fa2c2 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1679 from lamblin/ufunc_max_nb_inputs

Do not use python code for elemwise with >= 32 inputs
...@@ -765,6 +765,13 @@ class Elemwise(Op): ...@@ -765,6 +765,13 @@ class Elemwise(Op):
return ret return ret
def perform(self, node, inputs, output_storage): def perform(self, node, inputs, output_storage):
if len(node.inputs) >= 32:
# Some versions of NumPy will segfault, other will raise a
# ValueError, if the number of inputs to a ufunc is 32 or more.
# In that case, the C version should be used, or Elemwise fusion
# should be disabled.
super(Elemwise, self).perform(node, inputs, output_storage)
maxsize = max(len(input.shape) for input in inputs) maxsize = max(len(input.shape) for input in inputs)
for dims in izip(*[([(1, True)] * (maxsize - len(input.shape)) for dims in izip(*[([(1, True)] * (maxsize - len(input.shape))
+ zip(input.shape, sinput.type.broadcastable)) + zip(input.shape, sinput.type.broadcastable))
......
...@@ -1181,6 +1181,20 @@ class test_fusion(unittest.TestCase): ...@@ -1181,6 +1181,20 @@ class test_fusion(unittest.TestCase):
shp = (5, 5, 5) shp = (5, 5, 5)
self.do(mode, cuda.float32_shared_constructor, shp, gpu=True) self.do(mode, cuda.float32_shared_constructor, shp, gpu=True)
def test_fusion_35inputs(self):
# Make sure a fused graph with more than 35 inputs does not segfault
# or error.
inpts = vectors(['i%i' % i for i in range(35)])
# Make an elemwise graph looking like:
# sin(i34 + sin(i33 + sin(... i1 + sin(i0) ...)))
out = tensor.sin(inpts[0])
for idx in range(1, 35):
out = tensor.sin(inpts[idx] + out)
f = function(inpts, out)
# Test it on some dummy values
f(*[range(i, 4 + i) for i in range(35)])
def speed_fusion(self, shared_fn=shared, gpu=False, s=None): def speed_fusion(self, shared_fn=shared, gpu=False, s=None):
""" """
param type s: a slice object param type s: a slice object
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论