提交 b0350903 authored 作者: Frederic's avatar Frederic

fix local_gpu_multinomial opt handling of broadcastable.

Reported by Caglar directly to me.
上级 1923c879
...@@ -45,7 +45,8 @@ class MultinomialFromUniform(Op): ...@@ -45,7 +45,8 @@ class MultinomialFromUniform(Op):
odtype = pvals.dtype odtype = pvals.dtype
else: else:
odtype = self.odtype odtype = self.odtype
return Apply(self, [pvals, unis], [T.matrix(dtype=odtype)]) out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
return Apply(self, [pvals, unis], [out])
def grad(self, ins, outgrads): def grad(self, ins, outgrads):
pvals, unis = ins pvals, unis = ins
...@@ -180,7 +181,9 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -180,7 +181,9 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
raise NotImplementedError( raise NotImplementedError(
'GpuMultinomialFromUniform works only if ' 'GpuMultinomialFromUniform works only if '
'self.odtype == pvals.dtype', odtype, pvals.dtype) 'self.odtype == pvals.dtype', odtype, pvals.dtype)
return Apply(self, [pvals, unis], [pvals.type()]) br = (pvals.broadcastable[1], pvals.broadcastable[0])
out = CudaNdarrayType(broadcastable=br)()
return Apply(self, [pvals, unis], [out])
def perform(self, node, ins, outs): def perform(self, node, ins, outs):
#The perform from parent don't work with CudaNdarray. We #The perform from parent don't work with CudaNdarray. We
......
...@@ -138,3 +138,17 @@ def test_gpu_opt(): ...@@ -138,3 +138,17 @@ def test_gpu_opt():
pval = pval / pval.sum(axis=1)[:, None] pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:, 0]) * 0.5 uval = numpy.ones_like(pval[:, 0]) * 0.5
mval = f(pval, uval) mval = f(pval, uval)
# Test with a row, it was failing in the past.
r = tensor.frow()
m = multinomial.MultinomialFromUniform('auto')(r, u)
assert m.dtype == 'float32', m.dtype
m_gpu = cuda.gpu_from_host(m)
f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4))+0.1
pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:, 0]) * 0.5
mval2 = f(pval, uval)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论