提交 868ca5fd authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1624 from nouiz/conv3d2dinplace

Conv3d2dinplace
...@@ -627,7 +627,7 @@ class GpuConv(GpuOp): ...@@ -627,7 +627,7 @@ class GpuConv(GpuOp):
out, = outputs out, = outputs
assert images[1] == kerns[1] assert images[1] == kerns[1]
flops = 0 flops = 0
if self.out_mode == "valid": if self.border_mode == "valid":
# nb mul and add by output pixel # nb mul and add by output pixel
flops = kerns[2] * kerns[3] * 2 flops = kerns[2] * kerns[3] * 2
# nb flops by output image # nb flops by output image
......
import theano import theano
from theano.compat import any from theano.compat import any
from theano.gradient import DisconnectedType from theano.gradient import DisconnectedType
from theano.gof import Op, Apply from theano.gof import Op, Apply, TopoOptimizer
from theano import tensor from theano import tensor
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
...@@ -300,3 +300,20 @@ def make_gpu_optimizer(op, to_gpu): ...@@ -300,3 +300,20 @@ def make_gpu_optimizer(op, to_gpu):
if cuda.cuda_available: if cuda.cuda_available:
make_gpu_optimizer(DiagonalSubtensor, [0]) make_gpu_optimizer(DiagonalSubtensor, [0])
make_gpu_optimizer(IncDiagonalSubtensor, [0, 3]) make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
@theano.gof.local_optimizer([None])
def local_inplace_DiagonalSubtensor(node):
""" also work for IncDiagonalSubtensor """
if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and
not node.op.inplace):
new_op = node.op.__class__(inplace=True)
new_node = new_op(*node.inputs)
return [new_node]
return False
theano.compile.optdb.register(
'local_inplace_DiagonalSubtensor',
TopoOptimizer(
local_inplace_DiagonalSubtensor,
failure_callback=TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论