提交 ea8153b2 authored 作者: Frederic's avatar Frederic

Opt to use GpuConvMM in valid mode.

上级 57415669
...@@ -25,7 +25,7 @@ from theano.sandbox.cuda.basic_ops import ( ...@@ -25,7 +25,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape) GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape)
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar, from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar,
gpu_gemm_inplace, gpu_gemm_no_inplace, GpuConv) gpu_gemm_inplace, gpu_gemm_no_inplace, GpuConv, GpuConvMM)
from theano.sandbox.cuda.blas import gpu_gemv_inplace from theano.sandbox.cuda.blas import gpu_gemv_inplace
from theano.sandbox.cuda.blas import gpu_gemv_no_inplace from theano.sandbox.cuda.blas import gpu_gemv_no_inplace
from theano.sandbox.cuda.blas import gpu_ger_inplace from theano.sandbox.cuda.blas import gpu_ger_inplace
...@@ -1282,6 +1282,15 @@ def local_gpu_downsample_factor_max_grad(node): ...@@ -1282,6 +1282,15 @@ def local_gpu_downsample_factor_max_grad(node):
gpu_from_host(gz)))] gpu_from_host(gz)))]
@local_optimizer([GpuConv])
def local_conv_gemm(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)):
return [GpuConvMM(node.op.border_mode)(*node.inputs)]
gpu_optimizer.register("conv_gemm", local_conv_gemm)
from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论