提交 136153f4 authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: Pascal Lamblin

update optim

上级 24b77b44
......@@ -2451,8 +2451,7 @@ if True:
### AbstractConv Optimizations
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
def local_conv2d_cudnn(node):
def local_abstractconv_cudnn(node):
inp1 = node.inputs[0]
inp2 = node.inputs[1]
......@@ -2487,5 +2486,4 @@ def local_conv2d_cudnn(node):
subsample=node.op.subsample,
conv_mode = conv_mode)
return [rval]
register_specialize_device(local_conv2d_cudnn, 'cudnn')
......@@ -2686,8 +2686,9 @@ register_opt()(local_conv2d_gpu_conv)
### Corrmm opt
@local_optimizer([AbstractConv2d])
def local_conv2d_corrmm(node):
def local_abstractconv_gemm(node):
if not isinstance(node.op, AbstractConv2d):
return None
img, kern = node.inputs
if (not isinstance(img.type, CudaNdarrayType) or
not isinstance(kern.type, CudaNdarrayType)):
......@@ -2743,16 +2744,15 @@ def local_conv2d_corrmm(node):
).dimshuffle(1, 0, 2, 3))
return [rval]
register_specialize_device(local_conv2d_corrmm, 'conv_gemm')
@local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_corrmm(node):
def local_abstractconv_gradweight_gemm(node):
if not isinstance(node.op, AbstractConv2d_gradWeights):
return None
img, topgrad, shape = node.inputs
if not isinstance(img.type, CudaNdarrayType) or \
not isinstance(topgrad.type, CudaNdarrayType):
return None
rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape)
......@@ -2761,12 +2761,12 @@ def local_conv2d_gradweight_corrmm(node):
rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
rval = as_cuda_ndarray_variable(rval)
return [rval]
register_specialize_device(local_conv2d_gradweight_corrmm, 'conv_gemm')
@local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_corrmm(node):
def local_abstractconv_gradinputs_gemm(node):
if not isinstance(node.op, AbstractConv2d_gradInputs):
return None
kern, topgrad, shape = node.inputs
if not isinstance(kern.type, CudaNdarrayType) or \
not isinstance(topgrad.type, CudaNdarrayType):
return None
......@@ -2778,4 +2778,28 @@ def local_conv2d_gradinputs_corrmm(node):
subsample=node.op.subsample)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return [rval]
register_specialize_device(local_conv2d_gradinputs_corrmm, 'conv_gemm')
# Register GPU convolution implementation
# They are tried in a specific order so we can control
# which ones take precedence over others.
abstractconv_groupopt = theano.gof.optdb.LocalGroupDB()
abstractconv_groupopt.__name__ = "gpu_abstractconv_opts"
register_opt()(abstractconv_groupopt)
# cuDNN is first, but only registered if cuDNN is available.
conv_groupopt.register('local_abstractconv_dnn', dnn.local_abstractconv_cudnn, 20,
'conv_dnn',
'fast_compile', 'fast_run', 'cudnn')
# The GEMM-based convolution comes last to catch all remaining cases.
# It can be disabled by excluding 'conv_gemm'.
conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30,
'conv_gemm',
'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradweight_gemm',
local_abstractconv_gradweight_gemm, 30,
#'conv_gemm',
'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradinputs_gemm',
local_abstractconv_gradinputs_gemm, 30,
#'conv_gemm',
'fast_compile', 'fast_run')
......@@ -14,10 +14,7 @@ from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value,
from theano.tensor import TensorType
from theano.gof import Apply, Op
from theano.gof import local_optimizer
from theano.tensor.opt import register_specialize_device
from theano.sandbox.cuda.type import CudaNdarrayType
## Cpu implementation
from theano.tensor.nnet import conv2d as cpu_conv2d, ConvOp
......@@ -327,8 +324,8 @@ def local_conv2d_cpu(node):
return None
img, kern = node.inputs
if isinstance(img.type, CudaNdarrayType) or \
isinstance(kern.type, CudaNdarrayType):
if (not isinstance(img.type, TensorType) or
not isinstance(kern.type, TensorType)):
return None
if node.op.border_mode not in ['full', 'valid']:
return None
......@@ -349,8 +346,8 @@ def local_conv2d_gradweight_cpu(node):
img, topgrad, shape = node.inputs
if isinstance(img.type, CudaNdarrayType) or \
isinstance(topgrad.type, CudaNdarrayType):
if (not isinstance(img.type, TensorType) or
not isinstance(topgrad.type, TensorType)):
return None
if node.op.border_mode not in ['full', 'valid']:
return None
......@@ -458,8 +455,8 @@ register_specialize_device(local_conv2d_gradweight_cpu)
def local_conv2d_gradinputs_cpu(node):
kern, topgrad, shape = node.inputs
if isinstance(kern.type, CudaNdarrayType) or \
isinstance(topgrad.type, CudaNdarrayType):
if (not isinstance(kern.type, TensorType) or
not isinstance(topgrad.type, TensorType)):
return None
if node.op.border_mode not in ['full', 'valid']:
return None
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论