提交 136153f4 authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: Pascal Lamblin

update optim

上级 24b77b44
...@@ -2451,8 +2451,7 @@ if True: ...@@ -2451,8 +2451,7 @@ if True:
### AbstractConv Optimizations ### AbstractConv Optimizations
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
def local_conv2d_cudnn(node): def local_abstractconv_cudnn(node):
inp1 = node.inputs[0] inp1 = node.inputs[0]
inp2 = node.inputs[1] inp2 = node.inputs[1]
...@@ -2487,5 +2486,4 @@ def local_conv2d_cudnn(node): ...@@ -2487,5 +2486,4 @@ def local_conv2d_cudnn(node):
subsample=node.op.subsample, subsample=node.op.subsample,
conv_mode = conv_mode) conv_mode = conv_mode)
return [rval] return [rval]
register_specialize_device(local_conv2d_cudnn, 'cudnn')
...@@ -2686,8 +2686,9 @@ register_opt()(local_conv2d_gpu_conv) ...@@ -2686,8 +2686,9 @@ register_opt()(local_conv2d_gpu_conv)
### Corrmm opt ### Corrmm opt
@local_optimizer([AbstractConv2d]) @local_optimizer([AbstractConv2d])
def local_conv2d_corrmm(node): def local_abstractconv_gemm(node):
if not isinstance(node.op, AbstractConv2d):
return None
img, kern = node.inputs img, kern = node.inputs
if (not isinstance(img.type, CudaNdarrayType) or if (not isinstance(img.type, CudaNdarrayType) or
not isinstance(kern.type, CudaNdarrayType)): not isinstance(kern.type, CudaNdarrayType)):
...@@ -2743,16 +2744,15 @@ def local_conv2d_corrmm(node): ...@@ -2743,16 +2744,15 @@ def local_conv2d_corrmm(node):
).dimshuffle(1, 0, 2, 3)) ).dimshuffle(1, 0, 2, 3))
return [rval] return [rval]
register_specialize_device(local_conv2d_corrmm, 'conv_gemm')
@local_optimizer([AbstractConv2d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_corrmm(node): def local_abstractconv_gradweight_gemm(node):
if not isinstance(node.op, AbstractConv2d_gradWeights):
return None
img, topgrad, shape = node.inputs img, topgrad, shape = node.inputs
if not isinstance(img.type, CudaNdarrayType) or \ if not isinstance(img.type, CudaNdarrayType) or \
not isinstance(topgrad.type, CudaNdarrayType): not isinstance(topgrad.type, CudaNdarrayType):
return None return None
rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode, rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)( subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape) gpu_contiguous(img), gpu_contiguous(topgrad), shape)
...@@ -2761,12 +2761,12 @@ def local_conv2d_gradweight_corrmm(node): ...@@ -2761,12 +2761,12 @@ def local_conv2d_gradweight_corrmm(node):
rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable) rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
rval = as_cuda_ndarray_variable(rval) rval = as_cuda_ndarray_variable(rval)
return [rval] return [rval]
register_specialize_device(local_conv2d_gradweight_corrmm, 'conv_gemm')
@local_optimizer([AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_corrmm(node): def local_abstractconv_gradinputs_gemm(node):
if not isinstance(node.op, AbstractConv2d_gradInputs):
return None
kern, topgrad, shape = node.inputs kern, topgrad, shape = node.inputs
if not isinstance(kern.type, CudaNdarrayType) or \ if not isinstance(kern.type, CudaNdarrayType) or \
not isinstance(topgrad.type, CudaNdarrayType): not isinstance(topgrad.type, CudaNdarrayType):
return None return None
...@@ -2778,4 +2778,28 @@ def local_conv2d_gradinputs_corrmm(node): ...@@ -2778,4 +2778,28 @@ def local_conv2d_gradinputs_corrmm(node):
subsample=node.op.subsample)( subsample=node.op.subsample)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape) gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return [rval] return [rval]
register_specialize_device(local_conv2d_gradinputs_corrmm, 'conv_gemm')
# Register GPU convolution implementation
# They are tried in a specific order so we can control
# which ones take precedence over others.
abstractconv_groupopt = theano.gof.optdb.LocalGroupDB()
abstractconv_groupopt.__name__ = "gpu_abstractconv_opts"
register_opt()(abstractconv_groupopt)
# cuDNN is first, but only registered if cuDNN is available.
conv_groupopt.register('local_abstractconv_dnn', dnn.local_abstractconv_cudnn, 20,
'conv_dnn',
'fast_compile', 'fast_run', 'cudnn')
# The GEMM-based convolution comes last to catch all remaining cases.
# It can be disabled by excluding 'conv_gemm'.
conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30,
'conv_gemm',
'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradweight_gemm',
local_abstractconv_gradweight_gemm, 30,
#'conv_gemm',
'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradinputs_gemm',
local_abstractconv_gradinputs_gemm, 30,
#'conv_gemm',
'fast_compile', 'fast_run')
...@@ -14,10 +14,7 @@ from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value, ...@@ -14,10 +14,7 @@ from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value,
from theano.tensor import TensorType from theano.tensor import TensorType
from theano.gof import Apply, Op from theano.gof import Apply, Op
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor.opt import register_specialize_device from theano.tensor.opt import register_specialize_device
from theano.sandbox.cuda.type import CudaNdarrayType
## Cpu implementation ## Cpu implementation
from theano.tensor.nnet import conv2d as cpu_conv2d, ConvOp from theano.tensor.nnet import conv2d as cpu_conv2d, ConvOp
...@@ -327,8 +324,8 @@ def local_conv2d_cpu(node): ...@@ -327,8 +324,8 @@ def local_conv2d_cpu(node):
return None return None
img, kern = node.inputs img, kern = node.inputs
if isinstance(img.type, CudaNdarrayType) or \ if (not isinstance(img.type, TensorType) or
isinstance(kern.type, CudaNdarrayType): not isinstance(kern.type, TensorType)):
return None return None
if node.op.border_mode not in ['full', 'valid']: if node.op.border_mode not in ['full', 'valid']:
return None return None
...@@ -349,8 +346,8 @@ def local_conv2d_gradweight_cpu(node): ...@@ -349,8 +346,8 @@ def local_conv2d_gradweight_cpu(node):
img, topgrad, shape = node.inputs img, topgrad, shape = node.inputs
if isinstance(img.type, CudaNdarrayType) or \ if (not isinstance(img.type, TensorType) or
isinstance(topgrad.type, CudaNdarrayType): not isinstance(topgrad.type, TensorType)):
return None return None
if node.op.border_mode not in ['full', 'valid']: if node.op.border_mode not in ['full', 'valid']:
return None return None
...@@ -458,8 +455,8 @@ register_specialize_device(local_conv2d_gradweight_cpu) ...@@ -458,8 +455,8 @@ register_specialize_device(local_conv2d_gradweight_cpu)
def local_conv2d_gradinputs_cpu(node): def local_conv2d_gradinputs_cpu(node):
kern, topgrad, shape = node.inputs kern, topgrad, shape = node.inputs
if isinstance(kern.type, CudaNdarrayType) or \ if (not isinstance(kern.type, TensorType) or
isinstance(topgrad.type, CudaNdarrayType): not isinstance(topgrad.type, TensorType)):
return None return None
if node.op.border_mode not in ['full', 'valid']: if node.op.border_mode not in ['full', 'valid']:
return None return None
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论