提交 01219a35 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron 提交者: Pascal Lamblin

Remove GpuConv and everything that depends on it.

上级 6f0af857
差异被折叠。
差异被折叠。
差异被折叠。
...@@ -26,7 +26,6 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name, ...@@ -26,7 +26,6 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
gpu_contiguous, HostFromGpu, gpu_contiguous, HostFromGpu,
GpuAllocEmpty, empty_like) GpuAllocEmpty, empty_like)
from .elemwise import GpuElemwise from .elemwise import GpuElemwise
from .conv import GpuConv
# These don't exist in gpuarray # These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad # GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
...@@ -1216,59 +1215,6 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1216,59 +1215,6 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
return Apply(self, [dy, sm], [sm.type()]) return Apply(self, [dy, sm], [sm.type()])
# @register_opt('cudnn') # this optimizer is registered in opt.py instead.
@local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if not dnn_available(node.outputs[0].type.context_name):
return
if node.op.border_mode not in ['full', 'valid']:
return
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
direction_hint = node.op.direction_hint
rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)
return [rval]
# This optimizer is registered in opt.py as part of the meta-optimizer.
# It tries exactly the opposite code path of what local_conv_dnn() uses,
# because for some input/kernel shape configurations, this is faster.
@local_optimizer([GpuConv])
def local_conv_dnn_alternative(node):
if isinstance(node.op, GpuConv):
if not dnn_available(node.outputs[0].type.context_name):
return
border_mode = node.op.border_mode
subsample = node.op.subsample
if border_mode not in ['full', 'valid'] or subsample != (1, 1):
return
img, kern = node.inputs
direction_hint = node.op.direction_hint
if border_mode == 'full':
# for a full convolution, try using the forward pass instead
# of the backward pass wrt. inputs
direction_hint = 'forward!'
elif border_mode == 'valid':
# for a valid convolution, try using the backward pass wrt.
# weights instead of the forward pass and vice versa
if direction_hint == 'bprop weights':
direction_hint = 'forward'
else:
direction_hint = 'bprop weights'
rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)
return [rval]
conv_groupopt.register('local_conv_dnn', local_conv_dnn, 20,
'conv_dnn', 'fast_compile', 'fast_run', 'cudnn')
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, @local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs])
def local_abstractconv_cudnn(node): def local_abstractconv_cudnn(node):
......
...@@ -32,7 +32,6 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name, ...@@ -32,7 +32,6 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
GpuEye, gpu_join, GpuJoin) GpuEye, gpu_join, GpuJoin)
from .blas import (gpu_dot22, GpuGemv, GpuGemm, GpuGer, from .blas import (gpu_dot22, GpuGemv, GpuGemm, GpuGer,
gpugemm_no_inplace) gpugemm_no_inplace)
from .conv import GpuConv
from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias, from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmaxWithBias, GpuSoftmax) GpuSoftmaxWithBias, GpuSoftmax)
...@@ -780,82 +779,6 @@ def local_assert(node, context_name): ...@@ -780,82 +779,6 @@ def local_assert(node, context_name):
*node.inputs[1:]))] *node.inputs[1:]))]
@register_opt('fast_compile')
@op_lifter([ConvOp])
def local_gpu_conv(node, context_name):
def GpuConvOp_from_ConvOp(op):
logical_img_hw = None
if op.kshp_logical is not None and op.kshp_logical != op.kshp:
return None
ret = GpuConv(border_mode=op.out_mode,
subsample=(op.dx, op.dy),
logical_img_hw=logical_img_hw,
logical_kern_hw=op.kshp_logical,
logical_kern_align_top=op.kshp_logical_top_aligned,
kshp=op.kshp,
version=op.version,
direction_hint=op.direction_hint,
verbose=op.verbose,
imshp=op.imshp,
nkern=op.nkern,
bsize=op.bsize,
fft_opt=op.fft_opt)
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]:
rstride = int(numpy.ceil(op.imshp_logical[1] /
float(op.imshp[1])))
cstride = int(numpy.ceil(op.imshp_logical[2] /
float(op.imshp[2])))
def make_graph(img, kern):
buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
img.shape[0], *op.imshp_logical)
img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
img)
img = GpuFromHost(context_name)(img)
return ret(img, kern)
return make_graph
return ret
def values_eq_approx(a, b):
"""
This fct is needed to don't have DebugMode raise useless
error due to ronding error.
This happen as We reduce on the two last dimensions, so this
can raise the absolute error if the number of element we
reduce on is significant.
"""
assert a.ndim == 4
atol = None
if a.shape[-1] * a.shape[-2] > 100:
# For float32 the default atol is 1e-5
atol = 3e-5
return GpuArrayType.values_eq_approx(a, b, atol=atol)
img, kern = node.inputs
gpu_conv = GpuConvOp_from_ConvOp(node.op)
if gpu_conv is None:
return
out = gpu_conv(GpuFromHost(context_name)(img),
GpuFromHost(context_name)(kern))
assert isinstance(out.type, GpuArrayType)
# Make sure to keep the broadcastable pattern of the original
# convolution even if we might gain or lose some due to different
# information at the node level.
out = tensor.patternbroadcast(out, node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx
return [out]
# Register this here so that it goes after 'local_gpu_conv'
register_opt()(conv_groupopt)
# These two deal with any abstract convs that have a transfer somewhere # These two deal with any abstract convs that have a transfer somewhere
@register_opt() @register_opt()
@op_lifter([AbstractConv2d]) @op_lifter([AbstractConv2d])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论