提交 dc6c058c authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: Pascal Lamblin

update optim

上级 5ae763de
...@@ -14,31 +14,25 @@ from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value, ...@@ -14,31 +14,25 @@ from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value,
from theano.gof import Apply, Op from theano.gof import Apply, Op
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.sandbox.cuda import register_opt as register_gpu
from theano.tensor.opt import register_specialize_device
### Gpu related optimization (to be moved in sandbox/cuda)
from theano.sandbox.cuda.basic_ops import ( from theano.sandbox.cuda.basic_ops import (
as_cuda_ndarray_variable, as_cuda_ndarray_variable,
gpu_contiguous, gpu_from_host, host_from_gpu, gpu_contiguous, gpu_from_host, host_from_gpu,
GpuFromHost, HostFromGpu GpuFromHost, HostFromGpu
) )
from theano.sandbox.cuda import gpu_optimizer, register_opt
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.dnn import dnn_available, dnn_conv from theano.sandbox.cuda.dnn import dnn_available, dnn_conv
from theano.sandbox.cuda.blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs from theano.sandbox.cuda.blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
from theano.sandbox.cuda.opt import values_eq_approx_high_tol from theano.sandbox.cuda.opt import values_eq_approx_high_tol
## Cpu implementation
from theano.tensor.nnet import conv2d as cpu_conv2d from theano.tensor.nnet import conv2d as cpu_conv2d
_logger = logging.getLogger("theano.tensor.nnet.conv2d")
imported_scipy_signal = False
try:
# TODO: move these back out to global scope when they no longer
# cause an atexit error
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
from scipy.signal.sigtools import _convolve2d
imported_scipy_signal = True
except ImportError:
pass
_logger = logging.getLogger("theano.tensor.nnet.conv")
def conv2d(img, def conv2d(img,
...@@ -115,7 +109,7 @@ def conv2d(img, ...@@ -115,7 +109,7 @@ def conv2d(img,
class BaseConv2d(Op): class BaseAbstractConv2d(Op):
"""Base class for ConvInferace """Base class for ConvInferace
FIXME FIXME
...@@ -178,7 +172,7 @@ class BaseConv2d(Op): ...@@ -178,7 +172,7 @@ class BaseConv2d(Op):
class Conv2d(BaseConv2d): class AbstractConv2d(BaseAbstractConv2d):
""" """
FIXME FIXME
""" """
...@@ -188,7 +182,7 @@ class Conv2d(BaseConv2d): ...@@ -188,7 +182,7 @@ class Conv2d(BaseConv2d):
bsize=None, bsize=None,
border_mode="valid", border_mode="valid",
subsample=(1, 1)): subsample=(1, 1)):
super(Conv2d, self).__init__(imshp, kshp, bsize, super(AbstractConv2d, self).__init__(imshp, kshp, bsize,
border_mode, subsample) border_mode, subsample)
def make_node(self, img, kern): def make_node(self, img, kern):
...@@ -200,29 +194,31 @@ class Conv2d(BaseConv2d): ...@@ -200,29 +194,31 @@ class Conv2d(BaseConv2d):
broadcastable=[img.broadcastable[0], broadcastable=[img.broadcastable[0],
kern.broadcastable[0], kern.broadcastable[0],
False, False] False, False]
img = as_tensor_variable(img) output = img.type.__class__(dtype=img.type.dtype,
kern = as_tensor_variable(kern)
output = theano.tensor.tensor(dtype=img.type.dtype,
broadcastable=broadcastable) broadcastable=broadcastable)
return Apply(self, [img, kern], [output]) return Apply(self, [img, kern], [output])
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
raise NotImplementedError('Conv2d theano optimization failed') raise NotImplementedError('AbstractConv2d theano optimization failed')
def grad(self, inp, grads): def grad(self, inp, grads):
bottom, weights = inp bottom, weights = inp
top, = grads top, = grads
d_bottom = Conv2d_gradInputs(self.imshp, self.kshp, self.bsize, d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
self.border_mode, self.subsample)( self.bsize,
self.border_mode,
self.subsample)(
weights, top, bottom.shape[-2:]) weights, top, bottom.shape[-2:])
d_weights = Conv2d_gradWeights(self.imshp, self.kshp, self.bsize, d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
self.border_mode, self.subsample)( self.bsize,
self.border_mode,
self.subsample)(
bottom, top, weights.shape[-2:]) bottom, top, weights.shape[-2:])
return d_bottom, d_weights return d_bottom, d_weights
class Conv2d_gradWeights(BaseConv2d): class AbstractConv2d_gradWeights(BaseAbstractConv2d):
"""Gradient wrt. filters for `Conv2d`. """Gradient wrt. filters for `AbstractConv2d`.
:note: You will not want to use this directly, but rely on :note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to Theano's automatic differentiation or graph optimization to
...@@ -236,7 +232,7 @@ class Conv2d_gradWeights(BaseConv2d): ...@@ -236,7 +232,7 @@ class Conv2d_gradWeights(BaseConv2d):
bsize=None, bsize=None,
border_mode="valid", border_mode="valid",
subsample=(1, 1)): subsample=(1, 1)):
super(Conv2d_gradWeights, self).__init__(imshp, kshp, bsize, super(AbstractConv2d_gradWeights, self).__init__(imshp, kshp, bsize,
border_mode, subsample) border_mode, subsample)
def make_node(self, img, topgrad, shape=None): def make_node(self, img, topgrad, shape=None):
...@@ -255,23 +251,27 @@ class Conv2d_gradWeights(BaseConv2d): ...@@ -255,23 +251,27 @@ class Conv2d_gradWeights(BaseConv2d):
broadcastable=[topgrad.broadcastable[0], broadcastable=[topgrad.broadcastable[0],
img.broadcastable[0], img.broadcastable[0],
False, False] False, False]
img = as_tensor_variable(img) output = img.type.__class__(dtype=img.type.dtype,
topgrad = as_tensor_variable(topgrad)
output = theano.tensor.tensor(dtype=img.type.dtype,
broadcastable=broadcastable) broadcastable=broadcastable)
return Apply(self, [img, topgrad] + height_width, [output]) return Apply(self, [img, topgrad] + height_width, [output])
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
raise NotImplementedError('Conv2d_gradWeight theano optimization failed') raise NotImplementedError('AbstractConv2d_gradWeight theano optimization failed')
def grad(self, inp, grads): def grad(self, inp, grads):
bottom, top = inp[:2] bottom, top = inp[:2]
weights, = grads weights, = grads
d_bottom = Conv2d_gradInputs(self.imshp, self.kshp, self.bsize, d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
self.border_mode, self.subsample)( self.bsize,
self.border_mode,
self.subsample)(
weights, top, bottom.shape[-2:]) weights, top, bottom.shape[-2:])
d_top = Conv2d(self.imshp, self.kshp, self.bsize, d_top = AbstractConv2d(self.imshp,
self.border_mode, self.subsample)(bottom, weights) self.kshp,
self.bsize,
self.border_mode,
self.subsample)(
bottom, weights)
d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else () d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
return (d_bottom, d_top) + d_height_width return (d_bottom, d_top) + d_height_width
...@@ -282,8 +282,8 @@ class Conv2d_gradWeights(BaseConv2d): ...@@ -282,8 +282,8 @@ class Conv2d_gradWeights(BaseConv2d):
return [[1], [1], [0], [0]] # no connection to height, width return [[1], [1], [0], [0]] # no connection to height, width
class Conv2d_gradInputs(Conv2d): class AbstractConv2d_gradInputs(Conv2d):
"""Gradient wrt. inputs for `Conv2d`. """Gradient wrt. inputs for `AbstractConv2d`.
:note: You will not want to use this directly, but rely on :note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to Theano's automatic differentiation or graph optimization to
...@@ -297,7 +297,7 @@ class Conv2d_gradInputs(Conv2d): ...@@ -297,7 +297,7 @@ class Conv2d_gradInputs(Conv2d):
bsize=None, bsize=None,
border_mode="valid", border_mode="valid",
subsample=(1, 1)): subsample=(1, 1)):
super(Conv2d_gradInputs, self).__init__(imshp, kshp, bsize, super(AbstractConv2d_gradInputs, self).__init__(imshp, kshp, bsize,
border_mode, subsample) border_mode, subsample)
def make_node(self, kern, topgrad, shape=None): def make_node(self, kern, topgrad, shape=None):
...@@ -312,24 +312,25 @@ class Conv2d_gradInputs(Conv2d): ...@@ -312,24 +312,25 @@ class Conv2d_gradInputs(Conv2d):
broadcastable = [topgrad.type.broadcastable[0], broadcastable = [topgrad.type.broadcastable[0],
kern.type.broadcastable[1], kern.type.broadcastable[1],
False, False] False, False]
kern = as_tensor_variable(kern) output = kern.type.__class__(dtype=kern.type.dtype,
topgrad = as_tensor_variable(topgrad)
output = theano.tensor.tensor(dtype=kern.type.dtype,
broadcastable=broadcastable) broadcastable=broadcastable)
return Apply(self, [kern, topgrad] + height_width, [output]) return Apply(self, [kern, topgrad] + height_width, [output])
def perform(self, node, nodename, inp, out_, sub): def perform(self, node, nodename, inp, out_, sub):
raise NotImplementedError('Conv2d_gradWeight theano optimization failed') raise NotImplementedError('AbstractConv2d_gradWeight theano optimization failed')
def grad(self, inp, grads): def grad(self, inp, grads):
weights, top = inp[:2] weights, top = inp[:2]
bottom, = grads bottom, = grads
d_weights = Conv2d_gradWeights(self.imshp, self.kshp, self.bsize, d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
self.border_mode, self.subsample)( self.bsize,
self.border_mode,
self.subsample)(
bottom, top, weights.shape[-2:]) bottom, top, weights.shape[-2:])
d_top = Conv2d(self.imshp, self.filter_shape, self.bsize, d_top = AbstractConv2d(self.imshp, self.filter_shape, self.bsize,
self.border_mode, self.subsample)(bottom, weights) self.border_mode, self.subsample)(
bottom, weights)
d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else () d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
return (d_weights, d_top) + d_height_width return (d_weights, d_top) + d_height_width
...@@ -340,38 +341,126 @@ class Conv2d_gradInputs(Conv2d): ...@@ -340,38 +341,126 @@ class Conv2d_gradInputs(Conv2d):
return [[1], [1], [0], [0]] # no connection to height, width return [[1], [1], [0], [0]] # no connection to height, width
### Optimizations should be move in their appropriate files
### move to Gpu optimization
### Do not replace the AbstractOpt only the inputs
### Abstract Ops is replaced layer by device_specialized opt
@local_optimizer([gpu_from_host, AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
def local_conv2d_gpu_conv(node):
"""
gpu_from_host(AbstractConv) -> AbstractConv(gpu_from_host)
AbstractConv(host_from_gpu) -> host_from_gpu(AbstractConv)
"""
if isinstance(node.op, GpuFromHost):
#gpu_from_host(conv) -> gpu_conv(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and \
(isinstance(host_input.owner.op, AbstractConv2d) or
isinstance(host_input.owner.op, AbstractConv2d_gradWeights) or
isinstance(host_input.owner.op, AbstractConv2d_gradInputs)):
conv = host_input.owner.op
if len(host_input.owner.inputs) == 3:
inp1, inp2, shape = host_input.owner.inputs
else:
inp1, inp2 = host_input.owner.inputs
shape = None
out = conv.type.__class__(imgshp = conv.imshp,
kshp = conv.kshp,
bsize = conv.bsize,
border_mode = conv.border_mode,
subsample = conv.subsample)
out = out(gpu_from_host(inp1),
gpu_from_host(inp2),
shape)
out = theano.tensor.patternbroadcast(gpu_from_host(out),
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
return [out]
if (isinstance(node.op, AbstractConv2d) or
isinstance(node.op, AbstractConv2d_gradWeights) or
isinstance(node.op, AbstractConv2d_gradInputs)):
#conv(host_from_gpu) -> host_from_gpu(gpu_conv)
if len(node.inputs) == 3:
inp1, inp2, shape = node.inputs
else:
inp1, inp2 = node.inputs
shape = None
inp1_on_gpu = (inp1.owner and isinstance(inp1.owner.op, HostFromGpu))
inp2_on_gpu = (inp2.owner and isinstance(inp2.owner.op, HostFromGpu))
if inp1_on_gpu or inp2_on_gpu:
conv = node.op
out = conv.type.__class__(imgshp=conv.imshp,
kshp=conv.kshp,
bsize=conv.bsize,
border_mode=conv.border_mode,
subsample = conv.subsample)
out = out(gpu_from_host(inp1),
gpu_from_host(inp2),
shape)
out = theano.tensor.patternbroadcast(
out,
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
return [as_tensor_variable(out)]
# We register the optimizer that moves convolutions to the GPU.
register_gpu()(local_conv2d_gpu_conv)
@local_optimizer([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
def local_conv2d_cudnn(node):
def replace_conv_with_cudnn(convop, inputs): if len(node.inputs) == 3:
inp1, inp2, shape = node.inputs
else:
inp1, inp2 = node.inputs
shape = None
if not isinstance(inp1, CudaNdarrayType) or \
isinstance(inp2, CudaNdarrayType):
return None
if not dnn_available(): if not dnn_available():
return None return None
if (isinstance(node.op, AbstractConv2d)):
inp1, inp2, shape = inputs
if (isinstance(convop, Conv2d)):
rval = dnn_conv(inp1, inp2, rval = dnn_conv(inp1, inp2,
border_mode=convop.border_mode, border_mode=node.op.border_mode,
subsample=convop.subsample, subsample=node.op.subsample,
direction_hint='forward') direction_hint='forward')
return rval return rval
if (isinstance(convop, Conv2d_gradWeights)): if (isinstance(node.op, AbstractConv2d_gradWeights)):
rval = dnn_conv(inp1.dimshuffle(1, 0, 2, 3), inp2, rval = dnn_conv(inp1.dimshuffle(1, 0, 2, 3), inp2,
border_mode=convop.border_mode, border_mode=node.op.border_mode,
subsample=convop.subsample, subsample=node.op.subsample,
direction_hint='bprop weights') direction_hint='bprop weights')
return rval return rval
if (isinstance(convop, Conv2d_gradInputs)): if (isinstance(node.op, AbstractConv2d_gradInputs)):
rval = dnn_conv(inp1, inp2, rval = dnn_conv(inp1, inp2,
border_mode=convop.border_mode, border_mode=node.op.border_mode,
subsample=convop.subsample, subsample=node.op.subsample,
direction_hint='bprop inputs') direction_hint='bprop inputs')
return rval return rval
register_specialize_device()(local_conv2d_cudnn)
def replace_convforward_with_corrmm(convop, inputs):
img, kern, shape = inputs
if convop.border_mode in ['full', 'valid']: @local_optimizer(AbstractConv2d)
border_mode = convop.border_mode def local_conv2d_corrmm(convop, inputs):
subsample = convop.subsample
img, kern = node.inputs
if not isinstance(img, CudaNdarrayType) or \
isinstance(kern, CudaNdarrayType):
return None
if node.op.border_mode in ['full', 'valid']:
border_mode = node.op.border_mode
subsample = node.op.subsample
if (border_mode == 'valid') or (subsample != (1,1)): if (border_mode == 'valid') or (subsample != (1,1)):
# need to flip the kernel for valid convolution # need to flip the kernel for valid convolution
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
...@@ -385,20 +474,20 @@ def replace_convforward_with_corrmm(convop, inputs): ...@@ -385,20 +474,20 @@ def replace_convforward_with_corrmm(convop, inputs):
# GpuConv does not always store information on the batchsize and # GpuConv does not always store information on the batchsize and
# channels, though, so we only use what information we have.) # channels, though, so we only use what information we have.)
if ((subsample == (1,1)) and if ((subsample == (1,1)) and
(convop.imshp is not None) and (node.op.imshp is not None) and
(None not in convop.imshp[-2:]) and (None not in node.op.imshp[-2:]) and
(convop.kshp is not None) and (node.op.kshp is not None) and
(None not in convop.kshp)): (None not in node.op.kshp)):
# we know the kernel and output size # we know the kernel and output size
prod1 = convop.kshp[0] * convop.kshp[1] prod1 = node.op.kshp[0] * node.op.kshp[1]
prod2 = ((convop.imshp[-2] - convop.kshp[0] + 1) * prod2 = ((node.op.imshp[-2] - node.op.kshp[0] + 1) *
(convop.imshp[-1] - convop.kshp[1] + 1)) (node.op.imshp[-1] - node.op.kshp[1] + 1))
if ((convop.bsize is not None) and if ((node.op.bsize is not None) and
(len(convop.imshp) == 3) and (len(node.op.imshp) == 3) and
(convop.imshp[0] is not None)): (node.op.imshp[0] is not None)):
# we also know batchsize and input channels # we also know batchsize and input channels
prod1 *= convop.bsize prod1 *= node.op.bsize
prod2 *= convop.imshp[0] prod2 *= node.op.imshp[0]
# compare to decide # compare to decide
if prod1 > prod2: if prod1 > prod2:
# (we need to wrap the result in as_cuda_ndarray_variable, # (we need to wrap the result in as_cuda_ndarray_variable,
...@@ -416,108 +505,41 @@ def replace_convforward_with_corrmm(convop, inputs): ...@@ -416,108 +505,41 @@ def replace_convforward_with_corrmm(convop, inputs):
rval = GpuCorrMM_gradInputs('valid', subsample)( rval = GpuCorrMM_gradInputs('valid', subsample)(
gpu_contiguous(kern), gpu_contiguous(img)) gpu_contiguous(kern), gpu_contiguous(img))
return rval return rval
register_specialize_device()(local_conv2d_corrmm)
@local_optimizer(AbstractConv2d_gradWeights)
def local_conv2d_gradweight_corrmm(node):
def replace_convgradweight_with_corrmm(convop, inputs): img, topgrad, shape = node.inputs
img, topgrad, shape = inputs if not isinstance(img, CudaNdarrayType) or \
rval = GpuCorrMM_gradWeights(border_mode=convop.border_mode, isinstance(topgrad, CudaNdarrayType):
subsample=convop.subsample)( return None
rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape) gpu_contiguous(img), gpu_contiguous(topgrad), shape)
return rval return rval
register_specialize_device()(local_conv2d_gradweight_corrmm)
@local_optimizer(AbstractConv2d_gradInputs)
def local_conv2d_gradinputs_corrmm(node):
def replace_convgradinputs_withcorrmm(convop, inputs): kern, topgrad, shape = node.inputs
kern, topgrad, shape = inputs if not isinstance(img, CudaNdarrayType) or \
rval = GpuCorrMM_gradInputs(border_mode=convop.border_mode, isinstance(topgrad, CudaNdarrayType):
subsample=convop.subsample)( return None
rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape) gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return rval return rval
register_specialize_device()(local_conv2d_gradinputs_corrmm)
def replace_convop(convop, inputs):
"""
Dispatch based on the convop.optim values
"""
gpu_conv = None
if "cudnn" in convop.optim:
gpu_conv = replace_conv_with_cudnn(convop, inputs)
if gpu_conv is None and "corrmm" in convop.optim:
if isinstance(convop, Conv2d):
gpu_conv = replace_convforward_with_corrmm(convop, inputs)
elif isinstance(convop, Conv2d_gradWeights):
gpu_conv = replace_convgradweight_with_corrmm(convop, inputs)
elif isinstance(convop, Conv2d_gradInputs):
gpu_conv = replace_convgradinputs_withcorrmm(convop, inputs)
### FIXME add fft code
return gpu_conv
### move to Gpu optimization
@local_optimizer([gpu_from_host, Conv2d, Conv2d_gradWeights, Conv2d_gradInputs])
def local_conv2d_gpu_conv(node):
"""
gpu_from_host(Conv) -> (gpu)_Conv(gpu_from_host)
Conv(host_from_gpu) -> host_from_gpu((gpu)_Conv)
"""
if isinstance(node.op, GpuFromHost):
#gpu_from_host(conv) -> gpu_conv(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and \
(isinstance(host_input.owner.op, Conv2d) or
isinstance(host_input.owner.op, Conv2d_gradWeights) or
isinstance(host_input.owner.op, Conv2d_gradInputs)):
conv = host_input.owner.op
if len(host_input.owner.inputs) == 3:
inp1, inp2, shape = host_input.owner.inputs
else:
inp1, inp2 = host_input.owner.inputs
shape = None
out = replace_convop(conv, [gpu_from_host(inp1),
gpu_from_host(inp2),
shape])
if out is None:
return
out = theano.tensor.patternbroadcast(gpu_from_host(out),
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
return [out]
if (isinstance(node.op, Conv2d) or
isinstance(node.op, Conv2d_gradWeights) or
isinstance(node.op, Conv2d_gradInputs)):
#conv(host_from_gpu) -> host_from_gpu(gpu_conv)
if len(node.inputs) == 3:
inp1, inp2, shape = node.inputs
else:
inp1, inp2 = node.inputs
shape = None
inp1_on_gpu = (inp1.owner and isinstance(inp1.owner.op, HostFromGpu))
inp2_on_gpu = (inp2.owner and isinstance(inp2.owner.op, HostFromGpu))
if inp1_on_gpu or inp2_on_gpu:
conv = node.op
out = replace_convop(conv, [gpu_from_host(inp1),
gpu_from_host(inp2),
shape])
if out is None:
return
out = theano.tensor.patternbroadcast(
out,
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
return [as_tensor_variable(out)]
# We register the optimizer that moves convolutions to the GPU.
register_opt()(local_conv2d_gpu_conv)
### Cpu Optmization ### Cpu Optmization
### Desactived focus on GPU optimization first ### Desactived focus on GPU optimization first
# @local_optimizer([Conv2d]) # @local_optimizer([AbstractConv2d])
# def local_conv2d(node): # def local_conv2d(node):
# if isinstance(node.op, Conv2d) and not node.on_gpu: # if isinstance(node.op, AbstractConv2d) and not node.on_gpu:
# img, kern = node.inputs # img, kern = node.inputs
# rval = cpu_conv2d(img, kern, # rval = cpu_conv2d(img, kern,
# node.op.imshp, node.op.filter_shape, # node.op.imshp, node.op.filter_shape,
...@@ -526,10 +548,10 @@ register_opt()(local_conv2d_gpu_conv) ...@@ -526,10 +548,10 @@ register_opt()(local_conv2d_gpu_conv)
# return [rval] # return [rval]
# @local_optimizer([Conv2d_gradWeights]) # @local_optimizer([AbstractConv2d_gradWeights])
# def local_conv2d_gradweight_cpu(node): # def local_conv2d_gradweight_cpu(node):
# if not isinstance(node.op, Conv2d_gradWeights) or not node.on_gpu: # if not isinstance(node.op, AbstractConv2d_gradWeights) or not node.on_gpu:
# return # return
# img, topgrad = node.inputs # img, topgrad = node.inputs
...@@ -555,7 +577,7 @@ register_opt()(local_conv2d_gpu_conv) ...@@ -555,7 +577,7 @@ register_opt()(local_conv2d_gpu_conv)
# "stride y are different from 1 and 2, as there is a bug in it.") # "stride y are different from 1 and 2, as there is a bug in it.")
# if op.imshp is None or op.kshp is None: # if op.imshp is None or op.kshp is None:
# raise Exception("Conv2d grad when stride x!=1 or stride y!=1 we must have" # raise Exception("AbstractConv2d grad when stride x!=1 or stride y!=1 we must have"
# " all the optional shape information") # " all the optional shape information")
# ####### Determine gradient on kernels ######## # ####### Determine gradient on kernels ########
...@@ -604,9 +626,9 @@ register_opt()(local_conv2d_gpu_conv) ...@@ -604,9 +626,9 @@ register_opt()(local_conv2d_gpu_conv)
# return [dw(img, filters)] # return [dw(img, filters)]
# @local_optimizer([Conv2d_gradInputs]) # @local_optimizer([AbstractConv2d_gradInputs])
# def local_conv2d_gradinputs_cpu(node): # def local_conv2d_gradinputs_cpu(node):
# if not isinstance(node.op, Conv2d_gradInputs) or not node.on_gpu: # if not isinstance(node.op, AbstractConv2d_gradInputs) or not node.on_gpu:
# return # return
# # ####### Determine gradient on inputs ######## # # ####### Determine gradient on inputs ########
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论