提交 65262ef7 authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: Pascal Lamblin

enable cpu version

上级 28e99288
...@@ -456,8 +456,6 @@ def local_conv2d_corrmm(node): ...@@ -456,8 +456,6 @@ def local_conv2d_corrmm(node):
img, kern = node.inputs img, kern = node.inputs
if not isinstance(img.type, CudaNdarrayType) or \ if not isinstance(img.type, CudaNdarrayType) or \
not isinstance(kern.type, CudaNdarrayType): not isinstance(kern.type, CudaNdarrayType):
print 'here', img.type, kern.type
print isinstance(img, CudaNdarrayType), isinstance(kern, CudaNdarrayType)
return None return None
if node.op.border_mode in ['full', 'valid']: if node.op.border_mode in ['full', 'valid']:
...@@ -546,140 +544,157 @@ register_specialize_device(local_conv2d_gradinputs_corrmm) ...@@ -546,140 +544,157 @@ register_specialize_device(local_conv2d_gradinputs_corrmm)
### Cpu Optmization ### Cpu Optmization
### Desactived focus on GPU optimization first ### Desactived focus on GPU optimization first
# @local_optimizer([AbstractConv2d]) @local_optimizer([AbstractConv2d])
# def local_conv2d(node): def local_conv2d_cpu(node):
# if isinstance(node.op, AbstractConv2d) and not node.on_gpu:
# img, kern = node.inputs img, kern = node.inputs
# rval = cpu_conv2d(img, kern, if isinstance(img.type, CudaNdarrayType) or \
# node.op.imshp, node.op.filter_shape, isinstance(kern.type, CudaNdarrayType):
# border_mode=node.op.border_mode, return None
# subsample=node.op.subsample) rval = cpu_conv2d(img, kern,
# return [rval] node.op.imshp, node.op.filter_shape,
border_mode=node.op.border_mode,
subsample=node.op.subsample)
# @local_optimizer([AbstractConv2d_gradWeights]) return [rval]
# def local_conv2d_gradweight_cpu(node): register_specialize_device(local_conv2d_cpu)
# if not isinstance(node.op, AbstractConv2d_gradWeights) or not node.on_gpu:
# return @local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_cpu(node):
# img, topgrad = node.inputs
# if op.border_mode == 'valid' and op.subsample != (1, 1): if len(node.inputs) == 3:
# # Use the gradient as defined in conv3D, because the implementation img, topgrad, shape = node.inputs
# # by Conv is slow (about 3x slower than conv3D, and probably 10x else:
# # slower than it could be), nad incorrect when dx or dy > 2. img, topgrad = node.inputs
# # build a "node", that should be equivalent to the one given by shape = None
# # self.make_node, but using convGrad3D instead. if isinstance(img.type, CudaNdarrayType) or \
# shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1) isinstance(topgrad.type, CudaNdarrayType):
# shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) return None
# rval = ConvGrad3D(V=shuffled_img,
# d=(op.subsample[0], op.subsample[1], 1), if op.border_mode == 'valid' and op.subsample != (1, 1):
# WShape=(self.kshp[0], self.kshp[1], 1), # Use the gradient as defined in conv3D, because the implementation
# dCdH_=shuffled_topgrad) # by Conv is slow (about 3x slower than conv3D, and probably 10x
# slower than it could be), nad incorrect when dx or dy > 2.
# return [rval.dimshuffle(0, 4, 1, 2)] # build a "node", that should be equivalent to the one given by
# self.make_node, but using convGrad3D instead.
shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1)
# if op.subsample[0] not in (1, 2) or op.subsample[1] not in (1, 2): shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
# raise NotImplementedError( rval = ConvGrad3D(V=shuffled_img,
# "ERROR: We disable conv2d grad now when stride x or " d=(op.subsample[0], op.subsample[1], 1),
# "stride y are different from 1 and 2, as there is a bug in it.") WShape=(self.kshp[0], self.kshp[1], 1),
dCdH_=shuffled_topgrad)
# if op.imshp is None or op.kshp is None:
# raise Exception("AbstractConv2d grad when stride x!=1 or stride y!=1 we must have" return [rval.dimshuffle(0, 4, 1, 2)]
# " all the optional shape information")
# ####### Determine gradient on kernels ######## if op.subsample[0] not in (1, 2) or op.subsample[1] not in (1, 2):
# assert len(op.imshp) == 4 and len(op.kshp) == 4 raise NotImplementedError(
"ERROR: We disable conv2d grad now when stride x or "
# #newin = inputs.dimshuffle((1, 0, 2, 3)) "stride y are different from 1 and 2, as there is a bug in it.")
# #newgz = gz.dimshuffle((1, 0, 2, 3))
if op.imshp is None or op.kshp is None:
# outshp = op.getOutputShape(op.imshp[1:], raise Exception("AbstractConv2d grad when stride x!=1 or stride y!=1 we must have"
# op.kshp, op.subsample, " all the optional shape information")
# op.border_mode)
# fulloutshp = op.getOutputShape(op.imshp[1:], ####### Determine gradient on kernels ########
# op.kshp, (1, 1), assert len(op.imshp) == 4 and len(op.kshp) == 4
# op.border_mode)
#newin = inputs.dimshuffle((1, 0, 2, 3))
#newgz = gz.dimshuffle((1, 0, 2, 3))
# if op.border_mode == 'valid':
# (img, filters) = (img, topgrad) outshp = op.getOutputShape(op.imshp[1:],
# kshp_logical = fulloutshp ## FIXME op.kshp, op.subsample,
# kshp_logical_top_aligned = False op.border_mode)
# imshp_logical = None fulloutshp = op.getOutputShape(op.imshp[1:],
# (bsize, nkern) = (op.imshp[0], op.kshp[0]) op.kshp, (1, 1),
# imshp = (bsize, op.imshp[1], op.imshp[2]) op.border_mode)
# kshp = outshp ## FIXME
# elif op.border_mode == 'full':
# (img, filters) = (topgrad, imag) if op.border_mode == 'valid':
# kshp_logical = None (img, filters) = (img, topgrad)
# kshp_logical_top_aligned = True kshp_logical = fulloutshp ## FIXME
# imshp_logical = (op.imshp[0], kshp_logical_top_aligned = False
# fulloutshp[0], imshp_logical = None
# fulloutshp[1]) ## FIXME (bsize, nkern) = (op.imshp[0], op.kshp[0])
# (bsize, nkern) = (op.kshp[0], op.imshp[0]) imshp = (bsize, op.imshp[1], op.imshp[2])
# imshp = (op.imshp[0], outshp[0], outshp[1]) ## FIXME kshp = outshp ## FIXME
# kshp = op.imshp[1:] ## FIXME elif op.border_mode == 'full':
# else: (img, filters) = (topgrad, imag)
# raise NotImplementedError( kshp_logical = None
# 'Only [full,valid] modes are currently supported.') kshp_logical_top_aligned = True
imshp_logical = (op.imshp[0],
# filters = filters[:, :, ::-1, ::-1] # flip them fulloutshp[0],
# dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', fulloutshp[1]) ## FIXME
# unroll_batch=None, unroll_kern=None, unroll_patch=None, (bsize, nkern) = (op.kshp[0], op.imshp[0])
# imshp_logical=imshp_logical, imshp = (op.imshp[0], outshp[0], outshp[1]) ## FIXME
# kshp_logical=kshp_logical, kshp = op.imshp[1:] ## FIXME
# kshp_logical_top_aligned=kshp_logical_top_aligned, else:
# direction_hint='bprop weights') raise NotImplementedError(
# return [dw(img, filters)] 'Only [full,valid] modes are currently supported.')
filters = filters[:, :, ::-1, ::-1] # flip them
# @local_optimizer([AbstractConv2d_gradInputs]) dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid',
# def local_conv2d_gradinputs_cpu(node): unroll_batch=None, unroll_kern=None, unroll_patch=None,
# if not isinstance(node.op, AbstractConv2d_gradInputs) or not node.on_gpu: imshp_logical=imshp_logical,
# return kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned,
# # ####### Determine gradient on inputs ######## direction_hint='bprop weights')
# # mode = 'valid' return [dw(img, filters)]
# # if not self.out_mode == 'full': register_specialize_device(local_conv2d_gradweight_cpu)
# # mode = 'full'
# # filters = kerns.dimshuffle((1, 0, 2, 3))
# # filters = filters[:, :, ::-1, ::-1] @local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_cpu(node):
# # nkern = self.imshp[0]
# # imshp = (self.nkern, self.outshp[0], self.outshp[1]) if len(node.inputs) == 3:
# # imshp_logical = (self.nkern, self.fulloutshp[0], kern, topgrad, shape = node.inputs
# # self.fulloutshp[1]) else:
kern, topgrad = node.inputs
# # if 0: # hard-code c generation parameters shape = None
# # din = ConvOp(imshp, self.kshp, nkern, self.bsize, if isinstance(kern.type, CudaNdarrayType) or \
# # 1, 1, output_mode=mode, isinstance(topgrad.type, CudaNdarrayType):
# # unroll_batch=un_b, unroll_kern=un_k, return None
# # unroll_patch=un_p,
# # imshp_logical=imshp_logical, ####### Determine gradient on inputs ########
# # kshp_logical=None, mode = 'valid'
# # version=-1, # we we change the mode, we don't forward the version. if not self.out_mode == 'full':
# # direction_hint='bprop inputs', mode = 'full'
# # verbose=self.verbose) filters = kern.dimshuffle((1, 0, 2, 3))
# # else: # let __init__ figure out the unrolling / patch sizes filters = filters[:, :, ::-1, ::-1]
# # din = ConvOp(imshp, self.kshp, nkern, self.bsize,
# # 1, 1, output_mode=mode, nkern = self.imshp[0]
# # unroll_batch=None, unroll_kern=None, imshp = (self.nkern, self.outshp[0], self.outshp[1])
# # unroll_patch=None, imshp_logical = (self.nkern, self.fulloutshp[0],
# # imshp_logical=imshp_logical, self.fulloutshp[1])
# # kshp_logical=None,
# # version=-1, # we we change the mode, we don't forward the version. if 0: # hard-code c generation parameters
# # direction_hint='bprop inputs', din = ConvOp(imshp, self.kshp, nkern, self.bsize,
# # verbose=self.verbose) 1, 1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k,
# # din = din(gz, filters) unroll_patch=un_p,
imshp_logical=imshp_logical,
# # assert all(o is None or o == i kshp_logical=None,
# # for o, i in zip(din.owner.op.outshp, self.imshp[1:])) version=-1, # we we change the mode, we don't forward the version.
direction_hint='bprop inputs',
# # # din and dw should have the same broadcasting pattern as the verbose=self.verbose)
# # # parameters they are the gradient of (resp. inputs and kerns). else: # let __init__ figure out the unrolling / patch sizes
# # din = patternbroadcast(din, inputs.broadcastable) din = ConvOp(imshp, self.kshp, nkern, self.bsize,
# # dw = patternbroadcast(dw, kerns.broadcastable) 1, 1, output_mode=mode,
# # return [din, dw] unroll_batch=None, unroll_kern=None,
unroll_patch=None,
imshp_logical=imshp_logical,
kshp_logical=None,
version=-1, # we we change the mode, we don't forward the version.
direction_hint='bprop inputs',
verbose=self.verbose)
din = din(gz, filters)
assert all(o is None or o == i
for o, i in zip(din.owner.op.outshp, self.imshp[1:]))
# din and dw should have the same broadcasting pattern as the
# parameters they are the gradient of (resp. inputs and kerns).
din = patternbroadcast(din, inputs.broadcastable)
dw = patternbroadcast(dw, kerns.broadcastable)
return [din, dw]
register_specialize_device(local_conv2d_gradinputs_cpu)
...@@ -11,14 +11,16 @@ import theano.tensor.nnet.abstract_conv2d as conv ...@@ -11,14 +11,16 @@ import theano.tensor.nnet.abstract_conv2d as conv
from theano.sandbox.cuda import float32_shared_constructor as shared from theano.sandbox.cuda import float32_shared_constructor as shared
from theano.sandbox.cuda.tests.test_conv_cuda_ndarray import py_conv from theano.sandbox.cuda.tests.test_conv_cuda_ndarray import py_conv
from theano.sandbox.cuda.dnn import dnn_available
if theano.config.mode == 'FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu') mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
else: else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
from theano.sandbox.cuda.dnn import dnn_available
class TestConv2d(unittest.TestCase): class TestConv2d(unittest.TestCase):
...@@ -111,21 +113,16 @@ class TestConv2d(unittest.TestCase): ...@@ -111,21 +113,16 @@ class TestConv2d(unittest.TestCase):
def test_valid(self): def test_valid(self):
mode = mode_with_gpu mode = mode_with_gpu
# if dnn_available(): if dnn_available():
# self.run_conv(inputs_shape=(16, 1, 2, 2), self.run_conv(inputs_shape=(16, 1, 2, 2),
# filters_shape=(10, 1, 2, 2), filters_shape=(10, 1, 2, 2),
# verify_grad=False) verify_grad=False, mode=mode)
# # self.run_conv(inputs_shape=(16, 1, 8, 8), self.run_gradweight(inputs_shape=(16, 1, 2, 2),
# # filters_shape=(10, 1, 2, 2), filters_shape=(10, 1, 2, 2),
# # subsample=(2, 2), verify_grad=False, mode=mode)
# # verify_grad=False) self.run_gradinput(inputs_shape=(1, 1, 2, 2),
# self.run_conv(inputs_shape=(16, 1, 2, 2), filters_shape=(10, 1, 2, 2),
# filters_shape=(10, 1, 2, 2), verify_grad=False, mode=mode)
# verify_grad=True)
# # self.run_conv(inputs_shape=(16, 1, 8, 8),
# # filters_shape=(10, 1, 2, 2),
# # subsample=(2, 2),
# # verify_grad=True)
mode = mode.excluding('cudnn') mode = mode.excluding('cudnn')
self.run_conv(inputs_shape=(16, 1, 2, 2), self.run_conv(inputs_shape=(16, 1, 2, 2),
...@@ -138,6 +135,17 @@ class TestConv2d(unittest.TestCase): ...@@ -138,6 +135,17 @@ class TestConv2d(unittest.TestCase):
filters_shape=(10, 1, 2, 2), filters_shape=(10, 1, 2, 2),
verify_grad=False, mode=mode) verify_grad=False, mode=mode)
mode = mode_without_gpu
self.run_conv(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
verify_grad=False, mode=mode)
self.run_gradweight(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
verify_grad=False, mode=mode)
self.run_gradinput(inputs_shape=(1, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
verify_grad=False, mode=mode)
# self.run_conv(inputs_shape=(16, 1, 8, 8), # self.run_conv(inputs_shape=(16, 1, 8, 8),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论