提交 75cd8839 authored 作者: abergeron's avatar abergeron

Merge pull request #2483 from nouiz/mixed

Mixed
...@@ -4,6 +4,7 @@ and Ops building class (:class:`FromFunctionOp`) and decorator ...@@ -4,6 +4,7 @@ and Ops building class (:class:`FromFunctionOp`) and decorator
""" """
import copy import copy
import cPickle
import warnings import warnings
import theano import theano
...@@ -387,17 +388,34 @@ class Shape_i(gof.Op): ...@@ -387,17 +388,34 @@ class Shape_i(gof.Op):
return [None] return [None]
def shape_i(var, i): def shape_i(var, i, fgraph=None):
"""This is useful in optimization that need to get the shape. This """Equivalent of var.shape[i], but apply if possible the shape
feature optimization
This is useful in optimization that need to get the shape. This
remove the need of the following shape_feature optimization that remove the need of the following shape_feature optimization that
convert it. So this speed up optimization and remove Equilibrium convert it. So this speed up optimization and remove Equilibrium
max iteration problems. max iteration problems.
:param var: the variable we want to take the shape of
:param i: The shape dimensions we want
:param fgraph: optional. If var.fgraph do not exist, the fgraph that
have the shape_feature to introduce var in to get the optimized shape.
""" """
if (hasattr(var, 'fgraph') and if fgraph is None and hasattr(var, 'fgraph'):
hasattr(node.outputs[0].fgraph, 'shape_feature')): fgraph = var.fgraph
return node.outputs[0].fgraph.shape_feature.shape_of[var][i] if fgraph and hasattr(fgraph, 'shape_feature'):
return Shape_i(i)(var) if var not in fgraph.shape_feature.shape_of:
# If var isn't in the ShapeFeature, add it.
fgraph.shape_feature.on_import(fgraph, var.owner,
'gof.ops.shape_i')
return fgraph.shape_feature.shape_of[var][i]
# If we are not able to use the shape feature, we should not put
# Shape_i in the graph. Otherwise, the shape feature optimization
# won't get applied.
return var.shape[i]
def register_shape_i_c_code(typ, code, check_input, version=()): def register_shape_i_c_code(typ, code, check_input, version=()):
...@@ -474,17 +492,20 @@ class FromFunctionOp(gof.Op): ...@@ -474,17 +492,20 @@ class FromFunctionOp(gof.Op):
try: try:
obj = load_back(mod, name) obj = load_back(mod, name)
except (ImportError, KeyError, AttributeError): except (ImportError, KeyError, AttributeError):
raise PicklingError("Can't pickle as_op(), not found as %s.%s" % raise cPickle.PicklingError(
(mod, name)) "Can't pickle as_op(), not found as %s.%s" %
(mod, name))
else: else:
if obj is not self: if obj is not self:
raise PicklingError("Can't pickle as_op(), not the object " raise cPickle.PicklingError(
"at %s.%s" % (mod, name)) "Can't pickle as_op(), not the object "
"at %s.%s" % (mod, name))
return load_back, (mod, name) return load_back, (mod, name)
def _infer_shape(self, node, input_shapes): def _infer_shape(self, node, input_shapes):
return self.__infer_shape(node, input_shapes) return self.__infer_shape(node, input_shapes)
def as_op(itypes, otypes, infer_shape=None): def as_op(itypes, otypes, infer_shape=None):
""" """
Decorator that converts a function into a basic Theano op that Decorator that converts a function into a basic Theano op that
......
...@@ -146,6 +146,14 @@ cudnnConvolutionBackwardData_v2( ...@@ -146,6 +146,14 @@ cudnnConvolutionBackwardData_v2(
#define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2 #define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2
//Needed for R2 rc2
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING CUDNN_POOLING_AVERAGE
#else
// r2 rc1 and rc2 do not have the same macro defined
// I didn't checked if this the right combination, but as we do not wrap the padding interface, it is fine for now.
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING ((cudnnPoolingMode_t)1)
#endif #endif
#endif #endif
...@@ -576,6 +576,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -576,6 +576,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
work with this Op. work with this Op.
""" """
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
if (border_mode == 'valid' and subsample == (1,1) and if (border_mode == 'valid' and subsample == (1,1) and
direction_hint == 'bprop weights'): direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set # Special case: We are asked to use GpuDnnConvGradW. We need to set
...@@ -602,13 +603,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -602,13 +603,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv' conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape = theano.tensor.stack(shape_i(img, 0), shape_i(kerns, 1), shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
shape_i(img, 2) + shape_i(kerns, 2) - 1, shape = theano.tensor.stack(shape_i(img, 0, fgraph),
shape_i(img, 3) + shape_i(kerns, 3)- 1) shape_i(kerns, 1, fgraph),
shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode)(shape, kerns.shape) conv_mode=conv_mode)(shape, kerns.shape)
return GpuDnnConvGradI()(kerns, img, desc, shape[2], shape[3]) return GpuDnnConvGradI()(kerns, img, desc, shape2, shape3)
# Standard case: We use GpuDnnConv with suitable padding. # Standard case: We use GpuDnnConv with suitable padding.
img = gpu_contiguous(img) img = gpu_contiguous(img)
...@@ -662,7 +664,7 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -662,7 +664,7 @@ class GpuDnnPoolDesc(GpuOp):
if self.mode == 'max': if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX' mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average": elif self.mode == "average":
mode_flag = 'CUDNN_POOLING_AVERAGE' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
else: else:
raise NotImplementedError("Unsupported pooling model.") raise NotImplementedError("Unsupported pooling model.")
......
...@@ -964,7 +964,7 @@ class ConvOp(OpenMPOp): ...@@ -964,7 +964,7 @@ class ConvOp(OpenMPOp):
return ['<numpy/noprefix.h>', '<iostream>', '<sstream>'] return ['<numpy/noprefix.h>', '<iostream>', '<sstream>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (12, self.openmp, blas.blas_header_version()) return (13, self.openmp, blas.blas_header_version())
def c_support_code(self): def c_support_code(self):
return """ return """
...@@ -1194,7 +1194,15 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){ ...@@ -1194,7 +1194,15 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(%(self_dy)s)); dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(%(self_dy)s));
} }
""" % d """ % d
d["assert_size"] = "" d["assert_size"] = """
// Check the stack size of the filter and images are equals
if(kerns_dim[1] != img2d_dim[1]){
PyErr_Format(PyExc_ValueError,
"the filter stack size (%%ld) and image stack size (%%ld) differ",
(long)kerns_dim[1], (long)img2d_dim[1]);
%(fail)s;
}
""" % sub
if self.kshp_logical_top_aligned: if self.kshp_logical_top_aligned:
d["self_kshp_logical_offset_r"] = 0 d["self_kshp_logical_offset_r"] = 0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论