提交 27c8da22 authored 作者: Chiheb Trabelsi's avatar Chiheb Trabelsi

blas.py has been modified in order to respect the flake8 style.

blas.py do not contain long lines.
上级 ed7759fb
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import copy
import os import os
import logging import logging
_logger = logging.getLogger(__name__)
from six import integer_types from six import integer_types
from six.moves import StringIO, reduce from six.moves import StringIO, reduce
import theano import theano
from theano import Apply from theano import Apply
from theano import tensor from theano import tensor
...@@ -15,6 +11,7 @@ from theano.sandbox.cuda import GpuOp ...@@ -15,6 +11,7 @@ from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous) gpu_contiguous)
from theano.tensor import as_tensor_variable from theano.tensor import as_tensor_variable
_logger = logging.getLogger(__name__)
class GpuBatchedDot(GpuOp): class GpuBatchedDot(GpuOp):
...@@ -29,11 +26,11 @@ class GpuBatchedDot(GpuOp): ...@@ -29,11 +26,11 @@ class GpuBatchedDot(GpuOp):
assert inp1.dtype == "float32" assert inp1.dtype == "float32"
assert inp2.dtype == "float32" assert inp2.dtype == "float32"
assert inp1.ndim == 3 # (batch, a, b) assert inp1.ndim == 3 # (batch, a, b)
assert inp2.ndim == 3 assert inp2.ndim == 3
return theano.Apply(self, [inp1, inp2], return theano.Apply(self, [inp1, inp2],
[self.output_type(inp1, inp2)()]) [self.output_type(inp1, inp2)()])
def output_type(self, inp1, inp2): def output_type(self, inp1, inp2):
return CudaNdarrayType( return CudaNdarrayType(
...@@ -183,8 +180,7 @@ class GpuBatchedDot(GpuOp): ...@@ -183,8 +180,7 @@ class GpuBatchedDot(GpuOp):
} }
} else { } else {
// copy inputs if not contiguous // copy inputs if not contiguous
""" + """ + ("\n".join("""
("\n".join("""
if (( CudaNdarray_HOST_DIMS(%(var)s)[0] > 1 && CudaNdarray_HOST_STRIDES(%(var)s)[0] != 1 if (( CudaNdarray_HOST_DIMS(%(var)s)[0] > 1 && CudaNdarray_HOST_STRIDES(%(var)s)[0] != 1
&& CudaNdarray_HOST_DIMS(%(var)s)[1] > 1 && CudaNdarray_HOST_STRIDES(%(var)s)[1] != 1 && CudaNdarray_HOST_DIMS(%(var)s)[1] > 1 && CudaNdarray_HOST_STRIDES(%(var)s)[1] != 1
&& CudaNdarray_HOST_DIMS(%(var)s)[2] > 1 && CudaNdarray_HOST_STRIDES(%(var)s)[2] != 1) && CudaNdarray_HOST_DIMS(%(var)s)[2] > 1 && CudaNdarray_HOST_STRIDES(%(var)s)[2] != 1)
...@@ -198,8 +194,7 @@ class GpuBatchedDot(GpuOp): ...@@ -198,8 +194,7 @@ class GpuBatchedDot(GpuOp):
Py_XDECREF(%(var)s); Py_XDECREF(%(var)s);
%(var)s = _copy; %(var)s = _copy;
} }
""" % dict(var=var, fail=fail) for var in (bx, by))) """ % dict(var=var, fail=fail) for var in (bx, by))) + """
+ """
// fail if the output is not contiguous; we can't copy it because we // fail if the output is not contiguous; we can't copy it because we
// need to write to the original memory // need to write to the original memory
...@@ -363,7 +358,7 @@ class GpuDot22(GpuOp): ...@@ -363,7 +358,7 @@ class GpuDot22(GpuOp):
if y.type.ndim != 2: if y.type.ndim != 2:
raise TypeError(y) raise TypeError(y)
otype = CudaNdarrayType( otype = CudaNdarrayType(
(x.type.broadcastable[0], y.type.broadcastable[1])) (x.type.broadcastable[0], y.type.broadcastable[1]))
return Apply(self, [x, y], [otype()]) return Apply(self, [x, y], [otype()])
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -451,7 +446,7 @@ class GpuDot22Scalar(GpuOp): ...@@ -451,7 +446,7 @@ class GpuDot22Scalar(GpuOp):
if not tensor.blas._as_scalar(a): if not tensor.blas._as_scalar(a):
raise TypeError(a) raise TypeError(a)
otype = CudaNdarrayType( otype = CudaNdarrayType(
(x.type.broadcastable[0], y.type.broadcastable[1])) (x.type.broadcastable[0], y.type.broadcastable[1]))
return Apply(self, [x, y, a], [otype()]) return Apply(self, [x, y, a], [otype()])
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -537,8 +532,8 @@ class GpuGemm(GpuOp): ...@@ -537,8 +532,8 @@ class GpuGemm(GpuOp):
return 'GpuGemm{no_inplace}' return 'GpuGemm{no_inplace}'
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)\ return (type(self) == type(other) and
and self.inplace == other.inplace) self.inplace == other.inplace)
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ hash(self.inplace) return hash(type(self)) ^ hash(self.inplace)
...@@ -562,7 +557,7 @@ class GpuGemm(GpuOp): ...@@ -562,7 +557,7 @@ class GpuGemm(GpuOp):
return (4,) return (4,)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
#z_out = alpha * dot(x,y) + beta * z_in # z_out = alpha * dot(x,y) + beta * z_in
# inplace version, set set z_out = z_in # inplace version, set set z_out = z_in
# not inplace version, we copy z_in to z_out. # not inplace version, we copy z_in to z_out.
z_in, a, x, y, b = inputs z_in, a, x, y, b = inputs
...@@ -657,8 +652,8 @@ class GpuGemv(GpuOp): ...@@ -657,8 +652,8 @@ class GpuGemv(GpuOp):
return 'GpuGemv{no_inplace}' return 'GpuGemv{no_inplace}'
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)\ return (type(self) == type(other) and
and self.inplace == other.inplace) self.inplace == other.inplace)
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ hash(self.inplace) return hash(type(self)) ^ hash(self.inplace)
...@@ -682,7 +677,7 @@ class GpuGemv(GpuOp): ...@@ -682,7 +677,7 @@ class GpuGemv(GpuOp):
return (3,) return (3,)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
#z_out = alpha * dot(x,y) + beta * z_in # z_out = alpha * dot(x,y) + beta * z_in
# inplace version, set set z_out = z_in # inplace version, set set z_out = z_in
# not inplace version, we copy z_in to z_out. # not inplace version, we copy z_in to z_out.
z_in, a, x, y, b = inputs z_in, a, x, y, b = inputs
...@@ -757,8 +752,8 @@ class GpuGer(GpuOp): ...@@ -757,8 +752,8 @@ class GpuGer(GpuOp):
return 'GpuGer{no_inplace}' return 'GpuGer{no_inplace}'
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)\ return (type(self) == type(other) and
and self.inplace == other.inplace) self.inplace == other.inplace)
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ hash(self.inplace) return hash(type(self)) ^ hash(self.inplace)
...@@ -782,7 +777,7 @@ class GpuGer(GpuOp): ...@@ -782,7 +777,7 @@ class GpuGer(GpuOp):
return (2,) return (2,)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
#z_out = alpha * dot(x,y) + beta * z_in # z_out = alpha * dot(x,y) + beta * z_in
# inplace version, set set z_out = z_in # inplace version, set set z_out = z_in
# not inplace version, we copy z_in to z_out. # not inplace version, we copy z_in to z_out.
z_in, a, x, y = inputs z_in, a, x, y = inputs
...@@ -934,7 +929,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -934,7 +929,7 @@ class BaseGpuCorrMM(GpuOp):
# these files # these files
files = ['corr_gemm.cu'] files = ['corr_gemm.cu']
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read() codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files] for f in files]
return reduce(str.__add__, codes) return reduce(str.__add__, codes)
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None): def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
...@@ -947,7 +942,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -947,7 +942,7 @@ class BaseGpuCorrMM(GpuOp):
Parameters Parameters
---------- ----------
bottom bottom
Variable name of the input images in the forward pass, Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs or the gradient of the input images in backprop wrt. inputs
weights weights
...@@ -1001,7 +996,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1001,7 +996,7 @@ class BaseGpuCorrMM(GpuOp):
out = bottom out = bottom
else: else:
raise ValueError("direction must be one of 'forward', " raise ValueError("direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'") "'backprop weights', 'backprop inputs'")
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size. # Similarly, when pad="half", we cannot infer the weight size.
...@@ -1158,7 +1153,7 @@ class GpuCorrMM(BaseGpuCorrMM): ...@@ -1158,7 +1153,7 @@ class GpuCorrMM(BaseGpuCorrMM):
Parameters Parameters
---------- ----------
border_mode border_mode
The width of a border of implicit zeros to pad the The width of a border of implicit zeros to pad the
input with. Must be a tuple with 2 elements giving the numbers of rows input with. Must be a tuple with 2 elements giving the numbers of rows
and columns to pad on each side, or a single integer to pad the same and columns to pad on each side, or a single integer to pad the same
on all sides, or a string shortcut setting the padding at runtime: on all sides, or a string shortcut setting the padding at runtime:
...@@ -1174,7 +1169,7 @@ class GpuCorrMM(BaseGpuCorrMM): ...@@ -1174,7 +1169,7 @@ class GpuCorrMM(BaseGpuCorrMM):
but faster. but faster.
Set to `(1, 1)` to disable subsampling. Set to `(1, 1)` to disable subsampling.
pad pad
Deprecated alias for `border_mode`. Deprecated alias for `border_mode`.
Notes Notes
----- -----
...@@ -1247,8 +1242,8 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM): ...@@ -1247,8 +1242,8 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
""" """
def __init__(self, border_mode="valid", def __init__(self, border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
pad=(0, 0)): pad=(0, 0)):
super(GpuCorrMM_gradWeights, self).__init__(border_mode, subsample, pad) super(GpuCorrMM_gradWeights, self).__init__(border_mode, subsample, pad)
def make_node(self, img, topgrad, shape=None): def make_node(self, img, topgrad, shape=None):
...@@ -1283,11 +1278,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM): ...@@ -1283,11 +1278,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
bottom, top = inp[:2] bottom, top = inp[:2]
weights, = grads weights, = grads
weights = gpu_contiguous(weights) weights = gpu_contiguous(weights)
d_bottom = GpuCorrMM_gradInputs(self.border_mode, self.subsample)( d_bottom = GpuCorrMM_gradInputs(
weights, top, bottom.shape[-2:]) self.border_mode, self.subsample)(weights,
d_top = GpuCorrMM(self.border_mode, self.subsample)( top,
bottom, weights) bottom.shape[-2:])
d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else () d_top = GpuCorrMM(
self.border_mode, self.subsample)(bottom, weights)
d_height_width = (
theano.gradient.DisconnectedType()(),
) * 2 if len(inp) == 4 else ()
return (d_bottom, d_top) + d_height_width return (d_bottom, d_top) + d_height_width
def connection_pattern(self, node): def connection_pattern(self, node):
...@@ -1309,8 +1308,8 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM): ...@@ -1309,8 +1308,8 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
""" """
def __init__(self, border_mode="valid", def __init__(self, border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
pad=(0, 0)): pad=(0, 0)):
super(GpuCorrMM_gradInputs, self).__init__(border_mode, subsample, pad) super(GpuCorrMM_gradInputs, self).__init__(border_mode, subsample, pad)
def make_node(self, kern, topgrad, shape=None): def make_node(self, kern, topgrad, shape=None):
...@@ -1342,11 +1341,14 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM): ...@@ -1342,11 +1341,14 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
weights, top = inp[:2] weights, top = inp[:2]
bottom, = grads bottom, = grads
bottom = gpu_contiguous(bottom) bottom = gpu_contiguous(bottom)
d_weights = GpuCorrMM_gradWeights(self.border_mode, self.subsample)( d_weights = GpuCorrMM_gradWeights(
self.border_mode, self.subsample)(
bottom, top, weights.shape[-2:]) bottom, top, weights.shape[-2:])
d_top = GpuCorrMM(self.border_mode, self.subsample)( d_top = GpuCorrMM(
bottom, weights) self.border_mode, self.subsample)(bottom, weights)
d_height_width = (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else () d_height_width = (
theano.gradient.DisconnectedType()(),
) * 2 if len(inp) == 4 else ()
return (d_weights, d_top) + d_height_width return (d_weights, d_top) + d_height_width
def connection_pattern(self, node): def connection_pattern(self, node):
...@@ -1412,7 +1414,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1412,7 +1414,7 @@ class BaseGpuCorr3dMM(GpuOp):
# these files # these files
files = ['corr3d_gemm.cu'] files = ['corr3d_gemm.cu']
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read() codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files] for f in files]
return reduce(str.__add__, codes) return reduce(str.__add__, codes)
def c_code_helper(self, bottom, weights, def c_code_helper(self, bottom, weights,
...@@ -1459,7 +1461,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1459,7 +1461,7 @@ class BaseGpuCorr3dMM(GpuOp):
If self.pad == 'half', a variable giving the width of the filters If self.pad == 'half', a variable giving the width of the filters
for direction="backprop weights". for direction="backprop weights".
Ignored otherwise. Ignored otherwise.
depth depth
If self.subsample[2] != 1, a variable giving the depth If self.subsample[2] != 1, a variable giving the depth
of the filters for direction="backprop weights" or the depth of the of the filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
...@@ -1488,7 +1490,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1488,7 +1490,7 @@ class BaseGpuCorr3dMM(GpuOp):
out = bottom out = bottom
else: else:
raise ValueError("direction must be one of 'forward', " raise ValueError("direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'") "'backprop weights', 'backprop inputs'")
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size. # Similarly, when pad="half", we cannot infer the weight size.
...@@ -1755,10 +1757,16 @@ class GpuCorr3dMM(BaseGpuCorr3dMM): ...@@ -1755,10 +1757,16 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
bottom, weights = inp bottom, weights = inp
top, = grads top, = grads
top = gpu_contiguous(top) top = gpu_contiguous(top)
d_bottom = GpuCorr3dMM_gradInputs(self.border_mode, self.subsample, self.pad)( d_bottom = GpuCorr3dMM_gradInputs(self.border_mode,
weights, top, bottom.shape[-3:]) self.subsample,
d_weights = GpuCorr3dMM_gradWeights(self.border_mode, self.subsample, self.pad)( self.pad)(weights,
bottom, top, weights.shape[-3:]) top,
bottom.shape[-3:])
d_weights = GpuCorr3dMM_gradWeights(self.border_mode,
self.subsample,
self.pad)(bottom,
top,
weights.shape[-3:])
return d_bottom, d_weights return d_bottom, d_weights
...@@ -1863,11 +1871,14 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM): ...@@ -1863,11 +1871,14 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
weights, top = inp[:2] weights, top = inp[:2]
bottom, = grads bottom, = grads
bottom = gpu_contiguous(bottom) bottom = gpu_contiguous(bottom)
d_weights = GpuCorr3dMM_gradWeights(self.border_mode, self.subsample, self.pad)( d_weights = GpuCorr3dMM_gradWeights(
bottom, top, weights.shape[-3:]) self.border_mode, self.subsample, self.pad)(
d_top = GpuCorr3dMM(self.border_mode, self.subsample, self.pad)( bottom, top, weights.shape[-3:])
d_top = GpuCorr3dMM(
self.border_mode, self.subsample, self.pad)(
bottom, weights) bottom, weights)
d_height_width_depth = (theano.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else () d_height_width_depth = (theano.gradient.DisconnectedType()(),)\
* 3 if len(inp) == 5 else ()
return (d_weights, d_top) + d_height_width_depth return (d_weights, d_top) + d_height_width_depth
def connection_pattern(self, node): def connection_pattern(self, node):
...@@ -1938,19 +1949,19 @@ class GpuConv(GpuOp): ...@@ -1938,19 +1949,19 @@ class GpuConv(GpuOp):
raise ValueError(mode) raise ValueError(mode)
def __init__(self, border_mode, def __init__(self, border_mode,
subsample=(1, 1), subsample=(1, 1),
logical_img_hw=None, logical_img_hw=None,
logical_kern_hw=None, logical_kern_hw=None,
logical_kern_align_top=True, logical_kern_align_top=True,
version=-1, version=-1,
direction_hint=None, direction_hint=None,
verbose=0, verbose=0,
kshp=None, kshp=None,
imshp=None, imshp=None,
max_threads_dim0=None, max_threads_dim0=None,
nkern=None, nkern=None,
bsize=None, bsize=None,
fft_opt=True): fft_opt=True):
self.border_mode = border_mode self.border_mode = border_mode
if version != -1: if version != -1:
raise Exception( raise Exception(
...@@ -2107,7 +2118,7 @@ class GpuConv(GpuOp): ...@@ -2107,7 +2118,7 @@ class GpuConv(GpuOp):
# these files # these files
files = ['conv_kernel.cu', 'conv_full_kernel.cu', 'conv.cu'] files = ['conv_kernel.cu', 'conv_full_kernel.cu', 'conv.cu']
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read() codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files] for f in files]
return reduce(str.__add__, codes) return reduce(str.__add__, codes)
def c_code(self, node, nodename, inp, out_, sub): def c_code(self, node, nodename, inp, out_, sub):
...@@ -2186,7 +2197,7 @@ class GpuDownsampleFactorMax(GpuOp): ...@@ -2186,7 +2197,7 @@ class GpuDownsampleFactorMax(GpuOp):
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
# def perform(self, node, input_storage, output_storage): # def perform(self, node, input_storage, output_storage):
#raise NotImplementedError('only C is implemented') # raise NotImplementedError('only C is implemented')
def c_code_cache_version(self): def c_code_cache_version(self):
return (6) return (6)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论