提交 2425cd11 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #3011 from nouiz/mixed

Mixed stuff.
...@@ -144,8 +144,11 @@ def raise_with_op(node, thunk=None, exc_info=None, storage_map=None): ...@@ -144,8 +144,11 @@ def raise_with_op(node, thunk=None, exc_info=None, storage_map=None):
clients = [[c[0] for c in var.clients] for var in node.outputs] clients = [[c[0] for c in var.clients] for var in node.outputs]
detailed_err_msg += ("Inputs shapes: %s" % shapes + detailed_err_msg += ("Inputs shapes: %s" % shapes +
"\nInputs strides: %s" % strides + "\nInputs strides: %s" % strides +
"\nInputs values: %s" % scalar_values + "\nInputs values: %s" % scalar_values)
"\nOutputs clients: %s\n" % clients) if hasattr(node.op, '__input_name__'):
detailed_err_msg += "\nInputs name: %s\n" % str(node.op.__input_name__)
detailed_err_msg += "\nOutputs clients: %s\n" % clients
else: else:
hints.append( hints.append(
"HINT: Use another linker then the c linker to" "HINT: Use another linker then the c linker to"
......
...@@ -96,9 +96,9 @@ def load_reduce(self): ...@@ -96,9 +96,9 @@ def load_reduce(self):
except Exception: except Exception:
pass pass
if self.is_verbose: # if self.is_verbose:
print(sys.exc_info()) # print(sys.exc_info())
print(func, args) # print(func, args)
raise raise
......
...@@ -377,6 +377,8 @@ class GpuDnnConv(DnnBase, COp): ...@@ -377,6 +377,8 @@ class GpuDnnConv(DnnBase, COp):
:param descr: the convolution descriptor :param descr: the convolution descriptor
""" """
__props__ = ('workmem', 'inplace') __props__ = ('workmem', 'inplace')
__input_name__ = ('image', 'kernel', 'output',
'descriptor', 'alpha', 'beta')
def __init__(self, workmem=None, inplace=False): def __init__(self, workmem=None, inplace=False):
""" """
...@@ -501,6 +503,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -501,6 +503,7 @@ class GpuDnnConvGradW(DnnBase, COp):
""" """
__props__ = ('inplace',) __props__ = ('inplace',)
__input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False): def __init__(self, inplace=False):
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gw.c"], COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gw.c"],
...@@ -573,6 +576,8 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -573,6 +576,8 @@ class GpuDnnConvGradI(DnnBase, COp):
""" """
__props__ = ('inplace',) __props__ = ('inplace',)
__input_name__ = ('kernel', 'grad', 'output',
'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False): def __init__(self, inplace=False):
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gi.c"], COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gi.c"],
......
...@@ -5,6 +5,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -5,6 +5,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
CudaNdarray *om, cudnnConvolutionDescriptor_t desc, CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
float alpha, float beta, CudaNdarray **output) { float alpha, float beta, CudaNdarray **output) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n");
return 1;
}
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
......
...@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
float alpha, float beta, CudaNdarray **input) { float alpha, float beta, CudaNdarray **input) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(im)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n");
return 1;
}
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
......
...@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
float alpha, float beta, CudaNdarray **kerns) { float alpha, float beta, CudaNdarray **kerns) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(km)[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n");
return 1;
}
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
......
...@@ -184,11 +184,6 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1), ...@@ -184,11 +184,6 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]] kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]]
npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]] npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]]
t2 = None
t0 = time.time()
cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time()
i = cuda.CudaNdarrayType( i = cuda.CudaNdarrayType(
broadcastable=[sh == 1 for sh in npy_img.shape])() broadcastable=[sh == 1 for sh in npy_img.shape])()
k = cuda.CudaNdarrayType( k = cuda.CudaNdarrayType(
...@@ -198,21 +193,28 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1), ...@@ -198,21 +193,28 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
version=version, version=version,
verbose=verbose, verbose=verbose,
kshp=compile_kshp)(i, k) kshp=compile_kshp)(i, k)
assert [(sh == 1) is br for
sh, br in zip(cpuval.shape[:2], op.type.broadcastable[:2])]
f = theano.function([i, k], op, mode=theano_mode) f = theano.function([i, k], op, mode=theano_mode)
if cls is not None: if cls is not None:
assert any([isinstance(node.op, cls) assert any([isinstance(node.op, cls)
for node in f.maker.fgraph.toposort()]), "Cannot find class %r in %r" % (cls, f.maker.fgraph.toposort()) for node in f.maker.fgraph.toposort()]), "Cannot find class %r in %r" % (cls, f.maker.fgraph.toposort())
gpuval = f(img, kern)
t2 = time.time() t2 = time.time()
gpuval = f(img, kern)
t3 = time.time()
for i in range(nb_iter): for i in range(nb_iter):
gpuval2 = f(img, kern) gpuval2 = f(img, kern)
assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all() assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all()
gpuval = numpy.asarray(gpuval) gpuval = numpy.asarray(gpuval)
# CPU val computed after GPU val to get the GPU errors.
t0 = time.time()
cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time()
assert gpuval.shape == cpuval.shape, ("shape mismatch", gpuval.shape, cpuval.shape) assert gpuval.shape == cpuval.shape, ("shape mismatch", gpuval.shape, cpuval.shape)
assert_allclose(cpuval, gpuval, rtol=rtol, atol=atol) assert_allclose(cpuval, gpuval, rtol=rtol, atol=atol)
assert numpy.all(numpy.isfinite(gpuval)), gpuval assert numpy.all(numpy.isfinite(gpuval)), gpuval
assert [(sh == 1) is br for
sh, br in zip(cpuval.shape[:2], op.type.broadcastable[:2])]
if (t2 is not None): if (t2 is not None):
if mode == 'valid': if mode == 'valid':
...@@ -222,7 +224,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1), ...@@ -222,7 +224,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
kshape[3] * ishape[2] * ishape[3] * 2) kshape[3] * ishape[2] * ishape[3] * 2)
approx_fp /= 1e6 approx_fp /= 1e6
cpu_mflops = approx_fp / (t1 - t0) cpu_mflops = approx_fp / (t1 - t0)
gpu_mflops = approx_fp / (t2 - t1) gpu_mflops = approx_fp / (t3 - t2)
if verbose > 0: if verbose > 0:
print('%15s' % str(ishape), '%15s' % str(kshape), end=' ', file=sys.stdout) print('%15s' % str(ishape), '%15s' % str(kshape), end=' ', file=sys.stdout)
print('%12.5f %7.2f %7.2f %7.1f' % (approx_fp, print('%12.5f %7.2f %7.2f %7.1f' % (approx_fp,
...@@ -410,6 +412,12 @@ def test_dnn_valid(): ...@@ -410,6 +412,12 @@ def test_dnn_valid():
yield t yield t
def test_dnn_valid_err():
assert_raises(ValueError, _params_allgood, (1, 2, 4, 4), (1, 1, 2, 2),
'valid', theano_mode=theano_mode.including("cudnn"),
cls=DnnBase)
def test_default_conv(): def test_default_conv():
"""Just test that we introduce the right GPU convolution """Just test that we introduce the right GPU convolution
version. version.
......
...@@ -475,7 +475,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -475,7 +475,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
dtype='float32' dtype='float32'
) )
kern_vals = numpy.asarray( kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6), numpy.random.rand(13, 14, 15, 16),
dtype='float32' dtype='float32'
) )
......
...@@ -6,6 +6,8 @@ from nose.tools import assert_raises ...@@ -6,6 +6,8 @@ from nose.tools import assert_raises
import numpy import numpy
from theano import config from theano import config
from theano.compat import PY3
from theano.misc.pkl_utils import CompatUnpickler
from theano.sandbox.cuda import cuda_available from theano.sandbox.cuda import cuda_available
if cuda_available: if cuda_available:
...@@ -30,16 +32,18 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag0(): ...@@ -30,16 +32,18 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag0():
try: try:
testfile_dir = os.path.dirname(os.path.realpath(__file__)) testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'CudaNdarray.pkl' fname = 'CudaNdarray.pkl'
if sys.version_info[0] == 3:
fname = 'CudaNdarray_py3.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp: with open(os.path.join(testfile_dir, fname), 'rb') as fp:
if PY3:
u = CompatUnpickler(fp, encoding="latin1")
else:
u = CompatUnpickler(fp)
if cuda_available: if cuda_available:
mat = cPickle.load(fp) mat = u.load()
assert isinstance(mat, CudaNdarray) assert isinstance(mat, CudaNdarray)
assert numpy.asarray(mat)[0] == -42.0 assert numpy.asarray(mat)[0] == -42.0
else: else:
assert_raises(ImportError, cPickle.load, fp) assert_raises(ImportError, u.load)
finally: finally:
config.experimental.unpickle_gpu_on_cpu = oldflag config.experimental.unpickle_gpu_on_cpu = oldflag
...@@ -52,11 +56,13 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag1(): ...@@ -52,11 +56,13 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag1():
try: try:
testfile_dir = os.path.dirname(os.path.realpath(__file__)) testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'CudaNdarray.pkl' fname = 'CudaNdarray.pkl'
if sys.version_info[0] == 3:
fname = 'CudaNdarray_py3.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp: with open(os.path.join(testfile_dir, fname), 'rb') as fp:
mat = cPickle.load(fp) if PY3:
u = CompatUnpickler(fp, encoding="latin1")
else:
u = CompatUnpickler(fp)
mat = u.load()
assert isinstance(mat, numpy.ndarray) assert isinstance(mat, numpy.ndarray)
assert mat[0] == -42.0 assert mat[0] == -42.0
......
...@@ -176,13 +176,18 @@ def default_blas_ldflags(): ...@@ -176,13 +176,18 @@ def default_blas_ldflags():
yield flags yield flags
# Now test it! # Now test it!
x = theano.tensor.fmatrix()
try: try:
theano.function([x], theano.tensor.blas._dot22(x,x), old = config.compute_test_value
profile=False) config.compute_test_value = 'off'
except Exception as e: x = theano.tensor.fmatrix()
print(e) try:
yield "" theano.function([x], theano.tensor.blas._dot22(x, x),
profile=False)
except Exception as e:
print(e)
yield ""
finally:
config.compute_test_value = old
def static_default_blas_flags(): def static_default_blas_flags():
......
...@@ -345,7 +345,7 @@ def use_c_ger(node): ...@@ -345,7 +345,7 @@ def use_c_ger(node):
@local_optimizer([CGer(False)]) @local_optimizer([CGer(False)])
def make_c_ger_destructive(node): def make_c_ger_destructive(node):
if node.op == cger_no_inplace: if isinstance(node.op, CGer) and not node.op.destructive:
return [cger_inplace(*node.inputs)] return [cger_inplace(*node.inputs)]
...@@ -800,7 +800,7 @@ def use_c_gemv(node): ...@@ -800,7 +800,7 @@ def use_c_gemv(node):
@local_optimizer([CGemv(inplace=False)]) @local_optimizer([CGemv(inplace=False)])
def make_c_gemv_destructive(node): def make_c_gemv_destructive(node):
if node.op == cgemv_no_inplace: if isinstance(node.op, CGemv) and not node.op.inplace:
return [cgemv_inplace(*node.inputs)] return [cgemv_inplace(*node.inputs)]
......
...@@ -233,7 +233,8 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -233,7 +233,8 @@ def inplace_elemwise_optimizer_op(OP):
for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)): for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)):
op = node.op op = node.op
if not isinstance(op, OP): # gpuarray GpuElemwise inherit from Elemwise
if not type(op) == OP:
continue continue
baseline = op.inplace_pattern baseline = op.inplace_pattern
protected_inputs = [ protected_inputs = [
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论