提交 2425cd11 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #3011 from nouiz/mixed

Mixed stuff.
......@@ -144,8 +144,11 @@ def raise_with_op(node, thunk=None, exc_info=None, storage_map=None):
clients = [[c[0] for c in var.clients] for var in node.outputs]
detailed_err_msg += ("Inputs shapes: %s" % shapes +
"\nInputs strides: %s" % strides +
"\nInputs values: %s" % scalar_values +
"\nOutputs clients: %s\n" % clients)
"\nInputs values: %s" % scalar_values)
if hasattr(node.op, '__input_name__'):
detailed_err_msg += "\nInputs name: %s\n" % str(node.op.__input_name__)
detailed_err_msg += "\nOutputs clients: %s\n" % clients
else:
hints.append(
"HINT: Use another linker then the c linker to"
......
......@@ -96,9 +96,9 @@ def load_reduce(self):
except Exception:
pass
if self.is_verbose:
print(sys.exc_info())
print(func, args)
# if self.is_verbose:
# print(sys.exc_info())
# print(func, args)
raise
......
......@@ -377,6 +377,8 @@ class GpuDnnConv(DnnBase, COp):
:param descr: the convolution descriptor
"""
__props__ = ('workmem', 'inplace')
__input_name__ = ('image', 'kernel', 'output',
'descriptor', 'alpha', 'beta')
def __init__(self, workmem=None, inplace=False):
"""
......@@ -501,6 +503,7 @@ class GpuDnnConvGradW(DnnBase, COp):
"""
__props__ = ('inplace',)
__input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False):
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gw.c"],
......@@ -573,6 +576,8 @@ class GpuDnnConvGradI(DnnBase, COp):
"""
__props__ = ('inplace',)
__input_name__ = ('kernel', 'grad', 'output',
'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False):
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gi.c"],
......
......@@ -5,6 +5,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
float alpha, float beta, CudaNdarray **output) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n");
return 1;
}
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
return 1;
......
......@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
float alpha, float beta, CudaNdarray **input) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(im)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n");
return 1;
}
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
......
......@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
float alpha, float beta, CudaNdarray **kerns) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(km)[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n");
return 1;
}
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
......
......@@ -184,11 +184,6 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]]
npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]]
t2 = None
t0 = time.time()
cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time()
i = cuda.CudaNdarrayType(
broadcastable=[sh == 1 for sh in npy_img.shape])()
k = cuda.CudaNdarrayType(
......@@ -198,21 +193,28 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
version=version,
verbose=verbose,
kshp=compile_kshp)(i, k)
assert [(sh == 1) is br for
sh, br in zip(cpuval.shape[:2], op.type.broadcastable[:2])]
f = theano.function([i, k], op, mode=theano_mode)
if cls is not None:
assert any([isinstance(node.op, cls)
for node in f.maker.fgraph.toposort()]), "Cannot find class %r in %r" % (cls, f.maker.fgraph.toposort())
gpuval = f(img, kern)
t2 = time.time()
gpuval = f(img, kern)
t3 = time.time()
for i in range(nb_iter):
gpuval2 = f(img, kern)
assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all()
gpuval = numpy.asarray(gpuval)
# CPU val computed after GPU val to get the GPU errors.
t0 = time.time()
cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time()
assert gpuval.shape == cpuval.shape, ("shape mismatch", gpuval.shape, cpuval.shape)
assert_allclose(cpuval, gpuval, rtol=rtol, atol=atol)
assert numpy.all(numpy.isfinite(gpuval)), gpuval
assert [(sh == 1) is br for
sh, br in zip(cpuval.shape[:2], op.type.broadcastable[:2])]
if (t2 is not None):
if mode == 'valid':
......@@ -222,7 +224,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
kshape[3] * ishape[2] * ishape[3] * 2)
approx_fp /= 1e6
cpu_mflops = approx_fp / (t1 - t0)
gpu_mflops = approx_fp / (t2 - t1)
gpu_mflops = approx_fp / (t3 - t2)
if verbose > 0:
print('%15s' % str(ishape), '%15s' % str(kshape), end=' ', file=sys.stdout)
print('%12.5f %7.2f %7.2f %7.1f' % (approx_fp,
......@@ -410,6 +412,12 @@ def test_dnn_valid():
yield t
def test_dnn_valid_err():
assert_raises(ValueError, _params_allgood, (1, 2, 4, 4), (1, 1, 2, 2),
'valid', theano_mode=theano_mode.including("cudnn"),
cls=DnnBase)
def test_default_conv():
"""Just test that we introduce the right GPU convolution
version.
......
......@@ -475,7 +475,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
numpy.random.rand(13, 14, 15, 16),
dtype='float32'
)
......
......@@ -6,6 +6,8 @@ from nose.tools import assert_raises
import numpy
from theano import config
from theano.compat import PY3
from theano.misc.pkl_utils import CompatUnpickler
from theano.sandbox.cuda import cuda_available
if cuda_available:
......@@ -30,16 +32,18 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag0():
try:
testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'CudaNdarray.pkl'
if sys.version_info[0] == 3:
fname = 'CudaNdarray_py3.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp:
if PY3:
u = CompatUnpickler(fp, encoding="latin1")
else:
u = CompatUnpickler(fp)
if cuda_available:
mat = cPickle.load(fp)
mat = u.load()
assert isinstance(mat, CudaNdarray)
assert numpy.asarray(mat)[0] == -42.0
else:
assert_raises(ImportError, cPickle.load, fp)
assert_raises(ImportError, u.load)
finally:
config.experimental.unpickle_gpu_on_cpu = oldflag
......@@ -52,11 +56,13 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag1():
try:
testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'CudaNdarray.pkl'
if sys.version_info[0] == 3:
fname = 'CudaNdarray_py3.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp:
mat = cPickle.load(fp)
if PY3:
u = CompatUnpickler(fp, encoding="latin1")
else:
u = CompatUnpickler(fp)
mat = u.load()
assert isinstance(mat, numpy.ndarray)
assert mat[0] == -42.0
......
......@@ -176,13 +176,18 @@ def default_blas_ldflags():
yield flags
# Now test it!
x = theano.tensor.fmatrix()
try:
theano.function([x], theano.tensor.blas._dot22(x,x),
profile=False)
except Exception as e:
print(e)
yield ""
old = config.compute_test_value
config.compute_test_value = 'off'
x = theano.tensor.fmatrix()
try:
theano.function([x], theano.tensor.blas._dot22(x, x),
profile=False)
except Exception as e:
print(e)
yield ""
finally:
config.compute_test_value = old
def static_default_blas_flags():
......
......@@ -345,7 +345,7 @@ def use_c_ger(node):
@local_optimizer([CGer(False)])
def make_c_ger_destructive(node):
if node.op == cger_no_inplace:
if isinstance(node.op, CGer) and not node.op.destructive:
return [cger_inplace(*node.inputs)]
......@@ -800,7 +800,7 @@ def use_c_gemv(node):
@local_optimizer([CGemv(inplace=False)])
def make_c_gemv_destructive(node):
if node.op == cgemv_no_inplace:
if isinstance(node.op, CGemv) and not node.op.inplace:
return [cgemv_inplace(*node.inputs)]
......
......@@ -233,7 +233,8 @@ def inplace_elemwise_optimizer_op(OP):
for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)):
op = node.op
if not isinstance(op, OP):
# gpuarray GpuElemwise inherit from Elemwise
if not type(op) == OP:
continue
baseline = op.inplace_pattern
protected_inputs = [
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论