提交 5b276d14 authored 作者: notoraptor's avatar notoraptor

Fixes, and tests extended.

上级 196346a0
...@@ -98,7 +98,7 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input, ...@@ -98,7 +98,7 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input,
return 1; return 1;
} }
if (rsz == 1 && cudnnGetVersion() <= 7004) { if (rsz == 1) {
/* We must reduce some dimensions which have all size 1. /* We must reduce some dimensions which have all size 1.
* cuDNN (up to 7004) does not support this case. Let's use GpuElemwise. */ * cuDNN (up to 7004) does not support this case. Let's use GpuElemwise. */
switch (params->red_op) { switch (params->red_op) {
......
...@@ -1258,9 +1258,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1258,9 +1258,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
greduce = op2( greduce = op2(
op.scalar_op, op.scalar_op,
axis=new_axis, reduce_mask=new_mask, axis=new_axis, reduce_mask=new_mask,
dtype=getattr(op, 'dtype', outputs[0].dtype), dtype=odtype,
acc_dtype=getattr(op, 'acc_dtype', None)) acc_dtype=adtype)
with inherit_stack_trace(outputs): with inherit_stack_trace(outputs):
reshaped_x = x.reshape(tensor.stack(new_in_shp)) reshaped_x = x.reshape(tensor.stack(new_in_shp))
gpu_reshaped_x = as_gpuarray_variable(reshaped_x, context_name) gpu_reshaped_x = as_gpuarray_variable(reshaped_x, context_name)
......
...@@ -1614,26 +1614,40 @@ def test_dnn_reduction_absmax(): ...@@ -1614,26 +1614,40 @@ def test_dnn_reduction_absmax():
def test_dnn_reduction_axis_size_one(): def test_dnn_reduction_axis_size_one():
if not dnn.dnn_available(test_ctx_name) or dnn.version(raises=False) < 6000: if not dnn.dnn_available(test_ctx_name) or dnn.version(raises=False) < 6000:
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
for dtype in ('float16', 'float32', 'float64'): for dtype in ('float16', 'float32', 'float64'):
for shape, axis in [[(1, 2, 3), 0], [(2, 1, 3), 1], [(2, 3, 1), 2]]: for shape, axis in [[(1, 2, 3), 0],
x = theano.tensor.tensor3(dtype=dtype) [(2, 1, 3), 1],
[(2, 3, 1), 2],
[(1, 5, 1), (0, 2)],
[(4, 1, 6, 1), (1, 3)]]:
x = theano.tensor.TensorType(dtype=dtype, broadcastable=[False] * len(shape))()
sum = x.sum(axis=axis) sum = x.sum(axis=axis)
sum_squares = (x**2).sum(axis=axis) sum_squares = (x**2).sum(axis=axis)
sum_abs = abs(x).sum(axis=axis) sum_abs = abs(x).sum(axis=axis)
absmax = abs(x).max(axis=axis) absmax = abs(x).max(axis=axis)
cpu_f = theano.function([x], [sum, sum_squares, sum_abs, absmax], mode=mode_without_gpu) cpu_f = theano.function([x], [sum, sum_squares, sum_abs, absmax], mode=mode_without_gpu)
f1 = theano.function([x], sum, mode=mode_with_gpu) f1 = theano.function([x], sum, mode=mode_with_gpu)
f2 = theano.function([x], sum_squares, mode=mode_with_gpu) f2 = theano.function([x], sum_squares, mode=mode_with_gpu)
f3 = theano.function([x], sum_abs, mode=mode_with_gpu) f3 = theano.function([x], sum_abs, mode=mode_with_gpu)
f4 = theano.function([x], absmax, mode=mode_with_gpu) f4 = theano.function([x], absmax, mode=mode_with_gpu)
for fn, red_op in ((f1, 'add'), (f2, 'norm2'), (f3, 'norm1'), (f4, 'absmax')): for fn, red_op in ((f1, 'add'), (f2, 'norm2'), (f3, 'norm1'), (f4, 'absmax')):
assert any(isinstance(node.op, dnn.GpuDnnReduction) and node.op.red_op == red_op assert any(isinstance(node.op, dnn.GpuDnnReduction) and node.op.red_op == red_op
for node in fn.maker.fgraph.apply_nodes) for node in fn.maker.fgraph.apply_nodes)
xval = np.random.uniform(-10, -1, size=shape).astype(dtype) xval = np.random.uniform(-10, -1, size=shape).astype(dtype)
xval_reshaped = xval.reshape(shape[:axis] + shape[(axis + 1):]) if isinstance(axis, int):
xval_reshaped = xval.reshape(shape[:axis] + shape[(axis + 1):])
else:
xval_reshaped = xval.reshape([n for i, n in enumerate(shape) if i not in axis])
test_val = abs(xval_reshaped) test_val = abs(xval_reshaped)
val_sum, val_sum_squares, val_sum_abs, val_absmax = f1(xval), f2(xval), f3(xval), f4(xval) val_sum, val_sum_squares, val_sum_abs, val_absmax = f1(xval), f2(xval), f3(xval), f4(xval)
cpu_val_sum, cpu_val_sum_squares, cpu_val_sum_abs, cpu_val_absmax = cpu_f(xval) cpu_val_sum, cpu_val_sum_squares, cpu_val_sum_abs, cpu_val_absmax = cpu_f(xval)
utt.assert_allclose(cpu_val_sum, val_sum) utt.assert_allclose(cpu_val_sum, val_sum)
utt.assert_allclose(cpu_val_sum_squares, val_sum_squares) utt.assert_allclose(cpu_val_sum_squares, val_sum_squares)
utt.assert_allclose(cpu_val_sum_abs, val_sum_abs) utt.assert_allclose(cpu_val_sum_abs, val_sum_abs)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论