提交 7c07a3ce authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5950 from lamblin/fix_5730

Test for fix in libgpuarray, plus fix in batched_dot opt
......@@ -1208,9 +1208,31 @@ def local_gpua_gemmbatch(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float32', 'float64']:
return
a, b = inputs
c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
return gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=a.dtype),
a, b, np.asarray(0.0, dtype=a.dtype))
# Since GpuGemmBatch only supports 3D inputs and output,
# we need to add broadcastable dims to the inputs, and drop
# them from outputs
output_dims = [0, 1, 2]
if a.ndim == 2:
a = GpuDimShuffle(a.broadcastable, (0, 1, 'x'))(a)
del output_dims[1]
if b.ndim == 2:
b = GpuDimShuffle(b.broadcastable, (0, 'x', 1))(b)
del output_dims[-1]
# In case of mismatched dtypes, we also have to upcast
out_dtype = outputs[0].dtype
if a.dtype != out_dtype or b.dtype != out_dtype:
gpu_cast_op = GpuElemwise(Cast(Scalar(out_dtype)))
if a.dtype != out_dtype:
a = gpu_cast_op(a)
if b.dtype != out_dtype:
b = gpu_cast_op(b)
c = tensor.AllocEmpty(out_dtype)(a.shape[0], a.shape[1], b.shape[2])
out = gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=out_dtype),
a, b, np.asarray(0.0, dtype=out_dtype))
if len(output_dims) != 3:
out = GpuDimShuffle(out.broadcastable, output_dims)(out)
return out
@register_opt()
......
......@@ -5,6 +5,7 @@ import itertools
import numpy as np
import theano
from theano import config
from theano import tensor
from theano.tests import unittest_tools as utt
from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22, batched_dot
......@@ -13,7 +14,6 @@ from theano.tensor.tests.test_blas import TestGer, BaseGemv
from .. import gpuarray_shared_constructor
from .config import mode_with_gpu, test_ctx_name
from .test_basic_ops import makeTester, rand
from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace, gpugemm_no_inplace,
gpugemmbatch_no_inplace,
......@@ -135,6 +135,18 @@ GpuGemmBatchTester = makeTester(
)
class TestGpuGemmBatchStrided(TestCase):
def test0(self):
# Reported in https://github.com/Theano/Theano/issues/5730
x = tensor.tensor3()
y = tensor.tensor3()
z = tensor.batched_dot(x, y[:, 0, :, np.newaxis])
f = theano.function([x, y], z, mode=mode_with_gpu)
x_num = np.arange(32 * 19 * 600, dtype=config.floatX).reshape((32, 19, 600))
y_num = np.arange(7 * 32 * 600, dtype=config.floatX).reshape((32, 7, 600))
f(x_num, y_num)
class TestGpuSger(TestGer):
def setUp(self):
self.mode = mode_with_gpu
......
......@@ -656,3 +656,27 @@ def test_local_gpua_advanced_incsubtensor():
w = tensor.set_subtensor(w[tensor.eq(y, 1.0).nonzero()], 100)
w = tensor.set_subtensor(w[tensor.eq(y, -1.0).nonzero()], 0)
theano.function([target], w)
def test_batched_dot_lifter():
# The CPU Op accepts 2D and 3D inputs, as well as mixed dtypes.
# Make sure the lifter adds the appropriate dimshuffles and casts
rng = np.random.RandomState(utt.fetch_seed())
def randX(*args):
return rng.rand(*args).astype(theano.config.floatX)
cases = [
(randX(3, 5, 7), randX(3, 7)),
(randX(3, 5), randX(3, 5, 7)),
(randX(3, 5), randX(3, 5)),
(rng.rand(3, 5, 7).astype('float32'), randX(3, 7, 9)),
(rng.rand(3, 5, 7).astype('float64'), randX(3, 7, 9))]
for x_val, y_val in cases:
x = tensor.TensorType(broadcastable=[s == 1 for s in x_val.shape],
dtype=x_val.dtype)('x')
y = tensor.TensorType(broadcastable=[s == 1 for s in y_val.shape],
dtype=y_val.dtype)('y')
z = tensor.batched_dot(x, y)
f = theano.function([x, y], z, mode=mode_with_gpu)
f(x_val, y_val)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论