提交 c930d96a authored 作者: Frederic's avatar Frederic

Fix the opt local_gpu_dot_to_dot22 to move more dot to the GPU in

fast_compile.
上级 f8a19f5b
......@@ -350,7 +350,7 @@ def local_gpu_specifyShape_0(node):
@register_opt()
@local_optimizer([gpu_from_host]) # XXX: broken: tensor.basic.dot is not an op
@local_optimizer([GpuFromHost, tensor.basic.Dot])
def local_gpu_dot_to_dot22(node):
"""
gpu_from_host(dot) -> gpudot(gpu_from_host)
......@@ -361,6 +361,8 @@ def local_gpu_dot_to_dot22(node):
the output.
A more suitable solution would be to use the right cublas call
This is needed in fast_compile
"""
# In case the got do input upcast, we much check that we can
......@@ -369,17 +371,18 @@ def local_gpu_dot_to_dot22(node):
if node.outputs[0].type.dtype != 'float32':
return False
host_input = node.inputs[0]
if host_input.owner and host_input.owner.op == tensor.basic.dot:
if host_input.owner and isinstance(host_input.owner.op,
tensor.basic.Dot):
x, y = host_input.owner.inputs
# case one: vector X matrix
if _is_real_vector(x) and _is_real_matrix(y):
new_op = GpuDimShuffle((False,), ['x', 0])
new_op = GpuDimShuffle((False,), ('x', 0))
shape_out = y.shape[1].dimshuffle(['x'])
gpu_x = new_op(gpu_from_host(x))
gpu_y = gpu_from_host(y)
# case two: matrix X vector
elif _is_real_matrix(x) and _is_real_vector(y):
new_op = GpuDimShuffle((False,), [0, 'x'])
new_op = GpuDimShuffle((False,), (0, 'x'))
shape_out = x.shape[0].dimshuffle(['x'])
gpu_x = gpu_from_host(x)
gpu_y = new_op(gpu_from_host(y))
......@@ -387,20 +390,20 @@ def local_gpu_dot_to_dot22(node):
return False
return [GpuReshape(1)(gpu_dot22(gpu_x, gpu_y), shape_out)]
if node.op == tensor.basic.dot:
if isinstance(node.op, tensor.basic.Dot):
if node.outputs[0].type.dtype != 'float32':
return False
if any([i.owner and isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]):
x, y = node.inputs
if _is_real_vector(x) and _is_real_matrix(y):
new_op = GpuDimShuffle((False,), ['x', 0])
new_op = GpuDimShuffle((False,), ('x', 0))
shape_out = y.shape[1].dimshuffle(['x'])
gpu_x = new_op(gpu_from_host(x))
gpu_y = gpu_from_host(y)
elif _is_real_matrix(x) and _is_real_vector(y):
new_op = GpuDimShuffle((False,), [0, 'x'])
new_op = GpuDimShuffle((False,), (0, 'x'))
shape_out = x.shape[0].dimshuffle(['x'])
gpu_x = gpu_from_host(x)
gpu_y = new_op(gpu_from_host(y))
......
......@@ -404,6 +404,32 @@ def test_erfinvgpu():
assert numpy.allclose(f(xv),f2(xv))
def test_local_gpu_dot_to_dot22dot():
def cmp(a_shp, b_shp):
a0 = numpy.random.rand(*a_shp).astype('float32')
a = cuda.shared_constructor(a0, 'a')
b0 = numpy.random.rand(*b_shp).astype('float32')
b = cuda.shared_constructor(b0, 'a')
f = pfunc([], tensor.dot(a, b), mode=mode_with_gpu)
assert cuda.opt.local_gpu_dot_to_dot22.transform(
tensor.dot(a, b).owner)
out = f()
assert numpy.allclose(numpy.dot(a0, b0), out)
# Try with a matrix equal to a0, but with strides in both dims
a.set_value(a0)
a.set_value(
a.get_value(borrow=True,
return_internal_type=True)[::-1],
borrow=True)
f()
cmp((4,), (4, 5))
cmp((3, 4), (4,))
class test_diag(theano.tensor.tests.test_nlinalg.test_diag):
mode = mode_with_gpu
shared = staticmethod(cuda.shared_constructor)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论