提交 ad6ff6e2 authored 作者: Frederic Bastien's avatar Frederic Bastien

small fix to gemv to the gpu and added test to those optimization.

上级 009251df
...@@ -175,18 +175,21 @@ def local_gpu_dot_to_dot22(node): ...@@ -175,18 +175,21 @@ def local_gpu_dot_to_dot22(node):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and host_input.owner.op == tensor.basic.dot: if host_input.owner and host_input.owner.op == tensor.basic.dot:
x, y = host_input.owner.inputs x, y = host_input.owner.inputs
# case one vector X matrix # case one: vector X matrix
if _is_real_vector(x) and _is_real_matrix(y): if _is_real_vector(x) and _is_real_matrix(y):
new_op = GpuDimShuffle((False,), ['x',0]) new_op = GpuDimShuffle((False,), ['x',0])
shape_out = y.shape[0],dimshuffle(['x']) shape_out = y.shape[0].dimshuffle(['x'])
gpu_x = new_op(gpu_from_host(x)) gpu_x = new_op(gpu_from_host(x))
gpu_y = gpu_from_host(y) gpu_y = gpu_from_host(y)
# case two matrix X vector # case two: matrix X vector
elif _is_real_matrix(x) and _is_real_vector(y): elif _is_real_matrix(x) and _is_real_vector(y):
new_op = GpuDimShuffle((False,), [0,'x']) new_op = GpuDimShuffle((False,), [0,'x'])
shape_out = x.shape[1].dimshuffle(['x']) shape_out = x.shape[1].dimshuffle(['x'])
gpu_x = gpu_from_host(x) gpu_x = gpu_from_host(x)
gpu_y = new_op(gpu_from_host(y)) gpu_y = new_op(gpu_from_host(y))
else:
return False
return [GpuReshape(1)(gpu_dot22(gpu_x, gpu_y), shape_out)] return [GpuReshape(1)(gpu_dot22(gpu_x, gpu_y), shape_out)]
if node.op == tensor.basic.dot: if node.op == tensor.basic.dot:
if numpy.any([(i.owner and i.owner.op == host_from_gpu) for i in node.inputs]): if numpy.any([(i.owner and i.owner.op == host_from_gpu) for i in node.inputs]):
...@@ -202,6 +205,9 @@ def local_gpu_dot_to_dot22(node): ...@@ -202,6 +205,9 @@ def local_gpu_dot_to_dot22(node):
shape_out = x.shape[1].dimshuffle(['x']) shape_out = x.shape[1].dimshuffle(['x'])
gpu_x = gpu_from_host(x) gpu_x = gpu_from_host(x)
gpu_y = new_op(gpu_from_host(y)) gpu_y = new_op(gpu_from_host(y))
else:
return False
return [host_from_gpu(GpuReshape(1)(gpu_dot22(gpu_x, gpu_y), return [host_from_gpu(GpuReshape(1)(gpu_dot22(gpu_x, gpu_y),
shape_out))] shape_out))]
return False return False
......
...@@ -40,11 +40,18 @@ def test_dot_vm(): ...@@ -40,11 +40,18 @@ def test_dot_vm():
dtype='float32')) dtype='float32'))
no_gpu_f = theano.function([], theano.dot(v,m), mode = mode_without_gpu) no_gpu_f = theano.function([], theano.dot(v,m), mode = mode_without_gpu)
gpu_f = theano.function([], theano.dot(v,m), mode = mode_with_gpu) gpu_f = theano.function([], theano.dot(v,m), mode = mode_with_gpu)
#gpu_f2 is needed to test the case when the input is not on the gpu
#but the output is moved to the gpu.
gpu_f2 = theano.function([], cuda.gpu_from_host(theano.dot(v,m)), mode = mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol)
assert numpy.allclose(no_gpu_f(), gpu_f2(), atol = atol)
# Assert that the gpu version actually uses gpu # Assert that the gpu version actually uses gpu
assert sum([isinstance(node.op, blasop.GpuDot22) for node in assert sum([isinstance(node.op, blasop.GpuDot22) for node in
gpu_f.maker.env.toposort() ]) == 1 gpu_f.maker.env.toposort() ]) == 1
assert sum([isinstance(node.op, blasop.GpuDot22) for node in
gpu_f2.maker.env.toposort() ]) == 1
def test_dot_mv(): def test_dot_mv():
''' Test matrix dot vector ''' ''' Test matrix dot vector '''
...@@ -53,42 +60,61 @@ def test_dot_mv(): ...@@ -53,42 +60,61 @@ def test_dot_mv():
dtype='float32')) dtype='float32'))
no_gpu_f = theano.function([], theano.dot(m,v), mode = mode_without_gpu) no_gpu_f = theano.function([], theano.dot(m,v), mode = mode_without_gpu)
gpu_f = theano.function([], theano.dot(m,v), mode = mode_with_gpu) gpu_f = theano.function([], theano.dot(m,v), mode = mode_with_gpu)
#gpu_f2 is needed to test the case when the input is not on the gpu
#but the output is moved to the gpu.
gpu_f2 = theano.function([], cuda.gpu_from_host(theano.dot(m,v)), mode = mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol)
assert numpy.allclose(no_gpu_f(), gpu_f2(), atol = atol)
# Assert that the gpu version actually uses gpu # Assert that the gpu version actually uses gpu
assert sum([isinstance(node.op, blasop.GpuDot22) for node in assert sum([isinstance(node.op, blasop.GpuDot22) for node in
gpu_f.maker.env.toposort() ]) == 1 gpu_f.maker.env.toposort() ]) == 1
assert sum([isinstance(node.op, blasop.GpuDot22) for node in
gpu_f2.maker.env.toposort() ]) == 1
def test_gemv1(): def test_gemv1():
''' Is this the same test as test_gemv2 ? ''' ''' test vector1+dot(matrix,vector2) '''
v1 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32')) v1 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32'))
v2 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32')) v2 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32'))
m = theano.shared( numpy.array(numpy.random.rand(2,2), dtype='float32')) m = theano.shared( numpy.array(numpy.random.rand(2,2), dtype='float32'))
no_gpu_f = theano.function([], v2+theano.dot(m,v1), mode = mode_without_gpu) no_gpu_f = theano.function([], v2+theano.dot(m,v1), mode = mode_without_gpu)
gpu_f = theano.function([], v2+theano.dot(m,v1), mode = mode_with_gpu) gpu_f = theano.function([], v2+theano.dot(m,v1), mode = mode_with_gpu)
#gpu_f2 is needed to test the case when the input is not on the gpu
#but the output is moved to the gpu.
gpu_f2 = theano.function([], cuda.gpu_from_host(v2+theano.dot(m,v1)), mode = mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol)
assert numpy.allclose(no_gpu_f(), gpu_f2(), atol = atol)
# Assert that the gpu version actually uses gpu # Assert that the gpu version actually uses gpu
assert sum([isinstance(node.op, blasop.GpuGemm) for node in assert sum([isinstance(node.op, blasop.GpuGemm) for node in
gpu_f.maker.env.toposort() ]) == 1 gpu_f.maker.env.toposort() ]) == 1
assert sum([isinstance(node.op, blasop.GpuGemm) for node in
gpu_f2.maker.env.toposort() ]) == 1
def test_gemv2(): def test_gemv2():
''' Is this the same test as test_gemv1 ? ''' ''' test vector1+dot(vector2,matrix) '''
v1 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32')) v1 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32'))
v2 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32')) v2 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32'))
m = theano.shared( numpy.array(numpy.random.rand(2,2), dtype='float32')) m = theano.shared( numpy.array(numpy.random.rand(2,2), dtype='float32'))
no_gpu_f = theano.function([], v2+theano.dot(v1,m), mode = mode_without_gpu) no_gpu_f = theano.function([], v2+theano.dot(v1,m), mode = mode_without_gpu)
gpu_f = theano.function([], v2+theano.dot(v1,m), mode = mode_with_gpu) gpu_f = theano.function([], v2+theano.dot(v1,m), mode = mode_with_gpu)
#gpu_f2 is needed to test the case when the input is not on the gpu
#but the output is moved to the gpu.
gpu_f2 = theano.function([], cuda.gpu_from_host(v2+theano.dot(v1,m)), mode = mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol = atol)
assert numpy.allclose(no_gpu_f(), gpu_f2(), atol = atol)
# Assert that the gpu version actually uses gpu # Assert that the gpu version actually uses gpu
assert sum([isinstance(node.op, blasop.GpuGemm) for node in assert sum([isinstance(node.op, blasop.GpuGemm) for node in
gpu_f.maker.env.toposort() ]) == 1 gpu_f.maker.env.toposort() ]) == 1
assert sum([isinstance(node.op, blasop.GpuGemm) for node in
gpu_f2.maker.env.toposort() ]) == 1
if __name__=='__main__': if __name__=='__main__':
test_dot_vm() test_dot_vm()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论