提交 9ad7030e authored 作者: Frederic's avatar Frederic

Do the constant folding on the new back-end gpualloc.

Add other gpualloc tests. It currently fail as ger isn't implement on the new back-end.
上级 a82957e5
...@@ -694,6 +694,36 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -694,6 +694,36 @@ class GpuReshape(HideC, tensor.Reshape):
raise ValueError("total size of new array must be unchanged") raise ValueError("total size of new array must be unchanged")
out[0] = x.reshape(tuple(shp)) out[0] = x.reshape(tuple(shp))
def do_constant_folding(self, node):
for client in node.outputs[0].clients:
if client[0] == 'output':
# If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold.
return False
elif (#The following ops work inplace of their input id 0.
client[1] == 0 and
isinstance(client[0].op, (
#Ops that will work inplace on the Alloc. So if they
#get constant_folded, they would copy the
#constant and this is less efficients.
#Not doing the constant folding could also lower
#the peak memory usage, as we the "constant" won't
#always exists.
#theano.tensor.subtensor.AdvancedIncSubtensor,
GpuIncSubtensor,
GpuAdvancedIncSubtensor1,
theano.sandbox.gpuarray.blas.GpuGemm,
theano.sandbox.gpuarray.blas.GpuGemv,
#theano.sandbox.gpuarray.blas.GpuGer, Not Yet implemented
))):
return False
#If the clients is a transfer, we don't want to fold. We
#let the moving opt finish before deciding what to do.
elif isinstance(client[0].op, HostFromGpu):
return False
return True
class GpuEye(GpuKernelBase, Op): class GpuEye(GpuKernelBase, Op):
def __init__(self, dtype=None): def __init__(self, dtype=None):
......
...@@ -32,8 +32,10 @@ if not theano.sandbox.gpuarray.pygpu_activated: ...@@ -32,8 +32,10 @@ if not theano.sandbox.gpuarray.pygpu_activated:
from theano.sandbox.gpuarray.type import (GpuArrayType, from theano.sandbox.gpuarray.type import (GpuArrayType,
gpuarray_shared_constructor) gpuarray_shared_constructor)
from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host, from theano.sandbox.gpuarray.basic_ops import (
gpu_alloc, gpu_from_cuda, host_from_gpu, gpu_from_host,
gpu_alloc, GpuAlloc,
gpu_from_cuda,
cuda_from_gpu, HostFromGpu, cuda_from_gpu, HostFromGpu,
GpuFromHost, GpuReshape, GpuFromHost, GpuReshape,
GpuEye) GpuEye)
...@@ -290,6 +292,13 @@ GpuAllocTester = makeTester( ...@@ -290,6 +292,13 @@ GpuAllocTester = makeTester(
) )
class TestAlloc(theano.tensor.tests.test_basic.TestAlloc):
dtype = "float32"
mode = mode_with_gpu
shared = staticmethod(gpuarray_shared_constructor)
allocs = [GpuAlloc, GpuAlloc, T.Alloc]
def test_shape(): def test_shape():
x = GpuArrayType(dtype='float32', broadcastable=[False, False, False])() x = GpuArrayType(dtype='float32', broadcastable=[False, False, False])()
v = gpuarray.zeros((3, 4, 5), dtype='float32') v = gpuarray.zeros((3, 4, 5), dtype='float32')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论