提交 fff9c1f7 authored 作者: Pierre Luc Carrier's avatar Pierre Luc Carrier

Mostly adapted Op and tests to new backend. TODO: Remove faulty python…

Mostly adapted Op and tests to new backend. TODO: Remove faulty python implementation from _dev20 version of op
上级 6936dd28
...@@ -10,6 +10,7 @@ from theano.gof import (local_optimizer, EquilibriumDB, ...@@ -10,6 +10,7 @@ from theano.gof import (local_optimizer, EquilibriumDB,
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
from theano.tensor.nnet.conv import ConvOp from theano.tensor.nnet.conv import ConvOp
from theano.sandbox.cuda.basic_ops import device_properties
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, from theano.sandbox.gpuarray.basic_ops import (host_from_gpu,
gpu_from_host, gpu_from_host,
...@@ -25,7 +26,9 @@ from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBi ...@@ -25,7 +26,9 @@ from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBi
GpuSoftmax) GpuSoftmax)
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar, from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduceCuda) GpuDimShuffle, GpuCAReduceCuda)
from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor from theano.sandbox.gpuarray.subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
from theano.sandbox.gpuarray.type import GpuArrayConstant from theano.sandbox.gpuarray.type import GpuArrayConstant
gpu_optimizer = EquilibriumDB() gpu_optimizer = EquilibriumDB()
...@@ -241,6 +244,23 @@ def local_gpua_incsubtensor(node): ...@@ -241,6 +244,23 @@ def local_gpua_incsubtensor(node):
return GpuIncSubtensor(node.op.idx_list, node.op.inplace, return GpuIncSubtensor(node.op.idx_list, node.op.inplace,
node.op.set_instead_of_inc, node.op.set_instead_of_inc,
node.op.destroyhandler_tolerate_aliased) node.op.destroyhandler_tolerate_aliased)
@register_opt()
@op_lifter([tensor.AdvancedIncSubtensor1])
def local_gpua_advanced_incsubtensor(node):
x, y = node.inputs[0:2]
coords = node.inputs[2:]
set_instead_of_inc = node.op.set_instead_of_inc
active_device_no = theano.sandbox.cuda.active_device_number()
compute_capability = device_properties(active_device_no)['major']
if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
return GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
else:
return GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)
@register_opt() @register_opt()
......
import numpy
import theano
from theano.tensor.tests.test_subtensor import T_subtensor from theano.tensor.tests.test_subtensor import T_subtensor
from theano.sandbox.gpuarray.basic_ops import (HostFromGpu, GpuFromHost) from theano.sandbox.gpuarray.basic_ops import (HostFromGpu, GpuFromHost)
from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor from theano.sandbox.gpuarray.subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1)
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor from theano.sandbox.gpuarray.type import gpuarray_shared_constructor
...@@ -21,6 +25,7 @@ class G_subtensor(T_subtensor): ...@@ -21,6 +25,7 @@ class G_subtensor(T_subtensor):
shared=gpuarray_shared_constructor, shared=gpuarray_shared_constructor,
sub=GpuSubtensor, sub=GpuSubtensor,
inc_sub=GpuIncSubtensor, inc_sub=GpuIncSubtensor,
adv_incsub1 = GpuAdvancedIncSubtensor1,
mode=mode_with_gpu, mode=mode_with_gpu,
# avoid errors with limited devices # avoid errors with limited devices
dtype='float32', dtype='float32',
...@@ -34,17 +39,17 @@ class G_subtensor(T_subtensor): ...@@ -34,17 +39,17 @@ class G_subtensor(T_subtensor):
def test_advinc_subtensor1(): def test_advinc_subtensor1():
""" Test the second case in the opt local_gpu_advanced_incsubtensor1 """ """ Test the second case in the opt local_gpu_advanced_incsubtensor1 """
for shp in [(3, 3), (3, 3, 3)]: for shp in [(3, 3), (3, 3, 3)]:
shared = cuda.shared_constructor shared = gpuarray_shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype='float32').reshape(shp) + 1 xval = numpy.arange(numpy.prod(shp), dtype='float32').reshape(shp) + 1
yval = numpy.empty((2,) + shp[1:], dtype='float32') yval = numpy.empty((2,) + shp[1:], dtype='float32')
yval[:] = 10 yval[:] = 10
x = shared(xval, name='x') x = shared(xval, name='x')
y = T.tensor(dtype='float32', y = tensor.tensor(dtype='float32',
broadcastable=(False,) * len(shp), broadcastable=(False,) * len(shp),
name='y') name='y')
expr = T.advanced_inc_subtensor1(x, y, [0, 2]) expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu) f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1) assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1)
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy() rep = xval.copy()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论