提交 b8e3e943 authored 作者: Frederic Bastien's avatar Frederic Bastien

'implemented GpuAdvancedIncSubtensor1 and test it. It also work inplace.'

上级 be4d06b0
......@@ -138,7 +138,8 @@ outdated!""")
import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuAdvancedSubtensor1, GpuIncSubtensor,
GpuSubtensor, GpuIncSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1,
GpuFlatten, GpuShape, GpuAlloc,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol,
ftensor3, ftensor4, scalar, vector, matrix, row, col,
......
......@@ -1746,6 +1746,37 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
o[j] = x[i]
out[0] = o
class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
def make_node(self, x, y, ilist):
x_ = as_cuda_ndarray_variable(x)
y_ = as_cuda_ndarray_variable(y)
ilist_ = tensor.as_tensor_variable(ilist)
assert x_.type.dtype == y_.type.dtype
assert x_.type.ndim == y_.type.ndim
if ilist_.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
if ilist_.type.broadcastable != (False,):
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if x_.type.broadcastable[0]:
# the caller should have made a copy of x len(ilist) times
raise TypeError('cannot index into a broadcastable dimension')
return Apply(self, [x_, y_, ilist_], [x_.type()])
def perform_(self, node, inp, out_):
# This don't work as CudaNdarray_Subscript() don't support it.
#super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
x, idx = inp
out, = out_
o = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((len(idx),)+x.shape[1:])
for (j,i) in enumerate(idx):
o[j] = x[i]
out[0] = o
class GpuIncSubtensor(tensor.IncSubtensor):
def make_node(self, x, y, *inputs):
assert isinstance(x.type, CudaNdarrayType)
......
......@@ -517,6 +517,38 @@ def local_gpu_advanced_subtensor1(node):
return [host_from_gpu(GpuAdvancedSubtensor1()(gpu_x, *coords))]
return False
@register_opt()
@local_optimizer([])
def local_gpu_advanced_incsubtensor1(node):
if node.op == gpu_from_host:
host_input = node.inputs[0]
# Should not execute for GpuAdvancedIncSubtensor1
if host_input.owner and host_input.owner.op.__class__ is tensor.AdvancedIncSubtensor1:
x, y = host_input.owner.inputs[0:2]
coords = host_input.owner.inputs[2:]
return [GpuAdvancedIncSubtensor1()(gpu_from_host(x),
gpu_from_host(y), *coords)]
# Should not execute for GpuAdvancedIncSubtensor1
if node.op.__class__ is tensor.AdvancedSubtensor1:
import pdb;pdb.set_trace()
x, y = node.inputs[0:2]
coords = node.inputs[2:]
go_gpu = False
if x.owner and x.owner.op == host_from_gpu:
go_gpu = True
gpu_x, = x.owner.inputs
else:
gpu_x = gpu_from_host(x)
if y.owner and y.owner.op == host_from_gpu:
go_gpu = True
gpu_y, = y.owner.inputs
else:
gpu_y = gpu_from_host(y)
if go_gpu:
return [host_from_gpu(GpuAdvancedIncSubtensor1()(gpu_x, gpu_y, *coords))]
return False
@register_opt()
@local_optimizer([])
def local_gpu_incsubtensor(node):
......
......@@ -787,9 +787,10 @@ import theano.tensor.tests.test_basic
# This is to don't duplicate test.
class T_subtensor(theano.tensor.tests.test_basic.T_subtensor):
shared=staticmethod(cuda.shared_constructor)
sub=cuda.GpuSubtensor
inc_sub=cuda.GpuIncSubtensor
adv_sub1=cuda.GpuAdvancedSubtensor1
sub=cuda.GpuSubtensor,
inc_sub=cuda.GpuIncSubtensor,
adv_incsub1=cuda.GpuAdvancedIncSubtensor1
mode=mode_with_gpu
dtype='float32'
ignore_topo=(B.HostFromGpu, B.GpuFromHost)
......
......@@ -1160,6 +1160,7 @@ compile.optdb.register('inplace_setsubtensor', TopoOptimizer(local_inplace_setsu
@gof.local_optimizer([None])
def local_inplace_incsubtensor1(node):
""" also work for GpuAdvancedIncSubtensor1 """
if isinstance(node.op, T.AdvancedIncSubtensor1) and not node.op.inplace:
new_op = node.op.__class__(inplace=True)
new_node = new_op(*node.inputs)
......
......@@ -1376,16 +1376,18 @@ class T_subtensor(unittest.TestCase):
This is build in a way that allow to reuse it to test the equivalent gpu op.
"""
def __init__(self, name, shared=shared,
adv_sub1=theano.tensor.basic.AdvancedSubtensor1,
sub=theano.tensor.basic.Subtensor,
inc_sub=theano.tensor.basic.IncSubtensor,
adv_sub1=theano.tensor.basic.AdvancedSubtensor1,
adv_incsub1=theano.tensor.basic.AdvancedIncSubtensor1,
mode=None,
dtype=theano.config.floatX,
ignore_topo=(theano.compile.function_module.DeepCopyOp)):
self.shared = shared
self.adv_sub1 = adv_sub1
self.sub = sub
self.inc_sub = inc_sub
self.adv_sub1 = adv_sub1
self.adv_incsub1 = adv_incsub1
self.mode = mode
self.dtype = dtype
self.ignore_topo = ignore_topo
......@@ -1696,9 +1698,9 @@ class T_subtensor(unittest.TestCase):
f = function([], [gn, gn.shape], mode=self.mode)
topo = f.maker.env.toposort()
if not fast_compile:
assert any([isinstance(node.op, AdvancedIncSubtensor1) and node.op.inplace for node in topo])
assert any([isinstance(node.op, self.adv_incsub1) and node.op.inplace for node in topo])
else:
assert any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo])
assert any([isinstance(node.op, self.adv_incsub1) for node in topo])
assert any([isinstance(node.op, self.adv_sub1) for node in topo])
gval, gshape = f()
good = numpy.zeros_like(data)
......@@ -1723,7 +1725,7 @@ class T_subtensor(unittest.TestCase):
f = function([], [gn.shape, n[idx_].shape], mode=self.mode)
topo = f.maker.env.toposort()
if not fast_compile:
self.failUnless(not any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo]))
self.failUnless(not any([isinstance(node.op, self.adv_incsub1) for node in topo]))
self.failUnless(not any([isinstance(node.op, self.adv_sub1) for node in topo]))
f()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论