提交 c9d5297c authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Split GpuAdvancedSubtensor in non-boolean and boolean ops (like CPU).

上级 8372a28f
......@@ -70,9 +70,11 @@ from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedBooleanSubtensor,
GpuAdvancedIncSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20,
GpuAdvancedBooleanIncSubtensor,
GpuAllocDiag, GpuExtractDiag)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax
......@@ -1079,7 +1081,7 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
@op_lifter([tensor.AdvancedBooleanSubtensor])
@register_opt2([tensor.AdvancedBooleanSubtensor], 'fast_compile')
def local_gpua_advanced_boolean_subtensor(op, context_name, inputs, outputs):
return GpuAdvancedSubtensor()
return GpuAdvancedBooleanSubtensor()
@register_opt('fast_compile')
......@@ -1134,7 +1136,7 @@ def local_gpua_advanced_boolean_incsubtensor(op, context_name, inputs, outputs):
# GpuAdvancedIncSubtensor only works with a single boolean mask,
# but not with fancy combinations.
if not op.set_instead_of_inc and len(inputs) == 3:
return GpuAdvancedIncSubtensor()
return GpuAdvancedBooleanIncSubtensor()
else:
return False
......
......@@ -512,19 +512,7 @@ def check_and_convert_boolean_masks(input, idx_list):
return out_idx_list
class GpuAdvancedSubtensor(HideC, tensor.AdvancedBooleanSubtensor):
"""
AdvancedBooleanSubtensor On the GPU.
"""
def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedBooleanSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
class BaseGpuAdvancedSubtensor(object):
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
......@@ -634,21 +622,35 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedBooleanSubtensor):
out[0] = o
class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedBooleanIncSubtensor):
class GpuAdvancedSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedSubtensor):
"""
Implement AdvancedBooleanIncSubtensor on the gpu.
AdvancedSubtensor on the GPU.
"""
def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
class GpuAdvancedBooleanSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedBooleanSubtensor):
"""
def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y)
rval = tensor.AdvancedBooleanIncSubtensor.make_node(self, x, y, *inputs)
AdvancedBooleanSubtensor on the GPU.
"""
def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedBooleanSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
class BaseGpuAdvancedIncSubtensor(object):
def perform(self, node, inp, out_):
out, = out_
x = inp[0]
......@@ -764,6 +766,38 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedBooleanIncSubtensor):
out[0] = x_
class GpuAdvancedIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedIncSubtensor):
"""
Implement AdvancedIncSubtensor on the gpu.
"""
def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y)
rval = tensor.AdvancedIncSubtensor.make_node(self, x, y, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
class GpuAdvancedBooleanIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedBooleanIncSubtensor):
"""
Implement AdvancedBooleanIncSubtensor on the gpu.
"""
def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y)
rval = tensor.AdvancedBooleanIncSubtensor.make_node(self, x, y, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
class GpuAdvancedIncSubtensor1(Op):
"""
Implement AdvancedIncSubtensor1 on the gpu.
......
......@@ -13,6 +13,7 @@ from ..elemwise import GpuDimShuffle
from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedSubtensor,
GpuAdvancedBooleanSubtensor,
GpuAdvancedIncSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20,
......@@ -40,7 +41,7 @@ class G_subtensor(test_subtensor.T_subtensor):
adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1,
adv_sub=GpuAdvancedSubtensor,
adv_bool_sub=GpuAdvancedSubtensor,
adv_bool_sub=GpuAdvancedBooleanSubtensor,
dimshuffle=GpuDimShuffle,
mode=mode_with_gpu,
# avoid errors with limited devices
......@@ -69,7 +70,7 @@ class G_subtensorF16(test_subtensor.T_subtensor):
adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1,
adv_sub=GpuAdvancedSubtensor,
adv_bool_sub=GpuAdvancedSubtensor,
adv_bool_sub=GpuAdvancedBooleanSubtensor,
dimshuffle=GpuDimShuffle,
mode=mode_with_gpu,
# avoid errors with limited devices
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论