提交 c9d5297c authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Split GpuAdvancedSubtensor in non-boolean and boolean ops (like CPU).

上级 8372a28f
...@@ -70,9 +70,11 @@ from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda, ...@@ -70,9 +70,11 @@ from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
from .subtensor import (GpuIncSubtensor, GpuSubtensor, from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor, GpuAdvancedSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedSubtensor1,
GpuAdvancedBooleanSubtensor,
GpuAdvancedIncSubtensor, GpuAdvancedIncSubtensor,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1_dev20,
GpuAdvancedBooleanIncSubtensor,
GpuAllocDiag, GpuExtractDiag) GpuAllocDiag, GpuExtractDiag)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax from .reduction import GpuMaxAndArgmax
...@@ -1079,7 +1081,7 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs): ...@@ -1079,7 +1081,7 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
@op_lifter([tensor.AdvancedBooleanSubtensor]) @op_lifter([tensor.AdvancedBooleanSubtensor])
@register_opt2([tensor.AdvancedBooleanSubtensor], 'fast_compile') @register_opt2([tensor.AdvancedBooleanSubtensor], 'fast_compile')
def local_gpua_advanced_boolean_subtensor(op, context_name, inputs, outputs): def local_gpua_advanced_boolean_subtensor(op, context_name, inputs, outputs):
return GpuAdvancedSubtensor() return GpuAdvancedBooleanSubtensor()
@register_opt('fast_compile') @register_opt('fast_compile')
...@@ -1134,7 +1136,7 @@ def local_gpua_advanced_boolean_incsubtensor(op, context_name, inputs, outputs): ...@@ -1134,7 +1136,7 @@ def local_gpua_advanced_boolean_incsubtensor(op, context_name, inputs, outputs):
# GpuAdvancedIncSubtensor only works with a single boolean mask, # GpuAdvancedIncSubtensor only works with a single boolean mask,
# but not with fancy combinations. # but not with fancy combinations.
if not op.set_instead_of_inc and len(inputs) == 3: if not op.set_instead_of_inc and len(inputs) == 3:
return GpuAdvancedIncSubtensor() return GpuAdvancedBooleanIncSubtensor()
else: else:
return False return False
......
...@@ -512,19 +512,7 @@ def check_and_convert_boolean_masks(input, idx_list): ...@@ -512,19 +512,7 @@ def check_and_convert_boolean_masks(input, idx_list):
return out_idx_list return out_idx_list
class GpuAdvancedSubtensor(HideC, tensor.AdvancedBooleanSubtensor): class BaseGpuAdvancedSubtensor(object):
"""
AdvancedBooleanSubtensor On the GPU.
"""
def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedBooleanSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
x = inputs[0] x = inputs[0]
...@@ -634,21 +622,35 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedBooleanSubtensor): ...@@ -634,21 +622,35 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedBooleanSubtensor):
out[0] = o out[0] = o
class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedBooleanIncSubtensor): class GpuAdvancedSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedSubtensor):
""" """
Implement AdvancedBooleanIncSubtensor on the gpu. AdvancedSubtensor on the GPU.
"""
def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
class GpuAdvancedBooleanSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedBooleanSubtensor):
""" """
def make_node(self, x, y, *inputs): AdvancedBooleanSubtensor on the GPU.
ctx_name = infer_context_name(x, y) """
rval = tensor.AdvancedBooleanIncSubtensor.make_node(self, x, y, *inputs) def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedBooleanSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype, otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable, broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name) context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name) return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
class BaseGpuAdvancedIncSubtensor(object):
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
out, = out_ out, = out_
x = inp[0] x = inp[0]
...@@ -764,6 +766,38 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedBooleanIncSubtensor): ...@@ -764,6 +766,38 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedBooleanIncSubtensor):
out[0] = x_ out[0] = x_
class GpuAdvancedIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedIncSubtensor):
"""
Implement AdvancedIncSubtensor on the gpu.
"""
def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y)
rval = tensor.AdvancedIncSubtensor.make_node(self, x, y, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
class GpuAdvancedBooleanIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedBooleanIncSubtensor):
"""
Implement AdvancedBooleanIncSubtensor on the gpu.
"""
def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y)
rval = tensor.AdvancedBooleanIncSubtensor.make_node(self, x, y, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
class GpuAdvancedIncSubtensor1(Op): class GpuAdvancedIncSubtensor1(Op):
""" """
Implement AdvancedIncSubtensor1 on the gpu. Implement AdvancedIncSubtensor1 on the gpu.
......
...@@ -13,6 +13,7 @@ from ..elemwise import GpuDimShuffle ...@@ -13,6 +13,7 @@ from ..elemwise import GpuDimShuffle
from ..subtensor import (GpuIncSubtensor, GpuSubtensor, from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedSubtensor1,
GpuAdvancedSubtensor, GpuAdvancedSubtensor,
GpuAdvancedBooleanSubtensor,
GpuAdvancedIncSubtensor, GpuAdvancedIncSubtensor,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1_dev20,
...@@ -40,7 +41,7 @@ class G_subtensor(test_subtensor.T_subtensor): ...@@ -40,7 +41,7 @@ class G_subtensor(test_subtensor.T_subtensor):
adv_sub1=GpuAdvancedSubtensor1, adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1, adv_incsub1=GpuAdvancedIncSubtensor1,
adv_sub=GpuAdvancedSubtensor, adv_sub=GpuAdvancedSubtensor,
adv_bool_sub=GpuAdvancedSubtensor, adv_bool_sub=GpuAdvancedBooleanSubtensor,
dimshuffle=GpuDimShuffle, dimshuffle=GpuDimShuffle,
mode=mode_with_gpu, mode=mode_with_gpu,
# avoid errors with limited devices # avoid errors with limited devices
...@@ -69,7 +70,7 @@ class G_subtensorF16(test_subtensor.T_subtensor): ...@@ -69,7 +70,7 @@ class G_subtensorF16(test_subtensor.T_subtensor):
adv_sub1=GpuAdvancedSubtensor1, adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1, adv_incsub1=GpuAdvancedIncSubtensor1,
adv_sub=GpuAdvancedSubtensor, adv_sub=GpuAdvancedSubtensor,
adv_bool_sub=GpuAdvancedSubtensor, adv_bool_sub=GpuAdvancedBooleanSubtensor,
dimshuffle=GpuDimShuffle, dimshuffle=GpuDimShuffle,
mode=mode_with_gpu, mode=mode_with_gpu,
# avoid errors with limited devices # avoid errors with limited devices
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论