提交 179e4085 authored 作者: Frederic Bastien's avatar Frederic Bastien

Make GpuOp(old back-end) and GpuElemwise(new back-end) use prepare_node

上级 0e107ac0
...@@ -2640,11 +2640,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2640,11 +2640,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
def get_params(self, node): def get_params(self, node):
return node.outputs[0].type.context return node.outputs[0].type.context
def make_thunk(self, node, storage_map, compute_map, no_recycling): def prepare_node(self, node, storage_map, compute_map):
# cache the kernel object # cache the kernel object
self.get_kernel_cache(node) self.get_kernel_cache(node)
return super(GpuCAReduceCPY, self).make_thunk(
node, storage_map, compute_map, no_recycling)
def get_kernel_cache(self, node): def get_kernel_cache(self, node):
attr = '@cache_reduction_k' attr = '@cache_reduction_k'
......
...@@ -246,18 +246,14 @@ class GpuOp(theano.gof.Op): ...@@ -246,18 +246,14 @@ class GpuOp(theano.gof.Op):
""" """
def make_thunk(self, node, storage_map, compute_map, no_recycling): def prepare_node(self, node, storage_map, compute_map):
if use.device_number is None: if use.device_number is None:
use("gpu", use("gpu",
force=True, force=True,
default_to_move_computation_to_gpu=False, default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False, move_shared_float32_to_gpu=False,
enable_cuda=False) enable_cuda=False)
return super(GpuOp, self).make_thunk(node, storage_map,
compute_map, no_recycling)
theano.compile.debugmode.default_make_thunk.append(
get_unbound_function(GpuOp.make_thunk))
# We must do those import to be able to create the full doc when # We must do those import to be able to create the full doc when
# nvcc is not available # nvcc is not available
......
...@@ -2120,6 +2120,8 @@ class GpuConv(GpuOp): ...@@ -2120,6 +2120,8 @@ class GpuConv(GpuOp):
return flops return flops
def prepare_node(self, node, storage_map, compute_map): def prepare_node(self, node, storage_map, compute_map):
super(GpuConv, self).prepare_node(node, storage_map, compute_map, impl)
if node.op.max_threads_dim0 is None: if node.op.max_threads_dim0 is None:
cuda = theano.sandbox.cuda cuda = theano.sandbox.cuda
device_id = cuda.use.device_number device_id = cuda.use.device_number
......
...@@ -1513,6 +1513,8 @@ class GpuDnnPool(DnnBase): ...@@ -1513,6 +1513,8 @@ class GpuDnnPool(DnnBase):
self.mode = mode self.mode = mode
def prepare_node(self, node, storage_map, compute_map): def prepare_node(self, node, storage_map, compute_map):
super(GpuDnnPool, self).prepare_node(node, storage_map, compute_map)
if len(node.inputs) == 2: if len(node.inputs) == 2:
warnings.warn("Theano GPUDnnPoolGrad internal changed.", stacklevel=3) warnings.warn("Theano GPUDnnPoolGrad internal changed.", stacklevel=3)
# Old interface # Old interface
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论