提交 6349790c authored 作者: Frederic Bastien's avatar Frederic Bastien

Make ScipyGer use prepare_node

上级 ca40ef22
...@@ -2172,8 +2172,8 @@ class GpuConv(GpuOp): ...@@ -2172,8 +2172,8 @@ class GpuConv(GpuOp):
bmode = 0 bmode = 0
if max_threads_dim0 is None: if max_threads_dim0 is None:
raise NotImplementedError("GpuConv.c_code should not be called " raise NotImplementedError("GpuConv.c_code should not be called "
"directly. It should be called by " "directly. It should be called after "
"make_thunk() that add some information " "prepare_node() that add some information "
"related to the selected GPU.") "related to the selected GPU.")
sub.update(locals()) sub.update(locals())
return """ return """
......
...@@ -22,46 +22,34 @@ if have_fblas: ...@@ -22,46 +22,34 @@ if have_fblas:
class ScipyGer(Ger): class ScipyGer(Ger):
# keep everything else, but override the make_thunk def prepare_node(self, node, storage_map, compute_map):
def make_thunk(self, node, storage_map, compute_map, no_recycling): if impl == 'py':
node.tag.local_ger = _blas_ger_fns[numpy.dtype(
node_input_storage = [storage_map[r] for r in node.inputs] node.inputs[0].type.dtype)]
node_output_storage = [storage_map[r] for r in node.outputs]
node_output_compute = [compute_map[r] for r in node.outputs] def perform(self, node, inputs, output_storage):
cA, calpha, cx, cy = inputs
# get vars for containers cZ, = output_storage
cA, calpha, cx, cy = node_input_storage # N.B. some versions of scipy (e.g. mine) don't actually work
cZ, = node_output_storage # in-place on a, even when I tell it to.
local_ger = _blas_ger_fns[numpy.dtype(node.inputs[0].type.dtype)] A = cA
local_ger = node.tag.local_ger
def rval(): if A.size == 0:
# N.B. some versions of scipy (e.g. mine) don't actually work # We don't have to compute anything, A is empty.
# in-place on a, even when I tell it to. # We need this special case because Numpy considers it
A = cA[0] # C-contiguous, wich is confusing.
if A.size == 0: if not self.destructive:
# We don't have to compute anything, A is empty. # Sometimes numpy thinks empty matrices can share memory,
# We need this special case because Numpy considers it # so here to stop DebugMode from complaining.
# C-contiguous, wich is confusing. A = A.copy()
if not self.destructive: elif A.flags['C_CONTIGUOUS']:
# Sometimes numpy thinks empty matrices can share memory, A = local_ger(calpha, cy, cx, a=A.T,
# so here to stop DebugMode from complaining. overwrite_a=int(self.destructive)).T
A = A.copy() else:
elif A.flags['C_CONTIGUOUS']: A = local_ger(calpha, cx, cy, a=A,
A = local_ger(calpha[0], cy[0], cx[0], a=A.T, overwrite_a=int(self.destructive))
overwrite_a=int(self.destructive)).T cZ[0] = A
else:
A = local_ger(calpha[0], cx[0], cy[0], a=A,
overwrite_a=int(self.destructive))
cZ[0] = A
for o in node_output_compute:
o[0] = True
# TODO: If this is currently an unofficial part of the thunk API,
# then maybe it should be documented and made official?
rval.inputs = node_input_storage
rval.outputs = node_output_storage
rval.lazy = False
return rval
scipy_ger_no_inplace = ScipyGer(False) scipy_ger_no_inplace = ScipyGer(False)
scipy_ger_inplace = ScipyGer(True) scipy_ger_inplace = ScipyGer(True)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论