提交 c5848291 authored 作者: Adam Becker's avatar Adam Becker

small fixes

上级 4ed05072
...@@ -1369,7 +1369,7 @@ class LocalOptGroup(LocalOptimizer): ...@@ -1369,7 +1369,7 @@ class LocalOptGroup(LocalOptimizer):
if isinstance(new_repl, (tuple, list)): if isinstance(new_repl, (tuple, list)):
new_vars = new_repl new_vars = new_repl
else: # It must be a dict else: # It must be a dict
new_vars = new_repl.values() new_vars = list(new_repl.values())
if self.profile: if self.profile:
self.node_created[opt] += len(graph.ops(fgraph.variables, new_vars)) self.node_created[opt] += len(graph.ops(fgraph.variables, new_vars))
self.applied_true[opt] += 1 self.applied_true[opt] += 1
......
...@@ -151,7 +151,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp): ...@@ -151,7 +151,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
fail = sub['fail'] fail = sub['fail']
ctx = sub['params'] ctx = sub['params']
k_dtype = node.inputs[1].type.dtype_specs()[1] k_dtype = node.inputs[1].type.dtype_specs()[1]
MAX_TPB = 1024 # max thread per block MAX_TPB = 1024 # max threads per block
WARP_SIZE = 32 WARP_SIZE = 32
ndim = node.inputs[0].ndim ndim = node.inputs[0].ndim
...@@ -195,20 +195,19 @@ class GpuTopKOp(GpuKernelBase, TopKOp): ...@@ -195,20 +195,19 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
for (int i=0; i<%(ndim)d; i++) for (int i=0; i<%(ndim)d; i++)
odims[i] = dims[i]; odims[i] = dims[i];
odims[%(axis)d] = k_>=0 ? k_ : -k_; odims[%(axis)d] = k_>=0 ? k_ : -k_;
if (0 == odims[%(axis)d]) { if (0 == odims[%(axis)d]) {
PyErr_SetString( PyErr_SetString(
PyExc_ValueError, PyExc_ValueError,
"topk: k must not be zero"); "topk: kth must not be zero");
%(fail)s; %(fail)s;
} else if (dims[%(axis)d] < odims[%(axis)d]){ } else if (dims[%(axis)d] < odims[%(axis)d]){
PyErr_SetString( PyErr_SetString(
PyExc_ValueError, PyExc_ValueError,
"topk: k cannot larger than size on specified axis %(axis)d"); "topk: kth cannot larger than size on specified axis %(axis)d");
%(fail)s; %(fail)s;
} else if (dims[%(axis)d] > INT_MAX) { } else if (dims[%(axis)d] >= (1u << 31)) {
PyErr_SetString( PyErr_SetString(
PyExc_ValueError, PyExc_ValueError,
"topk: on GPU, array size on specified axis cannot larger or equal than 2^31"); "topk: on GPU, array size on specified axis cannot larger or equal than 2^31");
......
...@@ -7566,12 +7566,8 @@ def local_useless_topk(node): ...@@ -7566,12 +7566,8 @@ def local_useless_topk(node):
return False return False
x, k = node.inputs x, k = node.inputs
ret_val = False ret_val = bool(node.outputs[0].clients)
ret_idx = False ret_idx = bool(node.outputs[1].clients)
if op.return_values:
ret_val = bool(node.outputs[0].clients)
if op.return_indices:
ret_idx = bool(node.outputs[-1].clients)
if not (ret_val ^ ret_idx): if not (ret_val ^ ret_idx):
# both true -> nothing to remove # both true -> nothing to remove
...@@ -7584,4 +7580,4 @@ def local_useless_topk(node): ...@@ -7584,4 +7580,4 @@ def local_useless_topk(node):
idx_dtype=op.idx_dtype, idx_dtype=op.idx_dtype,
return_values=ret_val, return_values=ret_val,
return_indices=ret_idx)(x, k) return_indices=ret_idx)(x, k)
return {old_output:new_output} return {old_output: new_output}
...@@ -334,9 +334,7 @@ class TopKOp(theano.Op): ...@@ -334,9 +334,7 @@ class TopKOp(theano.Op):
# TODO c_code # TODO c_code
# TODO add opt, if k==1, use max/min reduce # TODO add opt, if k==1, use max/min reduce
# also if k is axis size, just copy input tensor # also if k is axis size, just copy input tensor
# TODO add opt to merge argtopk / topk, or split topk_and_argtopk when only # TODO add opt, to merge argtopk / topk
# one result is needed
__props__ = ('axis', 'return_values', 'return_indices', 'idx_dtype') __props__ = ('axis', 'return_values', 'return_indices', 'idx_dtype')
def __init__( def __init__(
...@@ -466,7 +464,8 @@ def topk(x, kth, axis=-1, sorted=True, idx_dtype='int64'): ...@@ -466,7 +464,8 @@ def topk(x, kth, axis=-1, sorted=True, idx_dtype='int64'):
""" """
if sorted: if sorted:
raise NotImplementedError("sorted=True is not supported yet.") raise NotImplementedError(
"We are still working on sorted topk. Use sorted=False for now.")
if axis is None: if axis is None:
x = theano.tensor.flatten(x) x = theano.tensor.flatten(x)
axis = 0 axis = 0
...@@ -511,7 +510,8 @@ def argtopk(x, kth, axis=-1, sorted=True, idx_dtype='int64'): ...@@ -511,7 +510,8 @@ def argtopk(x, kth, axis=-1, sorted=True, idx_dtype='int64'):
""" """
if sorted: if sorted:
raise NotImplementedError("sorted=True is not supported yet.") raise NotImplementedError(
"We are still working on sorted topk. Use sorted=False for now.")
if axis is None: if axis is None:
x = theano.tensor.flatten(x) x = theano.tensor.flatten(x)
axis = 0 axis = 0
...@@ -532,7 +532,8 @@ def topk_and_argtopk(x, kth, axis=-1, sorted=True, idx_dtype='int64'): ...@@ -532,7 +532,8 @@ def topk_and_argtopk(x, kth, axis=-1, sorted=True, idx_dtype='int64'):
""" """
if sorted: if sorted:
raise NotImplementedError("sorted=True is not supported yet.") raise NotImplementedError(
"We are still working on sorted topk. Use sorted=False for now.")
if axis is None: if axis is None:
x = theano.tensor.flatten(x) x = theano.tensor.flatten(x)
axis = 0 axis = 0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论