提交 51964e4e authored 作者: abergeron's avatar abergeron

Merge pull request #1955 from nouiz/debugmode

Speed up Debugmode
...@@ -38,6 +38,7 @@ script: ...@@ -38,6 +38,7 @@ script:
- ulimit -a - ulimit -a
- echo $PART - echo $PART
- theano-nose --with-timelimit -v $PART - theano-nose --with-timelimit -v $PART
- theano-cache list
#after_script: #after_script:
......
...@@ -138,6 +138,13 @@ default values. ...@@ -138,6 +138,13 @@ default values.
:return: the number of bytes taken by the object described by :return: the number of bytes taken by the object described by
``shape_info``. ``shape_info``.
.. method:: may_share_memory(a, b)
Optional. Only needed for DebugMode. Return True if the python
objects `a` and `b` could share memory. Return False
otherwise. It is used to debug when Ops didn't declare memory
aliaing between variables. Must be a static method.
For each method, the *default* is what ``Type`` defines For each method, the *default* is what ``Type`` defines
for you. So, if you create an instance of ``Type`` or an for you. So, if you create an instance of ``Type`` or an
instance of a subclass of ``Type``, you instance of a subclass of ``Type``, you
......
...@@ -685,9 +685,10 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, ...@@ -685,9 +685,10 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
actually_inplace_outputs = [] actually_inplace_outputs = []
dmap = getattr(node.op, 'destroy_map', {}) dmap = getattr(node.op, 'destroy_map', {})
for oo, ii in dmap.iteritems(): for oo, ii in dmap.iteritems():
out_var = storage_map[node.outputs[oo]][0] var = node.outputs[oo]
out_var = storage_map[var][0]
in_var = storage_map[node.inputs[ii[0]]][0] in_var = storage_map[node.inputs[ii[0]]][0]
if _may_share_memory(out_var, in_var): if var.type.may_share_memory(out_var, in_var):
actually_inplace_outputs.append(node.outputs[oo]) actually_inplace_outputs.append(node.outputs[oo])
if warn_input_not_reused and destroyed_res_list: if warn_input_not_reused and destroyed_res_list:
...@@ -702,9 +703,11 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, ...@@ -702,9 +703,11 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
vmap = getattr(node.op, 'view_map', {}) vmap = getattr(node.op, 'view_map', {})
for oo, ii in vmap.iteritems(): for oo, ii in vmap.iteritems():
out_var = storage_map[node.outputs[oo]][0] var = node.outputs[oo]
out_var = storage_map[var][0]
in_var = storage_map[node.inputs[ii[0]]][0] in_var = storage_map[node.inputs[ii[0]]][0]
if _may_share_memory(out_var, in_var): may_share = var.type.may_share_memory(out_var, in_var)
if may_share:
actually_inplace_outputs.append(node.outputs[oo]) actually_inplace_outputs.append(node.outputs[oo])
if warn_input_not_reused: if warn_input_not_reused:
...@@ -717,7 +720,7 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, ...@@ -717,7 +720,7 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
if isinstance(node.op, OutputGuard): if isinstance(node.op, OutputGuard):
# This class is not in the final graph. # This class is not in the final graph.
continue continue
if not _may_share_memory(out_var, in_var): if not may_share:
_logger.warning("Optimization Warning: input idx %d marked " _logger.warning("Optimization Warning: input idx %d marked "
"as viewed but new memory allocated by node '%s'", "as viewed but new memory allocated by node '%s'",
ii[0], str(node)) ii[0], str(node))
...@@ -766,7 +769,7 @@ def _check_viewmap(node, storage_map): ...@@ -766,7 +769,7 @@ def _check_viewmap(node, storage_map):
for ii, inode in enumerate(node.inputs): for ii, inode in enumerate(node.inputs):
if _may_share_memory(outstorage, storage_map[inode][0]): if inode.type.may_share_memory(outstorage, storage_map[inode][0]):
nodeid = id(inode) nodeid = id(inode)
bad_alias[nodeid] = ii bad_alias[nodeid] = ii
...@@ -794,26 +797,18 @@ def _check_viewmap(node, storage_map): ...@@ -794,26 +797,18 @@ def _check_viewmap(node, storage_map):
other_storage = storage_map[other_onode][0] other_storage = storage_map[other_onode][0]
# check to see if we share memory with this other output # check to see if we share memory with this other output
# this is not a problem if the node is not actually used # this is not a problem if the node is not actually used
if _is_used_in_graph(other_onode) and \ if (_is_used_in_graph(other_onode) and
_may_share_memory(outstorage, other_storage): other_onode.type.may_share_memory(outstorage,
other_storage)):
raise BadViewMap(node, oi, outstorage, raise BadViewMap(node, oi, outstorage,
out_alias_idx=other_oi) out_alias_idx=other_oi)
def _may_share_memory(a, b): def _is_used_in_graph(var):
from theano.misc.may_share_memory import may_share_memory
return may_share_memory(a, b, False)
def _is_function_output(node):
""" """
Returns True if the node in question is the a final output of the graph Returns True if `var` is used by another node in the graph
""" """
return node.clients == [('output', 1)] return not(var.clients == [('output', 1)] or var.clients == [])
def _is_used_in_graph(node):
return not(_is_function_output(node) or node.clients == [])
def _check_strides_match(a, b, warn_err, op): def _check_strides_match(a, b, warn_err, op):
...@@ -1111,6 +1106,9 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1111,6 +1106,9 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# is less relevant. # is less relevant.
# Dimensions should be align by the innermost index, so we iterate # Dimensions should be align by the innermost index, so we iterate
# from the end of shapes. # from the end of shapes.
if ('strided' in prealloc_modes or
'wrong_size' in prealloc_modes or
'ALL' in prealloc_modes):
max_ndim = 0 max_ndim = 0
rev_out_broadcastable = [] rev_out_broadcastable = []
for r in considered_outputs: for r in considered_outputs:
......
...@@ -677,6 +677,11 @@ class CLinker(link.Linker): ...@@ -677,6 +677,11 @@ class CLinker(link.Linker):
raise NotImplementedError("%s cannot produce C code" % op) raise NotImplementedError("%s cannot produce C code" % op)
assert isinstance(behavior, basestring), ( assert isinstance(behavior, basestring), (
str(node.op) + " didn't return a string for c_code") str(node.op) + " didn't return a string for c_code")
# To help understand what is following. It help read the c code.
# This prevent different op that generate the same c code
# to be merged, I suppose this won't happen...
behavior = ("// Op class " + node.op.__class__.__name__ + "\n" +
behavior)
try: try:
cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub) cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub)
......
...@@ -218,6 +218,7 @@ if __name__ == "__main__": ...@@ -218,6 +218,7 @@ if __name__ == "__main__":
GTX Titan Black 0.05s GTX Titan Black 0.05s
GTX Titan(D15U-50) 0.06s 0.06s don't work GTX Titan(D15U-50) 0.06s 0.06s don't work
GTX 780 0.06s
GTX 680 0.11s 0.12s 0.154s 0.218s GTX 680 0.11s 0.12s 0.154s 0.218s
GTX 580 0.16s 0.16s 0.164s 0.203s GTX 580 0.16s 0.16s 0.164s 0.203s
GTX 480 0.19s 0.19s 0.192s 0.237s 0.27s GTX 480 0.19s 0.19s 0.192s 0.237s 0.27s
......
...@@ -15,12 +15,14 @@ try: ...@@ -15,12 +15,14 @@ try:
def _is_sparse(a): def _is_sparse(a):
return scipy.sparse.issparse(a) return scipy.sparse.issparse(a)
except ImportError: except ImportError:
#scipy not imported, their can be only ndarray and cudandarray # scipy not imported, their can be only ndarray and cudandarray
def _is_sparse(a): def _is_sparse(a):
return False return False
from theano.sandbox import cuda from theano.sandbox import cuda
if cuda.cuda_available: if cuda.cuda_available:
from theano.sandbox.cuda.type import CudaNdarrayType
def _is_cuda(a): def _is_cuda(a):
return isinstance(a, cuda.CudaNdarray) return isinstance(a, cuda.CudaNdarray)
else: else:
...@@ -40,13 +42,19 @@ else: ...@@ -40,13 +42,19 @@ else:
def may_share_memory(a, b, raise_other_type=True): def may_share_memory(a, b, raise_other_type=True):
a_ndarray = isinstance(a, numpy.ndarray) a_ndarray = isinstance(a, numpy.ndarray)
b_ndarray = isinstance(b, numpy.ndarray) b_ndarray = isinstance(b, numpy.ndarray)
a_sparse = _is_sparse(a) if a_ndarray and b_ndarray:
b_sparse = _is_sparse(b) return TensorType.may_share_memory(a, b)
a_cuda = _is_cuda(a) a_cuda = _is_cuda(a)
b_cuda = _is_cuda(b) b_cuda = _is_cuda(b)
if a_cuda and b_cuda:
return CudaNdarrayType.may_share_memory(a, b)
a_gpua = _is_gpua(a) a_gpua = _is_gpua(a)
b_gpua = _is_gpua(b) b_gpua = _is_gpua(b)
if a_gpua and b_gpua:
return gpuarray.pygpu.gpuarray.may_share_memory(a, b)
a_sparse = _is_sparse(a)
b_sparse = _is_sparse(b)
if (not(a_ndarray or a_sparse or a_cuda or a_gpua) or if (not(a_ndarray or a_sparse or a_cuda or a_gpua) or
not(b_ndarray or b_sparse or b_cuda or b_gpua)): not(b_ndarray or b_sparse or b_cuda or b_gpua)):
if raise_other_type: if raise_other_type:
...@@ -54,13 +62,6 @@ def may_share_memory(a, b, raise_other_type=True): ...@@ -54,13 +62,6 @@ def may_share_memory(a, b, raise_other_type=True):
" and scipy.sparse, CudaNdarray or GpuArray type") " and scipy.sparse, CudaNdarray or GpuArray type")
return False return False
if a_ndarray and b_ndarray:
return TensorType.may_share_memory(a, b)
if a_cuda and b_cuda:
from theano.sandbox.cuda.type import CudaNdarrayType
return CudaNdarrayType.may_share_memory(a, b)
if a_gpua and b_gpua:
return gpuarray.pygpu.gpuarray.may_share_memory(a, b)
if a_cuda or b_cuda or a_gpua or b_gpua: if a_cuda or b_cuda or a_gpua or b_gpua:
return False return False
return SparseType.may_share_memory(a, b) return SparseType.may_share_memory(a, b)
# This is work in progress # This is work in progress
from theano import Op, Apply from theano import Op, Apply, tensor
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available, GpuOp from theano.sandbox.cuda import cuda_available, GpuOp
...@@ -7,7 +7,8 @@ from theano.sandbox.neighbours import Images2Neibs ...@@ -7,7 +7,8 @@ from theano.sandbox.neighbours import Images2Neibs
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import (
as_cuda_ndarray_variable, host_from_gpu, gpu_from_host)
from theano.sandbox.cuda.opt import register_opt as register_gpu_opt from theano.sandbox.cuda.opt import register_opt as register_gpu_opt
...@@ -21,13 +22,16 @@ class GpuImages2Neibs(Images2Neibs, GpuOp): ...@@ -21,13 +22,16 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
self.mode = mode self.mode = mode
def make_node(self, ten4, neib_shape, neib_step): def make_node(self, ten4, neib_shape, neib_step):
assert ten4.dtype == 'float32' ten4 = as_cuda_ndarray_variable(ten4)
if not isinstance(ten4.type, CudaNdarrayType): neib_shape = tensor.as_tensor_variable(neib_shape)
raise TypeError('ten4 must be cudandarray', ten4) neib_step = tensor.as_tensor_variable(neib_step)
assert ten4.ndim == 4 assert ten4.ndim == 4
assert ten4.dtype == 'float32'
assert neib_shape.ndim == 1 assert neib_shape.ndim == 1
assert neib_step.ndim == 1 assert neib_step.ndim == 1
assert "int" in neib_shape.dtype
assert "int" in neib_step.dtype
return Apply(self, [ten4, neib_shape, neib_step], return Apply(self, [ten4, neib_shape, neib_step],
[CudaNdarrayType(broadcastable=(False, False), [CudaNdarrayType(broadcastable=(False, False),
......
...@@ -29,6 +29,9 @@ class GpuImages2Neibs(Images2Neibs, Op): ...@@ -29,6 +29,9 @@ class GpuImages2Neibs(Images2Neibs, Op):
self.mode = mode self.mode = mode
def make_node(self, ten4, neib_shape, neib_step): def make_node(self, ten4, neib_shape, neib_step):
ten4 = as_gpuarray_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape)
neib_step = T.as_tensor_variable(neib_step)
assert ten4.ndim == 4 assert ten4.ndim == 4
assert neib_shape.ndim == 1 assert neib_shape.ndim == 1
...@@ -36,10 +39,6 @@ class GpuImages2Neibs(Images2Neibs, Op): ...@@ -36,10 +39,6 @@ class GpuImages2Neibs(Images2Neibs, Op):
assert "int" in neib_shape.dtype assert "int" in neib_shape.dtype
assert "int" in neib_step.dtype assert "int" in neib_step.dtype
ten4 = as_gpuarray_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape)
neib_step = T.as_tensor_variable(neib_step)
return Apply(self, [ten4, neib_shape, neib_step], return Apply(self, [ten4, neib_shape, neib_step],
[GpuArrayType(broadcastable=(False, False), [GpuArrayType(broadcastable=(False, False),
dtype=ten4.type.dtype)()]) dtype=ten4.type.dtype)()])
......
...@@ -145,6 +145,13 @@ class Scalar(Type): ...@@ -145,6 +145,13 @@ class Scalar(Type):
self.dtype = dtype self.dtype = dtype
self.dtype_specs() # error checking self.dtype_specs() # error checking
@staticmethod
def may_share_memory(a, b):
# This class represent basic c type, represented in python
# with numpy.scalar. They are read only. So from python, they
# can never share memory.
return False
def filter(self, data, strict=False, allow_downcast=None): def filter(self, data, strict=False, allow_downcast=None):
py_type = self.dtype_specs()[0] py_type = self.dtype_specs()[0]
if strict and not isinstance(data, py_type): if strict and not isinstance(data, py_type):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论