提交 81727020 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2741 from abergeron/fix_allocempty

Fix allocempty
...@@ -721,6 +721,13 @@ class DataDestroyed(): ...@@ -721,6 +721,13 @@ class DataDestroyed():
data_destroyed = DataDestroyed() data_destroyed = DataDestroyed()
def check_eq(var, val1, val2):
if hasattr(var.tag, 'values_eq_approx'):
return var.tag.values_eq_approx(val1, val2)
else:
return var.type.values_eq_approx(val1, val2)
def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
clobber_dr_vals=True, clobber_dr_vals=True,
perform=None, warn_input_not_reused=True): perform=None, warn_input_not_reused=True):
...@@ -945,7 +952,10 @@ def _find_bad_optimizations0(order, reasons, r_vals): ...@@ -945,7 +952,10 @@ def _find_bad_optimizations0(order, reasons, r_vals):
r_val = r_vals[r] r_val = r_vals[r]
assert r.type == new_r.type assert r.type == new_r.type
if hasattr(new_r, 'values_eq_approx'): if hasattr(new_r.tag, 'values_eq_approx'):
check = new_r.tag.values_eq_approx(r_val, new_r_val)
elif hasattr(new_r, 'values_eq_approx'):
# This way will be deprecated later, but not right now
check = new_r.values_eq_approx(r_val, new_r_val) check = new_r.values_eq_approx(r_val, new_r_val)
else: else:
check = r.type.values_eq_approx(r_val, new_r_val) check = r.type.values_eq_approx(r_val, new_r_val)
...@@ -1372,7 +1382,7 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val, ...@@ -1372,7 +1382,7 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
_check_viewmap(node, storage_map) _check_viewmap(node, storage_map)
for r in node.outputs: for r in node.outputs:
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]): if not check_eq(r, r_vals[r], storage_map[r][0]):
# TODO: indicate it is not a C/Py problem # TODO: indicate it is not a C/Py problem
inputs_val = [storage_map[inp][0] for inp in r.owner.inputs] inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
raise BadThunkOutput(r, raise BadThunkOutput(r,
...@@ -2002,22 +2012,20 @@ class _Linker(gof.link.LocalLinker): ...@@ -2002,22 +2012,20 @@ class _Linker(gof.link.LocalLinker):
# Check with Python result # Check with Python result
for r in node.outputs: for r in node.outputs:
if r in r_vals: if r in r_vals:
#print >> sys.stderr, i, "DEBUGMODE clearing output", r # compares the version from thunk_py
# compares the version from thunk_py (in r_vals) # (in r_vals) to the version produced
# to the version produced by thunk_c (in storage_map) # by thunk_c (in storage_map)
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]): if not check_eq(r, r_vals[r], storage_map[r][0]):
#import pdb; pdb.set_trace()
#r.type.values_eq_approx(r_vals[r], storage_map[r][0])
inputs_val = [storage_map[inp][0] for inp in r.owner.inputs] inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
raise BadThunkOutput(r, raise BadThunkOutput(
thunk1='perform', val1=r_vals[r], r, thunk1='perform', val1=r_vals[r],
thunk2='c_code', val2=storage_map[r][0], thunk2='c_code', val2=storage_map[r][0],
inputs_val=inputs_val) inputs_val=inputs_val)
else: else:
#print >> sys.stderr, i, "DEBUGMODE storing reference output %x" % id(storage_map[r][0])
# retrieve each output from the storage_map # retrieve each output from the storage_map
r_vals[r] = storage_map[r][0] r_vals[r] = storage_map[r][0]
storage_map[r][0] = None # clear the storage_map for the thunk_c # clear the storage_map for the thunk_c
storage_map[r][0] = None
if self.maker.mode.check_preallocated_output: if self.maker.mode.check_preallocated_output:
prealloc_modes = \ prealloc_modes = \
......
...@@ -450,6 +450,9 @@ class Constant(Variable): ...@@ -450,6 +450,9 @@ class Constant(Variable):
def signature(self): def signature(self):
return (self.type, self.data) return (self.type, self.data)
def merge_signature(self):
return self.signature()
def __str__(self): def __str__(self):
if self.name is not None: if self.name is not None:
return self.name return self.name
......
...@@ -499,7 +499,7 @@ class MergeFeature(object): ...@@ -499,7 +499,7 @@ class MergeFeature(object):
"""Check if a constant can be merged, and queue that replacement""" """Check if a constant can be merged, and queue that replacement"""
if id(c) in self.seen_constants: if id(c) in self.seen_constants:
return return
sig = c.signature() sig = c.merge_signature()
other_c = self.const_sig_inv.get(sig, None) other_c = self.const_sig_inv.get(sig, None)
if other_c is not None: if other_c is not None:
# multiple names will clobber each other.. # multiple names will clobber each other..
......
...@@ -627,11 +627,13 @@ if (py_%(name)s == NULL) { %(freefunc)s(%(name)s); } ...@@ -627,11 +627,13 @@ if (py_%(name)s == NULL) { %(freefunc)s(%(name)s); }
class CDataTypeConstant(graph.Constant): class CDataTypeConstant(graph.Constant):
def merge_signature(self):
# We don't want to merge constants that don't point to the
# same object.
return id(self.data)
def signature(self): def signature(self):
# The Op.c_code* methoss can't access the data, so it can't # There is no way to put the data in the signature, so we
# change the code depending of it. So there is no need to put # don't even try
# it in the signature. Also, under Python 2, PyCObject aren't
# pickable. So using the PyCObject in the signature would
# disable the c code cache for op that have it as an input.
return (self.type,) return (self.type,)
CDataType.Constant = CDataTypeConstant CDataType.Constant = CDataTypeConstant
...@@ -3291,7 +3291,7 @@ class GpuAllocEmpty(GpuOp): ...@@ -3291,7 +3291,7 @@ class GpuAllocEmpty(GpuOp):
def make_node(self, *shape): def make_node(self, *shape):
shape, output = self.validate_shape(shape) shape, output = self.validate_shape(shape)
output.values_eq_approx = tensor.type.values_eq_approx_always_true output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return Apply(self, shape, [output]) return Apply(self, shape, [output])
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
......
...@@ -28,6 +28,26 @@ else: ...@@ -28,6 +28,26 @@ else:
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_dnn_conv_desc_merge():
img_shp = T.as_tensor_variable(
numpy.asarray([2, 1, 8, 8]).astype('int64'))
kern_shp = T.as_tensor_variable(
numpy.asarray([3, 1, 2, 2]).astype('int64'))
desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
conv_mode='conv')(img_shp, kern_shp)
desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1),
conv_mode='cross')(img_shp, kern_shp)
# CDataType is not DeepCopyable so this will crash if we don't use
# borrow=True
f = theano.function([], [theano.Out(desc1, borrow=True),
theano.Out(desc2, borrow=True)])
d1, d2 = f()
# This will be the case if they are merged, which would be bad.
assert d1 != d2
def pool_2d_i2n(input, ds=(2, 2), strides=None, def pool_2d_i2n(input, ds=(2, 2), strides=None,
pad=(0, 0), pad=(0, 0),
pool_function=T.max, mode='ignore_borders'): pool_function=T.max, mode='ignore_borders'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论