提交 81727020 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2741 from abergeron/fix_allocempty

Fix allocempty
......@@ -721,6 +721,13 @@ class DataDestroyed():
data_destroyed = DataDestroyed()
def check_eq(var, val1, val2):
if hasattr(var.tag, 'values_eq_approx'):
return var.tag.values_eq_approx(val1, val2)
else:
return var.type.values_eq_approx(val1, val2)
def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
clobber_dr_vals=True,
perform=None, warn_input_not_reused=True):
......@@ -945,7 +952,10 @@ def _find_bad_optimizations0(order, reasons, r_vals):
r_val = r_vals[r]
assert r.type == new_r.type
if hasattr(new_r, 'values_eq_approx'):
if hasattr(new_r.tag, 'values_eq_approx'):
check = new_r.tag.values_eq_approx(r_val, new_r_val)
elif hasattr(new_r, 'values_eq_approx'):
# This way will be deprecated later, but not right now
check = new_r.values_eq_approx(r_val, new_r_val)
else:
check = r.type.values_eq_approx(r_val, new_r_val)
......@@ -1372,7 +1382,7 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
_check_viewmap(node, storage_map)
for r in node.outputs:
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
if not check_eq(r, r_vals[r], storage_map[r][0]):
# TODO: indicate it is not a C/Py problem
inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
raise BadThunkOutput(r,
......@@ -2002,22 +2012,20 @@ class _Linker(gof.link.LocalLinker):
# Check with Python result
for r in node.outputs:
if r in r_vals:
#print >> sys.stderr, i, "DEBUGMODE clearing output", r
# compares the version from thunk_py (in r_vals)
# to the version produced by thunk_c (in storage_map)
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
#import pdb; pdb.set_trace()
#r.type.values_eq_approx(r_vals[r], storage_map[r][0])
# compares the version from thunk_py
# (in r_vals) to the version produced
# by thunk_c (in storage_map)
if not check_eq(r, r_vals[r], storage_map[r][0]):
inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
raise BadThunkOutput(r,
thunk1='perform', val1=r_vals[r],
thunk2='c_code', val2=storage_map[r][0],
inputs_val=inputs_val)
raise BadThunkOutput(
r, thunk1='perform', val1=r_vals[r],
thunk2='c_code', val2=storage_map[r][0],
inputs_val=inputs_val)
else:
#print >> sys.stderr, i, "DEBUGMODE storing reference output %x" % id(storage_map[r][0])
# retrieve each output from the storage_map
r_vals[r] = storage_map[r][0]
storage_map[r][0] = None # clear the storage_map for the thunk_c
# clear the storage_map for the thunk_c
storage_map[r][0] = None
if self.maker.mode.check_preallocated_output:
prealloc_modes = \
......
......@@ -450,6 +450,9 @@ class Constant(Variable):
def signature(self):
return (self.type, self.data)
def merge_signature(self):
return self.signature()
def __str__(self):
if self.name is not None:
return self.name
......
......@@ -499,7 +499,7 @@ class MergeFeature(object):
"""Check if a constant can be merged, and queue that replacement"""
if id(c) in self.seen_constants:
return
sig = c.signature()
sig = c.merge_signature()
other_c = self.const_sig_inv.get(sig, None)
if other_c is not None:
# multiple names will clobber each other..
......
......@@ -627,11 +627,13 @@ if (py_%(name)s == NULL) { %(freefunc)s(%(name)s); }
class CDataTypeConstant(graph.Constant):
def merge_signature(self):
# We don't want to merge constants that don't point to the
# same object.
return id(self.data)
def signature(self):
# The Op.c_code* methoss can't access the data, so it can't
# change the code depending of it. So there is no need to put
# it in the signature. Also, under Python 2, PyCObject aren't
# pickable. So using the PyCObject in the signature would
# disable the c code cache for op that have it as an input.
# There is no way to put the data in the signature, so we
# don't even try
return (self.type,)
CDataType.Constant = CDataTypeConstant
......@@ -3291,7 +3291,7 @@ class GpuAllocEmpty(GpuOp):
def make_node(self, *shape):
shape, output = self.validate_shape(shape)
output.values_eq_approx = tensor.type.values_eq_approx_always_true
output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return Apply(self, shape, [output])
def perform(self, node, inputs, out_):
......
......@@ -28,6 +28,26 @@ else:
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_dnn_conv_desc_merge():
img_shp = T.as_tensor_variable(
numpy.asarray([2, 1, 8, 8]).astype('int64'))
kern_shp = T.as_tensor_variable(
numpy.asarray([3, 1, 2, 2]).astype('int64'))
desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
conv_mode='conv')(img_shp, kern_shp)
desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1),
conv_mode='cross')(img_shp, kern_shp)
# CDataType is not DeepCopyable so this will crash if we don't use
# borrow=True
f = theano.function([], [theano.Out(desc1, borrow=True),
theano.Out(desc2, borrow=True)])
d1, d2 = f()
# This will be the case if they are merged, which would be bad.
assert d1 != d2
def pool_2d_i2n(input, ds=(2, 2), strides=None,
pad=(0, 0),
pool_function=T.max, mode='ignore_borders'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论