提交 18708214 authored 作者: lamblin's avatar lamblin

Merge pull request #942 from nouiz/gpu_alloc2

Gpu alloc2
...@@ -164,7 +164,7 @@ if compile_cuda_ndarray and cuda_available: ...@@ -164,7 +164,7 @@ if compile_cuda_ndarray and cuda_available:
code, code,
location=cuda_ndarray_loc, location=cuda_ndarray_loc,
include_dirs=[cuda_path], libs=['cublas'], include_dirs=[cuda_path], libs=['cublas'],
preargs=compiler.compile_args()) preargs=['-O3'] + compiler.compile_args())
from cuda_ndarray.cuda_ndarray import * from cuda_ndarray.cuda_ndarray import *
except Exception, e: except Exception, e:
_logger.error("Failed to compile cuda_ndarray.cu: %s", str(e)) _logger.error("Failed to compile cuda_ndarray.cu: %s", str(e))
......
...@@ -2255,20 +2255,28 @@ gpu_join = GpuJoin() ...@@ -2255,20 +2255,28 @@ gpu_join = GpuJoin()
class GpuAlloc(GpuOp): class GpuAlloc(GpuOp):
"""Implement Alloc on the gpu.
The memset_0 param is an optimization. When True, we call
cudaMalloc that is faster.
""" """
Implement Alloc on the gpu. def __init__(self, memset_0=False):
""" self.memset_0 = memset_0
def __init__(self):
pass
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other) and self.memset_0 == other.memset_0
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self)) ^ hash(self.memset_0)
def __str__(self): def __str__(self):
return self.__class__.__name__ #Hide the memset parameter when not used to prevent confusion.
if self.memset_0:
s = "%s{memset_0=%s}" % (self.__class__.__name__, self.memset_0)
else:
s = self.__class__.__name__
return s
def make_node(self, value, *shape): def make_node(self, value, *shape):
#if their is unneeded transfert generated by the next line #if their is unneeded transfert generated by the next line
...@@ -2307,6 +2315,7 @@ class GpuAlloc(GpuOp): ...@@ -2307,6 +2315,7 @@ class GpuAlloc(GpuOp):
value = inputs[0] value = inputs[0]
shps = inputs[1:] shps = inputs[1:]
nd = len(shps) nd = len(shps)
memset_0 = int(self.memset_0)
str = "int dims[%(nd)s];\n" % locals() str = "int dims[%(nd)s];\n" % locals()
for idx, sh in enumerate(shps): for idx, sh in enumerate(shps):
str += "dims[%(idx)s] = PyInt_AsLong((PyObject*)%(sh)s);\n" % locals() str += "dims[%(idx)s] = PyInt_AsLong((PyObject*)%(sh)s);\n" % locals()
...@@ -2330,7 +2339,21 @@ class GpuAlloc(GpuOp): ...@@ -2330,7 +2339,21 @@ class GpuAlloc(GpuOp):
%(fail)s; %(fail)s;
} }
} }
if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(value)s, true)) if (%(memset_0)s)
{
if (cudaSuccess != cudaMemset(%(out)s->devdata, 0,
CudaNdarray_SIZE(%(out)s) * 4))
{
PyErr_Format(PyExc_MemoryError,
"GpuAlloc: Error memsetting %%d"
" bytes of device memory.",
CudaNdarray_SIZE(%(out)s) * 4);
Py_XDECREF(%(out)s);
%(out)s = NULL;
%(fail)s;
}
}
else if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(value)s, true))
{ {
// exception already set // exception already set
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
...@@ -2348,7 +2371,7 @@ class GpuAlloc(GpuOp): ...@@ -2348,7 +2371,7 @@ class GpuAlloc(GpuOp):
return [None for i in inputs] return [None for i in inputs]
def c_code_cache_version(self): def c_code_cache_version(self):
return (4,) return (5,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
for client in node.outputs[0].clients: for client in node.outputs[0].clients:
......
...@@ -3133,24 +3133,6 @@ CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj) ...@@ -3133,24 +3133,6 @@ CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj)
return 0; return 0;
} }
bool
CudaNdarray_is_c_contiguous(const CudaNdarray * self)
{
bool c_contiguous = true;
int size = 1;
for (int i = self->nd-1; (i >= 0) && c_contiguous; --i)
{
if (CudaNdarray_HOST_DIMS(self)[i] == 1)
continue;
if (CudaNdarray_HOST_STRIDES(self)[i] != size)
{
c_contiguous = false;
}
size = size * CudaNdarray_HOST_DIMS(self)[i];
}
return c_contiguous;
}
PyObject * PyObject *
CudaNdarray_new_nd(int nd) CudaNdarray_new_nd(int nd)
{ {
...@@ -4346,12 +4328,6 @@ CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self) ...@@ -4346,12 +4328,6 @@ CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self)
return self->host_structure + 2*self->nd; return self->host_structure + 2*self->nd;
} }
void
cnda_mark_dev_structure_dirty(CudaNdarray * self)
{
self->dev_structure_fresh = 0;
}
int int
CudaNdarray_EqualAndIgnore(CudaNdarray *cnda1, CudaNdarray *cnda2, int ignoreSync, int ignoreBase) CudaNdarray_EqualAndIgnore(CudaNdarray *cnda1, CudaNdarray *cnda2, int ignoreSync, int ignoreBase)
{ {
...@@ -4406,43 +4382,33 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2) ...@@ -4406,43 +4382,33 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2)
return CudaNdarray_EqualAndIgnore(cnda1, cnda2, 0, 0); return CudaNdarray_EqualAndIgnore(cnda1, cnda2, 0, 0);
} }
void int
CudaNdarray_set_dim(CudaNdarray * self, int idx, int d) cnda_copy_structure_to_device(const CudaNdarray * self)
{ {
if ((idx >= self->nd) || (idx < 0) || (d < 0)) //If the device structure do not exists, create it.
//We allocate it here as we do not need it often.
//In fact, we need it so infrequently that we expect
//that most object won't need it. Not allocating it
//save a significant when creating object.
//This speed up a benchmark by 8% with the gc.
if (!self->dev_structure)
{
int struct_size = cnda_structure_size(self->nd);
if (struct_size)
{ {
fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: %i %i\n", idx, d); self->dev_structure = (int*)device_malloc(struct_size* sizeof(int));
} if (NULL == self->dev_structure)
if (d != self->host_structure[idx])
{ {
self->host_structure[idx] = d; return -1;
int log2d = (int)log2((double)d);
self->host_structure[idx + 2*self->nd] = (d == (1 << log2d)) ? log2d : -1;
cnda_mark_dev_structure_dirty(self);
} }
}
void
CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
{
if ((idx >= self->nd) || (idx < 0))
{
fprintf(stderr, "WARNING: probably bad CudaNdarray_set_stride arguments: %i %i\n", idx, s);
} }
if (s != CudaNdarray_HOST_STRIDES(self)[idx])
{
self->host_structure[idx+self->nd] = s;
cnda_mark_dev_structure_dirty(self);
} }
} cublasSetVector(cnda_structure_size(self->nd),
sizeof(int),
self->host_structure,
int 1,
cnda_copy_structure_to_device(const CudaNdarray * self) self->dev_structure,
{ 1);
cublasSetVector(cnda_structure_size(self->nd), sizeof(int), self->host_structure, 1, self->dev_structure, 1);
CNDA_THREAD_SYNC; CNDA_THREAD_SYNC;
if (CUBLAS_STATUS_SUCCESS != cublasGetError()) if (CUBLAS_STATUS_SUCCESS != cublasGetError())
{ {
...@@ -4510,56 +4476,6 @@ CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure) ...@@ -4510,56 +4476,6 @@ CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure)
return PyInt_FromLong(CudaNdarray_SIZE(self)); return PyInt_FromLong(CudaNdarray_SIZE(self));
} }
int CudaNdarray_set_nd(CudaNdarray * self, const int nd)
{
if (nd != self->nd)
{
if (self->dev_structure)
{
if (device_free(self->dev_structure))
{
return -1;
}
self->dev_structure = NULL;
}
if (self->host_structure)
{
free(self->host_structure);
self->host_structure = NULL;
self->nd = -1;
}
if (nd == -1) return 0;
self->host_structure = (int*)malloc(cnda_structure_size(nd)*sizeof(int));
if (NULL == self->host_structure)
{
PyErr_SetString(PyExc_MemoryError, "Failed to allocate dim or str");
return -1;
}
//initialize all dimensions and strides to 0
for (int i = 0; i < cnda_structure_size(nd); ++i)
{
self->host_structure[i] = 0;
}
int struct_size = cnda_structure_size(nd);
if (struct_size)
{
self->dev_structure = (int*)device_malloc(struct_size* sizeof(int));
if (NULL == self->dev_structure)
{
free(self->host_structure);
self->host_structure = NULL;
self->dev_structure = NULL;
return -1;
}
}
self->nd = nd;
self->dev_structure_fresh = 0;
}
return 0;
}
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, const CudaNdarray * base) int CudaNdarray_set_device_data(CudaNdarray * self, float * data, const CudaNdarray * base)
{ {
return CudaNdarray_set_device_data(self, data, (PyObject *) base); return CudaNdarray_set_device_data(self, data, (PyObject *) base);
......
...@@ -82,8 +82,9 @@ struct CudaNdarray ...@@ -82,8 +82,9 @@ struct CudaNdarray
//device pointers (allocated by cudaMalloc) //device pointers (allocated by cudaMalloc)
mutable int dev_structure_fresh; mutable int dev_structure_fresh;
//dev_structure should be accessed via macros, otherwise may not be synchronized //dev_structure should be accessed via macros, otherwise may not be
int * dev_structure; //dim0, dim1, ..., stride0, stride1, ... //synchronized. The macro will allocate it when needed.
mutable int * dev_structure; //dim0, dim1, ..., stride0, stride1, ...
real* devdata; //pointer to data element [0,..,0]. real* devdata; //pointer to data element [0,..,0].
}; };
...@@ -126,8 +127,12 @@ CudaNdarray_HOST_STRIDES(const CudaNdarray * self); ...@@ -126,8 +127,12 @@ CudaNdarray_HOST_STRIDES(const CudaNdarray * self);
DllExport const int * DllExport const int *
CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self); CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self);
DllExport void DllExport inline void __attribute__((always_inline))
cnda_mark_dev_structure_dirty(CudaNdarray * self); cnda_mark_dev_structure_dirty(CudaNdarray * self)
{
self->dev_structure_fresh = 0;
}
DllExport int DllExport int
CudaNdarray_EqualAndIgnore(CudaNdarray *cnda1, CudaNdarray *cnda2, int ignoreSync, int ignoreBase); CudaNdarray_EqualAndIgnore(CudaNdarray *cnda1, CudaNdarray *cnda2, int ignoreSync, int ignoreBase);
...@@ -143,11 +148,38 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2); ...@@ -143,11 +148,38 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2);
* *
* Does not sync structure to host. * Does not sync structure to host.
*/ */
DllExport void DllExport inline void __attribute__((always_inline))
CudaNdarray_set_dim(CudaNdarray * self, int idx, int d); CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
{
if ((idx >= self->nd) || (idx < 0) || (d < 0))
{
fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: %i %i\n", idx, d);
}
if (d != self->host_structure[idx])
{
self->host_structure[idx] = d;
int log2d = (int)log2((double)d);
self->host_structure[idx + 2*self->nd] = (d == (1 << log2d)) ? log2d : -1;
cnda_mark_dev_structure_dirty(self);
}
}
DllExport void DllExport inline void __attribute__((always_inline))
CudaNdarray_set_stride(CudaNdarray * self, int idx, int s); CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
{
if ((idx >= self->nd) || (idx < 0))
{
fprintf(stderr, "WARNING: probably bad CudaNdarray_set_stride arguments: %i %i\n", idx, s);
}
if (s != CudaNdarray_HOST_STRIDES(self)[idx])
{
self->host_structure[idx+self->nd] = s;
cnda_mark_dev_structure_dirty(self);
}
}
/*** /***
* Update dependent variables from the contents of CudaNdarray_HOST_DIMS(self) and CudaNdarray_HOST_STRIDES(self) * Update dependent variables from the contents of CudaNdarray_HOST_DIMS(self) and CudaNdarray_HOST_STRIDES(self)
...@@ -188,7 +220,46 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd); ...@@ -188,7 +220,46 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd);
* *
* Note: This does not allocate storage for data. * Note: This does not allocate storage for data.
*/ */
DllExport int CudaNdarray_set_nd(CudaNdarray * self, const int nd); DllExport inline int __attribute__((always_inline))
CudaNdarray_set_nd(CudaNdarray * self, const int nd)
{
if (nd != self->nd)
{
if (self->dev_structure)
{
if (device_free(self->dev_structure))
{
return -1;
}
self->dev_structure = NULL;
}
if (self->host_structure)
{
free(self->host_structure);
self->host_structure = NULL;
self->nd = -1;
}
if (nd == -1) return 0;
self->host_structure = (int*)malloc(cnda_structure_size(nd)*sizeof(int));
if (NULL == self->host_structure)
{
PyErr_SetString(PyExc_MemoryError, "Failed to allocate dim or str");
return -1;
}
//initialize all dimensions and strides to 0
for (int i = 0; i < cnda_structure_size(nd); ++i)
{
self->host_structure[i] = 0;
}
//The device structure will be created in cnda_copy_structure_to_device
//if needed.
self->nd = nd;
self->dev_structure_fresh = 0;
}
return 0;
}
/** /**
* CudaNdarray_alloc_contiguous * CudaNdarray_alloc_contiguous
...@@ -218,7 +289,7 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i ...@@ -218,7 +289,7 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
size = size * dim[i]; size = size * dim[i];
} }
if (CudaNdarray_is_c_contiguous(self) && (self->data_allocated == size)) if ((self->data_allocated == size) && CudaNdarray_is_c_contiguous(self))
{ {
return 0; return 0;
} }
...@@ -333,7 +404,24 @@ CudaNdarray_ZEROS(int n, int * dims); ...@@ -333,7 +404,24 @@ CudaNdarray_ZEROS(int n, int * dims);
/** /**
* True iff the strides look like [dim[nd-2], dim[nd-3], ... , dim[0], 1] * True iff the strides look like [dim[nd-2], dim[nd-3], ... , dim[0], 1]
*/ */
DllExport bool CudaNdarray_is_c_contiguous(const CudaNdarray * self); DllExport inline bool __attribute__((always_inline))
CudaNdarray_is_c_contiguous(const CudaNdarray * self)
{
bool c_contiguous = true;
int size = 1;
for (int i = self->nd-1; (i >= 0) && c_contiguous; --i)
{
if (CudaNdarray_HOST_DIMS(self)[i] == 1)
continue;
if (CudaNdarray_HOST_STRIDES(self)[i] != size)
{
c_contiguous = false;
}
size = size * CudaNdarray_HOST_DIMS(self)[i];
}
return c_contiguous;
}
DllExport PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self); DllExport PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self);
DllExport int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C); DllExport int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C);
......
...@@ -33,6 +33,7 @@ from theano.sandbox.cuda.nnet import ( ...@@ -33,6 +33,7 @@ from theano.sandbox.cuda.nnet import (
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmax, GpuSoftmaxWithBias) GpuSoftmax, GpuSoftmaxWithBias)
from theano.sandbox.cuda.elemwise import SupportCodeError from theano.sandbox.cuda.elemwise import SupportCodeError
from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.scan_module import scan_utils, scan_op from theano.scan_module import scan_utils, scan_op
from theano.tensor.blas import _is_real_vector, _is_real_matrix from theano.tensor.blas import _is_real_vector, _is_real_matrix
...@@ -1337,6 +1338,19 @@ def local_gpualloc(node): ...@@ -1337,6 +1338,19 @@ def local_gpualloc(node):
return [new_out] return [new_out]
@register_opt()
@local_optimizer([tensor.Alloc])
def local_gpualloc_memset_0(node):
replace = False
if isinstance(node.op, GpuAlloc) and not node.op.memset_0:
inp = node.inputs[0]
if (isinstance(inp, CudaNdarrayConstant) and
inp.data.size == 1 and
(numpy.asarray(inp.data) == 0).all()):
new_out = GpuAlloc(memset_0=True)(*node.inputs)
return [new_out]
def safe_to_gpu(x): def safe_to_gpu(x):
if (isinstance(x.type, tensor.TensorType) and if (isinstance(x.type, tensor.TensorType) and
x.type.dtype == 'float32'): x.type.dtype == 'float32'):
......
...@@ -775,11 +775,11 @@ def test_gpujoin_gpualloc(): ...@@ -775,11 +775,11 @@ def test_gpujoin_gpualloc():
assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2
assert sum([node.op == T.join for node in f.maker.fgraph.toposort()]) == 1 assert sum([node.op == T.join for node in f.maker.fgraph.toposort()]) == 1
assert sum([node.op == B.gpu_alloc assert sum([isinstance(node.op, B.GpuAlloc)
for node in f_gpu.maker.fgraph.toposort()]) == 2 for node in f_gpu.maker.fgraph.toposort()]) == 2
assert sum([node.op == B.gpu_join assert sum([node.op == B.gpu_join
for node in f_gpu.maker.fgraph.toposort()]) == 1 for node in f_gpu.maker.fgraph.toposort()]) == 1
assert sum([node.op == B.gpu_alloc assert sum([isinstance(node.op, B.GpuAlloc)
for node in f_gpu2.maker.fgraph.toposort()]) == 2 for node in f_gpu2.maker.fgraph.toposort()]) == 2
assert sum([node.op == B.gpu_join assert sum([node.op == B.gpu_join
for node in f_gpu2.maker.fgraph.toposort()]) == 1 for node in f_gpu2.maker.fgraph.toposort()]) == 1
......
...@@ -62,7 +62,7 @@ def test_memory(): ...@@ -62,7 +62,7 @@ def test_memory():
# When dtype is float64, only the shared is on the gpu and it is transferd # When dtype is float64, only the shared is on the gpu and it is transferd
# to the cpu for computation. So no extra alloc after compilation. # to the cpu for computation. So no extra alloc after compilation.
# more_alloc1 if after the first compilation, more_alloc2 after the second. # more_alloc1 if after the first compilation, more_alloc2 after the second.
for dtype, more_alloc1, more_alloc2 in [("float32", 2, 9), for dtype, more_alloc1, more_alloc2 in [("float32", 1, 4),
("float64", 0, 0)]: ("float64", 0, 0)]:
print dtype print dtype
test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype) test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype)
...@@ -129,7 +129,7 @@ def test_memory_lazy(): ...@@ -129,7 +129,7 @@ def test_memory_lazy():
# When dtype is float64, only the shared is on the gpu and it is transferd # When dtype is float64, only the shared is on the gpu and it is transferd
# to the cpu for computation. So no extra alloc after compilation. # to the cpu for computation. So no extra alloc after compilation.
# more_alloc1 if after the first compilation, more_alloc2 after the second. # more_alloc1 if after the first compilation, more_alloc2 after the second.
for dtype, more_alloc1 in [("float32", 3), for dtype, more_alloc1 in [("float32", 2),
("float64", 0)]: ("float64", 0)]:
print dtype print dtype
test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype) test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype)
......
...@@ -70,6 +70,39 @@ def test_gpualloc(): ...@@ -70,6 +70,39 @@ def test_gpualloc():
assert numpy.any(ininstance(x.op, cuda.GpuAlloc) for x in l ) assert numpy.any(ininstance(x.op, cuda.GpuAlloc) for x in l )
def test_alloc_memset_0():
i = tensor.iscalar()
z = numpy.zeros((1,), dtype='float32')
o = numpy.ones((1,), dtype='float32')
ones = numpy.ones((2,), dtype='float32')
# Test with 0
a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(z)), i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, basic_ops.GpuAlloc) and topo[0].op.memset_0
assert (numpy.asarray(f(6)) == 0).all()
# Test with 1
a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(o)), i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, basic_ops.GpuAlloc)
assert not topo[0].op.memset_0
assert (numpy.asarray(f(6)) == 1).all()
# Test with 1, 1
a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(ones)), i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, basic_ops.GpuAlloc)
assert not topo[0].op.memset_0
assert (numpy.asarray(f(2)) == 1).all()
def test_gpuspecifyshape(): def test_gpuspecifyshape():
x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x') x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x')
m = theano.tensor.specify_shape(x + numpy.float32(1), (3,)) m = theano.tensor.specify_shape(x + numpy.float32(1), (3,))
......
...@@ -62,8 +62,8 @@ def test_merge_with_weird_eq(): ...@@ -62,8 +62,8 @@ def test_merge_with_weird_eq():
g = Env([x, y], [x+y]) g = Env([x, y], [x+y])
MergeOptimizer().optimize(g) MergeOptimizer().optimize(g)
assert len(g.nodes) == 1 assert len(g.apply_nodes) == 1
node = list(g.nodes)[0] node = list(g.apply_nodes)[0]
assert len(node.inputs) == 2 assert len(node.inputs) == 2
assert node.inputs[0] is node.inputs[1] assert node.inputs[0] is node.inputs[1]
...@@ -74,8 +74,7 @@ def test_merge_with_weird_eq(): ...@@ -74,8 +74,7 @@ def test_merge_with_weird_eq():
g = Env([x, y], [x+y]) g = Env([x, y], [x+y])
MergeOptimizer().optimize(g) MergeOptimizer().optimize(g)
assert len(g.nodes) == 1 assert len(g.apply_nodes) == 1
node = list(g.nodes)[0] node = list(g.apply_nodes)[0]
assert len(node.inputs) == 2 assert len(node.inputs) == 2
assert node.inputs[0] is node.inputs[1] assert node.inputs[0] is node.inputs[1]
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论