提交 4b81dbd1 authored 作者: Melanie Ducoffe's avatar Melanie Ducoffe

done with the current change

上级 27de245b
......@@ -2275,11 +2275,10 @@ def gpuScanOptimization(node):
return outputs
return False
# en attente de tests et de correction
@register_opt()
@local_optimizer([tensor.AllocEmpty, gpu_from_host])
def local_gpu_allocempty(node):
#print 'kikou'
if (isinstance(node.op, tensor.AllocEmpty) and
node.op.dtype=="float32"):
return [host_from_gpu(GpuAllocEmpty()(*node.inputs))]
......
......@@ -380,6 +380,17 @@ def test_alloc_empty():
assert out.shape == (2, 3)
assert out.dtype == 'float32'
# Test that we merge them.
f = theano.function([], [cuda.basic_ops.gpu_alloc_empty(2, 3),
cuda.basic_ops.gpu_alloc_empty(2, 3)])
out = f()
assert out[0].shape == (2, 3)
assert out[0].dtype == 'float32'
assert out[1].shape == (2, 3)
assert out[1].dtype == 'float32'
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, cuda.basic_ops.GpuAllocEmpty)]) == 1
def test_elemwise_empty():
# test with 0 element
......
......@@ -153,6 +153,7 @@ def test_gpualloc():
l = f.maker.fgraph.toposort()
assert numpy.any([isinstance(x.op, cuda.GpuAlloc) for x in l])
def test_gpuallocempty():
......
......@@ -5550,8 +5550,7 @@ class AllocEmpty(gof.Op):
return [node.inputs]
def c_code_cache_version(self):
return None
return (2,)
return (3,)
def do_constant_folding(self, node):
return False
......@@ -2043,7 +2043,7 @@ def local_dot22_to_dot22scalar(node):
assert not a.type.ndim
z = T.AllocEmpty(d.owner.inputs[0].dtype)(d.owner.inputs[0].shape[0],
d.owner.inputs[1].shape[1])
d.owner.inputs[1].shape[1])
zero = T.as_tensor_variable(numpy.asarray(0, dtype=a.dtype))
dot = gemm(z, a, d.owner.inputs[0], d.owner.inputs[1], zero)
......
......@@ -875,7 +875,7 @@ def test_dot22scalar():
cst = theano.tensor.basic.constant(.2, dtype=dtype4)
cst2 = theano.tensor.basic.constant(.1, dtype=dtype4)
def check_dot22scalar(func, len_topo_scalar=-1):
def check_dot22scalar_gemm(func, len_topo_scalar=-1):
topo = func.maker.fgraph.toposort()
ops = [x.op for x in topo]
classes = [type(x.op) for x in topo]
......@@ -920,7 +920,7 @@ def test_dot22scalar():
f = theano.function([a, b], cst * T.dot(a, b),
mode=mode_blas_opt)
topo = f.maker.fgraph.toposort()
check_dot22scalar(f, 1)
check_dot22scalar_gemm(f, 1)
f(av, bv)
......@@ -929,7 +929,7 @@ def test_dot22scalar():
cst * c * T.dot(a, b),
mode=mode_blas_opt)
topo = f.maker.fgraph.toposort()
check_dot22scalar(f, 5)
check_dot22scalar_gemm(f, 5)
#print (av.dtype, bv.dtype, cv.dtype)
f(av, bv, cv)
......@@ -938,7 +938,7 @@ def test_dot22scalar():
c * cst * T.dot(a, b),
mode=mode_blas_opt)
topo = f.maker.fgraph.toposort()
check_dot22scalar(f, 5)
check_dot22scalar_gemm(f, 5)
f(av, bv, cv)
# Here, canonicalize also seems needed
......@@ -948,7 +948,7 @@ def test_dot22scalar():
cst2 * c * cst * T.dot(a, b),
mode=m2)
topo = f.maker.fgraph.toposort()
check_dot22scalar(f, 5)
check_dot22scalar_gemm(f, 5)
f(av, bv, cv)
if dtype1 == dtype2 == dtype3:
......@@ -956,7 +956,7 @@ def test_dot22scalar():
c * cst * a * T.dot(a, b),
mode=m2)
topo = f.maker.fgraph.toposort()
check_dot22scalar(f, 5)
check_dot22scalar_gemm(f, 5)
f(sv, sv, sv)
f = theano.function([a, b, c],
......@@ -979,7 +979,7 @@ def test_dot22scalar():
c * a * cst * T.dot(a, b),
mode=m2)
topo = f.maker.fgraph.toposort()
check_dot22scalar(f, 5)
check_dot22scalar_gemm(f, 5)
f(sv, sv, sv)
cmp((3, 4), (4, 5), (3, 5))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论