提交 7858ffe2 authored 作者: abergeron's avatar abergeron

Merge pull request #2914 from nouiz/ifelse

Ifelse remove warning and speed up
......@@ -49,7 +49,7 @@ script:
- cd $(python -c 'import theano; import os; print(os.path.split(theano.__file__)[0])')
- echo "$PART"
- theano-nose -v $PART
- theano-cache list
# - theano-cache list
#after_script:
after_failure:
......
......@@ -15,11 +15,12 @@ from copy import deepcopy
from itertools import izip
import logging
from theano.gof import PureOp, Apply
import numpy
import theano.tensor
from theano.tensor import TensorType
from theano import gof
from theano.gof import PureOp, Apply
from theano.compile import optdb
from theano.tensor import opt
......@@ -226,7 +227,6 @@ class IfElse(PureOp):
if_false_op(*if_false, **dict(return_list=True)))
def make_thunk(self, node, storage_map, compute_map, no_recycling):
outtypes = [out.type for out in node.outputs]
cond = node.inputs[0]
ts = node.inputs[1:][:self.n_outs]
fs = node.inputs[1:][self.n_outs:]
......@@ -243,14 +243,16 @@ class IfElse(PureOp):
if len(ls) > 0:
return ls
else:
for out, outtype, t in izip(outputs, outtypes, ts):
for out, t in izip(outputs, ts):
compute_map[out][0] = 1
val = storage_map[t][0]
if self.as_view:
oval = outtype.filter(storage_map[t][0])
storage_map[out][0] = val
# Work around broken numpy deepcopy
elif type(val) in (numpy.ndarray, numpy.memmap):
storage_map[out][0] = val.copy()
else:
oval = outtype.filter(
deepcopy(storage_map[t][0]))
storage_map[out][0] = oval
storage_map[out][0] = deepcopy(val)
return []
else:
ls = [1 + idx + self.n_outs for idx in xrange(self.n_outs)
......@@ -258,13 +260,16 @@ class IfElse(PureOp):
if len(ls) > 0:
return ls
else:
for out, outtype, f in izip(outputs, outtypes, fs):
for out, f in izip(outputs, fs):
compute_map[out][0] = 1
# can't view both outputs unless destroyhandler
# improves
oval = outtype.filter(
deepcopy(storage_map[f][0]))
storage_map[out][0] = oval
# Work around broken numpy deepcopy
val = storage_map[f][0]
if type(val) in (numpy.ndarray, numpy.memmap):
storage_map[out][0] = val.copy()
else:
storage_map[out][0] = deepcopy(val)
return []
thunk.lazy = True
......
......@@ -497,12 +497,19 @@ def local_gpu_lazy_ifelse(node):
# Should not happen, but just in case
if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c)
if all([isinstance(o.type, CudaNdarrayType) or o.dtype != 'float32'
for o in outs]):
return
for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType):
if (not isinstance(outs[i].type, CudaNdarrayType) and
outs[i].dtype == 'float32'):
outs[i] = gpu_from_host(outs[i])
return [host_from_gpu(out) for out in
gpu_ifelse.make_node(c, *outs).outputs]
outs = gpu_ifelse(c, *outs, return_list=True)
for i in range(len(outs)):
if isinstance(outs[i].type, CudaNdarrayType):
outs[i] = host_from_gpu(outs[i])
return outs
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -522,11 +529,14 @@ def local_gpu_lazy_ifelse(node):
# Should not happen, but just in case
if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c)
if all([isinstance(o.type, CudaNdarrayType) or o.dtype != 'float32'
for o in outs]):
return
for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType):
if (not isinstance(outs[i].type, CudaNdarrayType) and
outs[i].dtype == 'float32'):
outs[i] = gpu_from_host(outs[i])
outs = gpu_ifelse.make_node(c, *outs).outputs
return outs
......
......@@ -51,6 +51,33 @@ class test_ifelse(unittest.TestCase, utt.TestOptimizationMixin):
assert numpy.allclose(vx, f(1, vx, vy))
assert numpy.allclose(vy, f(0, vx, vy))
def test_mixed_dtype(self):
x1 = tensor.vector('x1', dtype='int32')
x2 = tensor.vector('x2', dtype=self.dtype)
y1 = tensor.vector('y1', dtype='int32')
y2 = tensor.vector('y2', dtype=self.dtype)
c = tensor.iscalar('c')
f = theano.function([c, x1, x2, y1, y2],
ifelse(c, (x1, x2), (y1, y2)), mode=self.mode)
self.assertFunctionContains1(f, self.get_ifelse(2))
rng = numpy.random.RandomState(utt.fetch_seed())
xlen = rng.randint(200)
ylen = rng.randint(200)
vx1 = numpy.asarray(rng.uniform(size=(xlen,))*3, 'int32')
vx2 = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
vy1 = numpy.asarray(rng.uniform(size=(ylen,))*3, 'int32')
vy2 = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)
o1, o2 = f(1, vx1, vx2, vy1, vy2)
assert numpy.allclose(vx1, o1)
assert numpy.allclose(vx2, o2)
o1, o2 = f(0, vx1, vx2, vy1, vy2)
assert numpy.allclose(vy1, o1)
assert numpy.allclose(vy2, o2)
def test_lazy_if_on_generics(self):
x = theano.generic()
y = theano.generic()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论