提交 7858ffe2 authored 作者: abergeron's avatar abergeron

Merge pull request #2914 from nouiz/ifelse

Ifelse remove warning and speed up
...@@ -49,7 +49,7 @@ script: ...@@ -49,7 +49,7 @@ script:
- cd $(python -c 'import theano; import os; print(os.path.split(theano.__file__)[0])') - cd $(python -c 'import theano; import os; print(os.path.split(theano.__file__)[0])')
- echo "$PART" - echo "$PART"
- theano-nose -v $PART - theano-nose -v $PART
- theano-cache list # - theano-cache list
#after_script: #after_script:
after_failure: after_failure:
......
...@@ -15,11 +15,12 @@ from copy import deepcopy ...@@ -15,11 +15,12 @@ from copy import deepcopy
from itertools import izip from itertools import izip
import logging import logging
from theano.gof import PureOp, Apply import numpy
import theano.tensor import theano.tensor
from theano.tensor import TensorType from theano.tensor import TensorType
from theano import gof from theano import gof
from theano.gof import PureOp, Apply
from theano.compile import optdb from theano.compile import optdb
from theano.tensor import opt from theano.tensor import opt
...@@ -226,7 +227,6 @@ class IfElse(PureOp): ...@@ -226,7 +227,6 @@ class IfElse(PureOp):
if_false_op(*if_false, **dict(return_list=True))) if_false_op(*if_false, **dict(return_list=True)))
def make_thunk(self, node, storage_map, compute_map, no_recycling): def make_thunk(self, node, storage_map, compute_map, no_recycling):
outtypes = [out.type for out in node.outputs]
cond = node.inputs[0] cond = node.inputs[0]
ts = node.inputs[1:][:self.n_outs] ts = node.inputs[1:][:self.n_outs]
fs = node.inputs[1:][self.n_outs:] fs = node.inputs[1:][self.n_outs:]
...@@ -243,14 +243,16 @@ class IfElse(PureOp): ...@@ -243,14 +243,16 @@ class IfElse(PureOp):
if len(ls) > 0: if len(ls) > 0:
return ls return ls
else: else:
for out, outtype, t in izip(outputs, outtypes, ts): for out, t in izip(outputs, ts):
compute_map[out][0] = 1 compute_map[out][0] = 1
val = storage_map[t][0]
if self.as_view: if self.as_view:
oval = outtype.filter(storage_map[t][0]) storage_map[out][0] = val
# Work around broken numpy deepcopy
elif type(val) in (numpy.ndarray, numpy.memmap):
storage_map[out][0] = val.copy()
else: else:
oval = outtype.filter( storage_map[out][0] = deepcopy(val)
deepcopy(storage_map[t][0]))
storage_map[out][0] = oval
return [] return []
else: else:
ls = [1 + idx + self.n_outs for idx in xrange(self.n_outs) ls = [1 + idx + self.n_outs for idx in xrange(self.n_outs)
...@@ -258,13 +260,16 @@ class IfElse(PureOp): ...@@ -258,13 +260,16 @@ class IfElse(PureOp):
if len(ls) > 0: if len(ls) > 0:
return ls return ls
else: else:
for out, outtype, f in izip(outputs, outtypes, fs): for out, f in izip(outputs, fs):
compute_map[out][0] = 1 compute_map[out][0] = 1
# can't view both outputs unless destroyhandler # can't view both outputs unless destroyhandler
# improves # improves
oval = outtype.filter( # Work around broken numpy deepcopy
deepcopy(storage_map[f][0])) val = storage_map[f][0]
storage_map[out][0] = oval if type(val) in (numpy.ndarray, numpy.memmap):
storage_map[out][0] = val.copy()
else:
storage_map[out][0] = deepcopy(val)
return [] return []
thunk.lazy = True thunk.lazy = True
......
...@@ -497,12 +497,19 @@ def local_gpu_lazy_ifelse(node): ...@@ -497,12 +497,19 @@ def local_gpu_lazy_ifelse(node):
# Should not happen, but just in case # Should not happen, but just in case
if isinstance(c.type, CudaNdarrayType): if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c) c = host_from_gpu(c)
if all([isinstance(o.type, CudaNdarrayType) or o.dtype != 'float32'
for o in outs]):
return
for i in range(len(outs)): for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType): if (not isinstance(outs[i].type, CudaNdarrayType) and
outs[i].dtype == 'float32'):
outs[i] = gpu_from_host(outs[i]) outs[i] = gpu_from_host(outs[i])
return [host_from_gpu(out) for out in outs = gpu_ifelse(c, *outs, return_list=True)
gpu_ifelse.make_node(c, *outs).outputs] for i in range(len(outs)):
if isinstance(outs[i].type, CudaNdarrayType):
outs[i] = host_from_gpu(outs[i])
return outs
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -522,11 +529,14 @@ def local_gpu_lazy_ifelse(node): ...@@ -522,11 +529,14 @@ def local_gpu_lazy_ifelse(node):
# Should not happen, but just in case # Should not happen, but just in case
if isinstance(c.type, CudaNdarrayType): if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c) c = host_from_gpu(c)
if all([isinstance(o.type, CudaNdarrayType) or o.dtype != 'float32'
for o in outs]):
return
for i in range(len(outs)): for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType): if (not isinstance(outs[i].type, CudaNdarrayType) and
outs[i].dtype == 'float32'):
outs[i] = gpu_from_host(outs[i]) outs[i] = gpu_from_host(outs[i])
outs = gpu_ifelse.make_node(c, *outs).outputs outs = gpu_ifelse.make_node(c, *outs).outputs
return outs return outs
......
...@@ -51,6 +51,33 @@ class test_ifelse(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -51,6 +51,33 @@ class test_ifelse(unittest.TestCase, utt.TestOptimizationMixin):
assert numpy.allclose(vx, f(1, vx, vy)) assert numpy.allclose(vx, f(1, vx, vy))
assert numpy.allclose(vy, f(0, vx, vy)) assert numpy.allclose(vy, f(0, vx, vy))
def test_mixed_dtype(self):
x1 = tensor.vector('x1', dtype='int32')
x2 = tensor.vector('x2', dtype=self.dtype)
y1 = tensor.vector('y1', dtype='int32')
y2 = tensor.vector('y2', dtype=self.dtype)
c = tensor.iscalar('c')
f = theano.function([c, x1, x2, y1, y2],
ifelse(c, (x1, x2), (y1, y2)), mode=self.mode)
self.assertFunctionContains1(f, self.get_ifelse(2))
rng = numpy.random.RandomState(utt.fetch_seed())
xlen = rng.randint(200)
ylen = rng.randint(200)
vx1 = numpy.asarray(rng.uniform(size=(xlen,))*3, 'int32')
vx2 = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
vy1 = numpy.asarray(rng.uniform(size=(ylen,))*3, 'int32')
vy2 = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)
o1, o2 = f(1, vx1, vx2, vy1, vy2)
assert numpy.allclose(vx1, o1)
assert numpy.allclose(vx2, o2)
o1, o2 = f(0, vx1, vx2, vy1, vy2)
assert numpy.allclose(vy1, o1)
assert numpy.allclose(vy2, o2)
def test_lazy_if_on_generics(self): def test_lazy_if_on_generics(self):
x = theano.generic() x = theano.generic()
y = theano.generic() y = theano.generic()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论