提交 b6410099 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2243 from RoyXue/fix_crash_and_GPU_support

Fix ifelse crash and gpu support
...@@ -8,6 +8,7 @@ import numpy ...@@ -8,6 +8,7 @@ import numpy
import theano import theano
import theano.tensor as T import theano.tensor as T
from theano.ifelse import ifelse
def test_profiling(): def test_profiling():
...@@ -20,11 +21,11 @@ def test_profiling(): ...@@ -20,11 +21,11 @@ def test_profiling():
theano.config.profile_memory = True theano.config.profile_memory = True
theano.config.profiling.min_peak_memory = True theano.config.profiling.min_peak_memory = True
x = [T.dvector("val%i" % i) for i in range(3)] x = [T.fvector("val%i" % i) for i in range(3)]
z = [] z = []
z += [T.outer(x[i], x[i+1]).sum(axis=1) for i in range(len(x)-1)] z += [T.outer(x[i], x[i + 1]).sum(axis=1) for i in range(len(x) - 1)]
z += [x[i] + x[i+1] for i in range(len(x)-1)] z += [x[i] + x[i + 1] for i in range(len(x) - 1)]
p = theano.ProfileStats(False) p = theano.ProfileStats(False)
...@@ -36,7 +37,7 @@ def test_profiling(): ...@@ -36,7 +37,7 @@ def test_profiling():
f = theano.function(x, z, profile=p, name="test_profiling", f = theano.function(x, z, profile=p, name="test_profiling",
mode=m) mode=m)
inp = [numpy.arange(1024) + 1 for i in range(len(x))] inp = [numpy.arange(1024, dtype='float32') + 1 for i in range(len(x))]
output = f(*inp) output = f(*inp)
buf = StringIO.StringIO() buf = StringIO.StringIO()
...@@ -46,8 +47,16 @@ def test_profiling(): ...@@ -46,8 +47,16 @@ def test_profiling():
the_string = buf.getvalue() the_string = buf.getvalue()
lines1 = [l for l in the_string.split("\n") if "Max if linker" in l] lines1 = [l for l in the_string.split("\n") if "Max if linker" in l]
lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l] lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l]
assert "Max if linker=cvm(default): 8224KB (16408KB)" in the_string, (lines1, lines2) if theano.config.device == 'cpu':
assert "Minimum peak from all valid apply node order is 8208KB" in the_string, (lines1, lines2) assert "Max if linker=cvm(default): 4112KB (8204KB)" in the_string, (
lines1, lines2)
assert "Minimum peak from all valid apply node order is 4104KB" in the_string, (
lines1, lines2)
else:
assert "Max if linker=cvm(default): 8220KB (8220KB)" in the_string, (
lines1, lines2)
assert "Minimum peak from all valid apply node order is 4116KB" in the_string, (
lines1, lines2)
finally: finally:
theano.config.profile = config1 theano.config.profile = config1
...@@ -55,5 +64,41 @@ def test_profiling(): ...@@ -55,5 +64,41 @@ def test_profiling():
theano.config.profiling.min_peak_memory = config3 theano.config.profiling.min_peak_memory = config3
def test_ifelse():
config1 = theano.config.profile
config2 = theano.config.profile_memory
try:
theano.config.profile = True
theano.config.profile_memory = True
a, b = T.scalars('a', 'b')
x, y = T.scalars('x', 'y')
z = ifelse(T.lt(a, b), x * 2, y * 2)
p = theano.ProfileStats(False)
if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
m = "FAST_RUN"
else:
m = None
f_ifelse = theano.function([a, b, x, y], z, profile=p, name="test_ifelse",
mode=m)
val1 = 0.
val2 = 1.
big_mat1 = 10
big_mat2 = 11
out = f_ifelse(val1, val2, big_mat1, big_mat2)
finally:
theano.config.profile = config1
theano.config.profile_memory = config2
if __name__ == '__main__': if __name__ == '__main__':
test_profiling() test_profiling()
test_ifelse()
...@@ -56,6 +56,7 @@ raise_with_op = link.raise_with_op ...@@ -56,6 +56,7 @@ raise_with_op = link.raise_with_op
class VM(object): class VM(object):
""" """
A VM object's __call__ method evaluates a Theano program. A VM object's __call__ method evaluates a Theano program.
...@@ -83,6 +84,7 @@ class VM(object): ...@@ -83,6 +84,7 @@ class VM(object):
storage. False means it *must not* repeat that feedback. storage. False means it *must not* repeat that feedback.
""" """
def __init__(self, nodes, thunks, pre_call_clear): def __init__(self, nodes, thunks, pre_call_clear):
""" """
Allocate a virtual machine. Allocate a virtual machine.
...@@ -159,10 +161,12 @@ class VM(object): ...@@ -159,10 +161,12 @@ class VM(object):
class Loop(VM): class Loop(VM):
""" """
Unconditional start-to-finish program execution in Python. Unconditional start-to-finish program execution in Python.
No garbage collection is allowed on intermediate results. No garbage collection is allowed on intermediate results.
""" """
def __call__(self): def __call__(self):
if self.time_thunks: if self.time_thunks:
for cont in self.pre_call_clear: for cont in self.pre_call_clear:
...@@ -188,10 +192,12 @@ class Loop(VM): ...@@ -188,10 +192,12 @@ class Loop(VM):
class LoopGC(VM): class LoopGC(VM):
""" """
Unconditional start-to-finish program execution in Python. Unconditional start-to-finish program execution in Python.
Garbage collection is possible on intermediate results. Garbage collection is possible on intermediate results.
""" """
def __init__(self, nodes, thunks, pre_call_clear, post_thunk_clear): def __init__(self, nodes, thunks, pre_call_clear, post_thunk_clear):
super(LoopGC, self).__init__(nodes, thunks, pre_call_clear) super(LoopGC, self).__init__(nodes, thunks, pre_call_clear)
self.post_thunk_clear = post_thunk_clear self.post_thunk_clear = post_thunk_clear
...@@ -231,6 +237,7 @@ class LoopGC(VM): ...@@ -231,6 +237,7 @@ class LoopGC(VM):
class Stack(VM): class Stack(VM):
""" """
Finish-to-start evalution order of thunks. Finish-to-start evalution order of thunks.
...@@ -340,7 +347,7 @@ class Stack(VM): ...@@ -340,7 +347,7 @@ class Stack(VM):
apply_stack = list(self.base_apply_stack) apply_stack = list(self.base_apply_stack)
last_apply_stack_len = -1 last_apply_stack_len = -1
#This record all function inputs/shared varibles and constants # This record all function inputs/shared varibles and constants
for var, data in self.storage_map.iteritems(): for var, data in self.storage_map.iteritems():
if data[0] is None: if data[0] is None:
continue continue
...@@ -396,7 +403,7 @@ class Stack(VM): ...@@ -396,7 +403,7 @@ class Stack(VM):
current_idx = self.node_idx[current_apply] current_idx = self.node_idx[current_apply]
self.call_counts[current_idx] += 1 self.call_counts[current_idx] += 1
self.call_times[current_idx] += dt self.call_times[current_idx] += dt
## Computing the memory footprint of the the op # Computing the memory footprint of the the op
# ?? What about inplace .. if the op is inplace # ?? What about inplace .. if the op is inplace
# you don't actually ask for more memory! # you don't actually ask for more memory!
for (idx, o) in enumerate( for (idx, o) in enumerate(
...@@ -436,15 +443,16 @@ class Stack(VM): ...@@ -436,15 +443,16 @@ class Stack(VM):
if all(compute_map[v][0] if all(compute_map[v][0]
for v in dependencies[i]): for v in dependencies[i]):
storage_map[i][0] = None storage_map[i][0] = None
input_index.append(current_apply.inputs.index(i)) input_index.append(
current_apply.inputs.index(i))
#DO NOT set compute_map to 0 # DO NOT set compute_map to 0
#If values become False and the # If values become False and the
#current_apply is still in the #current_apply is still in the
#stack, this will cause it to be # stack, this will cause it to be
#recomputed! This can cause wrong value # recomputed! This can cause wrong value
#with some combination of inplace op. # with some combination of inplace op.
compute_map[i][0] = 2 compute_map[i][0] = 2
if (config.warn.vm_gc_bug and if (config.warn.vm_gc_bug and
current_apply in apply_stack and current_apply in apply_stack and
...@@ -456,7 +464,8 @@ class Stack(VM): ...@@ -456,7 +464,8 @@ class Stack(VM):
" only in the development version between July 5th 2012" " only in the development version between July 5th 2012"
" and July 30th 2012. This was not in a released version." " and July 30th 2012. This was not in a released version."
" The bug was affecting this script.", " The bug was affecting this script.",
#The stack level is not good when inside a Scan. # The stack level is not good when
# inside a Scan.
stacklevel=3 stacklevel=3
) )
self.node_cleared_order.append(input_index) self.node_cleared_order.append(input_index)
...@@ -468,7 +477,6 @@ class Stack(VM): ...@@ -468,7 +477,6 @@ class Stack(VM):
for inp in current_deps for inp in current_deps
if inp.owner) if inp.owner)
elif not computed_outs: elif not computed_outs:
# #
# stack loop: Lazy Evaluation Case # stack loop: Lazy Evaluation Case
...@@ -531,9 +539,10 @@ class Stack(VM): ...@@ -531,9 +539,10 @@ class Stack(VM):
break break
if empty_storage_map: if empty_storage_map:
storage_map[i][0] = None storage_map[i][0] = None
input_index.append(current_apply.inputs.index(i)) input_index.append(
#See the not lazy gc code for explanations current_apply.inputs.index(i))
#of compute_map change # See the not lazy gc code for explanations
# of compute_map change
compute_map[i][0] = 2 compute_map[i][0] = 2
self.node_cleared_order.append(input_index) self.node_cleared_order.append(input_index)
...@@ -560,6 +569,7 @@ try: ...@@ -560,6 +569,7 @@ try:
import lazylinker_c import lazylinker_c
class CVM(lazylinker_c.CLazyLinker, VM): class CVM(lazylinker_c.CLazyLinker, VM):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
lazylinker_c.CLazyLinker.__init__(self, *args, **kwargs) lazylinker_c.CLazyLinker.__init__(self, *args, **kwargs)
# skip VM.__init__ # skip VM.__init__
...@@ -576,6 +586,7 @@ except (OSError, theano.gof.cmodule.MissingGXX), e: ...@@ -576,6 +586,7 @@ except (OSError, theano.gof.cmodule.MissingGXX), e:
class VM_Linker(link.LocalLinker): class VM_Linker(link.LocalLinker):
""" """
Class that satisfies the Linker interface by acting as a VM factory. Class that satisfies the Linker interface by acting as a VM factory.
""" """
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论