Merge pull request #2423 from carriepl/breakpoint

Add breakpoint Op to Theano

Merge pull request #2423 from carriepl/breakpoint
e6a4c073 · Pascal Lamblin · 944e36dd · 495e144e · e6a4c073 · e6a4c073
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -63,6 +63,7 @@ from theano.tensor import nlinalg
 from theano.tensor import slinalg

 from theano.tensor.nnet.Conv3D import Conv3D
+from theano.tests.breakpoint import PdbBreakpoint

 try:
    # We need to be able to import this file even if cuda isn't avail.
@@ -1141,6 +1142,69 @@ def local_gpu_print_op(node):
    return False


+@register_opt()
+@local_optimizer([PdbBreakpoint])
+def local_gpu_pdbbreakpoint_op(node):
+    if isinstance(node.op, PdbBreakpoint):
+
+        old_inputs = node.inputs
+        old_outputs = node.outputs
+
+        new_inputs = node.inputs[:1]
+        input_transfered = []
+
+        # Go through the monitored variables, only transfering on GPU those
+        # for which the input comes from the GPU or the output will be
+        # transfered on the GPU.
+        nb_monitored_vars = len(node.outputs)
+        for i in range(nb_monitored_vars):
+
+            inp = old_inputs[i+1]
+            out = old_outputs[i]
+
+            input_is_from_gpu = (inp.owner and
+                                 isinstance(inp.owner.op, HostFromGpu))
+            output_goes_to_gpu = any([c[0] != "output" and
+                                      isinstance(c[0].op, GpuFromHost)
+                                      for c in out.clients])
+
+            if input_is_from_gpu:
+                # The op should be applied on the GPU version of the input
+                new_inputs.append(inp.owner.inputs[0])
+                input_transfered.append(True)
+
+            elif output_goes_to_gpu:
+                # The input should be transfered to the gpu
+                new_inputs.append(gpu_from_host(inp))
+                input_transfered.append(True)
+
+            else:
+                # No transfer is required.
+                new_inputs.append(inp)
+                input_transfered.append(False)
+
+        # Only continue the optimization if at least one input has been
+        # transfered to the gpu
+        if not any(input_transfered):
+            return False
+
+        # Apply the op on the new inputs
+        new_op_outputs = node.op(*new_inputs, return_list=True)
+
+        # Propagate the transfer to the gpu through the outputs that require
+        # it
+        new_outputs = []
+        for i in range(len(new_op_outputs)):
+            if input_transfered[i]:
+                new_outputs.append(host_from_gpu(new_op_outputs[i]))
+            else:
+                new_outputs.append(new_op_outputs[i])
+
+        return new_outputs
+
+    return False
+
+
 def cast(x, dtype):
    stype = scal.Scalar(dtype)
    cast_op = theano.tensor.Elemwise(scal.Identity(scal.specific_out(stype)))
@@ -2303,6 +2367,7 @@ def local_gpu_allocempty(node):
        return [ret]
    return False

+
 optdb.register('gpu_scanOp_make_inplace',
               scan_opt.ScanInplaceOptimizer(typeConstructor=typeConstructor,
                                             gpu_flag=True),

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -14,6 +14,7 @@ from theano import config, tensor
 import theano.tensor.tests.test_nlinalg
 import theano.tensor.tests.test_opt as test_opt

+from theano.tests.breakpoint import PdbBreakpoint
 from theano.tests import unittest_tools as utt

 import theano.sandbox.cuda as cuda
@@ -164,7 +165,7 @@ def test_gpuallocempty():

    f_cpu = theano.function([], tensor.AllocEmpty('int32')(2,3))
    l_cpu = f_cpu.maker.fgraph.toposort()
-    assert not numpy.any([isinstance(x.op, basic_ops.GpuAllocEmpty) for x in l_cpu])    
+    assert not numpy.any([isinstance(x.op, basic_ops.GpuAllocEmpty) for x in l_cpu])

 class Test_local_elemwise_alloc(test_opt.Test_local_elemwise_alloc):
    dtype = 'float32'
@@ -322,7 +323,7 @@ def test_opt_gpujoin_joinvectors_elemwise_then_minusone():


 def test_opt_gpujoin_joinvectors_negativeaxes():
-    """ 
+    """
    Test that negative axis concatenation works as expected.
    """

@@ -477,6 +478,25 @@ def test_print_op():
    f(numpy.random.random((5, 5)).astype('float32'))


+def test_pdbbreakpoint_op():
+    """ Test that PdbBreakpoint ops don't block gpu optimization"""
+    b = tensor.fmatrix()
+
+    # Create a function composed of a breakpoint followed by
+    # some computation
+    condition = tensor.gt(b.sum(), 0)
+    b_monitored = PdbBreakpoint(name='TestBreakpoint')(condition, b)
+    output = b_monitored ** 2
+
+    f = theano.function([b], output, mode=mode_with_gpu)
+
+    # Ensure that, in the compiled function, the computation following the
+    # breakpoint has been moved to the gpu.
+    topo = f.maker.fgraph.toposort()
+    assert isinstance(topo[-2].op, cuda.GpuElemwise)
+    assert topo[-1].op == cuda.host_from_gpu
+
+
 def test_huge_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly
        We check that we fuse one node with part of its input

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -15,6 +15,7 @@ from theano.gof import (local_optimizer, EquilibriumDB,
 from theano.scan_module import scan_utils, scan_op, scan_opt

 from theano.tensor.nnet.conv import ConvOp
+from theano.tests.breakpoint import PdbBreakpoint
 from .type import GpuArrayType, GpuArrayConstant
 from .basic_ops import (host_from_gpu, gpu_from_host,
                        HostFromGpu, GpuFromHost,
@@ -330,6 +331,69 @@ def local_gpu_print_op(node):
    return new_op(gpu_x)


+@register_opt('fast_compile')
+@local_optimizer([PdbBreakpoint])
+def local_gpu_pdbbreakpoint_op(node):
+    if isinstance(node.op, PdbBreakpoint):
+
+        old_inputs = node.inputs
+        old_outputs = node.outputs
+
+        new_inputs = node.inputs[:1]
+        input_transfered = []
+
+        # Go through the monitored variables, only transfering on GPU those
+        # for which the input comes from the GPU or the output will be
+        # transfered on the GPU.
+        nb_monitored_vars = len(node.outputs)
+        for i in range(nb_monitored_vars):
+
+            inp = old_inputs[i+1]
+            out = old_outputs[i]
+
+            input_is_from_gpu = (inp.owner and
+                                 isinstance(inp.owner.op, HostFromGpu))
+            output_goes_to_gpu = any([c[0] != "output" and
+                                      isinstance(c[0].op, GpuFromHost)
+                                      for c in out.clients])
+
+            if input_is_from_gpu:
+                # The op should be applied on the GPU version of the input
+                new_inputs.append(inp.owner.inputs[0])
+                input_transfered.append(True)
+
+            elif output_goes_to_gpu:
+                # The input should be transfered to the gpu
+                new_inputs.append(gpu_from_host(inp))
+                input_transfered.append(True)
+
+            else:
+                # No transfer is required.
+                new_inputs.append(inp)
+                input_transfered.append(False)
+
+        # Only continue the optimization if at least one input has been
+        # transfered to the gpu
+        if not any(input_transfered):
+            return False
+
+        # Apply the op on the new inputs
+        new_op_outputs = node.op(*new_inputs, return_list=True)
+
+        # Propagate the transfer to the gpu through the outputs that require
+        # it
+        new_outputs = []
+        for i in range(len(new_op_outputs)):
+            if input_transfered[i]:
+                new_outputs.append(host_from_gpu(new_op_outputs[i]))
+            else:
+                new_outputs.append(new_op_outputs[i])
+
+        return new_outputs
+
+    return False
+
+
 @register_opt('fast_compile')
 @op_lifter([tensor.Join])
 def local_gpua_join(node):

--- a/theano/sandbox/gpuarray/tests/test_opt.py
+++ b/theano/sandbox/gpuarray/tests/test_opt.py
@@ -2,6 +2,7 @@ import numpy

 import theano
 from theano import tensor
+from theano.tests.breakpoint import PdbBreakpoint
 from theano.tests import unittest_tools as utt
 from theano.tests.unittest_tools import SkipTest
 from theano.tensor.tests import test_basic
@@ -186,6 +187,25 @@ def test_print_op():
    f(numpy.random.random((5, 5)).astype('float32'))


+def test_pdbbreakpoint_op():
+    """ Test that PdbBreakpoint ops don't block gpu optimization"""
+    b = tensor.fmatrix()
+
+    # Create a function composed of a breakpoint followed by
+    # some computation
+    condition = tensor.gt(b.sum(), 0)
+    b_monitored = PdbBreakpoint(name='TestBreakpoint')(condition, b)
+    output = b_monitored ** 2
+
+    f = theano.function([b], output, mode=mode_with_gpu)
+
+    # Ensure that, in the compiled function, the computation following the
+    # breakpoint has been moved to the gpu.
+    topo = f.maker.fgraph.toposort()
+    assert isinstance(topo[-2].op, GpuElemwise)
+    assert topo[-1].op == host_from_gpu
+
+
 def test_local_gpu_elemwise_careduce():
    x = theano.tensor.matrix()
    o = (x*x).sum()

--- a/theano/tests/breakpoint.py
+++ b/theano/tests/breakpoint.py
+import numpy
+import pdb
+
+import theano
+from theano.gof import Op, Apply
+from theano.gradient import DisconnectedType
+
+
+class PdbBreakpoint(Op):
+    """
+    This is an identity-like op with the side effect of enforcing a
+    conditional breakpoint, inside a theano function, based on a symbolic
+    scalar condition.
+
+    @type name: String
+    @param name: name of the conditional breakpoint. To be printed when the
+                 breakpoint is activated.
+
+    :note: WARNING. At least one of the outputs of the op must be used
+                    otherwise the op will be removed from the Theano graph
+                    due to its outputs being unused
+
+    :note: WARNING. Employing the function inside a theano graph can prevent
+                    Theano from applying certain optimizations to improve
+                    performance, reduce memory consumption and/or reduce
+                    numerical instability.
+
+            Detailed explanation:
+            As of 2014-12-01 the PdbBreakpoint op is not known by any
+            optimization. Setting a PdbBreakpoint op in the middle of a
+            pattern that is usually optimized out will block the optimization.
+
+    Example:
+
+    .. code-block:: python
+
+        import theano
+        import theano.tensor as T
+        from theano.tests.breakpoint import PdbBreakpoint
+
+        input = T.fvector()
+        target = T.fvector()
+
+        # Mean squared error between input and target
+        mse = (input - target) ** 2
+
+        # Conditional breakpoint to be activated if the total MSE is higher
+        # than 100. The breakpoint will monitor the inputs, targets as well
+        # as the individual error values
+        breakpointOp = PdbBreakpoint("MSE too high")
+        condition = T.gt(mse.sum(), 100)
+        mse, monitored_input, monitored_target = breakpointOp(condition, mse,
+                                                              input, target)
+
+        # Compile the theano function
+        fct = theano.function([input, target], mse)
+
+        # Use the function
+        print fct([10, 0], [10, 5]) # Will NOT activate the breakpoint
+        print fct([0, 0], [10, 5]) # Will activate the breakpoint
+
+
+    """
+
+    __props__ = ("name",)
+
+    def __init__(self, name):
+        self.name = name
+
+    def make_node(self, condition, *monitored_vars):
+
+        # Ensure that condition is a theano tensor
+        if not isinstance(condition, theano.Variable):
+            condition = theano.tensor.as_tensor_variable(condition)
+
+        # Validate that the condition is a scalar (else it is not obvious how
+        # is should be evaluated)
+        assert (condition.ndim == 0)
+
+        # Because the user might be tempted to instantiate PdbBreakpoint only
+        # once and apply it many times on different number of inputs, we must
+        # create a new instance of the op here, define the instance attributes
+        # (view_map and var_types) in that instance and then apply it on the
+        # inputs.
+        new_op = PdbBreakpoint(name=self.name)
+        new_op.view_map = {}
+        new_op.inp_types = []
+        for i in range(len(monitored_vars)):
+            # Every output i is a view of the input i+1 because of the input
+            # condition.
+            new_op.view_map[i] = [i+1]
+            new_op.inp_types.append(monitored_vars[i].type)
+
+        # Build the Apply node
+        inputs = [condition] + list(monitored_vars)
+        outputs = [inp.type() for inp in monitored_vars]
+        return Apply(op=new_op, inputs=inputs, outputs=outputs)
+
+    def perform(self, node, inputs, output_storage):
+        condition = inputs[0]
+
+        if condition:
+            try:
+                monitored = [numpy.asarray(inp) for inp in inputs[1:]]
+            except:
+                raise ValueError("Some of the inputs to the PdbBreakpoint op "
+                                 "'%s' could not be casted to NumPy arrays" %
+                                 self.name)
+
+            print("\n")
+            print("-------------------------------------------------")
+            print("Conditional breakpoint '%s' activated\n" % self.name)
+            print("The monitored variables are stored, in order,")
+            print("in the list variable 'monitored' as NumPy arrays.\n")
+            print("Their contents can be altered and, when execution")
+            print("resumes, the updated values will be used.")
+            print("-------------------------------------------------")
+            pdb.set_trace()
+
+            # Take the new values in monitored, cast them back to their
+            # original type and store them in the output_storage
+            for i in range(len(output_storage)):
+                output_storage[i][0] = self.inp_types[i].filter(monitored[i])
+
+        else:
+            # Simply return views on the monitored variables
+            for i in range(len(output_storage)):
+                output_storage[i][0] = inputs[i+1]
+
+    def grad(self, inputs, output_gradients):
+        return ([DisconnectedType()()] + output_gradients)
+
+    def infer_shape(self, inputs, input_shapes):
+        # Return the shape of every input but the condition (first input)
+        return input_shapes[1:]
+
+    def connection_pattern(self, node):
+
+        nb_inp = len(node.inputs)
+        nb_out = nb_inp - 1
+
+        # First input is connected to no output and every other input n is
+        # connected to input n-1
+        connections = [[out_idx == inp_idx - 1 for out_idx in range(nb_out)]
+                       for inp_idx in range(nb_inp)]
+        return connections
--- a/theano/tests/test_breakpoint.py
+++ b/theano/tests/test_breakpoint.py
+import numpy
+import theano
+import theano.tensor as T
+from theano.tests import unittest_tools as utt
+from theano.tests.breakpoint import PdbBreakpoint
+
+
+class TestPdbBreakpoint(utt.InferShapeTester):
+
+    def setUp(self):
+
+        super(TestPdbBreakpoint, self).setUp()
+
+        # Sample computation that involves tensors with different numbers
+        # of dimensions
+        self.input1 = T.fmatrix()
+        self.input2 = T.fscalar()
+        self.output = T.dot((self.input1 - self.input2),
+                            (self.input1 - self.input2).transpose())
+
+        # Declare the conditional breakpoint
+        self.breakpointOp = PdbBreakpoint("Sum of output too high")
+        self.condition = T.gt(self.output.sum(), 1000)
+        (self.monitored_input1,
+         self.monitored_input2,
+         self.monitored_output) = self.breakpointOp(self.condition,
+                                                    self.input1,
+                                                    self.input2, self.output)
+
+    def test_infer_shape(self):
+
+        input1_value = numpy.arange(6).reshape(2, 3).astype("float32")
+        input2_value = 10.0
+
+        self._compile_and_check([self.input1, self.input2],
+                                [self.monitored_input1,
+                                 self.monitored_input2,
+                                 self.monitored_output],
+                                [input1_value, input2_value],
+                                PdbBreakpoint)
+
+    def test_grad(self):
+
+        input1_value = numpy.arange(9).reshape(3, 3).astype("float32")
+        input2_value = 10.0
+
+        grads = [T.grad(self.monitored_input1.sum(), self.input1),
+                 T.grad(self.monitored_input2.sum(), self.input2)]
+
+        # Add self.monitored_input1 as an output to the Theano function to
+        # prevent Theano from optimizing the PdbBreakpoint op out of the
+        # function graph
+        fct = theano.function([self.input1, self.input2],
+                              grads + [self.monitored_input1])
+
+        gradients = fct(input1_value, input2_value)[:-1]
+
+        expected_gradients = [numpy.ones((3, 3), dtype="float32"),
+                              numpy.array(1., dtype="float32")]
+
+        for i in range(len(gradients)):
+            numpy.testing.assert_allclose(gradients[i], expected_gradients[i])
+
+    def test_fprop(self):
+
+        input1_value = numpy.arange(9).reshape(3, 3).astype("float32")
+        input2_value = 10.0
+        fct = theano.function([self.input1, self.input2],
+                              [self.monitored_input1, self.monitored_input2])
+
+        output = fct(input1_value, input2_value)
+        numpy.testing.assert_allclose(output[0], input1_value)
+        numpy.testing.assert_allclose(output[1], input2_value)
+
+    def test_connection_pattern(self):
+
+        node = self.monitored_output.owner
+        connection_pattern = self.breakpointOp.connection_pattern(node)
+        expected_pattern = [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]]
+
+        assert connection_pattern == expected_pattern