use the new hack in profile mode to print the MFlops of the op.

6030673d · Frederic Bastien · 60b6facf · 6030673d · 6030673d
--- a/opt.py
+++ b/opt.py
@@ -156,12 +156,16 @@ def local_gpu_conv(node):
    conv(host_from_gpu) -> host_from_gpu(conv)
    """
    def GpuConvOp_from_ConvOp(op):
-        return GpuConv(border_mode=op.out_mode,
+        ret = GpuConv(border_mode=op.out_mode,
                    subsample=(op.dx, op.dy),
                    logical_img_hw=op.imshp_logical[1:3],
                    logical_kern_hw=op.kshp_logical,
                    logical_kern_align_top=op.kshp_logical_top_aligned
                    )
+        #HACK to print the number of MFlops in the profiler output.
+        if hasattr(op,'flops'):
+            ret.flops=op.flops
+        return ret

    if node.op == gpu_from_host:
        host_input = node.inputs[0]

--- a/tests/test_nnet.py
+++ b/tests/test_nnet.py
@@ -173,6 +173,14 @@ def run_conv_nnet2(shared_fn): # pretend we are training LeNet for MNIST

    conv_op = theano.sandbox.conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)
    conv_op1 = theano.sandbox.conv.ConvOp((n_kern,logical_hid_shape[0]/2, logical_hid_shape[1]/2), shape_kern1[2:], n_kern1, n_batch, 1, 1)
+    flops=shape_kern[2]*shape_kern[3]*2#nb mul and add by output pixed
+    flops*=logical_hid_shape[0]*logical_hid_shape[1]#nb mul by output image
+    flops*=n_kern*n_batch#for all outputs images#n_stack==1
+    conv_op.flops = flops
+    flops=shape_kern1[2]*shape_kern1[3]*2#nb mul and add by output pixed
+    flops*=logical_hid_shape1[0]*logical_hid_shape1[1]#nb mul by output image
+    flops*=n_kern1*n_batch*n_kern#for all outputs images#n_stack==n_kern
+    conv_op1.flops = flops

    hid = tensor.tanh(conv_op(x, w0)+b0)
    hid1 = tensor.tanh(conv_op1(hid[:,:,::2,::2], w1) + b1)