Merge pull request #3224 from nouiz/mixed3

Mixed stuff

Merge pull request #3224 from nouiz/mixed3
948f7266 · carriepl · 5a70f9ae · 71e9f541 · 948f7266 · 948f7266
--- a/doc/faq.txt
+++ b/doc/faq.txt
@@ -140,6 +140,17 @@ variables to achieve this. Then you can call it like this: ``f.fn()`` or
 ``f.fn(n_calls=N)`` to speed it up. In the last case, only the last
 function output (out of N calls) is returned.

+You can also use the ``C`` linker that will put all nodes in the same C
+compilation unit. This removes some overhead between node in the graph,
+but requires that all nodes in the graph have a C implementation:
+
+.. code-block:: python
+
+    x = theano.tensor.scalar('x')
+    f = function([x], (x + 1.) * 2, mode=theano.Mode(linker='c'))
+    f(10.)
+
+
 Out of memory... but not really
 -------------------------------


--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -30,7 +30,7 @@ from theano.gof import graph
 from theano.configparser import AddConfigVar, BoolParam, IntParam, StrParam


-import_time = time.time()
+theano_imported_time = time.time()
 config = theano.config

 _atexit_print_list = []
@@ -657,6 +657,8 @@ class ProfileStats(object):
    def summary_globals(self, file):
        print('Time in all call to theano.grad() %es' %
              theano.gradient.grad_time, file=file)
+        total_time = time.time() - theano_imported_time
+        print('Time since theano import %.3fs' % (total_time))

    def summary_memory(self, file, N=None):
        fct_memory = {}  # fgraph->dict(node->[outputs size])
@@ -1305,7 +1307,7 @@ if False:  # old code still to be ported from ProfileMode
               sum(t for f, t, a, ci, nb_call, nb_op in
                   sotimes[n_ops_to_print:])))

-        total_time = time.time() - import_time
+        total_time = time.time() - theano_imported_time
        total_fct_time = sum(fct_call_time.values())
        total_fct_call = sum(fct_call.values())
        other_time = total_time - total_fct_time - compile_time

--- a/theano/misc/check_blas.py
+++ b/theano/misc/check_blas.py
@@ -244,7 +244,7 @@ if __name__ == "__main__":

        cuda version      7.5    7.0    6.5
        gpu
-        K6000/NOECC
+        K6000/NOECC              0.69s
        K40                             0.88s
        K20m/ECC
        K20/NOECC
@@ -257,16 +257,20 @@ if __name__ == "__main__":
        C1060
        K600

-        GTX Titan Black
+        GTX Titan X              0.47s
+        GTX Titan Black          0.64s
        GTX Titan(D15U-50)
        GTX 780
        GTX 980
        GTX 970
-        GTX 680
+        GTX 680                  1.57s
        GRID K520
-        GTX 580
-        GTX 480
-        GTX 750 Ti
+        GTX 750 Ti               2.01s
+        GTX 580                         2.47s
+        GTX 480                         2.88s
+        GTX 660                  2.32s
+        GTX 750                  2.37s
+        GT 610                   33.5s
        """)

    if options.M == 0:

--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -398,7 +398,8 @@ def use(device,
                assert isinstance(device, int)
                gpu_init(device, config.lib.cnmem)
                use.device_number = device
-                assert active_device_number() == device
+                active_device = active_device_number()
+                assert active_device == device, (active_device, device)
            else:
                # This mean the driver should select the GPU.  As we
                # need to get the device number now, we force the

--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -860,7 +860,8 @@ def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):


 def test_conv_grads():
-    if cuda.device_properties(cuda.active_device_number())['major'] < 3:
+    if (not cuda.dnn.dnn_available() or
+            cuda.device_properties(cuda.active_device_number())['major'] < 3):
        ops = [gemm_op]
    else:
        ops = [gemm_op, dnn_op]

--- a/theano/sandbox/gpuarray/tests/test_nnet.py
+++ b/theano/sandbox/gpuarray/tests/test_nnet.py
@@ -260,9 +260,10 @@ def softmax_unittest_template(dtypeInput):
        x = T.dmatrix('x')

    z = T.nnet.softmax(x)
+    mode = mode_with_gpu.excluding('cudnn')
    f = theano.function([x], z, mode=mode_without_gpu)
-    f_gpu = theano.function([x], z, mode=mode_with_gpu)
-    assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax
+    f_gpu = theano.function([x], z, mode=mode)
+    assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax_op
    assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op,
                      GpuSoftmax)