提交 948f7266 authored 作者: carriepl's avatar carriepl

Merge pull request #3224 from nouiz/mixed3

Mixed stuff
......@@ -140,6 +140,17 @@ variables to achieve this. Then you can call it like this: ``f.fn()`` or
``f.fn(n_calls=N)`` to speed it up. In the last case, only the last
function output (out of N calls) is returned.
You can also use the ``C`` linker that will put all nodes in the same C
compilation unit. This removes some overhead between node in the graph,
but requires that all nodes in the graph have a C implementation:
.. code-block:: python
x = theano.tensor.scalar('x')
f = function([x], (x + 1.) * 2, mode=theano.Mode(linker='c'))
f(10.)
Out of memory... but not really
-------------------------------
......
......@@ -30,7 +30,7 @@ from theano.gof import graph
from theano.configparser import AddConfigVar, BoolParam, IntParam, StrParam
import_time = time.time()
theano_imported_time = time.time()
config = theano.config
_atexit_print_list = []
......@@ -657,6 +657,8 @@ class ProfileStats(object):
def summary_globals(self, file):
print('Time in all call to theano.grad() %es' %
theano.gradient.grad_time, file=file)
total_time = time.time() - theano_imported_time
print('Time since theano import %.3fs' % (total_time))
def summary_memory(self, file, N=None):
fct_memory = {} # fgraph->dict(node->[outputs size])
......@@ -1305,7 +1307,7 @@ if False: # old code still to be ported from ProfileMode
sum(t for f, t, a, ci, nb_call, nb_op in
sotimes[n_ops_to_print:])))
total_time = time.time() - import_time
total_time = time.time() - theano_imported_time
total_fct_time = sum(fct_call_time.values())
total_fct_call = sum(fct_call.values())
other_time = total_time - total_fct_time - compile_time
......
......@@ -244,7 +244,7 @@ if __name__ == "__main__":
cuda version 7.5 7.0 6.5
gpu
K6000/NOECC
K6000/NOECC 0.69s
K40 0.88s
K20m/ECC
K20/NOECC
......@@ -257,16 +257,20 @@ if __name__ == "__main__":
C1060
K600
GTX Titan Black
GTX Titan X 0.47s
GTX Titan Black 0.64s
GTX Titan(D15U-50)
GTX 780
GTX 980
GTX 970
GTX 680
GTX 680 1.57s
GRID K520
GTX 580
GTX 480
GTX 750 Ti
GTX 750 Ti 2.01s
GTX 580 2.47s
GTX 480 2.88s
GTX 660 2.32s
GTX 750 2.37s
GT 610 33.5s
""")
if options.M == 0:
......
......@@ -398,7 +398,8 @@ def use(device,
assert isinstance(device, int)
gpu_init(device, config.lib.cnmem)
use.device_number = device
assert active_device_number() == device
active_device = active_device_number()
assert active_device == device, (active_device, device)
else:
# This mean the driver should select the GPU. As we
# need to get the device number now, we force the
......
......@@ -860,7 +860,8 @@ def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):
def test_conv_grads():
if cuda.device_properties(cuda.active_device_number())['major'] < 3:
if (not cuda.dnn.dnn_available() or
cuda.device_properties(cuda.active_device_number())['major'] < 3):
ops = [gemm_op]
else:
ops = [gemm_op, dnn_op]
......
......@@ -260,9 +260,10 @@ def softmax_unittest_template(dtypeInput):
x = T.dmatrix('x')
z = T.nnet.softmax(x)
mode = mode_with_gpu.excluding('cudnn')
f = theano.function([x], z, mode=mode_without_gpu)
f_gpu = theano.function([x], z, mode=mode_with_gpu)
assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax
f_gpu = theano.function([x], z, mode=mode)
assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax_op
assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op,
GpuSoftmax)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论