提交 23a061f4 authored 作者: Frederic Bastien's avatar Frederic Bastien

now when we import theano.sandbox.cuda, we don't register gpu optimization in…

now when we import theano.sandbox.cuda, we don't register gpu optimization in fast_run and inplace. When we call theano.sandbox.cuda, we add gpu optimizer to fast_run and inplace. This way, they don't get applied when use() is not call.
上级 5ad0642b
...@@ -16,6 +16,7 @@ import theano.compile.sandbox ...@@ -16,6 +16,7 @@ import theano.compile.sandbox
import os import os
import theano.config as config import theano.config as config
from theano.compile import optdb
import logging, copy import logging, copy
_logger_name = 'theano_cuda_ndarray' _logger_name = 'theano_cuda_ndarray'
...@@ -44,6 +45,9 @@ def use(device=config.THEANO_GPU): ...@@ -44,6 +45,9 @@ def use(device=config.THEANO_GPU):
logging.getLogger('theano_cuda_ndarray').warning("WARNING: Won't use the GPU as the initialisation of device %i failed. %s" %(device, e)) logging.getLogger('theano_cuda_ndarray').warning("WARNING: Won't use the GPU as the initialisation of device %i failed. %s" %(device, e))
elif use.device_number != device: elif use.device_number != device:
logging.getLogger('theano_cuda_ndarray').warning("WARNING: ignoring call to use(%s), GPU number %i is already in use." %(str(device), use.device_number)) logging.getLogger('theano_cuda_ndarray').warning("WARNING: ignoring call to use(%s), GPU number %i is already in use." %(str(device), use.device_number))
optdb.add_tags('gpu',
'fast_run',
'inplace')
use.device_number = None use.device_number = None
......
...@@ -20,11 +20,7 @@ gpu_cut_copies = EquilibriumDB() ...@@ -20,11 +20,7 @@ gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB() gpu_seqopt = SequenceDB()
gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1, 'fast_run', 'inplace') gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1, 'fast_run', 'inplace')
gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2, 'fast_run', 'inplace') gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2, 'fast_run', 'inplace')
optdb.register('gpu', optdb.register('gpu', gpu_seqopt, optdb.__priority__.get('inplace_opt', 75) + 5)
gpu_seqopt,
optdb.__priority__.get('inplace_opt', 75) + 5,
'fast_run',
'inplace')
def register_opt(*tags, **kwargs): def register_opt(*tags, **kwargs):
def f(local_opt): def f(local_opt):
......
...@@ -13,6 +13,9 @@ except ImportError: ...@@ -13,6 +13,9 @@ except ImportError:
raise SkipTest('Optional package cuda_ndarray not available') raise SkipTest('Optional package cuda_ndarray not available')
import theano.sandbox.cuda as tcn import theano.sandbox.cuda as tcn
import theano.compile.mode
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
def tes_use(): def tes_use():
tcn.use() tcn.use()
...@@ -23,7 +26,7 @@ def test_elemwise0(): ...@@ -23,7 +26,7 @@ def test_elemwise0():
b = tensor.fmatrix() b = tensor.fmatrix()
f = pfunc([b], [], updates=[(a, a+b)]) f = pfunc([b], [], updates=[(a, a+b)], mode=mode_with_gpu)
a0 = a.value * 1.0 a0 = a.value * 1.0
print 'BEFORE ADD', a.value print 'BEFORE ADD', a.value
...@@ -43,21 +46,21 @@ def test_elemwise1(): ...@@ -43,21 +46,21 @@ def test_elemwise1():
#let debugmode catch any mistakes #let debugmode catch any mistakes
print >> sys.stderr, "STARTING FUNCTION 1" print >> sys.stderr, "STARTING FUNCTION 1"
f = pfunc([b], [], updates=[(a, b**a)]) f = pfunc([b], [], updates=[(a, b**a)], mode=mode_with_gpu)
for i, node in enumerate(f.maker.env.toposort()): for i, node in enumerate(f.maker.env.toposort()):
print i, node print i, node
f(numpy.random.rand(*shape)+0.3) f(numpy.random.rand(*shape)+0.3)
print >> sys.stderr, "STARTING FUNCTION 2" print >> sys.stderr, "STARTING FUNCTION 2"
#let debugmode catch any mistakes #let debugmode catch any mistakes
f = pfunc([b], [], updates=[(a, tensor.exp(b**a))]) f = pfunc([b], [], updates=[(a, tensor.exp(b**a))], mode=mode_with_gpu)
for i, node in enumerate(f.maker.env.toposort()): for i, node in enumerate(f.maker.env.toposort()):
print i, node print i, node
f(numpy.random.rand(*shape)+0.3) f(numpy.random.rand(*shape)+0.3)
print >> sys.stderr, "STARTING FUNCTION 3" print >> sys.stderr, "STARTING FUNCTION 3"
#let debugmode catch any mistakes #let debugmode catch any mistakes
f = pfunc([b], [], updates=[(a, a+b * tensor.exp(b**a))]) f = pfunc([b], [], updates=[(a, a+b * tensor.exp(b**a))], mode=mode_with_gpu)
f(numpy.random.rand(*shape)+0.3) f(numpy.random.rand(*shape)+0.3)
def test_elemwise2(): def test_elemwise2():
...@@ -68,7 +71,7 @@ def test_elemwise2(): ...@@ -68,7 +71,7 @@ def test_elemwise2():
for pattern in [(0,1), (1,0)]: for pattern in [(0,1), (1,0)]:
a = tcn.shared_constructor(rng.rand(*shape), name=None) a = tcn.shared_constructor(rng.rand(*shape), name=None)
b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))() b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
f = pfunc([b], [], updates=[(a, (a+b).dimshuffle(pattern))]) f = pfunc([b], [], updates=[(a, (a+b).dimshuffle(pattern))], mode=mode_with_gpu)
has_elemwise = False has_elemwise = False
for i, node in enumerate(f.maker.env.toposort()): for i, node in enumerate(f.maker.env.toposort()):
print >> sys.stderr, i, node print >> sys.stderr, i, node
...@@ -82,7 +85,7 @@ def test_elemwise2(): ...@@ -82,7 +85,7 @@ def test_elemwise2():
a = tcn.shared_constructor(rng.rand(*shape), 'a') a = tcn.shared_constructor(rng.rand(*shape), 'a')
b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))() b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) * f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) *
tensor.exp(b**a).dimshuffle([2,0,3,1]))]) tensor.exp(b**a).dimshuffle([2,0,3,1]))], mode=mode_with_gpu)
has_elemwise = False has_elemwise = False
for i, node in enumerate(f.maker.env.toposort()): for i, node in enumerate(f.maker.env.toposort()):
print i, node print i, node
...@@ -103,7 +106,7 @@ def test_elemwise3(): ...@@ -103,7 +106,7 @@ def test_elemwise3():
print (1 + b**a).type print (1 + b**a).type
print tensor.exp((1 + b**a)).type print tensor.exp((1 + b**a)).type
f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) * tensor.exp(1 + f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) * tensor.exp(1 +
b**a).dimshuffle([2,0,3,1]))]) b**a).dimshuffle([2,0,3,1]))], mode=mode_with_gpu)
has_elemwise = False has_elemwise = False
for i, node in enumerate(f.maker.env.toposort()): for i, node in enumerate(f.maker.env.toposort()):
print >> sys.stderr, i, node print >> sys.stderr, i, node
...@@ -119,7 +122,7 @@ def test_elemwise4(): ...@@ -119,7 +122,7 @@ def test_elemwise4():
a = tcn.shared_constructor(numpy.random.rand(*shape), 'a') a = tcn.shared_constructor(numpy.random.rand(*shape), 'a')
b = tensor.fvector() b = tensor.fvector()
c = tensor.fvector() c = tensor.fvector()
f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))]) f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))], mode=mode_with_gpu)
has_elemwise = False has_elemwise = False
for i, node in enumerate(f.maker.env.toposort()): for i, node in enumerate(f.maker.env.toposort()):
print >> sys.stderr, i, node print >> sys.stderr, i, node
......
...@@ -16,13 +16,17 @@ import theano.sandbox.cuda as tcn ...@@ -16,13 +16,17 @@ import theano.sandbox.cuda as tcn
from theano.sandbox.downsample import DownsampleFactorMax from theano.sandbox.downsample import DownsampleFactorMax
import theano.compile.mode
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
def test_dot(): def test_dot():
a = tcn.shared_constructor(numpy.random.rand(4,4), 'a') a = tcn.shared_constructor(numpy.random.rand(4,4), 'a')
b = tensor.fmatrix() b = tensor.fmatrix()
f = pfunc([b], [], updates=[(a, tensor.dot(a,b))]) f = pfunc([b], [], updates=[(a, tensor.dot(a,b))], mode=mode_with_gpu)
a0 = a.value * 1.0 a0 = a.value * 1.0
print a0 print a0
...@@ -41,7 +45,7 @@ def test_gemm(): ...@@ -41,7 +45,7 @@ def test_gemm():
b = tensor.fmatrix('b') b = tensor.fmatrix('b')
c = tensor.fmatrix('c') c = tensor.fmatrix('c')
f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))]) f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))], mode=mode_with_gpu)
a0 = a.value * 1.0 a0 = a.value * 1.0
print a0 print a0
...@@ -69,7 +73,7 @@ if 0: ...@@ -69,7 +73,7 @@ if 0:
a = tcn.blas.DownsampleFactorMax((2,2),border) a = tcn.blas.DownsampleFactorMax((2,2),border)
dmatrix4 = tensor.TensorType("float32", (False, False, False, False)) dmatrix4 = tensor.TensorType("float32", (False, False, False, False))
b = dmatrix4() b = dmatrix4()
f = pfunc([b], [a(b)]) f = pfunc([b], [a(b)], mode=mode_with_gpu)
bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1) bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1)
r = f(bval)[0] r = f(bval)[0]
...@@ -109,7 +113,7 @@ def test_downsample(): ...@@ -109,7 +113,7 @@ def test_downsample():
ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border) ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
a = tcn.shared_constructor(numpy.random.rand(*shp), 'a') a = tcn.shared_constructor(numpy.random.rand(*shp), 'a')
f = pfunc([], ds_op(tensor.as_tensor_variable(a))) f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
worked = False worked = False
for i, node in enumerate(f.maker.env.toposort()): for i, node in enumerate(f.maker.env.toposort()):
print i, node print i, node
......
...@@ -12,6 +12,10 @@ try: ...@@ -12,6 +12,10 @@ try:
except ImportError: except ImportError:
raise SkipTest('Optional package cuda_ndarray not available') raise SkipTest('Optional package cuda_ndarray not available')
import theano.compile.mode
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
...@@ -20,7 +24,7 @@ def test_no_shared_var_graph(): ...@@ -20,7 +24,7 @@ def test_no_shared_var_graph():
""" """
a=tensor.fmatrix() a=tensor.fmatrix()
b=tensor.fmatrix() b=tensor.fmatrix()
f = theano.function([a,b],[a+b]) f = theano.function([a,b],[a+b], mode=mode_with_gpu)
l = f.maker.env.toposort() l = f.maker.env.toposort()
assert len(l)==4 assert len(l)==4
assert any(isinstance(x.op,cuda.GpuElemwise) for x in l) assert any(isinstance(x.op,cuda.GpuElemwise) for x in l)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论