提交 69338f33 authored 作者: abergeron's avatar abergeron

Merge pull request #4454 from slefrancois/gpu_out_sandbox

Move new GPU backend out of sandbox
.. _libdoc_gpuarray_dnn: .. _libdoc_gpuarray_dnn:
=========================================== ===========================================
:mod:`theano.sandbox.gpuarray.dnn` -- cuDNN :mod:`gpuarray.dnn` -- cuDNN
=========================================== ===========================================
.. moduleauthor:: LISA .. moduleauthor:: LISA
...@@ -135,27 +135,27 @@ To get an error if Theano can not use cuDNN, use this Theano flag: ...@@ -135,27 +135,27 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
Functions Functions
========= =========
.. automodule:: theano.sandbox.gpuarray.dnn .. automodule:: theano.gpuarray.dnn
:noindex: :noindex:
:members: dnn_conv, dnn_pool :members: dnn_conv, dnn_pool
Convolution Ops Convolution Ops
=============== ===============
.. automodule:: theano.sandbox.gpuarray.dnn .. automodule:: theano.gpuarray.dnn
:noindex: :noindex:
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI :members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI
Pooling Ops Pooling Ops
=========== ===========
.. automodule:: theano.sandbox.gpuarray.dnn .. automodule:: theano.gpuarray.dnn
:noindex: :noindex:
:members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad :members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad
Softmax Ops Softmax Ops
=========== ===========
.. automodule:: theano.sandbox.gpuarray.dnn .. automodule:: theano.gpuarray.dnn
:noindex: :noindex:
:members: GpuDnnSoftmax, GpuDnnSoftmaxGrad :members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
...@@ -7,11 +7,11 @@ Utility functions ...@@ -7,11 +7,11 @@ Utility functions
Optimisation Optimisation
------------ ------------
.. automodule:: theano.sandbox.gpuarray.opt_util .. automodule:: theano.gpuarray.opt_util
:members: :members:
Kernel generation Kernel generation
----------------- -----------------
.. automodule:: theano.sandbox.gpuarray.kernel_codegen .. automodule:: theano.gpuarray.kernel_codegen
:members: :members:
...@@ -2,10 +2,10 @@ ...@@ -2,10 +2,10 @@
.. _libdoc_gpuarray: .. _libdoc_gpuarray:
======================================================= =======================================================
:mod:`theano.sandbox.gpuarray` -- The (new) GPU backend :mod:`gpuarray` -- The (new) GPU backend
======================================================= =======================================================
.. module:: theano.sandbox.gpuarray .. module:: theano.gpuarray
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: Code for GPU programming (new) :synopsis: Code for GPU programming (new)
.. moduleauthor:: MILA .. moduleauthor:: MILA
......
...@@ -13,35 +13,35 @@ is just useful to let people know what is implemented on the gpu. ...@@ -13,35 +13,35 @@ is just useful to let people know what is implemented on the gpu.
Basic Op Basic Op
======== ========
.. automodule:: theano.sandbox.gpuarray.basic_ops .. automodule:: theano.gpuarray.basic_ops
:members: :members:
Blas Op Blas Op
======= =======
.. automodule:: theano.sandbox.gpuarray.blas .. automodule:: theano.gpuarray.blas
:members: :members:
.. automodule:: theano.sandbox.gpuarray.nerv .. automodule:: theano.gpuarray.nerv
:members: :members:
Elemwise Op Elemwise Op
=========== ===========
.. automodule:: theano.sandbox.gpuarray.elemwise .. automodule:: theano.gpuarray.elemwise
:members: :members:
Subtensor Op Subtensor Op
============ ============
.. automodule:: theano.sandbox.gpuarray.subtensor .. automodule:: theano.gpuarray.subtensor
:members: :members:
Nnet Op Nnet Op
======= =======
.. automodule:: theano.sandbox.gpuarray.nnet .. automodule:: theano.gpuarray.nnet
:members: :members:
.. automodule:: theano.sandbox.gpuarray.neighbours .. automodule:: theano.gpuarray.neighbours
:members: :members:
.. _libdoc_gpuarray_type: .. _libdoc_gpuarray_type:
=================================================== ===================================================
:mod:`theano.sandbox.gpuarray.type` -- Type classes :mod:`gpuarray.type` -- Type classes
=================================================== ===================================================
.. automodule:: theano.sandbox.gpuarray.type .. automodule:: theano.gpuarray.type
:members: :members:
...@@ -17,6 +17,7 @@ Types and Ops that you can use to build and compile expression graphs. ...@@ -17,6 +17,7 @@ Types and Ops that you can use to build and compile expression graphs.
printing printing
d3viz/index d3viz/index
compile/index compile/index
gpuarray/index
sparse/index sparse/index
sparse/sandbox sparse/sandbox
scalar/index scalar/index
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
:maxdepth: 1 :maxdepth: 1
cuda/index cuda/index
gpuarray/index
linalg linalg
neighbours neighbours
rng_mrg rng_mrg
...@@ -393,7 +393,7 @@ into a file and run it. ...@@ -393,7 +393,7 @@ into a file and run it.
.. testcode:: .. testcode::
from theano import function, config, shared, tensor, sandbox from theano import function, config, shared, tensor
import numpy import numpy
import time import time
...@@ -461,7 +461,7 @@ the GPU object directly. The following code is modifed to do just that. ...@@ -461,7 +461,7 @@ the GPU object directly. The following code is modifed to do just that.
.. testcode:: .. testcode::
from theano import function, config, shared, tensor, sandbox from theano import function, config, shared, tensor, gpuarray
import numpy import numpy
import time import time
...@@ -470,7 +470,7 @@ the GPU object directly. The following code is modifed to do just that. ...@@ -470,7 +470,7 @@ the GPU object directly. The following code is modifed to do just that.
rng = numpy.random.RandomState(22) rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], sandbox.gpuarray.basic_ops.gpu_from_host(tensor.exp(x))) f = function([], gpuarray.basic_ops.GpuFromHost(None)(tensor.exp(x)))
print(f.maker.fgraph.toposort()) print(f.maker.fgraph.toposort())
t0 = time.time() t0 = time.time()
for i in range(iters): for i in range(iters):
...@@ -485,9 +485,10 @@ the GPU object directly. The following code is modifed to do just that. ...@@ -485,9 +485,10 @@ the GPU object directly. The following code is modifed to do just that.
else: else:
print('Used the gpu') print('Used the gpu')
Here the :func:`theano.sandbox.gpuarray.basic.gpu_from_host` call Here the :func:`theano.gpuarray.basic_ops.GpuFromHost(None)` call
means "copy input to the GPU". However during the optimization phase, means "copy input to the GPU", with ``None`` the default GPU context when not
since the result will already be on th gpu, it will be removed. It is explicitly given. However during the optimization phase,
since the result will already be on the gpu, it will be removed. It is
used here to tell theano that we want the result on the GPU. used here to tell theano that we want the result on the GPU.
The output is The output is
......
...@@ -116,7 +116,7 @@ if (config.device.startswith('cuda') or ...@@ -116,7 +116,7 @@ if (config.device.startswith('cuda') or
config.init_gpu_device.startswith('cuda') or config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl') or config.init_gpu_device.startswith('opencl') or
config.contexts != ''): config.contexts != ''):
import theano.sandbox.gpuarray import theano.gpuarray
# Use config.numpy to call numpy.seterr # Use config.numpy to call numpy.seterr
import numpy import numpy
......
from __future__ import absolute_import, print_function, division
import sys
import logging
import sys
import warnings
import theano
from theano import config
from theano.compile import optdb
from theano.tensor.basic import register_transfer
_logger_name = 'theano.gpuarray'
_logger = logging.getLogger(_logger_name)
error = _logger.error
info = _logger.info
pygpu_activated = False
try:
import pygpu
import pygpu.gpuarray
except ImportError:
pygpu = None
# This is for documentation not to depend on the availability of pygpu
from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined)
from .basic_ops import as_gpuarray_variable
from . import dnn, opt, nerv, extra_ops
def transfer(x, target):
try:
get_context(target)
return as_gpuarray_variable(x, target)
except ContextNotDefined:
pass
register_transfer(transfer)
def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version()
if v[0] != -9998:
raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu "
"are in sync.")
if v[1] < 0:
raise RuntimeError("Wrong minor API version for gpuarray:", v[1],
"Please update libgpuarray/pygpu.")
global pygpu_activated
if dev not in init_dev.devmap:
ctx = pygpu.init(dev)
init_dev.devmap[dev] = ctx
if config.gpuarray.preallocate != 0:
if config.gpuarray.preallocate < 1:
gmem = min(config.gpuarray.preallocate, 0.98) * ctx.total_gmem
else:
gmem = config.gpuarray.preallocate * (1024*1024)
# This will allocate and immediatly free an object of size gmem
# which will reserve that amount of memory on the GPU.
pygpu.empty((gmem,), dtype='int8', context=ctx)
context = init_dev.devmap[dev]
# This will map the context name to the real context object.
reg_context(name, context)
pygpu_activated = True
if config.print_active_device:
warn = None
cudnn_version = ""
if dev.startswith('cuda'):
cudnn_version = " (cuDNN not available)"
try:
cudnn_version = dnn.version()
# 5100 should not print warning with cudnn 5 final.
if cudnn_version > 5100:
warn = ("Your cuDNN version is more recent than Theano."
" If you see problems, try updating Theano or"
" downgrading cuDNN to version 5.")
cudnn_version = " (cuDNN version %s)" % cudnn_version
except Exception:
cudnn_version = dnn.dnn_present.msg
print("Mapped name %s to device %s: %s%s" % (
name, dev, context.devname, cudnn_version),
file=sys.stderr)
if warn:
warnings.warn(warn)
# This maps things like 'cuda0' to the context object on that device.
init_dev.devmap = {}
if pygpu:
try:
if (config.device.startswith('cuda') or
config.device.startswith('opencl')):
init_dev(config.device)
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
elif (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl')):
if config.device != 'cpu':
raise ValueError('you must set device=cpu to use init_gpu_device.')
if config.contexts != '':
print("Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want.")
init_dev(config.init_gpu_device)
if config.contexts != '':
for n, d in (c.split('->') for c in config.contexts.split(';')):
init_dev(d.strip(), n.strip())
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye,
GpuFromHost, GpuJoin, GpuReshape, GpuSplit,
HostFromGpu)
from .basic_ops import host_from_gpu, GpuFromHost
from .elemwise import GpuElemwise
from .subtensor import (GpuSubtensor, GpuIncSubtensor,
GpuAdvancedIncSubtensor1)
except Exception:
error("Could not initialize pygpu, support disabled", exc_info=True)
else:
if (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl') or
config.device.startswith('opencl') or
config.device.startswith('cuda') or
config.contexts != ''):
error("pygpu was configured but could not be imported", exc_info=True)
...@@ -12,7 +12,7 @@ from theano.gradient import grad_undefined ...@@ -12,7 +12,7 @@ from theano.gradient import grad_undefined
from .type import gpu_context_type from .type import gpu_context_type
from .basic_ops import as_gpuarray_variable, infer_context_name from .basic_ops import as_gpuarray_variable, infer_context_name
_logger = logging.getLogger('theano.sandbox.gpuarray.blocksparse') _logger = logging.getLogger('theano.gpuarray.blocksparse')
class GpuSparseBlockGemv(COp): class GpuSparseBlockGemv(COp):
......
...@@ -12,7 +12,7 @@ import theano.sandbox.multinomial ...@@ -12,7 +12,7 @@ import theano.sandbox.multinomial
from theano import Apply, config from theano import Apply, config
from theano.gof import Op from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano.sandbox import gpuarray from theano import gpuarray
from .basic_ops import as_gpuarray_variable, infer_context_name from .basic_ops import as_gpuarray_variable, infer_context_name
from .opt import register_opt, op_lifter from .opt import register_opt, op_lifter
from .type import GpuArrayType from .type import GpuArrayType
......
...@@ -46,7 +46,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -46,7 +46,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge from .opt_util import alpha_merge, output_merge
_logger = logging.getLogger("theano.sandbox.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
gpu_optimizer = EquilibriumDB() gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB() gpu_cut_copies = EquilibriumDB()
......
ctheano.sandbox.gpuarray.type ctheano.gpuarray.type
GpuArray_unpickler GpuArray_unpickler
p0 p0
(cnumpy.core.multiarray (cnumpy.core.multiarray
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import theano.sandbox.gpuarray import theano.gpuarray
if theano.sandbox.gpuarray.pygpu is None: if theano.gpuarray.pygpu is None:
raise SkipTest("pygpu not installed") raise SkipTest("pygpu not installed")
if (not theano.sandbox.gpuarray.pygpu_activated and if (not theano.gpuarray.pygpu_activated and
not theano.config.init_gpu_device.startswith('gpu')): not theano.config.init_gpu_device.startswith('gpu')):
theano.sandbox.gpuarray.init_dev('cuda') theano.gpuarray.init_dev('cuda')
if not theano.sandbox.gpuarray.pygpu_activated: if not theano.gpuarray.pygpu_activated:
raise SkipTest("pygpu disabled") raise SkipTest("pygpu disabled")
test_ctx_name = None test_ctx_name = None
......
...@@ -302,7 +302,7 @@ class G_reshape(test_basic.T_reshape): ...@@ -302,7 +302,7 @@ class G_reshape(test_basic.T_reshape):
mode=mode_with_gpu, mode=mode_with_gpu,
ignore_topo=(HostFromGpu, GpuFromHost, ignore_topo=(HostFromGpu, GpuFromHost,
theano.compile.DeepCopyOp, theano.compile.DeepCopyOp,
theano.sandbox.gpuarray.elemwise.GpuElemwise, theano.gpuarray.elemwise.GpuElemwise,
theano.tensor.opt.Shape_i, theano.tensor.opt.Shape_i,
theano.tensor.opt.MakeVector)) theano.tensor.opt.MakeVector))
assert self.op == GpuReshape assert self.op == GpuReshape
...@@ -405,7 +405,7 @@ def test_hostfromgpu_shape_i(): ...@@ -405,7 +405,7 @@ def test_hostfromgpu_shape_i():
'local_dot22_to_dot22scalar', 'local_dot22_to_dot22scalar',
'specialize') 'specialize')
a = T.fmatrix('a') a = T.fmatrix('a')
ca = theano.sandbox.gpuarray.type.GpuArrayType('float32', (False, False))() ca = theano.gpuarray.type.GpuArrayType('float32', (False, False))()
av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32') av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
cv = gpuarray.asarray(numpy.random.rand(5, 4), cv = gpuarray.asarray(numpy.random.rand(5, 4),
dtype='float32', dtype='float32',
......
...@@ -7,7 +7,7 @@ from theano.tests.breakpoint import PdbBreakpoint ...@@ -7,7 +7,7 @@ from theano.tests.breakpoint import PdbBreakpoint
from theano.tests import unittest_tools as utt, test_ifelse from theano.tests import unittest_tools as utt, test_ifelse
from theano.tensor.tests import test_basic from theano.tensor.tests import test_basic
import theano.sandbox.gpuarray import theano.gpuarray
from .. import basic_ops from .. import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor, get_context from ..type import GpuArrayType, gpuarray_shared_constructor, get_context
from ..basic_ops import ( from ..basic_ops import (
......
...@@ -14,24 +14,15 @@ from nose.plugins.skip import SkipTest ...@@ -14,24 +14,15 @@ from nose.plugins.skip import SkipTest
from nose.tools import assert_raises from nose.tools import assert_raises
import numpy import numpy
import theano.sandbox.gpuarray
from theano.compat import PY3 from theano.compat import PY3
from theano import config from theano import config
from theano.misc.pkl_utils import CompatUnpickler from theano.misc.pkl_utils import CompatUnpickler
if not theano.sandbox.gpuarray.pygpu_activated: try:
try: from . import config # noqa
import pygpu have_pygpu = True
except ImportError: except SkipTest:
pygpu = None have_pygpu = False
import theano.sandbox.cuda as cuda_ndarray
if pygpu and cuda_ndarray.cuda_available:
cuda_ndarray.use('gpu', default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
theano.sandbox.gpuarray.init_dev('cuda')
from .. import pygpu_activated # noqa
def test_unpickle_gpuarray_as_numpy_ndarray_flag1(): def test_unpickle_gpuarray_as_numpy_ndarray_flag1():
...@@ -40,8 +31,8 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag1(): ...@@ -40,8 +31,8 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag1():
test_type.py test it when pygpu is there. test_type.py test it when pygpu is there.
""" """
if pygpu_activated: if have_pygpu:
raise SkipTest("pygpu disabled") raise SkipTest("pygpu active")
oldflag = config.experimental.unpickle_gpu_on_cpu oldflag = config.experimental.unpickle_gpu_on_cpu
config.experimental.unpickle_gpu_on_cpu = False config.experimental.unpickle_gpu_on_cpu = False
......
...@@ -233,7 +233,7 @@ class GpuArrayType(Type): ...@@ -233,7 +233,7 @@ class GpuArrayType(Type):
return data return data
def filter_variable(self, other, allow_convert=True): def filter_variable(self, other, allow_convert=True):
from theano.sandbox.gpuarray import GpuFromHost from theano.gpuarray import GpuFromHost
if hasattr(other, '_as_GpuArrayVariable'): if hasattr(other, '_as_GpuArrayVariable'):
other = other._as_GpuArrayVariable(self.context_name) other = other._as_GpuArrayVariable(self.context_name)
......
...@@ -12,8 +12,8 @@ import time ...@@ -12,8 +12,8 @@ import time
import numpy import numpy
import theano import theano
from theano.sandbox.gpuarray import init_dev from theano.gpuarray import init_dev
from theano.sandbox.gpuarray.blas import gpu_dot22 from theano.gpuarray.blas import gpu_dot22
def main(dev1, dev2): def main(dev1, dev2):
......
...@@ -19,7 +19,7 @@ except ImportError: ...@@ -19,7 +19,7 @@ except ImportError:
return False return False
from theano.sandbox import cuda from theano.sandbox import cuda
from theano.sandbox import gpuarray from theano import gpuarray
if cuda.cuda_available: if cuda.cuda_available:
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
......
from __future__ import absolute_import, print_function, division """Placeholder for new gpuarray backend in sandbox. Supports old pickles
import sys which refered to theano.sandbox.gpuarray."""
import logging
import sys
import warnings
import theano
from theano import config
from theano.compile import optdb
from theano.tensor.basic import register_transfer
_logger_name = 'theano.sandbox.gpuarray'
_logger = logging.getLogger(_logger_name)
error = _logger.error
info = _logger.info
pygpu_activated = False
try:
import pygpu
import pygpu.gpuarray
except ImportError:
pygpu = None
# This is for documentation not to depend on the availability of pygpu
from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined)
from .basic_ops import as_gpuarray_variable
from . import dnn, opt, nerv, extra_ops
def transfer(x, target): import warnings
try: from theano.gpuarray import *
get_context(target)
return as_gpuarray_variable(x, target)
except ContextNotDefined:
pass
register_transfer(transfer)
def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version()
if v[0] != -9998:
raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu "
"are in sync.")
if v[1] < 0:
raise RuntimeError("Wrong minor API version for gpuarray:", v[1],
"Please update libgpuarray/pygpu.")
global pygpu_activated
if dev not in init_dev.devmap:
ctx = pygpu.init(dev)
init_dev.devmap[dev] = ctx
if config.gpuarray.preallocate != 0:
if config.gpuarray.preallocate < 1:
gmem = min(config.gpuarray.preallocate, 0.98) * ctx.total_gmem
else:
gmem = config.gpuarray.preallocate * (1024*1024)
# This will allocate and immediatly free an object of size gmem
# which will reserve that amount of memory on the GPU.
pygpu.empty((gmem,), dtype='int8', context=ctx)
context = init_dev.devmap[dev]
# This will map the context name to the real context object.
reg_context(name, context)
pygpu_activated = True
if config.print_active_device:
warn = None
cudnn_version = ""
if dev.startswith('cuda'):
cudnn_version = " (cuDNN not available)"
try:
cudnn_version = dnn.version()
# 5100 should not print warning with cudnn 5 final.
if cudnn_version > 5100:
warn = ("Your cuDNN version is more recent than Theano."
" If you see problems, try updating Theano or"
" downgrading cuDNN to version 5.")
cudnn_version = " (cuDNN version %s)" % cudnn_version
except Exception:
cudnn_version = dnn.dnn_present.msg
print("Mapped name %s to device %s: %s%s" % (
name, dev, context.devname, cudnn_version),
file=sys.stderr)
if warn:
warnings.warn(warn)
# This maps things like 'cuda0' to the context object on that device.
init_dev.devmap = {}
if pygpu:
try:
if (config.device.startswith('cuda') or
config.device.startswith('opencl')):
init_dev(config.device)
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
elif (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl')):
if config.device != 'cpu':
raise ValueError('you must set device=cpu to use init_gpu_device.')
if config.contexts != '':
print("Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want.")
init_dev(config.init_gpu_device)
if config.contexts != '':
for n, d in (c.split('->') for c in config.contexts.split(';')):
init_dev(d.strip(), n.strip())
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye,
GpuFromHost, GpuJoin, GpuReshape, GpuSplit,
HostFromGpu)
from .basic_ops import host_from_gpu, GpuFromHost
from .elemwise import GpuElemwise
from .subtensor import (GpuSubtensor, GpuIncSubtensor,
GpuAdvancedIncSubtensor1)
except Exception: message = "theano.sandbox.gpuarray has been moved to theano.gpuarray." + \
error("Could not initialize pygpu, support disabled", exc_info=True) " Please update your code and pickles."
else: warnings.warn(message)
if (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl') or
config.device.startswith('opencl') or
config.device.startswith('cuda') or
config.contexts != ''):
error("pygpu was configured but could not be imported", exc_info=True)
from __future__ import absolute_import, print_function, division
import os
import numpy
import theano
from theano import config
# This is a big hack to avoid creating a second context on the card.
from theano.sandbox.cuda.nvcc_compiler import (NVCC_compiler as NVCC_base,
hash_from_file)
class NVCC_compiler(NVCC_base):
@staticmethod
def compile_args():
"""
Re-implementation of compile_args that does not create an
additionnal context on the GPU.
"""
flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
if config.nvcc.fastmath:
flags.append('-use_fast_math')
cuda_ndarray_cuh_hash = hash_from_file(
os.path.join(os.path.split(theano.sandbox.cuda.__file__)[0],
'cuda_ndarray.cuh'))
flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)
# numpy 1.7 deprecated the following macros but they didn't
# exist in the past
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
if bool(numpy_ver < [1, 7]):
flags.append("-DNPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
flags.append("-DNPY_ARRAY_ALIGNED=NPY_ALIGNED")
flags.append("-DNPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
flags.append("-DNPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
flags.append("-DNPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
flags.append("-DNPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")
# If the user didn't specify architecture flags add them
if not any(['-arch=sm_' in f for f in flags]):
dev = theano.sandbox.gpuarray.init_dev.device
if dev is None:
raise Exception("Trying to compile GPU code without a context")
if dev.startswith("opencl"):
raise Exception("Trying to call nvcc with an OpenCL context")
assert dev.startswith('cuda')
if dev == 'cuda':
n = theano.sandbox.cuda.use.device_number
else:
n = int(dev[4:])
p = theano.sandbox.cuda.device_properties(n)
flags.append('-arch=sm_' + str(p['major']) + str(p['minor']))
return flags
...@@ -24,11 +24,11 @@ from . import multinomial ...@@ -24,11 +24,11 @@ from . import multinomial
import theano.sandbox.cuda import theano.sandbox.cuda
from theano.sandbox.cuda import GpuOp from theano.sandbox.cuda import GpuOp
from theano.sandbox.gpuarray.basic_ops import GpuKernelBase, Kernel from theano.gpuarray.basic_ops import GpuKernelBase, Kernel
from theano.sandbox.gpuarray.type import GpuArrayType from theano.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.fp16_help import write_w from theano.gpuarray.fp16_help import write_w
from theano.sandbox.gpuarray.opt import (register_opt as register_gpua, from theano.gpuarray.opt import (register_opt as register_gpua,
host_from_gpu as host_from_gpua) host_from_gpu as host_from_gpua)
if theano.sandbox.cuda.cuda_available: if theano.sandbox.cuda.cuda_available:
from theano.sandbox.cuda import (CudaNdarrayType, from theano.sandbox.cuda import (CudaNdarrayType,
float32_shared_constructor) float32_shared_constructor)
......
...@@ -366,9 +366,9 @@ def test_consistency_GPUA_serial(): ...@@ -366,9 +366,9 @@ def test_consistency_GPUA_serial():
are the same as the reference (Java) implementation by L'Ecuyer et al. are the same as the reference (Java) implementation by L'Ecuyer et al.
""" """
from theano.sandbox.gpuarray.tests.test_basic_ops import \ from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode mode_with_gpu as mode
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345 seed = 12345
n_samples = 5 n_samples = 5
...@@ -421,9 +421,9 @@ def test_consistency_GPUA_parallel(): ...@@ -421,9 +421,9 @@ def test_consistency_GPUA_parallel():
L'Ecuyer et al. L'Ecuyer et al.
""" """
from theano.sandbox.gpuarray.tests.test_basic_ops import \ from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode mode_with_gpu as mode
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345 seed = 12345
n_samples = 5 n_samples = 5
...@@ -1107,9 +1107,9 @@ def test_overflow_gpu_old_backend(): ...@@ -1107,9 +1107,9 @@ def test_overflow_gpu_old_backend():
def test_overflow_gpu_new_backend(): def test_overflow_gpu_new_backend():
# run with THEANO_FLAGS=mode=FAST_RUN,init_gpu_device=cuda1,device=cpu # run with THEANO_FLAGS=mode=FAST_RUN,init_gpu_device=cuda1,device=cpu
from theano.sandbox.gpuarray.tests.test_basic_ops import \ from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode mode_with_gpu as mode
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345 seed = 12345
n_substreams = 7 n_substreams = 7
curr_rstate = numpy.array([seed] * 6, dtype='int32') curr_rstate = numpy.array([seed] * 6, dtype='int32')
......
...@@ -982,7 +982,8 @@ def scan(fn, ...@@ -982,7 +982,8 @@ def scan(fn,
# the file because that would force on the user some dependencies that we # the file because that would force on the user some dependencies that we
# might do not want to. Currently we are working on removing the # might do not want to. Currently we are working on removing the
# dependencies on sandbox code completeley. # dependencies on sandbox code completeley.
from theano.sandbox import cuda, gpuarray from theano.sandbox import cuda
from theano import gpuarray
if cuda.cuda_available or gpuarray.pygpu_activated: if cuda.cuda_available or gpuarray.pygpu_activated:
# very often we end up in this situation when we want to # very often we end up in this situation when we want to
# replace w with w_copy, where w is a GPU variable # replace w with w_copy, where w is a GPU variable
......
...@@ -272,7 +272,7 @@ class Scan(PureOp): ...@@ -272,7 +272,7 @@ class Scan(PureOp):
# If scan has the flag 'gpua' set to false (meaning that is shouldn't # If scan has the flag 'gpua' set to false (meaning that is shouldn't
# use the gpuarray gpu backend ), ensure that is has no input and no # use the gpuarray gpu backend ), ensure that is has no input and no
# output with type GpuArrayType # output with type GpuArrayType
from theano.sandbox.gpuarray import GpuArrayType from theano.gpuarray import GpuArrayType
if not self.info.get("gpua", False): if not self.info.get("gpua", False):
for inp in self.inputs: for inp in self.inputs:
if isinstance(inp.type, GpuArrayType): if isinstance(inp.type, GpuArrayType):
......
...@@ -1008,8 +1008,8 @@ class ScanInplaceOptimizer(Optimizer): ...@@ -1008,8 +1008,8 @@ class ScanInplaceOptimizer(Optimizer):
# gpuarray might be imported but not its GpuAlloc and # gpuarray might be imported but not its GpuAlloc and
# GpuAllopEmpty ops. # GpuAllopEmpty ops.
try: try:
alloc_ops += (theano.sandbox.gpuarray.GpuAlloc, alloc_ops += (theano.gpuarray.GpuAlloc,
theano.sandbox.gpuarray.GpuAllocEmpty) theano.gpuarray.GpuAllocEmpty)
except: except:
pass pass
......
...@@ -151,7 +151,8 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -151,7 +151,8 @@ def traverse(out, x, x_copy, d, visited=None):
if out in visited: if out in visited:
return d return d
visited.add(out) visited.add(out)
from theano.sandbox import cuda, gpuarray from theano.sandbox import cuda
from theano import gpuarray
if out == x: if out == x:
if isinstance(x.type, cuda.CudaNdarrayType): if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy) d[out] = cuda.gpu_from_host(x_copy)
......
...@@ -4939,7 +4939,7 @@ class T_Scan_Gpuarray(unittest.TestCase, ScanGpuTests): ...@@ -4939,7 +4939,7 @@ class T_Scan_Gpuarray(unittest.TestCase, ScanGpuTests):
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
from theano.sandbox import gpuarray from theano import gpuarray
self.gpu_backend = gpuarray self.gpu_backend = gpuarray
# This is unfortunate, but required # This is unfortunate, but required
......
...@@ -39,6 +39,8 @@ whitelist_flake8 = [ ...@@ -39,6 +39,8 @@ whitelist_flake8 = [
"compile/profiling.py", "compile/profiling.py",
"compile/sandbox/__init__.py", "compile/sandbox/__init__.py",
"compile/tests/__init__.py", "compile/tests/__init__.py",
"gpuarray/__init__.py",
"gpuarray/tests/__init__.py",
"typed_list/__init__.py", "typed_list/__init__.py",
"typed_list/tests/__init__.py", "typed_list/tests/__init__.py",
"tensor/__init__.py", "tensor/__init__.py",
...@@ -89,7 +91,7 @@ whitelist_flake8 = [ ...@@ -89,7 +91,7 @@ whitelist_flake8 = [
"sandbox/tests/__init__.py", "sandbox/tests/__init__.py",
"sandbox/cuda/__init__.py", "sandbox/cuda/__init__.py",
"sandbox/cuda/tests/__init__.py", "sandbox/cuda/tests/__init__.py",
"sandbox/gpuarray/tests/__init__.py", "sandbox/gpuarray/__init__.py",
"sandbox/scan_module/scan_utils.py", "sandbox/scan_module/scan_utils.py",
"sandbox/scan_module/scan.py", "sandbox/scan_module/scan.py",
"sandbox/scan_module/scan_op.py", "sandbox/scan_module/scan_op.py",
...@@ -100,7 +102,6 @@ whitelist_flake8 = [ ...@@ -100,7 +102,6 @@ whitelist_flake8 = [
"sandbox/linalg/__init__.py", "sandbox/linalg/__init__.py",
"sandbox/linalg/tests/__init__.py", "sandbox/linalg/tests/__init__.py",
"sandbox/linalg/tests/test_linalg.py", "sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/__init__.py",
"scan_module/scan_utils.py", "scan_module/scan_utils.py",
"scan_module/scan_views.py", "scan_module/scan_views.py",
"scan_module/scan.py", "scan_module/scan.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论