提交 3a190f98 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3533 from abergeron/multi_gpu_followup

Multi gpu followup
...@@ -24,8 +24,8 @@ before_install: ...@@ -24,8 +24,8 @@ before_install:
- conda update --yes conda - conda update --yes conda
install: install:
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then conda create --yes -q -n pyenv mkl python=2.6 numpy=1.7.1 scipy=0.11 nose=1.3.0 pyparsing=1.5 pip flake8==2.3 six==1.9.0 pep8==1.6.2 pyflakes==0.8.1; fi - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then conda create --yes -q -n pyenv python=2.6 numpy=1.7.1 scipy=0.11 nose=1.3.0 pyparsing=1.5 pip flake8=2.3 six=1.9.0 pep8=1.6.2 pyflakes=0.8.1; fi
- if [[ $TRAVIS_PYTHON_VERSION == '3.3' ]]; then conda create --yes -q -n pyenv mkl python=3.3 numpy=1.9.1 scipy=0.14.0 nose=1.3.4 pyparsing=1.5 pip flake8==2.3 six==1.9.0 pep8==1.6.2 pyflakes==0.8.1; fi - if [[ $TRAVIS_PYTHON_VERSION == '3.3' ]]; then conda create --yes -q -n pyenv python=3.3 numpy=1.9.1 scipy=0.14.0 nose=1.3.4 pyparsing=1.5 pip flake8=2.3 six=1.9.0 pep8=1.6.2 pyflakes=0.8.1; fi
- source activate pyenv - source activate pyenv
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
- pip install . --no-deps - pip install . --no-deps
......
...@@ -167,6 +167,25 @@ overridden. ...@@ -167,6 +167,25 @@ overridden.
For more details you can go see the documentation for :ref:`type`. For more details you can go see the documentation for :ref:`type`.
Additional definitions
----------------------
For certain mechanisms, you can register functions and other such
things to plus your type into theano's mechanisms. These are optional
but will allow people to use you type with familiar interfaces.
`transfer()`
~~~~~~~~~~~~
To plug in additional options for the transfer target, define a
function which takes a theano variable and a target argument and
returns eitehr a new transferred variable (which can be the same as
the input if no transfer is nessecary) or returns None if the transfer
can't be done.
Then register that function by calling :func:`register_transfer()`
with it as argument.
Defining double Defining double
=============== ===============
......
...@@ -427,7 +427,8 @@ TensorVariable ...@@ -427,7 +427,8 @@ TensorVariable
you'll want to call. you'll want to call.
.. class:: _tensor_py_operators(object) .. autoclass:: _tensor_py_operators
:members:
This mix-in class adds convenient attributes, methods, and support This mix-in class adds convenient attributes, methods, and support
to TensorVariable, TensorConstant and TensorSharedVariable for to TensorVariable, TensorConstant and TensorSharedVariable for
......
...@@ -121,6 +121,9 @@ class ContextsParam(ConfigParam): ...@@ -121,6 +121,9 @@ class ContextsParam(ConfigParam):
s = v.split('->') s = v.split('->')
if len(s) != 2: if len(s) != 2:
raise ValueError("Malformed context map: %s" % (v,)) raise ValueError("Malformed context map: %s" % (v,))
if (s[0] == 'cpu' or s[0].startswith('cuda') or
s[0].startswith('opencl')):
raise ValueError("Cannot use %s as context name" % (s[0],))
return val return val
ConfigParam.__init__(self, '', filter, False) ConfigParam.__init__(self, '', filter, False)
...@@ -132,6 +135,8 @@ AddConfigVar( ...@@ -132,6 +135,8 @@ AddConfigVar(
'name->dev_name' format. An example that would map name 'test' to 'name->dev_name' format. An example that would map name 'test' to
device 'cuda0' and name 'test2' to device 'opencl0:0' follows: device 'cuda0' and name 'test2' to device 'opencl0:0' follows:
"test->cuda0;test2->opencl0:0". "test->cuda0;test2->opencl0:0".
Invalid context names are 'cpu', 'cuda*' and 'opencl*'
""", ContextsParam(), in_c_key=False) """, ContextsParam(), in_c_key=False)
AddConfigVar( AddConfigVar(
...@@ -150,7 +155,7 @@ def default_cuda_root(): ...@@ -150,7 +155,7 @@ def default_cuda_root():
return '' return ''
for dir in s.split(os.path.pathsep): for dir in s.split(os.path.pathsep):
if os.path.exists(os.path.join(dir, "nvcc")): if os.path.exists(os.path.join(dir, "nvcc")):
return os.path.split(dir)[0] return os.path.dirname(os.path.abspath(dir))
return '' return ''
AddConfigVar( AddConfigVar(
......
...@@ -276,7 +276,18 @@ def struct_gen(args, struct_builders, blocks, sub): ...@@ -276,7 +276,18 @@ def struct_gen(args, struct_builders, blocks, sub):
%(storage_decl)s %(storage_decl)s
%(struct_decl)s %(struct_decl)s
%(name)s() {} %(name)s() {
// This is only somewhat safe because we:
// 1) Are not a virtual class
// 2) Do not use any virtual classes in the members
// 3) Deal with mostly POD and pointers
// If this changes, we would have to revise this, but for
// now I am tired of chasing segfaults because
// initialization code had an error and some pointer has
// a junk value.
memset(this, 0, sizeof(*this));
}
~%(name)s(void) { ~%(name)s(void) {
cleanup(); cleanup();
} }
......
...@@ -294,7 +294,7 @@ def raise_with_op(node, thunk=None, exc_info=None, storage_map=None): ...@@ -294,7 +294,7 @@ def raise_with_op(node, thunk=None, exc_info=None, storage_map=None):
detailed_err_msg += "\n" detailed_err_msg += "\n"
detailed_err_msg += " TotalSize: %s Byte(s) %.3f GB\n" % ( detailed_err_msg += " TotalSize: %s Byte(s) %.3f GB\n" % (
total_size, total_size / 1024. / 1024 / 1024) total_size, total_size / 1024. / 1024 / 1024)
detailed_err_msg += " TotalSize inputs: %s Byte(s) %.3f BG\n" % ( detailed_err_msg += " TotalSize inputs: %s Byte(s) %.3f GB\n" % (
total_size_inputs, total_size_inputs / 1024. / 1024 / 1024) total_size_inputs, total_size_inputs / 1024. / 1024 / 1024)
else: else:
......
...@@ -17,6 +17,8 @@ from theano.configparser import ( ...@@ -17,6 +17,8 @@ from theano.configparser import (
config, AddConfigVar, BoolParam, FloatParam, StrParam) config, AddConfigVar, BoolParam, FloatParam, StrParam)
from . import nvcc_compiler from . import nvcc_compiler
from theano.tensor.basic import register_transfer
# ignore_newtrees is to speed the optimization as this is the pattern # ignore_newtrees is to speed the optimization as this is the pattern
# we use for optimization. Otherwise, we can iterate 100s of time on # we use for optimization. Otherwise, we can iterate 100s of time on
# the graph and apply only a few optimizations each time. # the graph and apply only a few optimizations each time.
...@@ -327,6 +329,12 @@ if cuda_available: ...@@ -327,6 +329,12 @@ if cuda_available:
from . import opt, dnn from . import opt, dnn
from .rng_curand import CURAND_RandomStreams from .rng_curand import CURAND_RandomStreams
def transfer(x, target):
if target == 'gpu':
return as_cuda_ndarray_variable(x)
register_transfer(transfer)
def use(device, def use(device,
force=False, force=False,
......
...@@ -162,11 +162,15 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable ...@@ -162,11 +162,15 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable
def cuda_shared_constructor(value, name=None, strict=False, def cuda_shared_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False, broadcastable=None): allow_downcast=None, borrow=False,
broadcastable=None, target='gpu'):
""" """
SharedVariable Constructor for CudaNdarrayType. SharedVariable Constructor for CudaNdarrayType.
""" """
if target != 'gpu':
raise TypeError('not for gpu')
# THIS CONSTRUCTOR TRIES TO CAST VALUE TO A FLOAT32, WHICH THEN GOES ONTO THE CARD # THIS CONSTRUCTOR TRIES TO CAST VALUE TO A FLOAT32, WHICH THEN GOES ONTO THE CARD
# SO INT shared vars, float64 shared vars, etc. all end up on the card. # SO INT shared vars, float64 shared vars, etc. all end up on the card.
# THIS IS NOT THE DEFAULT BEHAVIOUR THAT WE WANT. # THIS IS NOT THE DEFAULT BEHAVIOUR THAT WE WANT.
...@@ -196,12 +200,15 @@ def cuda_shared_constructor(value, name=None, strict=False, ...@@ -196,12 +200,15 @@ def cuda_shared_constructor(value, name=None, strict=False,
def float32_shared_constructor(value, name=None, strict=False, def float32_shared_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False, broadcastable=None): allow_downcast=None, borrow=False,
broadcastable=None, target='gpu'):
""" """
SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or
CudaNdarray. CudaNdarray.
""" """
if target != 'gpu':
raise TypeError('not for gpu')
if theano.sandbox.cuda.use.device_number is None: if theano.sandbox.cuda.use.device_number is None:
theano.sandbox.cuda.use("gpu", theano.sandbox.cuda.use("gpu",
force=True, force=True,
......
...@@ -6,6 +6,8 @@ import theano ...@@ -6,6 +6,8 @@ import theano
from theano.configparser import config, AddConfigVar, BoolParam from theano.configparser import config, AddConfigVar, BoolParam
from theano.compile import optdb from theano.compile import optdb
from theano.tensor.basic import register_transfer
_logger_name = 'theano.sandbox.gpuarray' _logger_name = 'theano.sandbox.gpuarray'
_logger = logging.getLogger(_logger_name) _logger = logging.getLogger(_logger_name)
...@@ -22,9 +24,19 @@ except ImportError: ...@@ -22,9 +24,19 @@ except ImportError:
# This is for documentation not to depend on the availability of pygpu # This is for documentation not to depend on the availability of pygpu
from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant, from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor, GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context) reg_context, get_context, ContextNotDefined)
from .basic_ops import as_gpuarray_variable
from . import opt, nerv from . import opt, nerv
def transfer(x, target):
try:
get_context(target)
return as_gpuarray_variable(x, target)
except ContextNotDefined:
pass
register_transfer(transfer)
def init_dev(dev, name=None): def init_dev(dev, name=None):
if pygpu.gpuarray.api_version() != (-10000, 0): if pygpu.gpuarray.api_version() != (-10000, 0):
......
...@@ -21,7 +21,8 @@ try: ...@@ -21,7 +21,8 @@ try:
except ImportError: except ImportError:
pass pass
from .type import GpuArrayType, GpuArrayConstant, gpu_context_type, get_context from .type import (GpuArrayType, GpuArrayConstant, gpu_context_type,
get_context, ContextNotDefined)
from .fp16_help import write_w from .fp16_help import write_w
...@@ -96,8 +97,12 @@ def infer_context_name(*vars): ...@@ -96,8 +97,12 @@ def infer_context_name(*vars):
return v.owner.inputs[0].type.context_name return v.owner.inputs[0].type.context_name
if len(v.owner.inputs) == 1: if len(v.owner.inputs) == 1:
todo.extendleft(v.owner.inputs) todo.extendleft(v.owner.inputs)
# If we can't find a context we infer None, which is the default # If we can't find a context try None if it exists
return None try:
get_context(None)
return None
except ContextNotDefined:
raise ValueError("Could not infer context from inputs")
class Kernel(object): class Kernel(object):
...@@ -386,29 +391,49 @@ class GpuFromHost(Op): ...@@ -386,29 +391,49 @@ class GpuFromHost(Op):
def infer_shape(self, node, xshp): def infer_shape(self, node, xshp):
return xshp return xshp
def c_headers(self):
return ["gpuarray_helper.h"]
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
return """ return """
PyArrayObject *%(name)s_tmp; PyArrayObject *%(name)s_tmp;
%(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s); %(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s);
if (%(name)s_tmp == NULL) if (%(name)s_tmp == NULL)
%(fail)s %(fail)s
Py_XDECREF(%(out)s);
%(out)s = pygpu_fromhostdata(PyArray_DATA(%(name)s_tmp), if (%(out)s != NULL && GpuArray_IS_C_CONTIGUOUS(&%(out)s->ga) &&
get_typecode((PyObject *)PyArray_DESCR(%(name)s_tmp)), theano_size_check(%(out)s, PyArray_NDIM(%(name)s_tmp),
PyArray_NDIM(%(name)s_tmp), (size_t *)PyArray_DIMS(%(name)s_tmp),
(size_t *)PyArray_DIMS(%(name)s_tmp), get_typecode((PyObject *)PyArray_DESCR(%(name)s_tmp)))) {
(ssize_t *)PyArray_STRIDES(%(name)s_tmp), int err = GpuArray_write(&%(out)s->ga, PyArray_DATA(%(name)s_tmp),
%(ctx)s, PyArray_NBYTES(%(name)s_tmp));
Py_None); Py_DECREF(%(name)s_tmp);
Py_DECREF(%(name)s_tmp); if (err != GA_NO_ERROR) {
if (%(out)s == NULL) { PyErr_Format(PyExc_RuntimeError, "Could not write data to gpu");
%(fail)s %(fail)s;
}
} else {
Py_XDECREF(%(out)s);
%(out)s = pygpu_fromhostdata(PyArray_DATA(%(name)s_tmp),
get_typecode((PyObject *)PyArray_DESCR(%(name)s_tmp)),
PyArray_NDIM(%(name)s_tmp),
(size_t *)PyArray_DIMS(%(name)s_tmp),
(ssize_t *)PyArray_STRIDES(%(name)s_tmp),
%(ctx)s,
Py_None);
Py_DECREF(%(name)s_tmp);
if (%(out)s == NULL) {
%(fail)s
}
} }
""" % {'name': name, 'inp': inputs[0], 'ctx': sub['context'], """ % {'name': name, 'inp': inputs[0], 'ctx': sub['context'],
'out': outputs[0], 'fail': sub['fail']} 'out': outputs[0], 'fail': sub['fail']}
def c_code_cache_version(self): def c_code_cache_version(self):
return (7,) return (8,)
class GpuToGpu(Op): class GpuToGpu(Op):
......
...@@ -17,7 +17,8 @@ from theano.scan_module import scan_utils, scan_op, scan_opt ...@@ -17,7 +17,8 @@ from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.nnet.conv import ConvOp from theano.tensor.nnet.conv import ConvOp
from theano.tests.breakpoint import PdbBreakpoint from theano.tests.breakpoint import PdbBreakpoint
from .type import GpuArrayType, GpuArrayConstant, get_context from .type import (GpuArrayType, GpuArrayConstant, get_context,
ContextNotDefined)
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
host_from_gpu, GpuToGpu, host_from_gpu, GpuToGpu,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
...@@ -164,9 +165,9 @@ class InputToGpuOptimizer(Optimizer): ...@@ -164,9 +165,9 @@ class InputToGpuOptimizer(Optimizer):
if isinstance(input.type, GpuArrayType): if isinstance(input.type, GpuArrayType):
continue continue
if (len(input.clients) == 1 and # If all clients are outputs or transfers don't do anything.
(input.clients[0][0] == 'output' or if (all(cl[0] == 'output' or isinstance(cl[0].op, GpuFromHost)
isinstance(input.clients[0][0].op, GpuFromHost))): for cl in input.clients)):
continue continue
ctx_name = getattr(input.tag, 'context_name', None) ctx_name = getattr(input.tag, 'context_name', None)
...@@ -177,11 +178,11 @@ class InputToGpuOptimizer(Optimizer): ...@@ -177,11 +178,11 @@ class InputToGpuOptimizer(Optimizer):
except TypeError: except TypeError:
# This could fail if the inputs are not TensorTypes # This could fail if the inputs are not TensorTypes
pass pass
except ValueError: except ContextNotDefined:
if hasattr(input.tag, 'context_name'):
raise
# If there is no context tag and no default context # If there is no context tag and no default context
# then it stays on the CPU # then it stays on the CPU
if not hasattr(input.tag, 'context_name'):
raise
pass pass
...@@ -194,7 +195,7 @@ def local_cut_gpu_transfers(node): ...@@ -194,7 +195,7 @@ def local_cut_gpu_transfers(node):
# gpu[ab] -> host -> gpub # gpu[ab] -> host -> gpub
if (isinstance(node.op, GpuFromHost) and if (isinstance(node.op, GpuFromHost) and
node.inputs[0].owner and node.inputs[0].owner and
node.inputs[0].owner.op == host_from_gpu): isinstance(node.inputs[0].owner.op, HostFromGpu)):
other = node.inputs[0].owner.inputs[0] other = node.inputs[0].owner.inputs[0]
if node.op.context_name == other.type.context_name: if node.op.context_name == other.type.context_name:
return [other] return [other]
...@@ -202,7 +203,7 @@ def local_cut_gpu_transfers(node): ...@@ -202,7 +203,7 @@ def local_cut_gpu_transfers(node):
return [GpuToGpu(node.op.context_name)(other)] return [GpuToGpu(node.op.context_name)(other)]
# ? -> gpua -> host # ? -> gpua -> host
elif (node.op == host_from_gpu and elif (isinstance(node.op, HostFromGpu) and
node.inputs[0].owner): node.inputs[0].owner):
n2 = node.inputs[0].owner n2 = node.inputs[0].owner
...@@ -255,7 +256,7 @@ def local_gpuaalloc2(node): ...@@ -255,7 +256,7 @@ def local_gpuaalloc2(node):
""" """
try: try:
get_context(None) get_context(None)
except ValueError: except ContextNotDefined:
# If there is no default context then we do not perform the move here. # If there is no default context then we do not perform the move here.
return return
if (isinstance(node.op, tensor.Alloc) and if (isinstance(node.op, tensor.Alloc) and
...@@ -620,6 +621,7 @@ def local_gpua_careduce(node, context_name): ...@@ -620,6 +621,7 @@ def local_gpua_careduce(node, context_name):
node.op.scalar_op, axis=node.op.axis, node.op.scalar_op, axis=node.op.axis,
dtype=getattr(node.op, 'dtype', None), dtype=getattr(node.op, 'dtype', None),
acc_dtype=getattr(node.op, 'acc_dtype', None)) acc_dtype=getattr(node.op, 'acc_dtype', None))
x.tag.context_name = context_name
gvar = greduce(x) gvar = greduce(x)
# We need to have the make node called, otherwise the mask can # We need to have the make node called, otherwise the mask can
# be None # be None
......
...@@ -17,6 +17,10 @@ except ImportError: ...@@ -17,6 +17,10 @@ except ImportError:
_context_reg = {} _context_reg = {}
class ContextNotDefined(ValueError):
pass
def reg_context(name, ctx): def reg_context(name, ctx):
""" """
Register a context by mapping it to a name. Register a context by mapping it to a name.
...@@ -56,7 +60,7 @@ def get_context(name): ...@@ -56,7 +60,7 @@ def get_context(name):
""" """
if name not in _context_reg: if name not in _context_reg:
raise ValueError("context name %s not defined" % (name,)) raise ContextNotDefined("context name %s not defined" % (name,))
return _context_reg[name] return _context_reg[name]
...@@ -72,7 +76,7 @@ def _name_for_ctx(ctx): ...@@ -72,7 +76,7 @@ def _name_for_ctx(ctx):
for k, v in _context_reg: for k, v in _context_reg:
if v == ctx: if v == ctx:
return k return k
raise ValueError('context is not registered') raise ContextNotDefined('context is not registered')
# This is a private method for use by the tests only # This is a private method for use by the tests only
...@@ -88,6 +92,8 @@ class GpuArrayType(Type): ...@@ -88,6 +92,8 @@ class GpuArrayType(Type):
self.ndim = len(self.broadcastable) self.ndim = len(self.broadcastable)
self.name = name self.name = name
self.context_name = context_name self.context_name = context_name
# This will check that the passed context name is valid and registered.
get_context(self.context_name)
try: try:
self.typecode = gpuarray.dtype_to_typecode(self.dtype) self.typecode = gpuarray.dtype_to_typecode(self.dtype)
except gpuarray.GpuArrayException: except gpuarray.GpuArrayException:
...@@ -468,27 +474,29 @@ GpuArrayType.SharedVariable = GpuArraySharedVariable ...@@ -468,27 +474,29 @@ GpuArrayType.SharedVariable = GpuArraySharedVariable
def gpuarray_shared_constructor(value, name=None, strict=False, def gpuarray_shared_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False, allow_downcast=None, borrow=False,
broadcastable=None, broadcastable=None, target=None):
context_name=None):
""" """
SharedVariable constructor for GpuArrayType. SharedVariable constructor for GpuArrayType.
""" """
if target == 'gpu' or target == 'cpu':
raise TypeError('not for me')
if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)): if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)):
raise TypeError('ndarray or GpuArray required') raise TypeError('ndarray or GpuArray required')
try: try:
get_context(context_name) get_context(target)
except ValueError: except ContextNotDefined:
# Don't make this a hard error if we attempt to make a shared # Don't make this a hard error if we attempt to make a shared
# variable while there is no default context. # variable while there is no default context.
if context_name is None: if target is None:
raise TypeError('No default context and no context specified') raise TypeError('No default context and no context specified')
raise raise
if broadcastable is None: if broadcastable is None:
broadcastable = (False,) * value.ndim broadcastable = (False,) * value.ndim
type = GpuArrayType(value.dtype, broadcastable, context_name=context_name) type = GpuArrayType(value.dtype, broadcastable, context_name=target)
deviceval = pygpu.gpuarray.array(value, copy=(not borrow), deviceval = pygpu.gpuarray.array(value, copy=(not borrow),
context=type.context) context=type.context)
return GpuArraySharedVariable(type=type, value=deviceval, name=name, return GpuArraySharedVariable(type=type, value=deviceval, name=name,
......
...@@ -2851,11 +2851,46 @@ class Alloc(gof.Op): ...@@ -2851,11 +2851,46 @@ class Alloc(gof.Op):
return False return False
return True return True
alloc = Alloc() alloc = Alloc()
pprint.assign(alloc, printing.FunctionPrinter('alloc')) pprint.assign(alloc, printing.FunctionPrinter('alloc'))
def transfer(var, target):
"""
Return a version of `var` transferred to `target`.
`cpu` mean a TensorType (on the CPU). Other types may define
additional targets.
Parameters
----------
var : variable
A theano variable
target : str
The target of the transfer
"""
if target == 'cpu':
return as_tensor_variable(var)
else:
for trans in transfer._others:
res = trans(var, target)
if res is not None:
return res
raise ValueError("Can't transfer to target %s" % (target,))
transfer._others = []
def register_transfer(fn):
"""
Register a transfer function for alternative targets.
Parameters
----------
fn : callable
"""
transfer._others.append(fn)
"""Create a duplicate of `a` (with duplicated storage)""" """Create a duplicate of `a` (with duplicated storage)"""
tensor_copy = elemwise.Elemwise(scal.identity) tensor_copy = elemwise.Elemwise(scal.identity)
pprint.assign(tensor_copy, printing.IgnorePrinter()) pprint.assign(tensor_copy, printing.IgnorePrinter())
......
...@@ -24,7 +24,7 @@ class TensorSharedVariable(_tensor_py_operators, SharedVariable): ...@@ -24,7 +24,7 @@ class TensorSharedVariable(_tensor_py_operators, SharedVariable):
@shared_constructor @shared_constructor
def tensor_constructor(value, name=None, strict=False, allow_downcast=None, def tensor_constructor(value, name=None, strict=False, allow_downcast=None,
borrow=False, broadcastable=None): borrow=False, broadcastable=None, target='cpu'):
""" """
SharedVariable Constructor for TensorType. SharedVariable Constructor for TensorType.
...@@ -36,6 +36,9 @@ def tensor_constructor(value, name=None, strict=False, allow_downcast=None, ...@@ -36,6 +36,9 @@ def tensor_constructor(value, name=None, strict=False, allow_downcast=None,
The optional `broadcastable` argument will override this default. The optional `broadcastable` argument will override this default.
""" """
if target != 'cpu':
raise TypeError('not for cpu')
if not isinstance(value, numpy.ndarray): if not isinstance(value, numpy.ndarray):
raise TypeError() raise TypeError()
...@@ -65,7 +68,7 @@ class ScalarSharedVariable(_tensor_py_operators, SharedVariable): ...@@ -65,7 +68,7 @@ class ScalarSharedVariable(_tensor_py_operators, SharedVariable):
@shared_constructor @shared_constructor
def scalar_constructor(value, name=None, strict=False, allow_downcast=None, def scalar_constructor(value, name=None, strict=False, allow_downcast=None,
borrow=False): borrow=False, target='cpu'):
""" """
SharedVariable constructor for scalar values. Default: int64 or float64. SharedVariable constructor for scalar values. Default: int64 or float64.
...@@ -78,6 +81,9 @@ def scalar_constructor(value, name=None, strict=False, allow_downcast=None, ...@@ -78,6 +81,9 @@ def scalar_constructor(value, name=None, strict=False, allow_downcast=None,
borrow, as it is a hint to Theano that we can reuse it. borrow, as it is a hint to Theano that we can reuse it.
""" """
if target != 'cpu':
raise TypeError('not for cpu')
if not isinstance(value, (numpy.number, float, int, complex)): if not isinstance(value, (numpy.number, float, int, complex)):
raise TypeError() raise TypeError()
try: try:
......
...@@ -29,7 +29,7 @@ class AsTensorError(TypeError): ...@@ -29,7 +29,7 @@ class AsTensorError(TypeError):
pass pass
class _tensor_py_operators: class _tensor_py_operators(object):
# UNARY # UNARY
def __abs__(self): def __abs__(self):
return theano.tensor.basic.abs_(self) return theano.tensor.basic.abs_(self)
...@@ -369,6 +369,19 @@ class _tensor_py_operators: ...@@ -369,6 +369,19 @@ class _tensor_py_operators:
def diagonal(self, offset=0, axis1=0, axis2=1): def diagonal(self, offset=0, axis1=0, axis2=1):
return theano.tensor.basic.diagonal(self, offset, axis1, axis2) return theano.tensor.basic.diagonal(self, offset, axis1, axis2)
# Transfer the data to another device
def transfer(self, target):
"""
If `target` is `'cpu'` this will transfer to a TensorType (if
not already one). Other types may define additional targets.
Paramters
---------
target : str
The desired location of the output variable
"""
return theano.tensor.transfer(self, target)
# Elemwise # Elemwise
def arccos(self): def arccos(self):
return theano.tensor.arccos(self) return theano.tensor.arccos(self)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论