提交 1f34a482 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Remove tentacles in tensor.

上级 1b22389f
from .config import test_ctx_name
from .config import test_ctx_name, mode_with_gpu
from ..type import get_context, GpuArrayType, GpuArraySharedVariable
from ..type import (get_context, GpuArrayType, GpuArraySharedVariable,
gpuarray_shared_constructor)
import pygpu
import numpy as np
from theano.misc.tests.test_may_share_memory import may_share_memory_core
from theano.misc.pkl_utils import dump, load
from theano.tensor.tests.test_opt import test_fusion as t_fusion
class test_fusion(t_fusion):
mode = mode_with_gpu
shared = gpuarray_shared_constructor
def test_may_share_memory():
ctx = get_context(test_ctx_name)
......
......@@ -9,6 +9,8 @@ from theano import config
from theano.compile import DeepCopyOp
from theano.misc.pkl_utils import CompatUnpickler
from theano.tensor.tests.test_sharedvar import makeSharedTester
from .config import test_ctx_name
from .test_basic_ops import rand_gpuarray
from ..type import GpuArrayType, gpuarray_shared_constructor
......@@ -76,3 +78,41 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
assert np.asarray(mat)[0] == -42.0
finally:
config.experimental.unpickle_gpu_on_cpu = oldflag
test_shared_options = makeSharedTester(
shared_constructor_=gpuarray_shared_constructor,
dtype_=theano.config.floatX,
get_value_borrow_true_alias_=True,
shared_borrow_true_alias_=True,
set_value_borrow_true_alias_=True,
set_value_inplace_=True,
set_cast_value_inplace_=False,
shared_constructor_accept_ndarray_=True,
internal_type_=lambda v: pygpu.array(v, context=get_context(test_ctx_name),
cls=pygpu._array.ndgpuarray),
test_internal_type_=lambda a: isinstance(a, pygpu.gpuarray.GpuArray),
theano_fct_=theano.tensor.exp,
ref_fct_=numpy.exp,
cast_value_=lambda v: pygpu.asarray(v, context=get_context(test_ctx_name),
cls=pygpu._array.ndgpuarray),
name='test_shared_options')
test_shared_options2 = makeSharedTester(
shared_constructor_=gpuarray_shared_constructor
dtype_=theano.config.floatX,
get_value_borrow_true_alias_=False,
shared_borrow_true_alias_=False,
set_value_borrow_true_alias_=False,
set_value_inplace_=True,
set_cast_value_inplace_=True,
shared_constructor_accept_ndarray_=True,
internal_type_=lambda v: pygpu.array(v, context=get_context(test_ctx_name),
cls=pygpu._array.ndgpuarray),
test_internal_type_=lambda a: isinstance(a, pygpu.gpuarray.GpuArray),
theano_fct_=theano.tensor.exp,
ref_fct_=numpy.exp,
cast_value_=lambda v: pygpu.asarray(v, context=get_context(test_ctx_name),
cls=pygpu._array.ndgpuarray),
name='test_shared_options2')
......@@ -107,22 +107,6 @@ def __oplist_tag(thing, tag):
thing.__oplist_tags = tags
if 0:
# this starts to feel like we're enumerating all the types
# the one place where this is used we should also allow for sparse
# variables
# - JB 20100226
def as_cuda_or_tensor_variable(x, name=None, ndim=None):
"""
Do the same as_tensor_variable,
but do not transfer the value on the gpu.
"""
if hasattr(x, '_as_CudaNdarrayVariable'):
# TODO: pass name and ndim arguments
return x._as_CudaNdarrayVariable()
return as_tensor_variable(x, name, ndim)
def as_tensor_variable(x, name=None, ndim=None):
"""Return `x`, transformed into a `TensorType`.
......
......@@ -15,7 +15,7 @@ There are four kinds of BLAS Ops in Theano:
- Python implementations (this file)
- SciPy-based (blas_scipy)
- C-based (blas_c)
- CUDA-based (theano.sandbox.cuda.blas)
- GPU-based (theano.gpuarray)
Notes
-----
......
from __future__ import absolute_import, print_function, division
import sys
from copy import copy
import numpy as np
......
......@@ -573,10 +573,7 @@ def conv3D(V, W, b, d):
The order of dimensions does not correspond to the one in `conv2d`.
This is for optimization.
The GPU implementation is very slow. You should use
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
GPU graph instead.
Please use nnet.conv3d instead of this for a faster GPU implementation.
See Also
--------
......
......@@ -903,7 +903,6 @@ class ConvOp(OpenMPOp):
newin = inputs.dimshuffle((1, 0, 2, 3))
newgz = gz.dimshuffle((1, 0, 2, 3))
un_p = self.unroll_patch
if self.out_mode == 'valid':
(img, filters) = (newin, newgz)
kshp_logical = self.fulloutshp
......@@ -912,8 +911,6 @@ class ConvOp(OpenMPOp):
(bsize, nkern) = (self.imshp[0], self.nkern)
imshp = (self.bsize, self.imshp[1], self.imshp[2])
kshp = self.outshp
un_b = self.unroll_batch
un_k = self.unroll_kern
elif self.out_mode == 'full':
(img, filters) = (newgz, newin)
kshp_logical = None
......@@ -924,8 +921,6 @@ class ConvOp(OpenMPOp):
(bsize, nkern) = (self.nkern, self.imshp[0])
imshp = (self.bsize, self.outshp[0], self.outshp[1])
kshp = self.imshp[1:]
un_b = self.unroll_kern
un_k = self.unroll_batch
else:
raise NotImplementedError(
'Only [full,valid] modes are currently supported.')
......
......@@ -4,7 +4,6 @@ from theano.gradient import DisconnectedType
from theano.gof import Op, Apply, TopoOptimizer
from theano.gof.opt import copy_stack_trace
from theano import tensor
import theano.sandbox.cuda as cuda
def get_diagonal_subtensor_view(x, i0, i1):
......@@ -16,7 +15,7 @@ def get_diagonal_subtensor_view(x, i0, i1):
It returns a partial view of x, not a partial copy.
"""
# We have to cast i0 and i0 to int because python 2.4 (and maybe later)
# We have to cast i0 and i0 to int because python
# do not support indexing with 0-dim, 'int*' ndarrays.
i0 = int(i0)
i1 = int(i1)
......@@ -198,8 +197,7 @@ def conv3d(signals, filters,
Another way to define signals: (batch, time, in channel, row, column)
Another way to define filters: (out channel,time,in channel, row, column)
For the GPU, you can use this implementation or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
For the GPU, use nnet.conv3d.
See Also
--------
......@@ -295,67 +293,6 @@ def conv3d(signals, filters,
return out_5d
def make_gpu_optimizer(op, to_gpu):
"""
This function create optimizer that move some inputs to the GPU
for op that work on both CPU and GPU.
The op object is created by calling op(), so good default value
are needed.
We suppose the same op work with CPU and GPU inputs.
Parameters
----------
op
The op that support GPU inputs.
to_gpu
A list of op inputs that are moved to the GPU.
"""
@theano.gof.local_optimizer([op, cuda.gpu_from_host])
def local_to_gpu(node):
"""
op(host_from_gpu()) -> host_from_gpu(op)
gpu_from_host(op) -> op(gpu_from_host)
"""
if isinstance(node.op, op):
# op(host_from_gpu()) -> host_from_gpu(op)
# If any of the input that go on the GPU are on the GPU,
# move the op to the gpu.
if any(node.inputs[idx].owner and
isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
for idx in to_gpu):
new_inp = list(node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
result_node = op()(*new_inp)
copy_stack_trace(node.outputs[0], result_node)
transfer_node = result_node.transfer('cpu')
copy_stack_trace(node.outputs[0], transfer_node)
return [transfer_node]
if node.op == cuda.gpu_from_host:
# gpu_from_host(op) -> op(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op,
op):
op_node = host_input.owner
new_inp = list(op_node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
new_node = op()(*new_inp)
copy_stack_trace(host_input, new_node)
return [new_node]
return False
local_to_gpu.__name__ = "local_to_gpu_" + op.__name__
cuda.opt.register_opt()(local_to_gpu)
if cuda.cuda_available:
make_gpu_optimizer(DiagonalSubtensor, [0])
make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
@theano.gof.local_optimizer([DiagonalSubtensor, IncDiagonalSubtensor])
def local_inplace_DiagonalSubtensor(node):
"""Also work for IncDiagonalSubtensor."""
......
......@@ -16,12 +16,6 @@ from theano.tensor.nnet.conv3d2d import conv3d, get_diagonal_subtensor_view, Dia
import theano.tests.unittest_tools as utt
if theano.config.mode == 'FAST_COMPILE':
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else:
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_get_diagonal_subtensor_view(wrap=lambda a: a):
x = numpy.arange(20).reshape(5, 4).astype('float32')
x = wrap(x)
......@@ -106,17 +100,11 @@ def check_diagonal_subtensor_view_traces(fn):
@parameterized.expand(('valid', 'full', 'half'), utt.custom_name_func)
def test_conv3d(border_mode):
check_conv3d(border_mode=border_mode,
mode=mode_without_gpu,
shared=theano.tensor._shared)
# This function will also be used in theano/sandbox/cuda/tests/test_tensor_op.py,
# which is not possible if it is decorated by @parameterized.expand
def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._shared):
if ndimage is None or not theano.config.cxx:
raise SkipTest("conv3d2d tests need SciPy and a c++ compiler")
shared = theano.tensor._shared
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5
......@@ -137,8 +125,7 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
border_mode=border_mode)
newconv3d = theano.function([], [],
updates={s_output: out},
mode=mode)
updates={s_output: out})
check_diagonal_subtensor_view_traces(newconv3d)
t0 = time.time()
......@@ -149,7 +136,6 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
gnewconv3d = theano.function([], [],
updates=[(s_filters, gfilters),
(s_signals, gsignals)],
mode=mode,
name='grad')
check_diagonal_subtensor_view_traces(gnewconv3d)
......@@ -163,7 +149,7 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype('float32')
filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype('float32')
utt.verify_grad(lambda s, f: conv3d(s, f, border_mode=border_mode),
[signals, filters], eps=1e-1, mode=mode)
[signals, filters], eps=1e-1)
# Additional Test that covers the case of patched implementation for filter with Tf=1
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
......@@ -186,8 +172,7 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
border_mode=border_mode)
newconv3d = theano.function([], [],
updates={s_output: out},
mode=mode)
updates={s_output: out})
t0 = time.time()
newconv3d()
......@@ -197,7 +182,6 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
gnewconv3d = theano.function([], [],
updates=[(s_filters, gfilters),
(s_signals, gsignals)],
mode=mode,
name='grad')
t0 = time.time()
......@@ -210,4 +194,4 @@ def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._share
signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype('float32')
filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype('float32')
utt.verify_grad(lambda s, f: conv3d(s, f, border_mode=border_mode),
[signals, filters], eps=1e-1, mode=mode)
[signals, filters], eps=1e-1)
......@@ -640,7 +640,7 @@ class Subtensor(Op):
strides_mul=None):
"""
The parameters c_prefix are there to allow reusing this
function on PyArray and CudaNdarray object.
function on PyArray and GpuArray object.
This fct take as input the x.
......@@ -1373,7 +1373,7 @@ class IncSubtensor(Op):
# but subclasses may override the helper methods
# to change the particulars, e.g. GpuIncSubtensor
# turns the view/copy operations on numpy arrays
# into the same operations on cuda arrays.
# into the same operations on gpu arrays.
self.do_type_checking(node)
......
......@@ -2,8 +2,6 @@ from __future__ import absolute_import, print_function, division
import copy
import logging
import os
import sys
import time
import unittest
......@@ -14,7 +12,7 @@ from nose.tools import assert_raises, assert_true
import theano
import theano.scalar as scal
from six import PY3, StringIO
from six import StringIO
from theano import compile
from theano.compile import deep_copy_op, DeepCopyOp
from theano.compile import get_mode
......@@ -909,7 +907,10 @@ def test_const_type_in_mul_canonizer():
class test_fusion(unittest.TestCase):
def do(self, mode, shared_fn, shp, gpu=False, nb_repeat=1, assert_len_topo=True, slice=None):
mode = copy.copy(compile.mode.get_default_mode())
_shared = shared
def do(self, mode, shared_fn, shp, nb_repeat=1, assert_len_topo=True, slice=None):
"""
param shared_fn: if None, will use compile.function
verify that the elemwise fusion work
......@@ -1103,14 +1104,9 @@ class test_fusion(unittest.TestCase):
nb_elemwise, answer, out_dtype] in enumerate(cases):
if isinstance(out_dtype, dict):
out_dtype = out_dtype[config.cast_policy]
if (gpu and (out_dtype != 'float32' or
any(i.dtype != 'float32' for i in g.owner.inputs))):
print("Skip test %d as the gpu code currently supports only float32" % id)
continue
print("new cases", id)
if shared_fn is None:
assert gpu is False
f = compile.function(list(sym_inputs), g, mode=mode)
for x in xrange(nb_repeat):
out = f(*val_inputs)
......@@ -1139,17 +1135,7 @@ class test_fusion(unittest.TestCase):
print(out)
print(answer * nb_repeat)
topo = f.maker.fgraph.toposort()
if gpu:
import theano.sandbox.cuda as cuda
topo_ = [x for x in topo if not isinstance(
x.op, (cuda.basic_ops.GpuFromHost, cuda.basic_ops.HostFromGpu))]
gpu_ = [x for x in topo
if isinstance(x.op, cuda.basic_ops.GpuFromHost)]
if not len(gpu_) == len(sym_inputs):
fail2.append((id, gpu_, sym_inputs))
else:
topo_ = topo
topo_ = topo
if assert_len_topo:
if not len(topo_) == nb_elemwise:
fail3.append((id, topo_, nb_elemwise))
......@@ -1177,62 +1163,24 @@ class test_fusion(unittest.TestCase):
def test_elemwise_fusion(self):
shp = (5, 5)
mode = copy.copy(compile.mode.get_default_mode())
mode = copy.copy(self.mode)
# we need the optimisation enabled and the canonicalize.
# the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize')
self.do(mode, shared, shp)
self.do(mode, self._shared, shp)
@attr('slow')
def test_elemwise_fusion_4d(self):
shp = (3, 3, 3, 3)
mode = copy.copy(compile.mode.get_default_mode())
mode = copy.copy(self.mode)
# we need the optimisation enabled and the canonicalize.
# the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including(
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize')
self.do(mode, shared, shp)
def test_gpu_fusion(self):
shp = (5, 5)
# we need the optimisation enabled, debug do this.
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.mode.get_mode("FAST_RUN").including(
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
else:
mode = theano.compile.mode.get_default_mode().including(
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest("cuda not available")
self.do(mode, cuda.float32_shared_constructor, shp, gpu=True)
@attr('slow')
def test_gpu_fusion_Xd(self):
# we need the optimisation enabled, debug do this.
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.mode.get_mode("FAST_RUN").including(
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
else:
mode = theano.compile.mode.get_default_mode().including(
'local_elemwise_fusion', 'composite_elemwise_fusion',
'canonicalize', 'gpu')
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest("cuda not available")
sizes = cuda.opt.get_device_type_sizes()
if sizes['int_size'] == 4:
shp = (5, 5, 5, 5)
else:
shp = (5, 5, 5)
self.do(mode, cuda.float32_shared_constructor, shp, gpu=True)
self.do(mode, self._shared, shp)
def test_fusion_35inputs(self):
# Make sure a fused graph with more than 35 inputs does not segfault
......@@ -1244,7 +1192,7 @@ class test_fusion(unittest.TestCase):
for idx in xrange(1, 35):
out = tensor.sin(inpts[idx] + out)
f = function(inpts, out)
f = function(inpts, out, mode=self.mode)
# Test it on some dummy values
f(*[list(range(i, 4 + i)) for i in xrange(35)])
......@@ -1280,7 +1228,7 @@ class test_fusion(unittest.TestCase):
dlogp = function(vars, [theano.grad(logp, v) for v in vars])
dlogp(2, np.random.rand(n))
def speed_fusion(self, shared_fn=shared, gpu=False, s=None):
def speed_fusion(self, s=None):
"""
param type s: a slice object
param s: a slice to apply to the case to execute. If None, exec all case.
......@@ -1292,18 +1240,18 @@ class test_fusion(unittest.TestCase):
# linker=gof.CLinker
# linker=gof.OpWiseCLinker
mode1 = copy.copy(compile.get_default_mode())
mode1 = copy.copy(self.mode)
mode1._optimizer = mode1._optimizer.including('local_elemwise_fusion')
# TODO:clinker is much faster... but use to much memory
# Possible cause: as their is do deletion of intermediate value when we don't keep the fct.
# More plausible cause: we keep a link to the output data?
# Follow up. Clinker do the same... second cause?
mode2 = copy.copy(compile.get_default_mode())
mode2 = copy.copy(self.mode)
mode2._optimizer = mode2._optimizer.excluding('local_elemwise_fusion')
print("test with linker", str(mode1.linker))
times1 = self.do(mode1, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat,
times1 = self.do(mode1, self._shared, shp, nb_repeat=nb_repeat,
assert_len_topo=False, slice=s)
times2 = self.do(mode2, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat,
times2 = self.do(mode2, self._shared, shp, nb_repeat=nb_repeat,
assert_len_topo=False, slice=s)
print("times1 with local_elemwise_fusion")
print(times1, times1.min(), times1.max(), times1.sum())
......@@ -1317,7 +1265,7 @@ class test_fusion(unittest.TestCase):
"mean", d.mean(), "std", d.std())
def test_fusion_inplace(self):
mode = copy.copy(compile.mode.get_default_mode())
mode = copy.copy(self.mode)
# we need the optimisation enabled and the canonicalize.
# the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer = mode._optimizer.including(
......@@ -1332,14 +1280,9 @@ class test_fusion(unittest.TestCase):
f(np.random.random((5, 5)), np.random.random((5, 5)),
np.random.random((5, 5)))
def speed_fusion_gpu(self):
import theano.sandbox.cuda as cuda
self.speed_fusion(shared_fn=cuda.float32_shared_constructor,
gpu=True, s=slice(0, 15))
def speed_log_exp(self):
s = slice(31, 36)
print("time", self.do(None, shared, shp=(1000, 1000), gpu=False,
print("time", self.do(self.mode, self._shared, shp=(1000, 1000),
assert_len_topo=False, slice=s, nb_repeat=100))
def tes_memory_leak(self, mode=compile.mode.Mode('c', 'merge'),
......@@ -1505,27 +1448,6 @@ class TestCompositeCodegen(unittest.TestCase):
fval = f([1, 2, 3])
assert np.all(fval == [6, 12, 18])
def test_nested_gpu(self):
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest("cuda not available")
import theano.sandbox.cuda.opt
y = self.times_2(self.x)
z = self.times_3(y)
f = theano.function(
[self.x], cuda.gpu_from_host(z),
mode=theano.compile.mode.get_default_mode().including('gpu'))
topo = f.maker.fgraph.toposort()
if config.mode != "FAST_COMPILE":
assert len(topo) == 2
assert topo[1].op == cuda.gpu_from_host
# topo1 is doing the composite work on the CPU. Auto-generation of
# GPU code for ops with support code is not possible.
fval = np.asarray(f([1, 2, 3]))
assert np.all(fval == [6, 12, 18]), fval
def test_local_useless_composite(self):
x = theano.scalar.float32()
c = theano.scalar.Composite([x], [x + 1, x - 1])
......@@ -4213,31 +4135,6 @@ class test_shapeoptimizer(unittest.TestCase):
f = theano.function([X], expr, mode=mode)
print(f([[1, 2], [2, 3]]))
def test_no_cycle(self):
# Optimizing this graph resulted in a cycle, see gh-1549
# This test depends on cuda
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest("cuda not available")
if sys.version_info[:2] < (2, 5):
raise SkipTest("Test skipped due to a too old python")
# This pickle file has undergone manual surgery due to changes
# in scan and may or may not run correctly. It does passes
# the test below.
pkl_filename = os.path.join(os.path.dirname(theano.__file__),
'tensor', 'tests', 'shape_opt_cycle.pkl')
# Due to incompatibilities between python 2 and 3 in the format
# of pickled numpy ndarray, we have to force an encoding
from theano.misc.pkl_utils import CompatUnpickler
with open(pkl_filename, "rb") as pkl_file:
if PY3:
u = CompatUnpickler(pkl_file, encoding="latin1")
else:
u = CompatUnpickler(pkl_file)
fn_args = u.load()
theano.function(**fn_args)
class test_assert(utt.InferShapeTester):
......
......@@ -25,7 +25,6 @@ def makeSharedTester(shared_constructor_,
theano_fct_,
ref_fct_,
cast_value_=np.asarray,
op_by_matrix_=False,
name=None,
):
"""
......@@ -49,7 +48,6 @@ def makeSharedTester(shared_constructor_,
:param theano_fct_: A theano op that will be used to do some computation on the shared variable
:param ref_fct_: A reference function that should return the same value as the theano_fct_
:param cast_value_: A callable that cast an ndarray into the internal shared variable representation
:param op_by_matrix_: When we do inplace operation on the an internal type object, should we do it with a scalar or a matrix of the same value.
:param name: This string is used to set the returned class' __name__
attribute. This is needed for nosetests to properly tag the
test with its correct name, rather than use the generic
......@@ -75,7 +73,6 @@ def makeSharedTester(shared_constructor_,
set_cast_value_inplace = set_cast_value_inplace_
shared_constructor_accept_ndarray = shared_constructor_accept_ndarray_
cast_value = staticmethod(cast_value_)
op_by_matrix = op_by_matrix_
def test_shared_dont_alias(self):
dtype = self.dtype
......@@ -96,11 +93,7 @@ def makeSharedTester(shared_constructor_,
assert np.allclose(self.ref_fct(x), total_val)
values_to_div = .5
if self.op_by_matrix:
values_to_div = self.internal_type(np.ones(x.shape, dtype=dtype)/2) # supported for cudandarray, but not ndarray.
assert self.test_internal_type(values_to_div)
x /= values_to_div
x /= .5
total_val_2 = total_func()
# value used to construct should not alias with internal
......@@ -108,7 +101,7 @@ def makeSharedTester(shared_constructor_,
x = x_shared.get_value(borrow=False)
x /= values_to_div
x /= .5
total_val_3 = total_func()
......@@ -117,7 +110,7 @@ def makeSharedTester(shared_constructor_,
# in this case we can alias
x = x_shared.get_value(borrow=True)
x /= values_to_div
x /= .5
# this is not required by the contract but it is a feature we've
# implemented for some type of SharedVariable.
......@@ -189,12 +182,7 @@ def makeSharedTester(shared_constructor_,
x = x_shared.get_value(borrow=True, return_internal_type=True)
assert self.test_internal_type(x)
values_to_div = .5
if self.op_by_matrix:
# supported for cudandarray, but not ndarray.
values_to_div = self.internal_type(
np.ones(x.shape, dtype=dtype)/2)
x /= values_to_div # supported by ndarray and CudaNdarray
x /= .5
# this is not required by the contract but it is a feature we can
# implement for some type of SharedVariable.
......@@ -203,7 +191,7 @@ def makeSharedTester(shared_constructor_,
x = x_shared.get_value(borrow=False, return_internal_type=True)
assert self.test_internal_type(x)
assert x is not x_shared.container.value
x /= values_to_div # supported by ndarray and CudaNdarray
x /= .5
# this is required by the contract
assert not np.allclose(self.ref_fct(x), total_func())
......@@ -244,16 +232,10 @@ def makeSharedTester(shared_constructor_,
total_func = theano.function([], total)
total_func()
values_to_div = .5
if self.op_by_matrix:
# supported for cudandarray, but not ndarray.
values_to_div = self.internal_type(np.ones(x.shape, dtype=dtype)/2)
assert self.test_internal_type(values_to_div)
# test if that theano shared variable optimize set_value(borrow=True)
get_x = x_shared.get_value(borrow=True)
assert get_x is not x_orig # borrow=False to shared_constructor
get_x /= values_to_div
get_x /= .5
x_shared.set_value(get_x, borrow=True)
x = x_shared.get_value(borrow=True)
if self.set_value_borrow_true_alias:
......@@ -267,7 +249,7 @@ def makeSharedTester(shared_constructor_,
assert get_x is not x_orig # borrow=False to shared_constructor
assert self.test_internal_type(get_x)
get_x /= values_to_div # supported by ndarray and CudaNdarray
get_x /= .5
assert self.test_internal_type(get_x)
x_shared.set_value(get_x, borrow=True)
x = x_shared.get_value(borrow=True, return_internal_type=True)
......@@ -295,12 +277,7 @@ def makeSharedTester(shared_constructor_,
assert np.allclose(self.ref_fct(x), total_val)
values_to_div = .5
if self.op_by_matrix:
# supported for cudandarray, but not ndarray.
values_to_div = self.internal_type(np.ones(x.shape, dtype=dtype)/2)
assert self.test_internal_type(values_to_div)
x /= values_to_div
x /= .5
# not required by the contract but it is a feature we've implemented
if self.shared_borrow_true_alias:
......@@ -345,9 +322,9 @@ def makeSharedTester(shared_constructor_,
if x.__class__.__name__ != 'csr_matrix':
# sparse matrix don't support inplace affectation
nd += 1
# THIS DON't DO WHAT WE EXPECT the contain of a is not updated for CudaNdarray, but it is for ndarray
# THIS DOENS'T DO WHAT WE EXPECT the content of a is
# not updated for GpuArray, but it is for ndarray
x_shared.get_value(borrow=True)[:] = nd
#assert (np.asarray(x_shared.get_value(borrow=True))!=nd).all()
assert may_share_memory(old_data, x_shared.container.storage[0])
x_shared.get_value(borrow=True)
......@@ -617,7 +594,6 @@ test_shared_options = makeSharedTester(
theano_fct_=lambda a: a*2,
ref_fct_=lambda a: np.asarray((a*2)),
cast_value_=np.asarray,
op_by_matrix_=False,
name='test_shared_options')
......
......@@ -203,10 +203,9 @@ class TensorType(Type):
"""
Convert a symbolic Variable into a TensorType, if compatible.
For the moment, only a TensorType, GpuArrayType and
CudaNdarrayType will be
converted, provided they have the same number of dimensions and
dtype and have "compatible" broadcastable pattern.
For the moment, only a TensorType and GpuArrayType will be
converted, provided they have the same number of dimensions
and dtype and have "compatible" broadcastable pattern.
"""
if hasattr(other, '_as_TensorVariable'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论