提交 a24fd9bb authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #4570 from Sentient07/new_graph2gpu

New graph2gpu
......@@ -402,6 +402,14 @@ class Shape_i(gof.Op):
def infer_shape(self, node, input_shapes):
return [()]
def connection_pattern(self, node):
# the grad returns the gradient with respect to the
# elements of a tensor variable
# the elements of the tensor variable do not participate
# in the computation of the shape, so they are not really
# part of the graph
return [[False]]
def grad(self, inp, grads):
return [theano.gradient.grad_not_implemented(
op=self, x_pos=0, x=inp[0],
......@@ -455,6 +463,14 @@ def shape_i(var, i, fgraph=None):
return var.shape[i]
def shape_i_op(i):
key = i
if key not in shape_i_op.cache:
shape_i_op.cache[key] = Shape_i(i)
return shape_i_op.cache[key]
shape_i_op.cache = {}
def register_shape_i_c_code(typ, code, check_input, version=()):
"""
Tell Shape_i how to generate C code for a Theano Type.
......
......@@ -54,7 +54,7 @@ def _atexit_print_fn():
destination_file = open(config.profiling.destination, 'w')
for ps in _atexit_print_list:
if ps.fct_callcount or ps.compile_time > 0:
if ps.fct_callcount >= 1 or ps.compile_time > 1:
ps.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
......
......@@ -2413,7 +2413,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for (t, count, n_created, o) in count_opt[::-1]:
print(blanc, ' %.3fs - %d - %d - %s' % (
t, count, n_created, o), file=stream)
print(blanc, ' %.3fs - in %d optimization that where not used (display only those with a runtime > 0)' % (
print(blanc, ' %.3fs - in %d optimization that were not used (display only those with a runtime > 0)' % (
not_used_time, len(not_used)), file=stream)
not_used.sort(key=lambda nu: (nu[0], str(nu[1])))
for (t, o) in not_used[::-1]:
......
......@@ -70,7 +70,7 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor
if isinstance(x.type, tensor.TensorType):
return GpuFromHost(context_name)(x)
return gpu_from_host(context_name)(x)
# Try _as_GpuArrayVariable if possible
if hasattr(x, '_as_GpuArrayVariable'):
......@@ -544,7 +544,7 @@ class HostFromGpu(Op):
def grad(self, inputs, grads):
gz, = grads
return [GpuFromHost(inputs[0].type.context_name)(gz)]
return [gpu_from_host(inputs[0].type.context_name)(gz)]
def R_op(self, inputs, eval_points):
ev, = eval_points
......@@ -647,6 +647,14 @@ class GpuFromHost(Op):
return (9,)
# Caching GPUAlloc
def gpu_from_host(ctx):
if ctx not in gpu_alloc.cache:
gpu_from_host.cache[ctx] = GpuFromHost(ctx)
return gpu_from_host.cache[ctx]
gpu_from_host.cache = {}
class GpuToGpu(Op):
"""
Transfer data between GPUs.
......@@ -870,6 +878,15 @@ class GpuAlloc(HideC, Alloc):
return True
# Caching GPUAlloc
def gpu_alloc(ctx, memset_0=False):
key = (ctx, memset_0)
if key not in gpu_alloc.cache:
gpu_alloc.cache[key] = GpuAlloc(ctx, memset_0)
return gpu_alloc.cache[key]
gpu_alloc.cache = {}
class GpuAllocEmpty(HideC, Alloc):
"""
Allocate uninitialized memory on the GPU.
......@@ -956,6 +973,14 @@ def empty_like(var):
return GpuAllocEmpty(var.type.dtype, var.type.context_name)(*var.shape)
def gpu_alloc_empty(ctx, dtype):
key = (dtype, ctx)
if key not in gpu_alloc_empty.cache:
gpu_alloc_empty.cache[key] = GpuAllocEmpty(dtype, ctx)
return gpu_alloc_empty.cache[key]
gpu_alloc_empty.cache = {}
class GpuContiguous(Op):
"""
Return a C contiguous version of the input.
......@@ -1031,6 +1056,7 @@ class GpuReshape(HideC, tensor.Reshape):
def make_node(self, x, shp):
ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, context_name=ctx_name)
shp = tensor.as_tensor_variable(shp)
res = host_from_gpu(x).reshape(shp, ndim=self.ndim)
otype = GpuArrayType(dtype=res.dtype,
broadcastable=res.broadcastable,
......
差异被折叠。
......@@ -2587,6 +2587,18 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
return kernels
# Caching GpuCAReduceCuda
def gpu_ca_reduce_cuda(scalar_op, axis=None, reduce_mask=None, dtype=None, acc_dtype=None,
pre_scalar_op=None):
key = (scalar_op, axis, reduce_mask, dtype, acc_dtype,
pre_scalar_op)
if key not in gpu_ca_reduce_cuda.cache:
gpu_ca_reduce_cuda.cache[key] = GpuCAReduceCuda(scalar_op, axis, reduce_mask, dtype,
acc_dtype, pre_scalar_op)
return gpu_ca_reduce_cuda.cache[key]
gpu_ca_reduce_cuda.cache = {}
class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
"""
CAReduce that reuse the python code from gpuarray.
......
......@@ -2,15 +2,14 @@ from __future__ import absolute_import, print_function, division
import os
from theano import Apply, Op
from theano.tensor.extra_ops import CumsumOp
from .basic_ops import infer_context_name
try:
from pygpu import gpuarray
except ImportError:
pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name, GpuFromHost)
from .opt import register_opt as register_gpu_opt, op_lifter
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape)
from .opt import register_opt, op_lifter, register_opt2
class GpuCumsum(GpuKernelBase, Op):
......@@ -40,7 +39,10 @@ class GpuCumsum(GpuKernelBase, Op):
def make_node(self, x):
assert x.type.dtype == 'float32', "Only float32 supported for GpuCumSum"
x = as_gpuarray_variable(x, infer_context_name(x))
context_name = infer_context_name(x)
x = as_gpuarray_variable(x, context_name)
if x.ndim > GpuCumsum.SUPPORTED_NDIMS:
raise NotImplementedError('Only cumsum on 1D, 2D and\
......@@ -451,24 +453,23 @@ class GpuCumsum(GpuKernelBase, Op):
return super(GpuCumsum, self).c_support_code_struct(node, nodename) + code
@register_opt('fast_compile')
@op_lifter([CumsumOp])
def use_gpu_cumsumop(node, ctx_name):
if node.inputs[0].dtype == 'float32':
axis = node.op.axis
x = node.inputs[0]
@register_opt2([CumsumOp], 'fast_compile')
def local_gpua_cumsumop(op, ctx_name, inputs, outputs):
if inputs[0].dtype == 'float32':
axis = op.axis
x = inputs[0]
if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS:
return None
if axis is None and x.ndim > 1:
x = x.flatten()
x = as_gpuarray_variable(x, ctx_name)
x = GpuFromHost(ctx_name)(x)
if axis is None and x.ndim > 1:
x = GpuReshape(1)(x, (-1,))
# ``gpu_cumsum`` assume array has been flattened if needed.
if axis is None:
axis = 0
return GpuCumsum(axis)(x)
register_gpu_opt()(use_gpu_cumsumop)
......@@ -9,7 +9,7 @@ from theano.gradient import DisconnectedType
from theano.gpuarray import (basic_ops, GpuArrayType)
import theano.tensor.fft
from .opt import register_opt, op_lifter
from .opt import register_opt, op_lifter, register_opt2
try:
import pygpu
......@@ -373,10 +373,12 @@ def _unitary(norm):
if scikits_cuda_available:
@register_opt('fast_compile')
@op_lifter([theano.tensor.fft.RFFTOp])
def local_curfft_op(node, context_name):
@register_opt2([theano.tensor.fft.RFFTOp], 'fast_compile')
def local_gpua_curfft_op(op, ctx_name, inputs, outputs):
return curfft_op
@register_opt('fast_compile')
@op_lifter([theano.tensor.fft.IRFFTOp])
def local_cuirfft_op(node, context_name):
@register_opt2([theano.tensor.fft.IRFFTOp], 'fast_compile')
def local_gpua_cuirfft_op(op, ctx_name, inputs, outputs):
return cuirfft_op
......@@ -14,7 +14,7 @@ from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano import gpuarray
from .basic_ops import as_gpuarray_variable, infer_context_name
from .opt import register_opt, op_lifter
from .opt import register_opt, op_lifter, register_opt2
from .type import GpuArrayType
......@@ -227,23 +227,24 @@ KERNEL void k_multi_warp_multinomial(
return (1,)
@register_opt()
@register_opt('fast_compile')
@op_lifter([theano.sandbox.multinomial.MultinomialFromUniform])
def local_gpua_multinomial(node, context_name):
@register_opt2([theano.sandbox.multinomial.MultinomialFromUniform], 'fast_compile')
def local_gpua_multinomial(op, context_name, inputs, outputs):
# TODO : need description for function
if len(node.inputs) == 2:
p, u = node.inputs
if len(inputs) == 2:
p, u = inputs
n_samples = 1
else:
p, u, n_samples = node.inputs
p, u, n_samples = inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = node.outputs
m, = outputs
if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GPUAMultinomialFromUniform(node.op.odtype)
gpu_op = GPUAMultinomialFromUniform(op.odtype)
return gpuarray.elemwise.GpuDimShuffle([False, False], [1, 0])(
gpu_op(p, u))
......@@ -13,7 +13,7 @@ except ImportError:
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name)
from .opt import register_opt as register_gpu_opt, op_lifter
from .opt import register_opt2, op_lifter, register_opt
from .type import GpuArrayType
......@@ -468,9 +468,9 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
Op.perform(self, node, inp, out, ctx)
@register_opt('fast_compile')
@op_lifter([Images2Neibs])
def use_gpu_images2neibs(node, context_name):
if node.op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(node.op.mode)
register_gpu_opt()(use_gpu_images2neibs)
@register_opt2([Images2Neibs], 'fast_compile')
def local_gpua_images2neibs(op, context_name, inputs, outputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(op.mode)
......@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant
from . import opt
from .basic_ops import (as_gpuarray_variable, GpuAllocEmpty,
infer_context_name)
infer_context_name, gpu_alloc_empty)
from .type import gpu_context_type
from .opt_util import alpha_merge, output_merge
......@@ -147,17 +147,18 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
return '\n'.join(codel)
@opt.register_opt()
@opt.register_opt('fast_compile')
@opt.op_lifter([tensor.Dot])
def local_dot_to_gemm16(node, ctx_name):
@opt.register_opt2([tensor.Dot], 'fast_compile')
def local_gpua_dot_to_gemm16(op, ctx_name, inputs, outputs):
if nerv is None:
return
A = node.inputs[0]
B = node.inputs[1]
A = inputs[0]
B = inputs[1]
if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = node.inputs[0].fgraph
C = GpuAllocEmpty(dtype='float16', context_name=ctx_name)(
fgraph = getattr(outputs[0], 'fgraph', None)
C = gpu_alloc_empty(ctx_name, dtype='float16')(
shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
return Gemm16()(C, 1.0, A, B, 0.0)
......
差异被折叠。
......@@ -8,7 +8,7 @@ from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError)
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, gpu_alloc_empty
from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(numpy.asarray(1.0, dtype='float32'))
......@@ -324,8 +324,7 @@ def inplace_allocempty(op, idx):
if (alloc.owner and
isinstance(alloc.owner.op, GpuAllocEmpty) and
len(alloc.clients) > 1):
alloc_op = GpuAllocEmpty(alloc.owner.op.dtype,
alloc.owner.op.context_name)
alloc_op = gpu_alloc_empty(alloc.owner.op.context_name, dtype=alloc.owner.op.dtype)
inputs[idx] = alloc_op(*alloc.owner.inputs)
return maker(node, inputs)
return opt
......
......@@ -26,9 +26,11 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
if not dnn_available(test_ctx_name):
raise SkipTest(dnn_available.msg)
mode = mode_with_gpu
if fd != (1, 1):
raise SkipTest("Doesn't have CUDNN implementation")
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
......
......@@ -396,7 +396,7 @@ def test_gpueye():
k_symb = numpy.asarray(0)
out = T.eye(N_symb, M_symb, k_symb, dtype=dtype)
f = theano.function([N_symb, M_symb],
out,
T.stack(out),
mode=mode_with_gpu)
result = numpy.asarray(f(N, M))
assert numpy.allclose(result, numpy.eye(N, M_, dtype=dtype))
......
......@@ -138,11 +138,21 @@ def test_local_gpualloc_memset_0():
ones = numpy.ones((2,), dtype='float32')
# Test with 0 from CPU op.
# Should not be transfered as the only client is the output
a = tensor.alloc(z, i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.Alloc)
assert (numpy.asarray(f(6)) == 0).all()
# Test with 0 from CPU op.
# Should be transfered as it is used by another op.
a = tensor.alloc(z, i)
f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAlloc)
assert (numpy.asarray(f(6)) == 0).all()
# Test with 0
......@@ -177,19 +187,30 @@ def test_local_gpualloc_empty():
ii = theano.tensor.iscalar()
# Test with vector
# Should not be moved as the only client is the output
a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.AllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3).shape == (3,)
# Test with vector
# Should be moved
a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3).shape == (3,)
# Test with matrix
a = tensor.AllocEmpty('float32')(i, ii)
f = theano.function([i, ii], a, mode=mode_with_gpu)
f = theano.function([i, ii], a.cumsum(axis=0), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3, 4).shape == (3, 4)
......@@ -334,7 +355,10 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert any([isinstance(node.op, GpuElemwise) for node in topo])
# Our optimizer isn't smart enough to move to the GPU Elemwise.
# If it where just a little bit smarter, it could wrongly move it to the GPU.
# If it where super smart, it would know it should not move it to the GPU.
assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
def test_local_gpu_elemwise():
......@@ -427,7 +451,7 @@ def test_local_assert_no_cpu_op():
out = theano.tensor.tanh(ms).dot(ms.T)
mode_local_assert = mode_with_gpu.including("assert_no_cpu_op")
mode_local_assert = mode_local_assert.excluding("local_gpu_elemwise")
mode_local_assert = mode_local_assert.excluding("local_gpua_elemwise")
old = theano.config.assert_no_cpu_op
old2 = theano.config.on_opt_error
......
......@@ -233,7 +233,7 @@ class GpuArrayType(Type):
return data
def filter_variable(self, other, allow_convert=True):
from theano.gpuarray import GpuFromHost
from theano.gpuarray.basic_ops import gpu_from_host
if hasattr(other, '_as_GpuArrayVariable'):
other = other._as_GpuArrayVariable(self.context_name)
......@@ -265,7 +265,7 @@ class GpuArrayType(Type):
str(self.broadcastable)))
other = other2
return GpuFromHost(self.context_name)(other)
return gpu_from_host(self.context_name)(other)
@staticmethod
def values_eq(a, b, force_same_dtype=True):
......
......@@ -24,10 +24,11 @@ from . import multinomial
import theano.sandbox.cuda
from theano.sandbox.cuda import GpuOp
from theano.gpuarray.basic_ops import GpuKernelBase, Kernel
from theano.gpuarray.basic_ops import GpuKernelBase, Kernel, infer_context_name
from theano.gpuarray.type import GpuArrayType
from theano.gpuarray.fp16_help import write_w
from theano.gpuarray.opt import (register_opt as register_gpua,
register_opt2,
host_from_gpu as host_from_gpua)
if theano.sandbox.cuda.cuda_available:
from theano.sandbox.cuda import (CudaNdarrayType,
......@@ -1551,17 +1552,22 @@ class MRG_RandomStreams(object):
return final_samples
@register_opt2([mrg_uniform], 'fast_compile')
def local_gpua_mrg_graph(op, context_name, inputs, outputs):
if (type(op) == mrg_uniform and
isinstance(inputs[0].type, GpuArrayType)):
outs = GPUA_mrg_uniform.new(inputs[0],
op.output_type.ndim,
op.output_type.dtype,
inputs[1])
return [outs[0], host_from_gpua(outs[1])]
@register_gpua('fast_compile')
@local_optimizer([mrg_uniform])
def local_gpua_mrg(node):
# TODO : need description for function
if (type(node.op) == mrg_uniform and
isinstance(node.inputs[0].type, GpuArrayType)):
outs = GPUA_mrg_uniform.new(node.inputs[0],
node.op.output_type.ndim,
node.op.output_type.dtype,
node.inputs[1])
return [outs[0], host_from_gpua(outs[1])]
context_name = infer_context_name(*node.inputs)
return local_gpua_mrg_graph(node.op, context_name, node.inputs, node.outputs)
MRG_RNGs = (mrg_uniform, GPU_mrg_uniform, GPUA_mrg_uniform)
......
......@@ -152,13 +152,15 @@ def traverse(out, x, x_copy, d, visited=None):
return d
visited.add(out)
from theano.sandbox import cuda
from theano import gpuarray
from theano.gpuarray.basic_ops import gpu_from_host, host_from_gpu
from theano.gpuarray import pygpu_activated
from theano.gpuarray.type import GpuArrayType
if out == x:
if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy)
else:
assert isinstance(x.type, gpuarray.GpuArrayType)
d[out] = gpuarray.GpuFromHost(x.type.context_name)(x_copy)
assert isinstance(x.type, GpuArrayType)
d[out] = gpu_from_host(x.type.context_name)(x_copy)
return d
elif out.owner is None:
return d
......@@ -167,8 +169,8 @@ def traverse(out, x, x_copy, d, visited=None):
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
elif (gpuarray.pygpu_activated and
out.owner.op == gpuarray.host_from_gpu and
elif (pygpu_activated and
out.owner.op == host_from_gpu and
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
......
......@@ -630,9 +630,15 @@ def get_scalar_constant_value(orig_v, elemwise=True,
v = v.owner.inputs[0]
continue
elif isinstance(v.owner.op, theano.compile.ops.Shape_i):
if isinstance(v.owner.inputs[0], Constant):
return numpy.asarray(
v.owner.inputs[0].data.shape[v.owner.op.i])
i = v.owner.op.i
inp = v.owner.inputs[0]
if isinstance(inp, Constant):
return numpy.asarray(inp.data.shape[i])
# The shape of a broadcastable dimension is 1
if (hasattr(inp.type, 'broadcastable') and
inp.type.broadcastable[i]):
return numpy.asarray(1)
# Don't act as the constant_folding optimization here as this
# fct is used too early in the optimization phase. This would
# mess with the stabilization optimization and be too slow.
......@@ -2690,15 +2696,18 @@ class Alloc(gof.Op):
sh = [as_tensor_variable(s) for s in shape]
bcast = []
for i, s in enumerate(sh):
if config.exception_verbosity == 'high':
s_as_str = '\n' + min_informative_str(s)
else:
s_as_str = str(s)
def err_str():
if config.exception_verbosity == 'high':
return '\n' + min_informative_str(s)
else:
return str(s)
if s.type.dtype[:3] not in ('int', 'uin'):
s_as_str = err_str()
raise TypeError('Shape arguments to Alloc must be integers, '
'but argument %s is not for apply node: %s' %
(i, s_as_str))
if s.ndim != 0:
s_as_str = err_str()
raise TypeError(
"Each shape dimension to Alloc must be a scalar, ",
'but dimension %s have %d dimensions for apply node: %s' %
......
......@@ -66,8 +66,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
"""
bsize, imshp = image_shape[0], image_shape[2:]
nkern, kshp = kernel_shape[0], kernel_shape[2:]
if filter_dilation is None:
filter_dilation = numpy.ones(len(subsample), dtype='int')
if isinstance(border_mode, tuple):
out_shp = tuple(get_conv_shape_1axis(
imshp[i], kshp[i], border_mode[i],
......@@ -121,7 +123,16 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
pad = border_mode
if pad < 0:
raise ValueError("border_mode must be >= 0")
out_shp = (image_shape + 2 * pad - dil_kernel_shape) // subsample + 1
# In case of symbolic shape, we want to build the smallest graph
# (image_shape + 2 * pad - dil_kernel_shape) // subsample + 1
if pad == 0:
out_shp = (image_shape - dil_kernel_shape)
else:
out_shp = (image_shape + 2 * pad - dil_kernel_shape)
if subsample != 1:
out_shp = out_shp // subsample
out_shp = out_shp + 1
return out_shp
......
......@@ -7003,6 +7003,9 @@ class T_get_scalar_constant_value(unittest.TestCase):
assert get_scalar_constant_value(s) == 3
s = opt.Shape_i(1)(c)
assert get_scalar_constant_value(s) == 4
d = theano.shared(numpy.random.randn(1,1), broadcastable=(True, True))
f = theano.tensor.basic.ScalarFromTensor()(opt.Shape_i(0)(d))
assert get_scalar_constant_value(f) == 1
def test_elemwise(self):
# We test only for a few elemwise, the list of all supported
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论