提交 a536464a authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4323 from abergeron/gpua_newelem

Use the new GpuElemwise from libgpuarray
......@@ -42,7 +42,7 @@ register_transfer(transfer)
def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version()
if v[0] != -10000:
if v[0] != -9999:
raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu "
"are in sync.")
......
from __future__ import absolute_import, print_function, division
import copy
from theano.compat import izip
import numpy
import theano
from theano import Apply, scalar, config
from theano import scalar as scal
from theano import Apply, scalar, config, Op
from six.moves import StringIO, xrange
from theano.gof.utils import MethodNotDefined
from theano.scalar import Scalar
......@@ -14,41 +12,20 @@ from theano.tensor.elemwise import (Elemwise, DimShuffle, CAReduceDtype)
try:
import pygpu
from pygpu import gpuarray
from pygpu.tools import ScalarArg, ArrayArg
from pygpu.elemwise import ElemwiseKernel
from pygpu.tools import ArrayArg
from pygpu.reduction import ReductionKernel
from pygpu.gpuarray import dtype_to_typecode, dtype_to_ctype
from pygpu.gpuarray import dtype_to_typecode
except ImportError:
pass
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
infer_context_name)
from .type import GpuArrayType
from .type import GpuArrayType, gpu_context_type
from .fp16_help import load_w, write_w
def _is_scalar(v):
False
def make_argument(v, name):
if _is_scalar(v):
return ScalarArg(numpy.dtype(v.type.dtype), name)
else:
return ArrayArg(numpy.dtype(v.type.dtype), name)
def ensure_allocated(storage, shape, dtype, ctx):
odat = storage[0]
if odat is not None:
if odat.shape != shape:
# It is unsafe to try to resize odat,
# we have to allocate output storage.
odat = None
if odat is None:
odat = pygpu.empty(shape, dtype=dtype, context=ctx)
storage[0] = odat
return odat
return ArrayArg(numpy.dtype(v.type.dtype), name)
def as_C_string_const(s):
......@@ -56,11 +33,12 @@ def as_C_string_const(s):
for l in s.split('\n'))
class GpuElemwise(GpuKernelBase, HideC, Elemwise):
class GpuElemwise(HideC, Elemwise):
"""
Elemwise on the GPU.
"""
params_type = gpu_context_type
nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout)
_f16_ok = True
......@@ -109,20 +87,21 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
def get_params(self, node):
return node.inputs[0].type.context
def generate_kernel(self, node, nodename):
inps = [make_argument(i, 'i%d' % (n,)) for n, i in
enumerate(node.inputs)]
scal_v_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]
def _get_vnames(self, node):
inps = ['i%d' % (n,) for n, _ in enumerate(node.inputs)]
outs = ['o%d' % (n,) for n, _ in enumerate(node.outputs) if n not in self.inplace_pattern]
return inps, outs
outs = [make_argument(o, 'o%d' % (n,)) for n, o in
enumerate(node.outputs) if n not in self.inplace_pattern]
def _generate_op_string(self, node):
scal_v_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]
scal_v_outs = [scalar.get_scalar_type(o.dtype) for o in node.outputs]
inps, outs = self._get_vnames(node)
fake_node = Apply(self.scalar_op, [i() for i in scal_v_ins],
[o() for o in scal_v_outs])
scal_in = [i.name + '[i]' if i.dtype != 'float16' else
'__half2float(' + i.name + '[i])' for i in inps]
scal_in = [i if si.dtype != 'float16' else
'load_half(&' + i + ')' for i, si in zip(inps, scal_v_ins)]
scal_out = []
oi = 0
......@@ -133,13 +112,13 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
else:
arg = outs[oi]
oi += 1
if arg.dtype == 'float16':
if node.outputs[n].dtype == 'float16':
scal_f16.append(('tmpf16%i' % (len(scal_f16),), arg))
scal_out.append(scal_f16[-1][0])
else:
scal_out.append(arg.name + '[i]')
scal_out.append(arg)
kop = self.scalar_op.c_code(fake_node, nodename + '_scalar',
kop = self.scalar_op.c_code(fake_node, 'elem_scalar',
scal_in, scal_out,
dict(fail='return;'))
......@@ -154,7 +133,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
# variables inthe middle are float32
code.append(kop.replace('npy_float16', 'ga_float'))
for f in scal_f16:
code.append('%s[i] = __float2half_rn(%s);' % (f[1].name, f[0]))
code.append('store_half(&%s, %s);' % (f[1], f[0]))
code.append('}')
kop = '\n'.join(code)
......@@ -178,76 +157,74 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
("npy_float64", "ga_double"),
]:
kop = kop.replace(npy, ga)
return ElemwiseKernel(self.get_params(node), inps + outs, kop,
preamble=support_code)
return support_code, kop
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>']
def c_support_code(self):
return self.scalar_op.c_support_code()
def _gpu_kernel_code(self, node, nodename):
# This is useless by itself, but will serve an eventual c_code
# implementation
k = self.generate_kernel(node, nodename)
nd = node.inputs[0].type.ndim
res = []
for i in range(0, nd + 1):
res.append(k.render_basic(i, name="elem_" + str(i)) + ';')
res.append(k.contig_src + ';')
return ['<numpy_compat.h>', '<gpuarray/types.h>',
'<gpuarray/elemwise.h>']
def c_support_code_struct(self, node, name):
return "\nGpuElemwise *ge;\n"
def c_init_code_struct(self, node, name, sub):
inps, outs = self._get_vnames(node)
nargs = len(inps) + len(outs)
support_code, kop = self._generate_op_string(node)
res = """
gpuelemwise_arg args[%(nargs)s] = {{0}};
""" % dict(nargs=nargs)
for n, (i, name) in enumerate(zip(node.inputs, inps)):
res += """
args[%(n)s].name = %(name)s;
args[%(n)s].typecode = %(typecode)s;
args[%(n)s].flags = GE_READ;
""" % dict(n=n, name='"%s"' % (name,),
typecode=i.type.typecode)
p = 0
for n, o in enumerate(node.outputs):
if n in self.inplace_pattern:
assert(len(node.outputs) == 1)
res += "\nargs[%(n)s].flags |= GE_WRITE;\n" % dict(n=self.inplace_pattern[n])
else:
nn = len(inps) + p
name = outs[p]
p += 1
res += """
args[%(n)s].name = %(name)s;
args[%(n)s].typecode = %(typecode)s;
args[%(n)s].flags = GE_WRITE;
""" % dict(n=nn, name='"%s"' % (name,),
typecode=o.type.typecode)
res += """
ge = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, %(support)s, %(kop)s, %(nargs)s, args, %(nd)s, 0);
if (ge == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not initialize elemwise support");
%(fail)s
}
""" % dict(nargs=nargs, ctx=sub['params'], fail=sub['fail'],
support=as_C_string_const(support_code),
kop=as_C_string_const(kop), nd=node.inputs[0].ndim)
return '\n'.join(res)
def gpu_kernels(self, node, nodename):
src = self._gpu_kernel_code(node, nodename)
nd = node.outputs[0].ndim
params = ['uintp']
params.extend('uintp' for _ in range(nd))
num_inputs = len(node.inputs)
num_outputs = len(node.outputs)
for n in range(num_inputs + num_outputs):
if (n - len(node.inputs)) in self.inplace_pattern:
continue
params.extend([gpuarray.GpuArray, 'uintp'])
params.extend('intp' for _ in range(nd))
acc_dtype = getattr(self, 'acc_dtype', None)
if acc_dtype is None:
acc_dtype = node.outputs[0].type.dtype
return [Kernel(code=src, name="elem_%d" % nd, params=params,
flags=Kernel.get_flags(node.inputs[0].type.dtype,
acc_dtype,
node.outputs[0].type.dtype),
objvar='elem_%d_%s' % (nd, nodename))]
return res
def c_code(self, node, name, inputs, outputs, sub):
if node.inputs[0].type.context.kind != 'cuda':
raise MethodNotDefined('cuda only')
nd = node.outputs[0].ndim
fail = sub["fail"]
initial_dims = ','.join('1' for i in xrange(nd))
opname = str(self.scalar_op)
ctx = sub['params']
nargs = len(node.inputs) + len(node.outputs) - len(self.inplace_pattern)
# check that all inputs have valid dimensions
emitted_inames = {}
num_kernel_params = 1 + nd + len(inputs + outputs) * (2 + nd)
code = """
size_t n_blocks = 0;
size_t threads_per_block = 0;
size_t numEls = 0;
const ssize_t zero = 0;
void *kernel_params[%(num_kernel_params)d] = {0};
int err;
// +1 is so that MSVC is happy when nd == 0
size_t dims[%(nd)s+1] = {%(initial_dims)s};
void *rargs[%(nargs)s] = {0};
""" % locals()
if nd > 0:
code += """
size_t dims[%(nd)s] = {%(initial_dims)s};
""" % locals()
else:
code += """
size_t *dims = NULL;
"""
for idx, iname in enumerate(inputs):
if iname in emitted_inames:
assert emitted_inames[iname] is node.inputs[idx]
......@@ -256,19 +233,15 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
broadcasts = map(int, node.inputs[idx].broadcastable)
broadcasts = ', '.join(map(str, broadcasts))
nd = node.inputs[idx].ndim
if nd > 0:
code += """
int broadcasts_%(iname)s[%(nd)s] = {%(broadcasts)s};
""" % locals()
else:
code += """
int *broadcasts_%(iname)s = NULL;
""" % locals()
code += """
int broadcasts_%(iname)s[%(nd)s+1] = {%(broadcasts)s};
""" % locals()
emitted_inames[iname] = node.inputs[idx]
# check that all inputs have valid dimensions
emitted_inames = {}
for idx, iname in enumerate(inputs):
code += "rargs[%(idx)s] = &%(iname)s->ga;\n" % dict(idx=idx, iname=iname)
if iname in emitted_inames:
continue
code += """
......@@ -300,6 +273,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
""" % locals()
emitted_inames[iname] = True
# check that all outputs have valid dimensions
p = len(node.inputs)
for idx, oname in enumerate(outputs):
typecode = dtype_to_typecode(node.outputs[idx].dtype)
if idx not in self.inplace_pattern.keys():
......@@ -325,7 +299,9 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
%(fail)s
}
}
""" % locals()
rargs[%(p)s] = &%(oname)s->ga;
""" % locals()
p += 1
else:
input_idx = self.inplace_pattern[idx]
iname = inputs[input_idx]
......@@ -351,92 +327,35 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
}
}
""" % locals()
z = outputs[0]
code += """numEls = PyGpuArray_SIZE(%(z)s);
//first use at least a full warp
threads_per_block = std::min(numEls, (size_t)32); //WARP SIZE
//next start adding multiprocessors
// UP TO NUMBER OF MULTIPROCESSORS, use 30 for now.
n_blocks = std::min(numEls/threads_per_block +
(numEls %% threads_per_block?1:0),
(size_t)30);
// next start adding more warps per multiprocessor
if (threads_per_block * n_blocks < numEls)
threads_per_block = std::min(numEls/n_blocks, (size_t) 256);
""" % locals()
kname = 'elem_%d_%s' % (nd, name)
param = ["(void *)&numEls"]
for i in range(nd):
param.append("(void *)&%(z)s->ga.dimensions[%(i)d]" % dict(z=outputs[0],
i=i))
for n, (name, var) in enumerate(zip(inputs + outputs,
node.inputs + node.outputs)):
if (n - len(inputs)) in self.inplace_pattern:
continue
dtype = dtype_to_ctype(var.dtype)
param.append("(void *)%(name)s->ga.data" % locals())
param.append("(void *)&%(name)s->ga.offset" % locals())
for i in range(nd):
param.append("PyGpuArray_DIMS(%(name)s)[%(i)d] == 1 ? (void *)&zero: (void *)&PyGpuArray_STRIDES(%(name)s)[%(i)d]" % locals())
for n, p in enumerate(param):
code += "kernel_params[%(n)d] = %(p)s;\n" % locals()
code += """
err = GpuKernel_call(&%(kname)s, 1, &threads_per_block, &n_blocks, 0, kernel_params);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"gpuarray error: %(kname)s: %%s.",
GpuKernel_error(&%(kname)s, err));
%(fail)s;
if (GpuElemwise_call(ge, rargs, GE_BROADCAST) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Error in the elemwise call");
%(fail)s
}
""" % dict(kname=kname, fail=fail)
""" % dict(fail=sub['fail'])
if config.gpuarray.sync:
z = outputs[0]
code += """
err = GpuArray_sync(&%(z)s->ga);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"gpuarray error: %(kname)s: %%s.",
GpuKernel_error(&%(kname)s, err));
"gpuarray error: %%s.",
GpuArray_error(&%(z)s->ga, err));
%(fail)s;
}
""" % locals()
return str(code)
def perform(self, node, inputs, output_storage, ctx):
# Try to reuse the kernel from a previous call to hopefully
# avoid recompiling
if not hasattr(node, '_cache_elemwise_k'):
node._cache_elemwise_k = self.generate_kernel(node, "kcode")
out_shape = []
for values in izip(*[input.shape for input in inputs]):
if any(v == 0 for v in values):
# All non-broadcasted dimensions should be zero
assert max(values) <= 1
out_shape.append(0)
else:
out_shape.append(max(values))
out_shape = tuple(out_shape)
args = copy.copy(inputs)
for n, (stor, out) in enumerate(izip(output_storage, node.outputs)):
if n in self.inplace_pattern:
stor[0] = inputs[self.inplace_pattern[n]]
else:
args.append(ensure_allocated(stor, out_shape, out.type.dtype, ctx))
return str(code)
node._cache_elemwise_k(*args, broadcast=True)
if config.gpuarray.sync:
output_storage[0][0].sync()
# To disable the superclass perform.
perform = Op.perform
def c_code_cache_version(self):
ver = self.scalar_op.c_code_cache_version()
if ver:
return (4, ver)
return (6, ver)
else:
return ver
......@@ -585,7 +504,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
This op was recently upgraded from just GpuSum a general CAReduce. Not
many code cases are supported for scalar_op being anything other than
scal.Add instances yet.
scalar.Add instances yet.
Important note: if you implement new cases for this op, be sure to
benchmark them and make sure that they actually result in a speedup.
......@@ -735,7 +654,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
# It might be nice to use a property of the op class to do this,
# but tensor.elemwise.CAReduce has this exact same check so I guess
# this is OK to do
if self.scalar_op in [scal.minimum, scal.maximum]:
if self.scalar_op in [scalar.minimum, scalar.maximum]:
conds = ["(PyGpuArray_DIMS(%s)[%d] == 0)" % (x, i)
for i in xrange(nd_in)
if self.reduce_mask[i]]
......@@ -1060,13 +979,13 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
if hasattr(self.scalar_op, 'identity'):
return str(self.scalar_op.identity)
else:
assert isinstance(self.scalar_op, (scal.Maximum,
scal.Minimum))
assert isinstance(self.scalar_op, (scalar.Maximum,
scalar.Minimum))
if self.pre_scalar_op: # TODO: multiple dtypes
# dtype = node.inputs[0].dtype
dtype = 'float32'
dummy_var = scal.Scalar(dtype=dtype)()
dummy_var = scalar.Scalar(dtype=dtype)()
dummy_node = self.pre_scalar_op.make_node(dummy_var)
......
from __future__ import absolute_import, print_function, division
import os
import copy
import numpy
from six import integer_types
from six.moves import StringIO
import theano
from theano import tensor, gof
from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list
import theano.tensor.inplace
try:
import pygpu
......@@ -18,10 +15,9 @@ try:
except ImportError:
pass
from .type import GpuArrayType
from .type import GpuArrayType, gpu_context_type
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
infer_context_name)
from .elemwise import GpuElemwise
class GpuSubtensor(HideC, Subtensor):
......@@ -168,7 +164,7 @@ class GpuSubtensor(HideC, Subtensor):
return (6,)
class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
class GpuIncSubtensor(IncSubtensor):
"""
Implement IncSubtensor on the gpu.
......@@ -181,45 +177,20 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
:meth:`copy_of_x`, etc. specialize the c_code for this Op.
"""
@property
def _f16_ok(self):
return self.iadd_node.op._f16_ok
def c_headers(self):
return self.iadd_node.op.c_headers()
def c_init_code(self):
return self.iadd_node.op.c_init_code()
def gpu_kernels(self, node, nodename):
subname = nodename + "_add_to_zview"
return self.iadd_node.op.gpu_kernels(self.iadd_node, subname)
_f16_ok = True
params_type = gpu_context_type
def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
op = copy.copy(self)
ret = gof.Apply(op, [x, y] + rval.inputs[2:], [x.type()])
op.create_iadd_node(ret)
ret = gof.Apply(self, [x, y] + rval.inputs[2:], [x.type()])
return ret
def get_params(self, node):
return node.outputs[0].type.context
def create_iadd_node(self, node):
# We store a iadd_node in the op that contain the info needed
# for the inplace add.
cop = theano.tensor.inplace.add_inplace
gop = GpuElemwise(cop.scalar_op, copy.copy(cop.inplace_pattern),
"Gpu" + cop.name, cop.nfunc_spec)
y = node.inputs[1]
xview = y.type()
iadd_node = gop(xview, y).owner
self.iadd_node = iadd_node
def perform(self, node, inputs, out_, ctx):
out, = out_
x, y = inputs[:2]
......@@ -261,18 +232,6 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
x.__setitem__(cdata, y)
out[0] = x
def __setstate__(self, d):
self.__dict__.update(d)
owner = getattr(self, "owner", None)
if owner:
self.create_iadd_node(owner)
def __getstate__(self):
d = copy.copy(self.__dict__)
if "iadd_node" in d:
d.pop('iadd_node')
return d
def do_type_checking(self, node):
"""
Should raise NotImplementedError if c_code does not support
......@@ -365,47 +324,52 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
"""
return """GpuArray_setarray(&%(view)s->ga, &%(source)s->ga)""" % locals()
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/error.h>', '<gpuarray/array.h>',
'<gpuarray/elemwise.h>']
def c_support_code_struct(self, node, nodename):
gop = self.iadd_node.op
sub_name = nodename + "_add_to_zview"
ret = gop.c_support_code_struct(self.iadd_node, sub_name)
ret += """
PyGpuArrayObject* inc_sub_iadd_%(nodename)s(PyGpuArrayObject* dst,
PyGpuArrayObject* src){
PyGpuArrayObject* ret = NULL;
""" % locals()
inputs = ["dst", "src"]
outputs = ["ret"]
sub = {"fail": "return NULL;", "params": "dst->context"}
ret += gop.c_code(self.iadd_node, sub_name, inputs, outputs, sub)
ret += """
return ret;
return "\nGpuElemwise *iadd;\n"
def c_init_code_struct(self, node, name, sub):
return """
gpuelemwise_arg args[2] = {{0}};
args[0].name = "a";
args[0].typecode = %(type1)s;
args[0].flags = GE_READ|GE_WRITE;
args[1].name = "b";
args[1].typecode = %(type2)s;
args[1].flags = GE_READ;
iadd = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, "", "a += b",
2, args, %(nd)s, 0);
if (iadd == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
%(fail)s
}
"""
return ret
""" % dict(ctx=sub['params'], fail=sub['fail'],
type1=node.inputs[0].type.typecode,
type2=node.inputs[1].type.typecode,
nd=node.inputs[1].ndim)
def add_to_zview(self, nodename, x, fail):
return """
PyGpuArrayObject * add_result = inc_sub_iadd_%(nodename)s(zview, %(x)s);
if (! add_result )
{
void *args[2];
args[0] = &zview->ga;
args[1] = &%(x)s->ga;
if (GpuElemwise_call(iadd, args, GE_BROADCAST) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Error doing inplace add");
Py_DECREF(zview);
%(fail)s;
}
else
{
Py_DECREF(add_result);
%(fail)s
}
}
""" % locals()
def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
elemwise_version = self.iadd_node.c_code_cache_version()
if not parent_version or not elemwise_version:
if not parent_version:
return
return parent_version + elemwise_version + (3,)
return parent_version + (5,)
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
......
......@@ -18,40 +18,18 @@ from pygpu import ndgpuarray as gpuarray
# This is acutally a test for GpuElemwise
class test_gpu_Broadcast(test_elemwise.test_Broadcast):
op = GpuElemwise
type = GpuArrayType
cop = GpuElemwise
ctype = GpuArrayType
# The order is important
linkers = [gof.PerformLinker, gof.CLinker]
def setUp(self):
if get_context(test_ctx_name).kind != 'cuda':
self.linkers = [gof.PerformLinker]
def rand_val(self, shp):
return rand_gpuarray(*shp, **dict(cls=gpuarray))
def rand_cval(self, shp):
return rand_gpuarray(*shp, **dict(cls=gpuarray))
def test_c(self):
if get_context(test_ctx_name).kind != 'cuda':
raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c()
def test_c_inplace(self):
if get_context(test_ctx_name).kind != 'cuda':
raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c_inplace()
def test_elemwise_pow():
# Test that GpuElemwise(pow) can compile with any combination of integer
# or float input dtype.
if get_context(test_ctx_name).kind != 'cuda':
raise SkipTest("Cuda specific tests")
dtypes = ["uint8", "uint16", "uint32", "uint64",
"int8", "int16", "int32", "int64",
"float16", "float32", "float64"]
......@@ -65,10 +43,10 @@ def test_elemwise_pow():
output = base ** exp
f = theano.function([base, exp], output)
# Call the function to make sure the output is valid
base_val = numpy.random.randint(0, 5, size=10).astype(dtype_base)
exp_val = numpy.random.randint(0, 3, size=10).astype(dtype_exp)
# Call the function to make sure the output is valid
out = f(base_val, exp_val)
expected_out = base_val ** exp_val
assert_allclose(out, expected_out)
......
......@@ -166,10 +166,12 @@ class test_Broadcast(unittest.TestCase):
linkers = [gof.PerformLinker, gof.CLinker]
def rand_val(self, shp):
return numpy.asarray(numpy.random.rand(*shp))
return numpy.asarray(numpy.random.rand(*shp),
dtype=theano.config.floatX)
def rand_cval(self, shp):
return numpy.asarray(numpy.random.rand(*shp))
return numpy.asarray(numpy.random.rand(*shp),
dtype=theano.config.floatX)
def setUp(self):
unittest_tools.seed_rng()
......@@ -189,8 +191,10 @@ class test_Broadcast(unittest.TestCase):
((2, 3, 4, 5), (1, 3, 1, 5)),
((2, 3, 4, 5), (1, 1, 1, 1)),
((), ())]:
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.add)(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
xv = rand_val(xsh)
......@@ -202,8 +206,10 @@ class test_Broadcast(unittest.TestCase):
# test Elemwise.infer_shape
# the Shape op don't implement c_code!
if isinstance(linker, gof.PerformLinker):
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.add)(x, y)
f = copy(linker).accept(FunctionGraph(
[x, y], [e.shape])).make_function()
......@@ -218,8 +224,10 @@ class test_Broadcast(unittest.TestCase):
((2, 3, 4, 5), (1, 3, 1, 5)),
((2, 3, 4, 5), (1, 1, 1, 1)),
((), ())]:
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
xv = rand_val(xsh)
......@@ -232,8 +240,10 @@ class test_Broadcast(unittest.TestCase):
# test Elemwise.infer_shape
# the Shape op don't implement c_code!
if isinstance(linker, gof.PerformLinker):
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
f = copy(linker).accept(FunctionGraph(
[x, y], [e.shape])).make_function()
......@@ -267,13 +277,15 @@ class test_Broadcast(unittest.TestCase):
def test_fill(self):
if not theano.config.cxx:
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0])('x')
y = self.ctype('float64', [1, 1])('y')
for linker, op in zip(self.linkers, [self.op, self.cop]):
for linker, op, t, rval in zip(self.linkers, [self.op, self.cop],
[self.type, self.ctype],
[self.rand_val, self.rand_cval]):
x = t(theano.config.floatX, [0, 0])('x')
y = t(theano.config.floatX, [1, 1])('y')
e = op(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y)
f = linker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((5, 5))
yv = self.rand_cval((1, 1))
xv = rval((5, 5))
yv = rval((1, 1))
f(xv, yv)
assert (xv == yv).all()
......@@ -292,24 +304,28 @@ class test_Broadcast(unittest.TestCase):
def test_weird_strides(self):
if not theano.config.cxx:
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0, 0, 0, 0])('x')
y = self.ctype('float64', [0, 0, 0, 0, 0])('y')
for linker, op in zip(self.linkers, [self.op, self.cop]):
for linker, op, t, rval in zip(self.linkers, [self.op, self.cop],
[self.type, self.ctype],
[self.rand_val, self.rand_cval]):
x = t(theano.config.floatX, [0, 0, 0, 0, 0])('x')
y = t(theano.config.floatX, [0, 0, 0, 0, 0])('y')
e = op(scalar.add)(x, y)
f = linker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((2, 2, 2, 2, 2))
yv = self.rand_cval((2, 2, 2, 2, 2)).transpose(4, 0, 3, 1, 2)
xv = rval((2, 2, 2, 2, 2))
yv = rval((2, 2, 2, 2, 2)).transpose(4, 0, 3, 1, 2)
zv = xv + yv
assert (f(xv, yv) == zv).all()
def test_same_inputs(self):
if not theano.config.cxx:
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0])('x')
for linker, op in zip(self.linkers, [self.op, self.cop]):
for linker, op, t, rval in zip(self.linkers, [self.op, self.cop],
[self.type, self.ctype],
[self.rand_val, self.rand_cval]):
x = t(theano.config.floatX, [0, 0])('x')
e = op(scalar.add)(x, x)
f = linker().accept(FunctionGraph([x], [e])).make_function()
xv = self.rand_cval((2, 2))
xv = rval((2, 2))
zv = xv + xv
assert (f(xv) == zv).all()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论