提交 4a8fed96 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5923 from nouiz/float16

don't move op that don't support float16 on the GPU
...@@ -50,6 +50,7 @@ class ViewOp(gof.Op): ...@@ -50,6 +50,7 @@ class ViewOp(gof.Op):
# the output variable is %(oname)s. # the output variable is %(oname)s.
c_code_and_version = {} c_code_and_version = {}
__props__ = () __props__ = ()
_f16_ok = True
def make_node(self, x): def make_node(self, x):
return gof.Apply(self, [x], [x.type()]) return gof.Apply(self, [x], [x.type()])
...@@ -151,6 +152,7 @@ class DeepCopyOp(gof.Op): ...@@ -151,6 +152,7 @@ class DeepCopyOp(gof.Op):
check_input = False check_input = False
__props__ = () __props__ = ()
_f16_ok = True
def __init__(self): def __init__(self):
pass pass
...@@ -659,6 +661,7 @@ class Rebroadcast(gof.Op): ...@@ -659,6 +661,7 @@ class Rebroadcast(gof.Op):
check_input = False check_input = False
__props__ = ("axis",) __props__ = ("axis",)
_f16_ok = True
def __init__(self, *axis): def __init__(self, *axis):
# Sort them to make sure we merge all possible case. # Sort them to make sure we merge all possible case.
...@@ -820,6 +823,7 @@ class SpecifyShape(gof.Op): ...@@ -820,6 +823,7 @@ class SpecifyShape(gof.Op):
# the output variable is %(oname)s. # the output variable is %(oname)s.
c_code_and_version = {} c_code_and_version = {}
__props__ = () __props__ = ()
_f16_ok = True
def make_node(self, x, shape): def make_node(self, x, shape):
if not isinstance(x, gof.Variable): if not isinstance(x, gof.Variable):
......
...@@ -14,189 +14,6 @@ if os.path.exists(os.path.join(config.compiledir, 'cutils_ext.so')): ...@@ -14,189 +14,6 @@ if os.path.exists(os.path.join(config.compiledir, 'cutils_ext.so')):
os.remove(os.path.join(config.compiledir, 'cutils_ext.so')) os.remove(os.path.join(config.compiledir, 'cutils_ext.so'))
def compile_cutils_code():
types = ['npy_' + t for t in ['int8', 'int16', 'int32', 'int64', 'int128',
'int256', 'uint8', 'uint16', 'uint32',
'uint64', 'uint128', 'uint256',
'float16', 'float32', 'float64',
'float80', 'float96', 'float128',
'float256']]
complex_types = ['npy_' + t for t in ['complex32', 'complex64',
'complex128', 'complex160',
'complex192', 'complex512']]
inplace_map_template = """
#if defined(%(typen)s)
static void %(type)s_inplace_add(PyArrayMapIterObject *mit,
PyArrayIterObject *it, int inc_or_set)
{
int index = mit->size;
while (index--) {
%(op)s
PyArray_MapIterNext(mit);
PyArray_ITER_NEXT(it);
}
}
#endif
"""
floatadd = ("((%(type)s*)mit->dataptr)[0] = "
"(inc_or_set ? ((%(type)s*)mit->dataptr)[0] : 0)"
" + ((%(type)s*)it->dataptr)[0];")
complexadd = """
((%(type)s*)mit->dataptr)[0].real =
(inc_or_set ? ((%(type)s*)mit->dataptr)[0].real : 0)
+ ((%(type)s*)it->dataptr)[0].real;
((%(type)s*)mit->dataptr)[0].imag =
(inc_or_set ? ((%(type)s*)mit->dataptr)[0].imag : 0)
+ ((%(type)s*)it->dataptr)[0].imag;
"""
fns = ''.join([inplace_map_template % {'type': t, 'typen': t.upper(),
'op': floatadd % {'type': t}}
for t in types] +
[inplace_map_template % {'type': t, 'typen': t.upper(),
'op': complexadd % {'type': t}}
for t in complex_types])
def gen_binop(type, typen):
return """
#if defined(%(typen)s)
%(type)s_inplace_add,
#endif
""" % dict(type=type, typen=typen)
fn_array = ("static inplace_map_binop addition_funcs[] = {" +
''.join([gen_binop(type=t, typen=t.upper())
for t in types + complex_types]) + "NULL};\n")
def gen_num(typen):
return """
#if defined(%(typen)s)
%(typen)s,
#endif
""" % dict(type=type, typen=typen)
type_number_array = ("static int type_numbers[] = {" +
''.join([gen_num(typen=t.upper())
for t in types + complex_types]) + "-1000};")
code = ("""
#if NPY_API_VERSION >= 0x00000008
typedef void (*inplace_map_binop)(PyArrayMapIterObject *,
PyArrayIterObject *, int inc_or_set);
""" + fns + fn_array + type_number_array + """
static int
map_increment(PyArrayMapIterObject *mit, PyObject *op,
inplace_map_binop add_inplace, int inc_or_set)
{
PyArrayObject *arr = NULL;
PyArrayIterObject *it;
PyArray_Descr *descr;
if (mit->ait == NULL) {
return -1;
}
descr = PyArray_DESCR(mit->ait->ao);
Py_INCREF(descr);
arr = (PyArrayObject *)PyArray_FromAny(op, descr,
0, 0, NPY_ARRAY_FORCECAST, NULL);
if (arr == NULL) {
return -1;
}
if ((mit->subspace != NULL) && (mit->consec)) {
PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0);
if (arr == NULL) {
return -1;
}
}
it = (PyArrayIterObject*)
PyArray_BroadcastToShape((PyObject*)arr, mit->dimensions, mit->nd);
if (it == NULL) {
Py_DECREF(arr);
return -1;
}
(*add_inplace)(mit, it, inc_or_set);
Py_DECREF(arr);
Py_DECREF(it);
return 0;
}
static PyObject *
inplace_increment(PyObject *dummy, PyObject *args)
{
PyObject *arg_a = NULL, *index=NULL, *inc=NULL;
int inc_or_set = 1;
PyArrayObject *a;
inplace_map_binop add_inplace = NULL;
int type_number = -1;
int i = 0;
PyArrayMapIterObject * mit;
if (!PyArg_ParseTuple(args, "OOO|i", &arg_a, &index,
&inc, &inc_or_set)) {
return NULL;
}
if (!PyArray_Check(arg_a)) {
PyErr_SetString(PyExc_ValueError,
"needs an ndarray as first argument");
return NULL;
}
a = (PyArrayObject *) arg_a;
if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) {
return NULL;
}
if (PyArray_NDIM(a) == 0) {
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed.");
return NULL;
}
type_number = PyArray_TYPE(a);
while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){
if (type_number == type_numbers[i]) {
add_inplace = addition_funcs[i];
break;
}
i++ ;
}
if (add_inplace == NULL) {
PyErr_SetString(PyExc_TypeError, "unsupported type for a");
return NULL;
}
mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index);
if (mit == NULL) {
goto fail;
}
if (map_increment(mit, inc, add_inplace, inc_or_set) != 0) {
goto fail;
}
Py_DECREF(mit);
Py_INCREF(Py_None);
return Py_None;
fail:
Py_XDECREF(mit);
return NULL;
}
#endif
""")
return code
def compile_cutils(): def compile_cutils():
""" """
Do just the compilation of cutils_ext. Do just the compilation of cutils_ext.
...@@ -204,7 +21,6 @@ def compile_cutils(): ...@@ -204,7 +21,6 @@ def compile_cutils():
""" """
code = (""" code = ("""
#include <Python.h> #include <Python.h>
#include "numpy/arrayobject.h"
#include "theano_mod_helper.h" #include "theano_mod_helper.h"
extern "C"{ extern "C"{
...@@ -226,18 +42,10 @@ def compile_cutils(): ...@@ -226,18 +42,10 @@ def compile_cutils():
int failure = fn(it); int failure = fn(it);
return Py_BuildValue("i", failure); return Py_BuildValue("i", failure);
}""") }
static PyMethodDef CutilsExtMethods[] = {
code += compile_cutils_code()
code += ("""static PyMethodDef CutilsExtMethods[] = {
{"run_cthunk", run_cthunk, METH_VARARGS|METH_KEYWORDS, {"run_cthunk", run_cthunk, METH_VARARGS|METH_KEYWORDS,
"Run a theano cthunk."}, "Run a theano cthunk."},
#if NPY_API_VERSION >= 0x00000008
{"inplace_increment", inplace_increment,
METH_VARARGS,
"increments a numpy array inplace at the passed indexes."},
#endif
{NULL, NULL, 0, NULL} /* Sentinel */ {NULL, NULL, 0, NULL} /* Sentinel */
};""") };""")
if PY3: if PY3:
...@@ -256,7 +64,6 @@ def compile_cutils(): ...@@ -256,7 +64,6 @@ def compile_cutils():
PyMODINIT_FUNC PyMODINIT_FUNC
PyInit_cutils_ext(void) { PyInit_cutils_ext(void) {
import_array();
return PyModule_Create(&moduledef); return PyModule_Create(&moduledef);
} }
} }
...@@ -266,7 +73,6 @@ def compile_cutils(): ...@@ -266,7 +73,6 @@ def compile_cutils():
PyMODINIT_FUNC PyMODINIT_FUNC
initcutils_ext(void) initcutils_ext(void)
{ {
import_array();
(void) Py_InitModule("cutils_ext", CutilsExtMethods); (void) Py_InitModule("cutils_ext", CutilsExtMethods);
} }
} //extern C } //extern C
......
...@@ -261,7 +261,7 @@ class GpuCholesky(Op): ...@@ -261,7 +261,7 @@ class GpuCholesky(Op):
raise RuntimeError('CUSOLVER is not available and ' raise RuntimeError('CUSOLVER is not available and '
'GpuCholesky Op can not be constructed.') 'GpuCholesky Op can not be constructed.')
if skcuda.__version__ <= '0.5.1': if skcuda.__version__ <= '0.5.1':
warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8') warnings.warn('The GpuCholesky op requires scikit-cuda > 0.5.1 to work with CUDA 8')
if not pygpu_available: if not pygpu_available:
raise RuntimeError('Missing pygpu or triu/tril functions.' raise RuntimeError('Missing pygpu or triu/tril functions.'
'Install or update libgpuarray.') 'Install or update libgpuarray.')
...@@ -382,6 +382,7 @@ class GpuMagmaSVD(COp): ...@@ -382,6 +382,7 @@ class GpuMagmaSVD(COp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
if A.ndim != 2: if A.ndim != 2:
raise LinAlgError("Matrix rank error") raise LinAlgError("Matrix rank error")
assert A.dtype == 'float32'
if self.compute_uv: if self.compute_uv:
return theano.Apply(self, [A], return theano.Apply(self, [A],
[A.type(), [A.type(),
...@@ -476,6 +477,7 @@ class GpuMagmaMatrixInverse(COp): ...@@ -476,6 +477,7 @@ class GpuMagmaMatrixInverse(COp):
def make_node(self, x): def make_node(self, x):
ctx_name = infer_context_name(x) ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
assert x.dtype == 'float32'
if x.ndim != 2: if x.ndim != 2:
raise LinAlgError("Matrix rank error") raise LinAlgError("Matrix rank error")
return theano.Apply(self, [x], [x.type()]) return theano.Apply(self, [x], [x.type()])
......
...@@ -1181,6 +1181,14 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1181,6 +1181,14 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
@op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv]) @op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
@register_opt2([tensor.blas.Gemv], 'fast_compile') @register_opt2([tensor.blas.Gemv], 'fast_compile')
def local_gpua_gemv(op, context_name, inputs, outputs): def local_gpua_gemv(op, context_name, inputs, outputs):
if inputs[0].dtype == 'float16':
# Use gemm implementation as cublas gemv don't support float16
return gpugemm_no_inplace(inputs[0][:, None],
inputs[1],
inputs[2],
inputs[3][:, None],
inputs[4]).dimshuffle(0)
if inputs[0].dtype not in ['float32', 'float64']: if inputs[0].dtype not in ['float32', 'float64']:
return return
if op.inplace: if op.inplace:
...@@ -1351,6 +1359,8 @@ theano.tensor.nnet.conv2d() ...@@ -1351,6 +1359,8 @@ theano.tensor.nnet.conv2d()
@op_lifter([SparseBlockGemv]) @op_lifter([SparseBlockGemv])
@register_opt2([SparseBlockGemv], 'fast_compile') @register_opt2([SparseBlockGemv], 'fast_compile')
def local_gpua_sparseblockgemv(op, context_name, inputs, outputs): def local_gpua_sparseblockgemv(op, context_name, inputs, outputs):
if inputs[0].dtype == 'float16':
return
if op.inplace: if op.inplace:
return gpu_sparse_block_gemv_inplace return gpu_sparse_block_gemv_inplace
else: else:
...@@ -1361,6 +1371,8 @@ def local_gpua_sparseblockgemv(op, context_name, inputs, outputs): ...@@ -1361,6 +1371,8 @@ def local_gpua_sparseblockgemv(op, context_name, inputs, outputs):
@op_lifter([SparseBlockOuter]) @op_lifter([SparseBlockOuter])
@register_opt2([SparseBlockOuter], 'fast_compile') @register_opt2([SparseBlockOuter], 'fast_compile')
def local_gpua_sparseblockouter(op, context_name, inputs, outputs): def local_gpua_sparseblockouter(op, context_name, inputs, outputs):
if inputs[0].dtype == 'float16':
return
if op.inplace: if op.inplace:
return gpu_sparse_block_outer_inplace return gpu_sparse_block_outer_inplace
else: else:
...@@ -1998,7 +2010,13 @@ def _scan_type_infer(node): ...@@ -1998,7 +2010,13 @@ def _scan_type_infer(node):
@op_lifter([tensor.MaxAndArgmax]) @op_lifter([tensor.MaxAndArgmax])
@register_opt2([tensor.MaxAndArgmax], 'fast_compile') @register_opt2([tensor.MaxAndArgmax], 'fast_compile')
def local_gpu_maxandargmax(op, context_name, inputs, outputs): def local_gpu_maxandargmax(op, context_name, inputs, outputs):
return GpuMaxAndArgmax(op.get_params(None)) op = GpuMaxAndArgmax(op.get_params(None))
if inputs[0].dtype == "float16":
# For now it is better to copy/cast on the GPU then transfer to the CPU
casted_inputs = inputs[0].astype('float32')
ret = op(casted_inputs)
return [ret[0].astype('float16'), ret[1]]
return op
# solve # solve
...@@ -2008,9 +2026,15 @@ def local_gpu_maxandargmax(op, context_name, inputs, outputs): ...@@ -2008,9 +2026,15 @@ def local_gpu_maxandargmax(op, context_name, inputs, outputs):
def local_gpu_solve(op, context_name, inputs, outputs): def local_gpu_solve(op, context_name, inputs, outputs):
if not cusolver_available: if not cusolver_available:
return return
if inputs[0].dtype not in ['float16', 'float32']:
return
if op.A_structure not in MATRIX_STRUCTURES_SOLVE: if op.A_structure not in MATRIX_STRUCTURES_SOLVE:
return return
return GpuCusolverSolve(A_structure=op.A_structure) op = GpuCusolverSolve(A_structure=op.A_structure)
if inputs[0].dtype == 'float16':
return op(inputs[0].astype('float32'),
inputs[1].astype('float32')).astype('float16')
return op
@register_inplace() @register_inplace()
...@@ -2028,7 +2052,13 @@ def local_inplace_gpu_solve(node): ...@@ -2028,7 +2052,13 @@ def local_inplace_gpu_solve(node):
def local_gpu_cholesky(op, context_name, inputs, outputs): def local_gpu_cholesky(op, context_name, inputs, outputs):
if not cusolver_available: if not cusolver_available:
return return
return GpuCholesky(lower=op.lower, inplace=op.destructive) if inputs[0].dtype not in ['float16', 'float32']:
return
op = GpuCholesky(lower=op.lower, inplace=op.destructive)
if inputs[0].dtype == 'float16':
return op(inputs[0].astype('float32')).astype('float16')
return op
@register_inplace() @register_inplace()
...@@ -2044,7 +2074,12 @@ def local_inplace_cholesky(node): ...@@ -2044,7 +2074,12 @@ def local_inplace_cholesky(node):
def local_gpu_matrix_inverse(op, context_name, inputs, outputs): def local_gpu_matrix_inverse(op, context_name, inputs, outputs):
if not config.magma.enabled: if not config.magma.enabled:
return return
return GpuMagmaMatrixInverse() if inputs[0].dtype not in ['float16', 'float32']:
return
op = GpuMagmaMatrixInverse()
if inputs[0].dtype == 'float16':
return op(inputs[0].astype('float32')).astype('float16')
return op
@register_inplace() @register_inplace()
...@@ -2061,9 +2096,13 @@ def local_inplace_matrix_inverse_inplace(node): ...@@ -2061,9 +2096,13 @@ def local_inplace_matrix_inverse_inplace(node):
def local_gpu_svd(op, context_name, inputs, outputs): def local_gpu_svd(op, context_name, inputs, outputs):
if not config.magma.enabled: if not config.magma.enabled:
return return
return GpuMagmaSVD(full_matrices=op.full_matrices, if inputs[0].dtype not in ['float16', 'float32']:
compute_uv=op.compute_uv) return
op = GpuMagmaSVD(full_matrices=op.full_matrices,
compute_uv=op.compute_uv)
if inputs[0].dtype == 'float16':
return op(inputs[0].astype('float32')).astype('float16')
return op
# Do not register in fast_run or fast_compile. # Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled. # It will be added to fast_run if the GPU is enabled.
......
...@@ -232,7 +232,7 @@ class GpuIncSubtensor(IncSubtensor): ...@@ -232,7 +232,7 @@ class GpuIncSubtensor(IncSubtensor):
if not self.set_instead_of_inc: if not self.set_instead_of_inc:
# sub_x += y # sub_x += y
iadd = get_iadd(node.inputs[0], node.inputs[1]) iadd = get_iadd(node.inputs[0], node.inputs[1])
iadd(sub_x, y, broadcast=False) iadd(sub_x, y)
else: else:
# sub_x[...] = y # sub_x[...] = y
x.__setitem__(cdata, y) x.__setitem__(cdata, y)
...@@ -403,6 +403,8 @@ class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1): ...@@ -403,6 +403,8 @@ class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
""" """
AdvancedSubrensor1 on the GPU. AdvancedSubrensor1 on the GPU.
""" """
_f16_ok = True
def make_node(self, x, ilist): def make_node(self, x, ilist):
ctx_name = infer_context_name(x, ilist) ctx_name = infer_context_name(x, ilist)
x_ = as_gpuarray_variable(x, ctx_name) x_ = as_gpuarray_variable(x, ctx_name)
...@@ -807,7 +809,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, ...@@ -807,7 +809,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
""" """
ctx_name = infer_context_name(x, y, ilist) ctx_name = infer_context_name(x, y, ilist)
x_ = as_gpuarray_variable(x, ctx_name) x_ = as_gpuarray_variable(x, ctx_name)
y_ = as_gpuarray_variable(y, ctx_name) y_ = as_gpuarray_variable(y.astype(x.dtype), ctx_name)
ilist_ = as_gpuarray_variable(ilist, ctx_name) ilist_ = as_gpuarray_variable(ilist, ctx_name)
assert x_.type.ndim >= y_.type.ndim assert x_.type.ndim >= y_.type.ndim
...@@ -1088,6 +1090,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) { ...@@ -1088,6 +1090,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
class GpuExtractDiag(Op): class GpuExtractDiag(Op):
__props__ = ("offset", "axis1", "axis2", "view") __props__ = ("offset", "axis1", "axis2", "view")
_f16_ok = True
def __init__(self, offset=0, axis1=0, axis2=1, view=False): def __init__(self, offset=0, axis1=0, axis2=1, view=False):
self.view = view self.view = view
......
...@@ -8,7 +8,7 @@ import theano ...@@ -8,7 +8,7 @@ import theano
from theano import config from theano import config
from theano import tensor from theano import tensor
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22, batched_dot from theano.tensor.blas import gemv, gemv_inplace, gemm_inplace, _dot22, batched_dot
from theano.tensor.tests.test_blas import TestGer, BaseGemv from theano.tensor.tests.test_blas import TestGer, BaseGemv
from .. import gpuarray_shared_constructor from .. import gpuarray_shared_constructor
...@@ -18,7 +18,7 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace, ...@@ -18,7 +18,7 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace, gpugemm_no_inplace, gpugemm_inplace, gpugemm_no_inplace,
gpugemmbatch_no_inplace, gpugemmbatch_no_inplace,
gpuger_inplace, gpuger_no_inplace, gpuger_inplace, gpuger_no_inplace,
GpuGer, gpu_dot22) GpuGer, GpuGemm, gpu_dot22)
GpuGemvTester = makeTester( GpuGemvTester = makeTester(
...@@ -42,6 +42,22 @@ GpuGemvTester = makeTester( ...@@ -42,6 +42,22 @@ GpuGemvTester = makeTester(
def test_float16(): def test_float16():
# gemv (gemm called)
float16_data = [rand(3).astype('float16'),
np.asarray(1, dtype=np.float32),
rand(3, 3).astype('float16'),
rand(3).astype('float16'),
np.asarray(0.5, dtype=np.float32)]
float16_shared = [gpuarray_shared_constructor(val, target=test_ctx_name)
for val in float16_data]
o = gemv(*float16_shared)
f = theano.function([], o, mode=mode_with_gpu)
y, alpha, A, x, beta = float16_data
out = f()
utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
topo = f.maker.fgraph.toposort()
assert any([isinstance(n.op, GpuGemm) for n in topo])
# gemm # gemm
float16_data = [rand(3, 3).astype('float16'), float16_data = [rand(3, 3).astype('float16'),
np.asarray(1, dtype=np.float32), np.asarray(1, dtype=np.float32),
......
...@@ -49,6 +49,33 @@ class G_subtensor(test_subtensor.T_subtensor): ...@@ -49,6 +49,33 @@ class G_subtensor(test_subtensor.T_subtensor):
assert self.sub == GpuSubtensor assert self.sub == GpuSubtensor
class G_subtensorF16(test_subtensor.T_subtensor):
def shortDescription(self):
return None
def __init__(self, name):
def shared(x, **kwargs):
return gpuarray_shared_constructor(x, target=test_ctx_name,
**kwargs)
test_subtensor.T_subtensor.__init__(
self, name,
shared=shared,
sub=GpuSubtensor,
inc_sub=GpuIncSubtensor,
adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1,
dimshuffle=GpuDimShuffle,
mode=mode_with_gpu,
# avoid errors with limited devices
dtype='float16', # use floatX?
ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp, GpuContiguous))
# GPU opt can't run in fast_compile only.
self.fast_compile = False
assert self.sub == GpuSubtensor
def test_advinc_subtensor1(): def test_advinc_subtensor1():
# Test the second case in the opt local_gpu_advanced_incsubtensor1 # Test the second case in the opt local_gpu_advanced_incsubtensor1
for shp in [(3, 3), (3, 3, 3)]: for shp in [(3, 3), (3, 3, 3)]:
...@@ -73,7 +100,9 @@ def test_advinc_subtensor1(): ...@@ -73,7 +100,9 @@ def test_advinc_subtensor1():
def test_advinc_subtensor1_dtype(): def test_advinc_subtensor1_dtype():
# Test the mixed dtype case # Test the mixed dtype case
shp = (3, 4) shp = (3, 4)
for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]: for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64'),
('float16', 'int8'), ('float16', 'float64'),
('float16', 'float16')]:
shared = gpuarray_shared_constructor shared = gpuarray_shared_constructor
xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1 xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
yval = np.empty((2,) + shp[1:], dtype=dtype2) yval = np.empty((2,) + shp[1:], dtype=dtype2)
...@@ -95,7 +124,9 @@ def test_advinc_subtensor1_dtype(): ...@@ -95,7 +124,9 @@ def test_advinc_subtensor1_dtype():
def test_advinc_subtensor1_vector_scalar(): def test_advinc_subtensor1_vector_scalar():
# Test the case where x is a vector and y a scalar # Test the case where x is a vector and y a scalar
shp = (3,) shp = (3,)
for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]: for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64'),
('float16', 'int8'), ('float16', 'float64'),
('float16', 'float16')]:
shared = gpuarray_shared_constructor shared = gpuarray_shared_constructor
xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1 xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
yval = np.asarray(10, dtype=dtype2) yval = np.asarray(10, dtype=dtype2)
...@@ -105,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar(): ...@@ -105,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar():
name='y') name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2]) expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu) f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1_dev20) assert sum([isinstance(node.op, (GpuAdvancedIncSubtensor1_dev20,
GpuAdvancedIncSubtensor1))
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy() rep = xval.copy()
...@@ -169,7 +201,26 @@ class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor): ...@@ -169,7 +201,26 @@ class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
sub=GpuAdvancedSubtensor, sub=GpuAdvancedSubtensor,
mode=mode_with_gpu, mode=mode_with_gpu,
# avoid errors with limited devices # avoid errors with limited devices
dtype='float32', dtype='float32', # floatX?
ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp))
# GPU opt can't run in fast_compile only.
self.fast_compile = False
assert self.sub == GpuAdvancedSubtensor
class G_advancedsubtensorF16(test_subtensor.TestAdvancedSubtensor):
def shortDescription(self):
return None
def __init__(self, name):
test_subtensor.TestAdvancedSubtensor.__init__(
self, name,
shared=gpuarray_shared_constructor,
sub=GpuAdvancedSubtensor,
mode=mode_with_gpu,
# avoid errors with limited devices
dtype='float16', # floatX?
ignore_topo=(HostFromGpu, GpuFromHost, ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp)) DeepCopyOp))
# GPU opt can't run in fast_compile only. # GPU opt can't run in fast_compile only.
...@@ -218,6 +269,17 @@ class test_gpuextractdiag(unittest.TestCase): ...@@ -218,6 +269,17 @@ class test_gpuextractdiag(unittest.TestCase):
GpuExtractDiag(offset, axis1, axis2)(x).eval({x: np_x}), GpuExtractDiag(offset, axis1, axis2)(x).eval({x: np_x}),
np_x.diagonal(offset, axis1, axis2)) np_x.diagonal(offset, axis1, axis2))
def test_tensor_float16(self):
x = tensor.tensor4()
np_x = np.arange(30107).reshape(7, 11, 17, 23).astype('float16')
for offset, axis1, axis2 in [
(1, 0, 1), (-1, 0, 1), (0, 1, 0), (-2, 1, 0),
(-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
(2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
assert np.allclose(
GpuExtractDiag(offset, axis1, axis2)(x).eval({x: np_x}),
np_x.diagonal(offset, axis1, axis2))
class test_gpuallocdiag(unittest.TestCase): class test_gpuallocdiag(unittest.TestCase):
def test_matrix(self): def test_matrix(self):
......
...@@ -7,7 +7,7 @@ import numpy as np ...@@ -7,7 +7,7 @@ import numpy as np
import theano import theano
from theano.compat import PY3 from theano.compat import PY3
from theano import config from theano import config
from theano.compile import DeepCopyOp from theano.compile import DeepCopyOp, Rebroadcast, ViewOp
from theano.misc.pkl_utils import CompatUnpickler from theano.misc.pkl_utils import CompatUnpickler
# Disabled for now # Disabled for now
...@@ -21,16 +21,45 @@ import pygpu ...@@ -21,16 +21,45 @@ import pygpu
def test_deep_copy(): def test_deep_copy():
a = rand_gpuarray(20, dtype='float32') for dtype in ['float16', 'float32']:
g = GpuArrayType(dtype='float32', broadcastable=(False,))('g') a = rand_gpuarray(20, dtype=dtype)
g = GpuArrayType(dtype=dtype, broadcastable=(False,))('g')
f = theano.function([g], g)
assert isinstance(f.maker.fgraph.toposort()[0].op, DeepCopyOp)
res = f(a)
assert GpuArrayType.values_eq(res, a)
def test_view():
for dtype in ['float16', 'float32']:
a = rand_gpuarray(20, dtype=dtype)
g = GpuArrayType(dtype=dtype, broadcastable=(False,))('g')
f = theano.function([g], g) f = theano.function([g], ViewOp()(g))
assert isinstance(f.maker.fgraph.toposort()[0].op, DeepCopyOp) assert isinstance(f.maker.fgraph.toposort()[0].op, ViewOp)
res = f(a) res = f(a)
assert GpuArrayType.values_eq(res, a) assert GpuArrayType.values_eq(res, a)
def test_rebroadcast():
for dtype in ['float16', 'float32']:
a = rand_gpuarray(1, dtype=dtype)
g = GpuArrayType(dtype=dtype, broadcastable=(False,))('g')
f = theano.function([g], Rebroadcast((0, True))(g))
assert isinstance(f.maker.fgraph.toposort()[0].op, Rebroadcast)
res = f(a)
assert GpuArrayType.values_eq(res, a)
def test_values_eq_approx(): def test_values_eq_approx():
...@@ -45,10 +74,11 @@ def test_values_eq_approx(): ...@@ -45,10 +74,11 @@ def test_values_eq_approx():
def test_specify_shape(): def test_specify_shape():
a = rand_gpuarray(20, dtype='float32') for dtype in ['float16', 'float32']:
g = GpuArrayType(dtype='float32', broadcastable=(False,))('g') a = rand_gpuarray(20, dtype=dtype)
f = theano.function([g], theano.tensor.specify_shape(g, [20])) g = GpuArrayType(dtype=dtype, broadcastable=(False,))('g')
f(a) f = theano.function([g], theano.tensor.specify_shape(g, [20]))
f(a)
def test_filter_float(): def test_filter_float():
......
...@@ -1482,7 +1482,11 @@ class numeric_grad(object): ...@@ -1482,7 +1482,11 @@ class numeric_grad(object):
The tuple (abs_err, rel_err) is returned The tuple (abs_err, rel_err) is returned
""" """
abs_err = abs(a - b) abs_err = abs(a - b)
rel_err = abs_err / np.maximum(abs(a) + abs(b), 1e-8) # 1e-8 is to prevent division by zeros.
# [] is to make sure that if a and b are float16, 1e-8 don't get
# dowcasted to float16 as that give 0! This would add back the
# division by zero
rel_err = abs_err / np.maximum(abs(a) + abs(b), [1e-8])
# The numpy.asarray are needed as if a or b is a sparse matrix # The numpy.asarray are needed as if a or b is a sparse matrix
# this would result in a numpy.matrix and not a numpy.ndarray # this would result in a numpy.matrix and not a numpy.ndarray
# and the behave differently causing problem later. # and the behave differently causing problem later.
......
from __future__ import absolute_import, print_function, division
def inc_code():
types = ['npy_' + t for t in ['int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
'float16', 'float32', 'float64']]
complex_types = ['npy_' + t for t in ['complex32', 'complex64',
'complex128']]
inplace_map_template = """
#if defined(%(typen)s)
static void %(type)s_inplace_add(PyArrayMapIterObject *mit,
PyArrayIterObject *it, int inc_or_set)
{
int index = mit->size;
while (index--) {
%(op)s
PyArray_MapIterNext(mit);
PyArray_ITER_NEXT(it);
}
}
#endif
"""
floatadd = ("((%(type)s*)mit->dataptr)[0] = "
"(inc_or_set ? ((%(type)s*)mit->dataptr)[0] : 0)"
" + ((%(type)s*)it->dataptr)[0];")
complexadd = """
((%(type)s*)mit->dataptr)[0].real =
(inc_or_set ? ((%(type)s*)mit->dataptr)[0].real : 0)
+ ((%(type)s*)it->dataptr)[0].real;
((%(type)s*)mit->dataptr)[0].imag =
(inc_or_set ? ((%(type)s*)mit->dataptr)[0].imag : 0)
+ ((%(type)s*)it->dataptr)[0].imag;
"""
fns = ''.join([inplace_map_template % {'type': t, 'typen': t.upper(),
'op': floatadd % {'type': t}}
for t in types] +
[inplace_map_template % {'type': t, 'typen': t.upper(),
'op': complexadd % {'type': t}}
for t in complex_types])
def gen_binop(type, typen):
return """
#if defined(%(typen)s)
%(type)s_inplace_add,
#endif
""" % dict(type=type, typen=typen)
fn_array = ("static inplace_map_binop addition_funcs[] = {" +
''.join([gen_binop(type=t, typen=t.upper())
for t in types + complex_types]) + "NULL};\n")
def gen_num(typen):
return """
#if defined(%(typen)s)
%(typen)s,
#endif
""" % dict(type=type, typen=typen)
type_number_array = ("static int type_numbers[] = {" +
''.join([gen_num(typen=t.upper())
for t in types + complex_types]) + "-1000};")
code = ("""
typedef void (*inplace_map_binop)(PyArrayMapIterObject *,
PyArrayIterObject *, int inc_or_set);
""" + fns + fn_array + type_number_array + """
static int
map_increment(PyArrayMapIterObject *mit, PyArrayObject *op,
inplace_map_binop add_inplace, int inc_or_set)
{
PyArrayObject *arr = NULL;
PyArrayIterObject *it;
PyArray_Descr *descr;
if (mit->ait == NULL) {
return -1;
}
descr = PyArray_DESCR(mit->ait->ao);
Py_INCREF(descr);
arr = (PyArrayObject *)PyArray_FromAny((PyObject *)op, descr,
0, 0, NPY_ARRAY_FORCECAST, NULL);
if (arr == NULL) {
return -1;
}
if ((mit->subspace != NULL) && (mit->consec)) {
PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0);
if (arr == NULL) {
return -1;
}
}
it = (PyArrayIterObject*)
PyArray_BroadcastToShape((PyObject*)arr, mit->dimensions, mit->nd);
if (it == NULL) {
Py_DECREF(arr);
return -1;
}
(*add_inplace)(mit, it, inc_or_set);
Py_DECREF(arr);
Py_DECREF(it);
return 0;
}
static int
inplace_increment(PyArrayObject *a, PyObject *index, PyArrayObject *inc,
int inc_or_set)
{
inplace_map_binop add_inplace = NULL;
int type_number = -1;
int i = 0;
PyArrayMapIterObject * mit;
if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) {
return -1;
}
if (PyArray_NDIM(a) == 0) {
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed.");
return -1;
}
type_number = PyArray_TYPE(a);
while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){
if (type_number == type_numbers[i]) {
add_inplace = addition_funcs[i];
break;
}
i++ ;
}
if (add_inplace == NULL) {
PyErr_SetString(PyExc_TypeError, "unsupported type for a");
return -1;
}
mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index);
if (mit == NULL) {
goto fail;
}
if (map_increment(mit, inc, add_inplace, inc_or_set) != 0) {
goto fail;
}
Py_DECREF(mit);
Py_INCREF(Py_None);
return 0;
fail:
Py_XDECREF(mit);
return -1;
}
""")
return code
...@@ -22,9 +22,7 @@ from theano.tensor.elemwise import DimShuffle ...@@ -22,9 +22,7 @@ from theano.tensor.elemwise import DimShuffle
from theano.tensor.type_other import NoneConst, SliceType, NoneTypeT, make_slice from theano.tensor.type_other import NoneConst, SliceType, NoneTypeT, make_slice
from theano import config from theano import config
if config.cxx: from .inc_code import inc_code
import theano.gof.cutils # needed to import cutils_ext
from cutils_ext.cutils_ext import inplace_increment
_logger = logging.getLogger("theano.tensor.subtensor") _logger = logging.getLogger("theano.tensor.subtensor")
...@@ -1943,8 +1941,7 @@ class AdvancedIncSubtensor1(Op): ...@@ -1943,8 +1941,7 @@ class AdvancedIncSubtensor1(Op):
NPY_ARRAY_ENSURECOPY, NULL)""" % locals() NPY_ARRAY_ENSURECOPY, NULL)""" % locals()
def c_support_code(self): def c_support_code(self):
from theano.gof.cutils import compile_cutils_code return inc_code()
return compile_cutils_code()
def c_code(self, node, name, input_names, output_names, sub): def c_code(self, node, name, input_names, output_names, sub):
numpy_ver = [int(n) for n in np.__version__.split('.')[:2]] numpy_ver = [int(n) for n in np.__version__.split('.')[:2]]
...@@ -1976,17 +1973,14 @@ class AdvancedIncSubtensor1(Op): ...@@ -1976,17 +1973,14 @@ class AdvancedIncSubtensor1(Op):
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = %(copy_of_x)s; %(out)s = %(copy_of_x)s;
} }
PyObject *arglist = Py_BuildValue("OOOi",%(out)s, %(idx)s, %(y)s, %(inc_or_set)d); if (inplace_increment(%(out)s, (PyObject *)%(idx)s, %(y)s, %(inc_or_set)d)) {
rval = inplace_increment(NULL, arglist);
Py_XDECREF(arglist);
if (rval == NULL) {
%(fail)s; %(fail)s;
} }
Py_XDECREF(rval); Py_XDECREF(rval);
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (4,)
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
# TODO opt to make this inplace # TODO opt to make this inplace
...@@ -2001,36 +1995,10 @@ class AdvancedIncSubtensor1(Op): ...@@ -2001,36 +1995,10 @@ class AdvancedIncSubtensor1(Op):
if self.set_instead_of_inc: if self.set_instead_of_inc:
x[idx] = y x[idx] = y
else: else:
if config.cxx and node.inputs[0].dtype != 'float16': np.add.at(x, idx, y)
increment = inplace_increment
else:
increment = self.inplace_increment1d_slow
increment(x, idx, y)
out[0] = x out[0] = x
def inplace_increment1d_slow(self, x, idx, y):
# If `y` has as many dimensions as `x`, then we want to iterate
# jointly on `x` and `y`. Otherwise, it means `y` should be
# broadcasted to fill all relevant rows of `x`.
assert y.ndim <= x.ndim # Should be guaranteed by `make_node`
if y.ndim == x.ndim:
if len(y) == 1:
# Allow broadcasting of y[0]
y_0 = y[0]
for i in idx:
x[i] += y_0
else:
assert len(y) == len(idx)
j = 0
for i in idx:
x[i] += y[j]
j += 1
else:
for i in idx:
x[i] += y
def infer_shape(self, node, ishapes): def infer_shape(self, node, ishapes):
x, y, ilist = ishapes x, y, ilist = ishapes
return [x] return [x]
...@@ -2246,14 +2214,8 @@ class AdvancedIncSubtensor(Op): ...@@ -2246,14 +2214,8 @@ class AdvancedIncSubtensor(Op):
if self.set_instead_of_inc: if self.set_instead_of_inc:
out[0][inputs[2:]] = inputs[1] out[0][inputs[2:]] = inputs[1]
elif config.cxx:
inplace_increment(out[0], tuple(inputs[2:]), inputs[1])
else: else:
raise NotImplementedError( np.add.at(out[0], tuple(inputs[2:]), inputs[1])
'Could not import inplace_increment, so advanced '
'indexing is disabled. '
'Please make sure that you have a working C++ compiler '
'and that config.cxx is correctly set.')
def infer_shape(self, node, ishapes): def infer_shape(self, node, ishapes):
return [ishapes[0]] return [ishapes[0]]
......
...@@ -2936,10 +2936,7 @@ def test_local_IncSubtensor_serialize(): ...@@ -2936,10 +2936,7 @@ def test_local_IncSubtensor_serialize():
i = T.vector('i', dtype='int64') i = T.vector('i', dtype='int64')
j = T.vector('j', dtype='int64') j = T.vector('j', dtype='int64')
t = T.scalar('t') t = T.scalar('t')
if theano.tensor.subtensor.inplace_increment: y = (W[i] + W[j] + W[1] + W[i, j]).sum()
y = (W[i] + W[j] + W[1] + W[i, j]).sum()
else:
y = (W[i] + W[j] + W[1]).sum()
cost = T.sqr(t - y) cost = T.sqr(t - y)
dW = theano.grad(cost, W) dW = theano.grad(cost, W)
mode = theano.compile.mode.get_default_mode().excluding('fusion') mode = theano.compile.mode.get_default_mode().excluding('fusion')
......
...@@ -5,7 +5,6 @@ import sys ...@@ -5,7 +5,6 @@ import sys
import unittest import unittest
import numpy as np import numpy as np
from nose.plugins.skip import SkipTest
from nose.tools import assert_equal from nose.tools import assert_equal
from numpy.testing import assert_array_equal from numpy.testing import assert_array_equal
from six import StringIO from six import StringIO
...@@ -524,10 +523,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -524,10 +523,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
gn = theano.grad(t.sum(), n) gn = theano.grad(t.sum(), n)
g = self.function([], gn, op=self.adv_incsub1) g = self.function([], gn, op=self.adv_incsub1)
utt.verify_grad(lambda m: m[[1, 3]], utt.verify_grad(lambda m: m[[1, 3]],
[np.random.rand(5, 5).astype(self.dtype)]) [np.random.rand(5, 5).astype(self.dtype)],
mode=self.mode)
g() g()
utt.verify_grad(lambda m: m[idx], utt.verify_grad(lambda m: m[idx],
[data]) [data], mode=self.mode)
def test_noncontiguous_idx(self): def test_noncontiguous_idx(self):
data = rand(4, 2, 3) data = rand(4, 2, 3)
...@@ -597,17 +597,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -597,17 +597,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self.assertTrue(np.allclose(g_00, 2)) self.assertTrue(np.allclose(g_00, 2))
utt.verify_grad(lambda m: m[[1, 3]], utt.verify_grad(lambda m: m[[1, 3]],
[np.random.rand(5, 5).astype(self.dtype)]) [np.random.rand(5, 5).astype(self.dtype)],
mode=self.mode)
def fun(x, y): def fun(x, y):
return advanced_inc_subtensor1(x, y, [1, 3]) return advanced_inc_subtensor1(x, y, [1, 3])
utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype), utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype),
np.random.rand(2, 5).astype(self.dtype)]) np.random.rand(2, 5).astype(self.dtype)],
mode=self.mode)
def fun(x, y): def fun(x, y):
return advanced_set_subtensor1(x, y, [1, 3]) return advanced_set_subtensor1(x, y, [1, 3])
utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype), utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype),
np.random.rand(2, 5).astype(self.dtype)]) np.random.rand(2, 5).astype(self.dtype)],
mode=self.mode)
# test set_subtensor broadcast # test set_subtensor broadcast
self.dtype = 'float32' self.dtype = 'float32'
...@@ -872,12 +875,12 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -872,12 +875,12 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
def fct(t): def fct(t):
return theano.tensor.sum(t[idx_]) return theano.tensor.sum(t[idx_])
utt.verify_grad(fct, [data]) utt.verify_grad(fct, [data], mode=self.mode)
# Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad) # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
def fct2(t): def fct2(t):
return theano.tensor.grad(theano.tensor.sum(t[idx_]), t) return theano.tensor.grad(theano.tensor.sum(t[idx_]), t)
utt.verify_grad(fct2, [data]) utt.verify_grad(fct2, [data], mode=self.mode)
# Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1 # Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
if not self.fast_compile: if not self.fast_compile:
...@@ -958,7 +961,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -958,7 +961,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
# vector # vector
utt.verify_grad( utt.verify_grad(
inc_slice(slice(2, 4, None)), inc_slice(slice(2, 4, None)),
(np.asarray([0, 1, 2, 3, 4, 5.]), np.asarray([9, 9.]),)) (np.asarray([0, 1, 2, 3, 4, 5.]), np.asarray([9, 9.]),),
mode=self.mode)
# matrix # matrix
utt.verify_grad( utt.verify_grad(
...@@ -1498,9 +1502,6 @@ class TestAdvancedSubtensor(unittest.TestCase): ...@@ -1498,9 +1502,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
utt.assert_allclose(rval, aval) utt.assert_allclose(rval, aval)
def test_inc_adv_subtensor_w_2vec(self): def test_inc_adv_subtensor_w_2vec(self):
if not config.cxx:
raise SkipTest('config.cxx empty')
subt = self.m[self.ix1, self.ix12] subt = self.m[self.ix1, self.ix12]
a = inc_subtensor(subt, subt) a = inc_subtensor(subt, subt)
...@@ -1519,9 +1520,6 @@ class TestAdvancedSubtensor(unittest.TestCase): ...@@ -1519,9 +1520,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
[.5, .3 * 2, .15]]), aval [.5, .3 * 2, .15]]), aval
def test_inc_adv_subtensor_with_broadcasting(self): def test_inc_adv_subtensor_with_broadcasting(self):
if not config.cxx:
raise SkipTest('config.cxx empty')
inc = dscalar() inc = dscalar()
a = inc_subtensor(self.m[self.ix1, self.ix12], inc) a = inc_subtensor(self.m[self.ix1, self.ix12], inc)
g_inc = tensor.grad(a.sum(), inc) g_inc = tensor.grad(a.sum(), inc)
...@@ -1542,9 +1540,6 @@ class TestAdvancedSubtensor(unittest.TestCase): ...@@ -1542,9 +1540,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
assert np.allclose(gval, 3.0), gval assert np.allclose(gval, 3.0), gval
def test_inc_adv_subtensor1_with_broadcasting(self): def test_inc_adv_subtensor1_with_broadcasting(self):
if not config.cxx:
raise SkipTest('config.cxx empty')
inc = dscalar() inc = dscalar()
a = inc_subtensor(self.m[self.ix1], inc) a = inc_subtensor(self.m[self.ix1], inc)
g_inc = tensor.grad(a.sum(), inc) g_inc = tensor.grad(a.sum(), inc)
...@@ -1564,9 +1559,6 @@ class TestAdvancedSubtensor(unittest.TestCase): ...@@ -1564,9 +1559,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
assert np.allclose(gval, 9.0), gval assert np.allclose(gval, 9.0), gval
def test_inc_adv_subtensor_with_index_broadcasting(self): def test_inc_adv_subtensor_with_index_broadcasting(self):
if not config.cxx:
raise SkipTest('config.cxx empty')
a = inc_subtensor(self.m[self.ix1, self.ix2], 2.1) a = inc_subtensor(self.m[self.ix1, self.ix2], 2.1)
assert a.type == self.m.type, (a.type, self.m.type) assert a.type == self.m.type, (a.type, self.m.type)
...@@ -1640,17 +1632,20 @@ class TestAdvancedSubtensor(unittest.TestCase): ...@@ -1640,17 +1632,20 @@ class TestAdvancedSubtensor(unittest.TestCase):
self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor)) self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor))
utt.verify_grad(lambda m: m[[1, 3], [2, 4]], utt.verify_grad(lambda m: m[[1, 3], [2, 4]],
[np.random.rand(5, 5).astype(self.dtype)]) [np.random.rand(5, 5).astype(self.dtype)],
mode=self.mode)
def fun(x, y): def fun(x, y):
return advanced_inc_subtensor(x, y, [1, 3], [2, 4]) return advanced_inc_subtensor(x, y, [1, 3], [2, 4])
utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype), utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype),
np.random.rand(2).astype(self.dtype)]) np.random.rand(2).astype(self.dtype)],
mode=self.mode)
def fun(x, y): def fun(x, y):
return advanced_set_subtensor(x, y, [1, 3], [2, 4]) return advanced_set_subtensor(x, y, [1, 3], [2, 4])
utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype), utt.verify_grad(fun, [np.random.rand(5, 5).astype(self.dtype),
np.random.rand(2).astype(self.dtype)]) np.random.rand(2).astype(self.dtype)],
mode=self.mode)
class TestInferShape(utt.InferShapeTester): class TestInferShape(utt.InferShapeTester):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论