提交 5c25f307 authored 作者: cocu's avatar cocu

Merge branch 'master' into allow_cxx_flag_full_path

Conflicts: theano/gof/cmodule.py
.. _libdoc_cuda_dnn:
================================
:mod:`sandbox.cuda.dnn` -- cuDNN
================================
.. moduleauthor:: LISA
`cuDNN <https://developer.nvidia.com/cuDNN>`_ is an NVIDIA library with
functionality used by deep neural network. It provides optimized versions
of some operations like the convolution. cuDNN is not currently
installed with CUDA 6.5. You must download and install it
yourself.
To install it, decompress the downloaded file and make the ``*.h`` and
``*.so*`` files available to the compilation environment. On Linux,
this can be done by setting the environment variables
``LD_LIBRARY_PATH``, ``LIBRARY_PATH`` and ``CPATH`` to the
uncompressed directory path. Separate multiple directory with ``:`` as
the ``PATH`` environment variable. Or you can copy the ``*.h`` files
to ``/usr/include`` and the ``*.so*`` files to ``/lib64``.
By default, Theano will detect if it can use cuDNN. If so, it will use
it. If not, Theano optimizations will not introduce cuDNN ops. So
Theano will still work if the user did not introduce them manually.
To get an error if Theano can not use cuDNN, use this Theano flag:
``optimizer_including=cudnn``.
.. note::
Normally you should not call GPU Ops directly, but the CPU interface
currently does not allow all options supported by cuDNN ops. So it is
possible that you will need to call them manually.
Functions
=========
.. automodule:: theano.sandbox.cuda.dnn
:members: dnn_conv, dnn_pool
Convolution Ops
===============
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI,
Pooling Ops
===========
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad,
Softmax Ops
===========
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
......@@ -13,6 +13,7 @@
.. toctree::
:maxdepth: 1
op
var
type
op
dnn
......@@ -123,29 +123,13 @@ def git_version():
git_revision = "unknown-git"
return git_revision
# Python 2.4 compatibility: Python versions 2.6 and later support new
# exception syntax, but for now we have to resort to exec.
if sys.hexversion >= 0x2070000:
exec("""\
def write_text(filename, text):
with open(filename, 'w') as a:
try:
a.write(text)
except Exception as e:
print(e)
""")
else:
exec("""\
def write_text(filename, text):
a = open(filename, 'w')
try:
try:
with open(filename, 'w') as a:
a.write(text)
except Exception, e:
print e
finally:
a.close()
""")
except Exception as e:
print(e)
def write_version_py(filename=os.path.join('theano', 'generated_version.py')):
......
......@@ -1795,7 +1795,8 @@ class GCC_compiler(object):
return cxxflags
@staticmethod
def try_compile_tmp(src_code, tmp_prefix='', flags=(), try_run=False):
def try_compile_tmp(src_code, tmp_prefix='', flags=(),
try_run=False, output=False):
"""Try to compile (and run) a test program.
This is useful in various occasions, to check if libraries
......@@ -1806,6 +1807,7 @@ class GCC_compiler(object):
If try_run is False, returns the compilation status.
If try_run is True, returns a (compile_status, run_status) pair.
If output is there, we append the stdout and stderr to the output.
"""
if not theano.config.cxx:
return False
......@@ -1825,14 +1827,14 @@ class GCC_compiler(object):
os.write(fd, src_code)
os.close(fd)
fd = None
p_ret = call_subprocess_Popen(
out, err, p_ret = output_subprocess_Popen(
[theano.config.cxx, path, '-o', exe_path] + flags)
if p_ret != 0:
compilation_ok = False
elif try_run:
# Try to execute the program
try:
p_ret = call_subprocess_Popen([exe_path])
out, err, p_ret = output_subprocess_Popen([exe_path])
run_ok = (p_ret == 0)
finally:
os.remove(exe_path)
......@@ -1846,13 +1848,18 @@ class GCC_compiler(object):
except OSError, e:
compilation_ok = False
if not try_run:
if not try_run and not output:
return compilation_ok
else:
elif not try_run and output:
return (compilation_ok, out, err)
elif not output:
return (compilation_ok, run_ok)
else:
return (compilation_ok, run_ok, out, err)
@staticmethod
def try_flags(flag_list):
def try_flags(flag_list, preambule="", body="",
try_run=False, output=False):
'''
Try to compile a dummy file with these flags.
......@@ -1863,13 +1870,16 @@ class GCC_compiler(object):
return False
code = b("""
%(preambule)s
int main(int argc, char** argv)
{
%(body)s
return 0;
}
""")
""" % locals())
return GCC_compiler.try_compile_tmp(code, tmp_prefix='try_flags_',
flags=flag_list, try_run=False)
flags=flag_list, try_run=try_run,
output=output)
@staticmethod
def compile_str(module_name, src_code, location=None,
......
import os
import theano
from theano import Apply, tensor
from theano import Apply, gof, tensor
from theano.gof import Optimizer
from theano.gof.type import CDataType
from theano.compat import PY3
from theano.sandbox.cuda.type import CudaNdarrayType
......@@ -12,6 +13,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
......@@ -23,9 +25,35 @@ def dnn_available():
dnn_available.msg = "Device not supported by cuDNN"
dnn_available.avail = False
else:
dnn_available.msg = "Can not find the cuDNN library"
dnn_available.avail = theano.gof.cmodule.GCC_compiler.try_flags(
["-l", "cudnn"])
preambule = """
#include <cudnn.h>
#include <stdio.h>
#include <cuda.h>
#include <cudnn_helper.h>
"""
body = """
cudnnHandle_t _handle = NULL;
cudnnStatus_t err;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
fprintf(stderr, "could not create cuDNN handle: %s",
cudnnGetErrorString(err));
return 1;
}
"""
comp, run, out, err = gof.cmodule.GCC_compiler.try_flags(
["-l", "cudnn", "-I" + os.path.dirname(__file__)],
preambule=preambule, body=body,
try_run=True, output=True)
dnn_available.avail = comp and run
if dnn_available.avail:
dnn_available.msg = "cuDNN should work"
else:
dnn_available.msg = (
"Theano is not able to use cuDNN. We got this error: \n" +
err)
return dnn_available.avail
......@@ -54,14 +82,6 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
""" % dict(var=var, err=err, desc=desc, fail=fail)
def raise_no_dnn():
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
raise RuntimeError(
"cuDNN optimization was enabled, but cuDNN is not available. " +
dnn_available.msg)
class DnnBase(GpuOp):
"""
Creates a handle for cudnn and pulls in the cudnn libraries and headers.
......@@ -88,7 +108,7 @@ cudnnHandle_t _handle = NULL;
return ["""{
cudnnStatus_t err;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle: %%s",
PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %%s",
cudnnGetErrorString(err));
return %s;
}
......@@ -96,6 +116,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
class GpuDnnConvDesc(GpuOp):
"""This Op builds a convolution descriptor for use in the other
convolution operations.
:param border_mode: 'valid' or 'full'
:param subsample: The subsample, tuple like (dx, dy)
:param conv_mode: 'conv' or 'cross'
"""
__props__ = ('border_mode', 'subsample', 'conv_mode')
def c_headers(self):
......@@ -266,6 +294,9 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
}
""" % dict(var=var, desc=desc, err=err, fail=fail)
def c_set_tensor4d(self, *arg):
return c_set_tensor4d(*arg)
def c_code(self, node, name, inputs, outputs, sub):
desc = inputs[2]
out, = outputs
......@@ -351,6 +382,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
class GpuDnnConv(GpuDnnConvBase):
"""
The forward convolution.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
conv_inputs = 'input', 'kerns'
conv_output = 'output'
conv_types = 'tensor4d', 'filter', 'tensor4d'
......@@ -374,6 +413,15 @@ class GpuDnnConv(GpuDnnConvBase):
class GpuDnnConvGradW(GpuDnnConvBase):
"""
The convolution gradient with respect to the weights.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
conv_inputs = 'input', 'output',
conv_output = 'kerns'
conv_types = 'tensor4d', 'tensor4d', 'filter'
......@@ -382,6 +430,15 @@ class GpuDnnConvGradW(GpuDnnConvBase):
class GpuDnnConvGradI(GpuDnnConvBase):
"""
The convolution gradient with respect to the inputs.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
conv_inputs = 'kerns', 'output',
conv_output = 'input'
conv_types = 'filter', 'tensor4d', 'tensor4d'
......@@ -415,7 +472,15 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
class GpuDnnPoolDesc(GpuOp):
__props__ = ('mode', 'ws', 'stride')
"""
This Op builds a pooling descriptor for use in the other
pooling operations.
:param ws: windows size
:param stride: (dx, dy)
:param mode: 'max' or 'average'
"""
__props__ = ('ws', 'stride', 'mode')
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
......@@ -486,13 +551,19 @@ class GpuDnnPoolDesc(GpuOp):
class GpuDnnPool(DnnBase):
"""
Pooling.
:param img: the image 4d tensor.
:param desc: the pooling descriptor.
"""
__props__ = ()
def make_node(self, img, desc):
img = as_cuda_ndarray_variable(img)
if img.type.ndim != 4:
raise TypeError('img must be 4D tensor')
if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t')
......@@ -534,10 +605,10 @@ if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
out, = outputs
set_in = c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']),
'err' + name, sub['fail'])
'err' + name, sub['fail'])
set_out = c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail'])
'err' + name, sub['fail'])
return """
cudnnStatus_t err%(name)s;
......@@ -612,6 +683,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
class GpuDnnPoolGrad(DnnBase):
"""
The pooling gradient.
:param inp: the input of the pooling.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param out: the output of the pooling in the forward.
:param desc: The pooling descriptor.
"""
__props__ = ()
def make_node(self, inp, inp_grad, out, desc):
......@@ -622,7 +701,7 @@ class GpuDnnPoolGrad(DnnBase):
inp_grad = as_cuda_ndarray_variable(inp_grad)
if inp_grad.type.ndim != 4:
raise TypeError('inp_grad must be 4D tensor')
out = as_cuda_ndarray_variable(out)
if out.type.ndim != 4:
raise TypeError('out must be 4D tensor')
......@@ -685,15 +764,15 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
set_in = "\n".join([
c_set_tensor4d(inp, "input" + str(sub['struct_id']),
'err' + name, sub['fail']),
'err' + name, sub['fail']),
c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']),
'err' + name, sub['fail']),
'err' + name, sub['fail']),
c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail'])
'err' + name, sub['fail'])
])
set_out = c_set_tensor4d(out, "output_grad" + str(sub['struct_id']),
'err' + name, sub['fail'])
'err' + name, sub['fail'])
return """
cudnnStatus_t err%(name)s;
......@@ -735,7 +814,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(output_grad=out_grad, desc=desc, fail=sub['fail'], id=sub['struct_id'],
""" % dict(output_grad=out_grad, desc=desc,
fail=sub['fail'], id=sub['struct_id'],
name=name, set_in=set_in,
set_out=set_out, input=inp, input_grad=inp_grad, output=out,
input_desc="input"+str(sub['struct_id']),
......@@ -773,13 +853,12 @@ class GpuDnnSoftmax(DnnBase):
"""
Op for the cuDNN Softmax.
Parameters''
-tensor_format: Whether the data format is 'bc01' or 'b01c'
-algo: 'fast' or 'accurate' indicating whether computations should be
optimized for speed or accuracy respectively.
-mode: 'instance' or 'channel' indicating whether the softmax should be
computed per image across 'c01' or per spationali location '01' per image
across 'c'.
:param tensor_format: Whether the data format is 'bc01' or 'b01c'
:param algo: 'fast' or 'accurate' indicating whether computations should be
optimized for speed or accuracy respectively.
:param mode: 'instance' or 'channel' indicating whether the softmax should
be computed per image across 'c01' or per spationali location '01' per
image across 'c'.
"""
__props__ = ('tensor_format', 'mode', 'algo')
......@@ -924,11 +1003,14 @@ err%(name)s = cudnnSoftmaxForward(
# We need this since other stuff from opt is not importable.
if cuda_available:
from theano.sandbox.cuda.opt import local_optimizer, gpu_optimizer
from theano.sandbox.cuda.opt import (
local_optimizer, gpu_optimizer, gpu_seqopt)
@register_opt('cudnn')
@local_optimizer([GpuConv])
def local_conv_dnn(node):
raise_no_dnn()
if not dnn_available():
return
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
......@@ -938,11 +1020,11 @@ if cuda_available:
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax])
def local_pool_dnn(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMax):
if node.op.ignore_border:
return
......@@ -950,32 +1032,43 @@ if cuda_available:
ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds)]
gpu_optimizer.register("pool_cudnn", local_pool_dnn, 'cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMaxGrad])
def local_pool_dnn_grad(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMaxGrad):
if node.op.ignore_border:
return
inp, out, inp_grad = node.inputs
ds = node.op.ds
desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(inp_grad), gpu_contiguous(out), desc)]
gpu_optimizer.register("pool_cudnn_grad", local_pool_dnn_grad, 'cudnn')
desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(inp_grad),
gpu_contiguous(out), desc)]
@register_opt('cudnn')
@local_optimizer([GpuSoftmax])
def local_softmax_dnn(node):
raise_no_dnn()
if not dnn_available():
return
if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
ins = gpu_contiguous(ins)
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(ins)
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
class NoCuDNNRaise(Optimizer):
def apply(self, fgraph):
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
# Make an assert error as we want Theano to fail, not
# just skip this optimization.
raise AssertionError(
"cuDNN optimization was enabled, but Theano was not able"
" to use it. We got this error: \n" +
dnn_available.msg)
gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
......@@ -1163,11 +1163,6 @@ def local_conv_fft_full(node):
return
# Needs to be registered before local_gpu_conv_legacy. Otherwise, it
# will have priority over this optimization. We want, if cudnn is
# available and the GPU supports it, to use it. Otherwise, the gemm
# version should be used. If the users want the legacy convolution,
# they should use the Theano flag to disable the dnn and/or gemm version.
@local_optimizer([GpuConv])
def local_gpu_conv(node):
"""
......@@ -1350,7 +1345,7 @@ conv_groupopt.register("conv_fft_valid", local_conv_fft_valid, 1)
conv_groupopt.register("conv_fft_full", local_conv_fft_full, 1)
# Use dnn if avail, so have the dnn tag to be able to disable it.
conv_groupopt.register('local_gpu_conv', local_gpu_conv, 10,
'fast_compile', 'fast_run', 'dnn')
'fast_compile', 'fast_run', 'cudnn')
conv_groupopt.register('local_conv_gemm', local_conv_gemm, 12,
'fast_compile', 'fast_run')
......
import logging
import unittest
from nose.plugins.skip import SkipTest
import numpy
import unittest
import theano
from theano.compat.six import StringIO
from theano.gof.python25 import any
import theano.tensor as T
import theano.tests.unittest_tools as utt
......@@ -85,7 +88,7 @@ def test_pooling_opt():
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn"))
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
......@@ -97,3 +100,36 @@ def test_pooling_opt():
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
def test_dnn_tag():
"""
We test that if cudnn isn't avail we crash and that if it is avail, we use it.
"""
x = T.ftensor4()
old = theano.config.on_opt_error
theano.config.on_opt_error = "raise"
sio = StringIO()
handler = logging.StreamHandler(sio)
logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
# Silence original handler when intentionnally generating warning messages
logging.getLogger('theano').removeHandler(theano.logging_default_handler)
raised = False
try:
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn"))
except RuntimeError, e:
assert not cuda.dnn.dnn_available()
raised = True
finally:
theano.config.on_opt_error = old
logging.getLogger('theano.compile.tests.test_dnn').removeHandler(handler)
logging.getLogger('theano').addHandler(theano.logging_default_handler)
if not raised:
assert cuda.dnn.dnn_available()
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
......@@ -1504,7 +1504,7 @@ class TrueDiv(BinaryScalarOp):
x = numpy.asarray(x)
y = numpy.asarray(y)
if all(a.dtype in discrete_types for a in (x, y)):
return numpy.array(float(x) / y, dtype=config.floatX)
return numpy.sctypeDict[config.floatX](float(x) / y)
else:
return x / y
......@@ -2166,7 +2166,7 @@ neg = Neg(same_out, name='neg')
class Inv(UnaryScalarOp):
""" multiplicative inverse. Also called reciprocal"""
def impl(self, x):
return 1.0 / x
return numpy.float32(1.0) / x
def grad(self, (x,), (gz,)):
if x.type in complex_types:
......@@ -2180,6 +2180,8 @@ class Inv(UnaryScalarOp):
return -gz / (x * x),
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type in complex_types:
raise NotImplementedError()
return "%(z)s = 1.0 / %(x)s;" % locals()
inv = Inv(upgrade_to_float, name='inv')
......@@ -2190,6 +2192,11 @@ class Log(UnaryScalarOp):
amd_float64 = "amd_vrda_log"
def impl(self, x):
# If x is an int8 or uint8, numpy.log will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.log(x, sig='f')
return numpy.log(x)
def grad(self, (x,), (gz,)):
......@@ -2219,6 +2226,11 @@ class Log2(UnaryScalarOp):
amd_float64 = "amd_vrda_log2"
def impl(self, x):
# If x is an int8 or uint8, numpy.log2 will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.log2(x, sig='f')
return numpy.log2(x)
def grad(self, (x,), (gz,)):
......@@ -2245,6 +2257,11 @@ class Log10(UnaryScalarOp):
amd_float64 = "amd_vrda_log10"
def impl(self, x):
# If x is an int8 or uint8, numpy.log10 will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.log10(x, sig='f')
return numpy.log10(x)
def grad(self, (x,), (gz,)):
......@@ -2268,6 +2285,11 @@ log10 = Log10(upgrade_to_float, name='log10')
class Log1p(UnaryScalarOp):
""" log(1+x) """
def impl(self, x):
# If x is an int8 or uint8, numpy.log1p will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.log1p(x, sig='f')
return numpy.log1p(x)
def grad(self, (x,), (gz,)):
......@@ -2293,6 +2315,11 @@ class Exp(UnaryScalarOp):
amd_float64 = "amd_vrda_exp"
def impl(self, x):
# If x is an int8 or uint8, numpy.exp will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.exp(x, sig='f')
return numpy.exp(x)
def grad(self, (x, ), (gz, )):
......@@ -2315,6 +2342,11 @@ exp = Exp(upgrade_to_float, name='exp')
class Exp2(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.exp2 will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.exp2(x, sig='f')
return numpy.exp2(x)
def grad(self, (x, ), (gz, )):
......@@ -2337,6 +2369,11 @@ exp2 = Exp2(upgrade_to_float, name='exp2')
class Expm1(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.expm1 will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.expm1(x, sig='f')
return numpy.expm1(x)
def grad(self, (x, ), (gz, )):
......@@ -2382,6 +2419,11 @@ sqr = Sqr(same_out, name='sqr')
class Sqrt(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.sqrt will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.sqrt(x, sig='f')
return numpy.sqrt(x)
def grad(self, (x,), (gz,)):
......@@ -2404,6 +2446,11 @@ sqrt = Sqrt(upgrade_to_float, name='sqrt')
class Deg2Rad(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.deg2rad will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.deg2rad(x, sig='f')
return numpy.deg2rad(x)
def grad(self, (x,), (gz,)):
......@@ -2426,6 +2473,11 @@ deg2rad = Deg2Rad(upgrade_to_float, name='deg2rad')
class Rad2Deg(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.rad2deg will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.rad2deg(x, sig='f')
return numpy.rad2deg(x)
def grad(self, (x,), (gz,)):
......@@ -2451,6 +2503,11 @@ class Cos(UnaryScalarOp):
amd_float64 = "amd_vrda_cos"
def impl(self, x):
# If x is an int8 or uint8, numpy.cos will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.cos(x, sig='f')
return numpy.cos(x)
def grad(self, (x, ), (gz, )):
......@@ -2473,6 +2530,11 @@ cos = Cos(upgrade_to_float, name='cos')
class ArcCos(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.arccos will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.arccos(x, sig='f')
return numpy.arccos(x)
def grad(self, (x,), (gz,)):
......@@ -2498,6 +2560,11 @@ class Sin(UnaryScalarOp):
amd_float64 = "amd_vrda_sin"
def impl(self, x):
# If x is an int8 or uint8, numpy.sin will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.sin(x, sig='f')
return numpy.sin(x)
def grad(self, (x, ), (gz, )):
......@@ -2520,6 +2587,11 @@ sin = Sin(upgrade_to_float, name='sin')
class ArcSin(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.arcsin will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.arcsin(x, sig='f')
return numpy.arcsin(x)
def grad(self, (x,), (gz,)):
......@@ -2542,6 +2614,11 @@ arcsin = ArcSin(upgrade_to_float, name='arcsin')
class Tan(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.tan will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.tan(x, sig='f')
return numpy.tan(x)
def grad(self, (x,), (gz,)):
......@@ -2564,6 +2641,11 @@ tan = Tan(upgrade_to_float, name='tan')
class ArcTan(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.arctan will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.arctan(x, sig='f')
return numpy.arctan(x)
def grad(self, (x,), (gz,)):
......@@ -2586,6 +2668,13 @@ arctan = ArcTan(upgrade_to_float, name='arctan')
class ArcTan2(BinaryScalarOp):
def impl(self, y, x):
# If x and y are int8 or uint8, numpy.arctan2 will compute the result
# in half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
y_dtype = str(getattr(x, 'dtype', ''))
if y_dtype in ('int8', 'uint8'):
return numpy.arctan2(y, x, sig='f')
return numpy.arctan2(y, x)
def grad(self, (y, x), (gz,)):
......@@ -2621,6 +2710,11 @@ class Cosh(UnaryScalarOp):
cosh(x) = (exp(x) + exp(-x)) / 2
"""
def impl(self, x):
# If x is an int8 or uint8, numpy.cosh will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.cosh(x, sig='f')
return numpy.cosh(x)
def grad(self, (x, ), (gz, )):
......@@ -2643,6 +2737,11 @@ cosh = Cosh(upgrade_to_float, name='cosh')
class ArcCosh(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.arccosh will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.arccosh(x, sig='f')
return numpy.arccosh(x)
def grad(self, (x, ), (gz, )):
......@@ -2668,6 +2767,11 @@ class Sinh(UnaryScalarOp):
sinh(x) = (exp(x) - exp(-x)) / 2
"""
def impl(self, x):
# If x is an int8 or uint8, numpy.sinh will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.sinh(x, sig='f')
return numpy.sinh(x)
def grad(self, (x, ), (gz, )):
......@@ -2690,6 +2794,11 @@ sinh = Sinh(upgrade_to_float, name='sinh')
class ArcSinh(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.arcsinh will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.arcsinh(x, sig='f')
return numpy.arcsinh(x)
def grad(self, (x, ), (gz, )):
......@@ -2716,6 +2825,11 @@ class Tanh(UnaryScalarOp):
= (exp(2*x) - 1) / (exp(2*x) + 1)
"""
def impl(self, x):
# If x is an int8 or uint8, numpy.tanh will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.tanh(x, sig='f')
return numpy.tanh(x)
def grad(self, (x, ), (gz, )):
......@@ -2738,6 +2852,11 @@ tanh = Tanh(upgrade_to_float, name='tanh')
class ArcTanh(UnaryScalarOp):
def impl(self, x):
# If x is an int8 or uint8, numpy.arctanh will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.arctanh(x, sig='f')
return numpy.arctanh(x)
def grad(self, (x, ), (gz, )):
......
......@@ -10,6 +10,7 @@ If you do want to rewrite these tests, bear in mind:
"""
import unittest
import numpy as np
import theano
from theano.gof import FunctionGraph
......@@ -20,8 +21,12 @@ from theano.scalar.basic import (floats, float32, float64,
ints, int8, int32, complex64,
ComplexError, IntDiv, TrueDiv,
Composite, add, div_proxy, clip,
and_, eq, neq, invert, mul)
import numpy
and_, eq, neq, invert, mul, Scalar)
from theano.scalar.basic import (
true_div, inv, log, log2, log10, log1p, exp, exp2, expm1, sqrt, deg2rad,
rad2deg, cos, arccos, sin, arcsin, tan, arctan, arctan2, cosh, arccosh,
sinh, arcsinh, tanh, arctanh)
def inputs():
return floats('xyz')
......@@ -75,7 +80,7 @@ class test_ScalarOps(unittest.TestCase):
g3 = theano.gradient.grad(a3, x)
fn3 = gof.DualLinker().accept(FunctionGraph([x], [g3])).make_function()
rng = numpy.random.RandomState(utt.fetch_seed())
rng = np.random.RandomState(utt.fetch_seed())
ntests = 50
for i in xrange(ntests):
......@@ -235,6 +240,128 @@ class test_logical(unittest.TestCase):
self.assertTrue(fn(a,b) == ~a, (a,))
# This class does not inherit from unittest.TestCase, because it would
# interfere with the "yield" mechanism that automatically generates test, see
# http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class
# Therefore, it needs to be named "test_..." or "Test_...", so nose can pick
# it up by name, otherwise the tests would not be executed.
class test_upgrade_to_float(object):
# Test for Ops whose output has to be floating point, even when all
# inputs are ints.
# In particular, when the inputs are int8, the output should be
# at least float32, not float16.
unary_ops_vals = [
(inv, range(-127, 0) + range(1, 127)),
(sqrt, range(0, 128)),
(log, range(1, 128)),
(log2, range(1, 128)),
(log10, range(1, 128)),
(log1p, range(0, 128)),
(exp, range(-127, 89)),
(exp2, range(-127, 89)),
(expm1, range(-127, 89)),
(deg2rad, range(-127, 128)),
(rad2deg, range(-127, 128)),
(cos, range(-127, 128)),
(arccos, range(-1, 2)),
(cosh, range(-89, 90)),
(arccosh, range(1, 128)),
(sin, range(-127, 128)),
(arcsin, range(-1, 2)),
(sinh, range(-89, 90)),
(arcsinh, range(-127, 128)),
(tan, range(-3, 4)),
(arctan, range(-127, 128)),
(tanh, range(-127, 128)),
(arctanh, [0])]
binary_ops_vals = [
(arctan2, range(-127, 128), range(-127, 128))]
@staticmethod
def _test_unary(unary_op, x_range):
xi = int8('xi')
xf = float32('xf')
ei = unary_op(xi)
fi = theano.function([xi], ei)
ef = unary_op(xf)
ff = theano.function([xf], ef)
for x_val in x_range:
outi = fi(x_val)
outf = ff(x_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
@staticmethod
def _test_binary(binary_op, x_range, y_range):
xi = int8('xi')
yi = int8('yi')
xf = float32('xf')
yf = float32('yf')
ei = binary_op(xi, yi)
fi = theano.function([xi, yi], ei)
ef = binary_op(xf, yf)
ff = theano.function([xf, yf], ef)
for x_val in x_range:
for y_val in y_range:
outi = fi(x_val, y_val)
outf = ff(x_val, y_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
def test_true_div(self):
# true_div's upcast policy is not exactly "upgrade_to_float",
# so the test is a little bit different
x_range = range(-127, 128)
y_range = range(-127, 0) + range(1, 127)
xi = int8('xi')
yi = int8('yi')
xf = Scalar(theano.config.floatX)('xf')
yf = Scalar(theano.config.floatX)('yf')
ei = true_div(xi, yi)
fi = theano.function([xi, yi], ei)
ef = true_div(xf, yf)
ff = theano.function([xf, yf], ef)
for x_val in x_range:
for y_val in y_range:
outi = fi(x_val, y_val)
outf = ff(x_val, y_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
def test_unary(self):
# Automatically define all individual unary tests
for unary_op, x_range in self.unary_ops_vals:
test_name = 'test_%s' % unary_op.name
# Make a lambda function so we can name the test
test = lambda: self._test_unary(unary_op, x_range)
test.description = test_name
yield test
def test_binary(self):
# Automatically define all individual binary tests
for binary_op, x_range, y_range in self.binary_ops_vals:
test_name = 'test_%s' % binary_op.name
# Make a lambda function so we can name the test
test = lambda: self._test_binary(binary_op, x_range, y_range)
test.description = test_name
yield test
class test_complex_mod(unittest.TestCase):
"""Make sure % fails on complex numbers."""
......
......@@ -1812,7 +1812,7 @@ def round(a, mode="half_away_from_zero"):
raise Exception("round mode %s is not implemented." % mode)
@_scal_elemwise_with_nfunc('around', 1, -1)
@_scal_elemwise_with_nfunc('around', 1, 1)
def round_half_to_even(a):
"""round_half_to_even(a)"""
......@@ -1952,20 +1952,20 @@ def chi2sf(x, k):
#numpy.real(float32) return a view on the inputs.
#@_scal_elemwise_with_nfunc('real', 1, -1)
#@_scal_elemwise_with_nfunc('real', 1, 1)
@_scal_elemwise
def real(z):
"""Return real component of complex-valued tensor `z`"""
_tensor_py_operators.real = property(real)
@_scal_elemwise_with_nfunc('imag', 1, -1)
@_scal_elemwise_with_nfunc('imag', 1, 1)
def imag(z):
"""Return imaginary component of complex-valued tensor `z`"""
_tensor_py_operators.imag = property(imag)
@_scal_elemwise_with_nfunc('angle', 1, -1)
@_scal_elemwise_with_nfunc('angle', 1, 1)
def angle(z):
"""Return polar-coordinate angle of complex-valued tensor `z`"""
......@@ -1975,7 +1975,7 @@ def complex(real, imag):
"""Return complex-valued tensor with `real` and `imag` components"""
@_scal_elemwise_with_nfunc('conj', 1, -1)
@_scal_elemwise_with_nfunc('conj', 1, 1)
def conj(z):
"""Return the complex conjugate of `z`."""
......
......@@ -18,9 +18,10 @@ from theano.tensor import elemwise_cgen as cgen
config = theano.config
# We cannot import discrete_dtypes from tensor.basic yet,
# We cannot import discrete_dtypes or float_dtypes from tensor.basic yet,
# so we redefine them here
discrete_dtypes = map(str, scalar.discrete_types)
float_dtypes = map(str, scalar.float_types)
# tensor depends on elemwise to provide definitions for several ops
......@@ -472,14 +473,11 @@ class Elemwise(OpenMPOp):
the input's storage. (Just like destroymap, but without the lists.)
* nfunc_spec: either None or a tuple of three elements,
(nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
implements this operation, takes nin inputs and abs(nout) outputs
(nout < 0 if the numpy function does not provide the option of
providing a numpy array to store the results in). Note that nin
cannot always be inferred from the scalar op's own nin field
because that value is sometimes 0 (meaning a variable number of
inputs), whereas the numpy function may not have varargs.
NOTE: as of now, the sign of the nout field is ignored (some work
needs to be done to resize the destinations when needed).
implements this operation, takes nin inputs and nout outputs.
Note that nin cannot always be inferred from the scalar op's
own nin field because that value is sometimes 0 (meaning a
variable number of inputs), whereas the numpy function may
not have varargs.
"""
if inplace_pattern is None:
inplace_pattern = {}
......@@ -819,43 +817,24 @@ class Elemwise(OpenMPOp):
out_shape.append(max(values))
out_shape = tuple(out_shape)
# Commented as we don't reuse outputs now.
#
# if not self.inplace_pattern:
# for output, storage in izip(node.outputs, output_storage):
# odat = storage[0]
# if odat is not None:
# if odat.shape != out_shape:
# # It is unsafe to try to resize odat,
# # we have to allocate output storage.
# odat = None
# if odat is None:
# odat = numpy.ndarray(out_shape, dtype=output.type.dtype)
# storage[0] = odat
# else:
# for i, (output, storage) in enumerate(
# izip(node.outputs, output_storage)):
# #i is an output idx
# if i in self.inplace_pattern:
# odat = inputs[self.inplace_pattern[i]]
# else:
# odat = storage[0]
# if odat is not None:
# if odat.shape != out_shape:
# # It is unsafe to try to resize odat,
# # we have to allocate output storage.
# odat = None
# if odat is None:
# odat = numpy.ndarray(out_shape,
# dtype=output.type.dtype)
# storage[0] = odat
ufunc_args = inputs # + output_storage
ufunc_args = inputs
ufunc_kwargs = {}
if self.nfunc and len(inputs) == self.nfunc_spec[1]:
ufunc = self.nfunc
nout = self.nfunc_spec[2]
if nout < 0:
nout = -nout
# Numpy ufuncs will sometimes perform operations in
# float16, in particular when the input is int8.
# This is not something that we want, and we do not
# do it in the C code, so we specify that the computation
# should be carried out in the returned dtype.
# This is done via the "sig" kwarg of the ufunc, its value
# should be something like "ff->f", where the characters
# represent the dtype of the inputs and outputs.
out_dtype = node.outputs[0].dtype
if out_dtype in float_dtypes and isinstance(ufunc, numpy.ufunc):
char = numpy.sctype2char(out_dtype)
sig = char * node.nin + '->' + char * node.nout
ufunc_kwargs['sig'] = sig
# Unfortunately, the else case does not allow us to
# directly feed the destination arguments to the nfunc
# since it sometimes requires resizing. Doing this
......@@ -869,7 +848,7 @@ class Elemwise(OpenMPOp):
self.scalar_op.nout))
nout = ufunc.nout
variables = ufunc(*ufunc_args)
variables = ufunc(*ufunc_args, **ufunc_kwargs)
if nout == 1:
variables = [variables]
......
......@@ -31,6 +31,11 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
return 0.0
if x > 30.0:
return 1.0
# If x is an int8 or uint8, numpy.exp will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return 1.0 / (1.0 + numpy.exp(-x, sig='f'))
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
......@@ -268,8 +273,11 @@ def hard_sigmoid(x):
Removing the slope and shift does not make it faster.
"""
slope = 0.2
shift = 0.5
# Use the same dtype as determined by "upgrade_to_float",
# and perform computation in that dtype.
out_dtype = scalar.upgrade_to_float(scalar.Scalar(dtype=x.dtype))[0].dtype
slope = tensor.constant(0.2, dtype=out_dtype)
shift = tensor.constant(0.5, dtype=out_dtype)
x = (x * slope) + shift
x = tensor.clip(x, 0, 1)
return x
......@@ -300,6 +308,11 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return 0.0
if x > 30.0:
return x
# If x is an int8 or uint8, numpy.exp will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.log1p(numpy.exp(x, sig='f'))
return numpy.log1p(numpy.exp(x))
def impl(self, x):
......
......@@ -16,7 +16,7 @@ from theano.tensor.nnet.sigm import (
register_local_1msigmoid, simplify_mul,
)
from theano.tensor.tests.test_basic import (makeBroadcastTester, rand,
check_floatX,
check_floatX, upcast_int8_nfunc,
_good_broadcast_unary_normal_no_complex)
......@@ -30,8 +30,8 @@ class T_sigmoid(unittest.TestCase):
SigmoidTester = makeBroadcastTester(
op=sigmoid,
expected=lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))),
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal,
name='SigmoidTester',
......@@ -39,8 +39,8 @@ SigmoidTester = makeBroadcastTester(
UltraFastSigmoidTester = makeBroadcastTester(
op=ultra_fast_sigmoid,
expected=lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))),
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal,
name='UltraFastSigmoidTester',
......@@ -49,20 +49,21 @@ UltraFastSigmoidTester = makeBroadcastTester(
HardSigmoidTester = makeBroadcastTester(
op=hard_sigmoid,
expected=lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))),
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal,
name='UltraFastSigmoidTester',
name='HardSigmoidTester',
# This is an approx of the sigmoid. That is why we raise eps
eps=1e-1)
SoftplusTester = makeBroadcastTester(
op=softplus,
expected=lambda inputs: check_floatX(
inputs, numpy.log1p(numpy.exp(inputs))),
good=_good_broadcast_unary_normal_no_complex,
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, numpy.log1p(numpy.exp(inputs)))),
good=dict(_good_broadcast_unary_normal_no_complex,
int8=[numpy.arange(-127, 89, dtype='int8')]),
#grad=_grad_broadcast_unary_normal,
name='SoftplusTester',
)
......
......@@ -189,6 +189,50 @@ def safe_make_node(op, *inputs):
return node.owner
def upcast_float16_ufunc(fn):
"""Decorator that enforces computation is not done in float16 by NumPy.
Some ufuncs in NumPy will compute float values on int8 and uint8
in half-precision (float16), which is not enough, and not compatible
with the C code.
:param fn: numpy ufunc
:returns: function similar to fn.__call__, computing the same
value with a minimum floating-point precision of float32
"""
def ret(*args, **kwargs):
out_dtype = numpy.find_common_type(
[a.dtype for a in args], [numpy.float16])
if out_dtype == 'float16':
# Force everything to float32
sig = 'f' * fn.nin + '->' + 'f' * fn.nout
kwargs.update(sig=sig)
return fn(*args, **kwargs)
return ret
def upcast_int8_nfunc(fn):
"""Decorator that upcasts input of dtype int8 to float32.
This is so that floating-point computation is not carried using
half-precision (float16), as some NumPy functions do.
:param fn: function computing a floating-point value from inputs
:returns: function similar to fn, but upcasting its uint8 and int8
inputs before carrying out the computation.
"""
def ret(*args, **kwargs):
args = list(args)
for i, a in enumerate(args):
if getattr(a, 'dtype', None) in ('int8', 'uint8'):
args[i] = a.astype('float32')
return fn(*args, **kwargs)
return ret
def makeTester(name, op, expected, checks=None, good=None, bad_build=None,
bad_runtime=None, grad=None, mode=None, grad_rtol=None,
eps=1e-10, skip=False, test_memmap=True, check_name=True):
......@@ -321,7 +365,8 @@ def makeTester(name, op, expected, checks=None, good=None, bad_build=None,
expecteds = self.expected(*inputs)
eps = 1e-10
if any([i.dtype == 'float32' for i in inputs]):
if any([i.dtype in ('float32', 'int8', 'uint8')
for i in inputs]):
eps = 1e-6
eps = numpy.max([eps, _eps])
......@@ -788,6 +833,9 @@ _good_broadcast_div_mod_normal_float_no_complex = dict(
integer=(randint(2, 3), randint_nonzero(2, 3)),
uinteger=(randint(2, 3).astype("uint8"),
randint_nonzero(2, 3).astype("uint8")),
int8=[numpy.tile(numpy.arange(-127, 128, dtype='int8'), [254, 1]).T,
numpy.tile(numpy.array(range(-127, 0) + range(1, 128), dtype='int8'),
[255, 1])],
# This empty2 doesn't work for some tests. I don't remember why
#empty2=(numpy.asarray([0]), numpy.asarray([])),
)
......@@ -853,7 +901,7 @@ def _numpy_true_div(x, y):
TrueDivTester = makeBroadcastTester(
op=tensor.true_div,
expected=_numpy_true_div,
good=_good_broadcast_div_mod_normal_float,
good=_good_broadcast_div_mod_normal_float_no_complex,
grad=_grad_broadcast_div_mod_normal,
grad_rtol=div_grad_rtol,
)
......@@ -864,12 +912,48 @@ TrueDivInplaceTester = makeBroadcastTester(
good=copymod(
_good_broadcast_div_mod_normal_float_inplace,
# The output is now in float, we cannot work inplace on an int.
without=['integer', 'uinteger']),
without=['integer', 'uinteger', 'int8']),
grad=_grad_broadcast_div_mod_normal,
grad_rtol=div_grad_rtol,
inplace=True)
_good_inv = dict(
normal=[5 * rand_nonzero((2, 3))],
integers=[randint_nonzero(2, 3)],
int8=[numpy.array(range(-127, 0) + range(1, 127), dtype='int8')],
complex=[randcomplex_nonzero((2, 3))],
empty=[numpy.asarray([], dtype=config.floatX)])
_good_inv_inplace = copymod(_good_inv, without=['integers', 'int8', 'complex'])
_grad_inv = copymod(_good_inv,
without=['integers', 'int8', 'complex', 'empty'])
_bad_runtime_inv = dict(
float=[numpy.zeros((2, 3))],
integers=[numpy.zeros((2, 3), dtype='int64')],
int8=[numpy.zeros((2, 3), dtype='int8')],
complex=[numpy.zeros((2, 3), dtype='complex128')])
InvTester = makeBroadcastTester(
op=tensor.inv,
expected=lambda x: upcast_int8_nfunc(numpy.true_divide)(numpy.int8(1), x),
good=_good_inv,
bad_runtime=_bad_runtime_inv,
grad=_grad_inv,
grad_rtol=div_grad_rtol)
InvInplaceTester = makeBroadcastTester(
op=inplace.inv_inplace,
expected=lambda x: _numpy_true_div(numpy.int8(1), x),
good=_good_inv_inplace,
bad_runtime=_bad_runtime_inv,
grad=_grad_inv,
grad_rtol=div_grad_rtol,
inplace=True)
CeilIntDivTester = makeBroadcastTester(
op=tensor.ceil_intdiv,
expected=lambda x, y: check_floatX((x, y), (x // y) + ((x % y) != 0)),
......@@ -990,6 +1074,8 @@ _good_broadcast_unary_normal = dict(
normal=[numpy.asarray(rand_ranged(-5, 5, (2, 3)),
dtype=config.floatX)],
integers=[randint_ranged(-5, 5, (2, 3))],
# not using -128 because numpy.allclose would return False
int8=[numpy.arange(-127, 128, dtype='int8')],
corner_case=[corner_case],
complex=[randcomplex(2, 3)],
empty=[numpy.asarray([], dtype=config.floatX)],
......@@ -998,6 +1084,7 @@ _good_broadcast_unary_normal = dict(
_good_broadcast_unary_normal_no_complex = dict(
normal=[numpy.asarray(rand_ranged(-5, 5, (2, 3)), dtype=floatX)],
integers=[randint_ranged(-5, 5, (2, 3))],
int8=[numpy.arange(-127, 128, dtype='int8')],
corner_case=[corner_case],
empty=[numpy.asarray([], dtype=config.floatX)],
)
......@@ -1020,6 +1107,8 @@ _grad_broadcast_unary_0_2_no_complex = dict(
normal=[numpy.asarray(rand_ranged(0, 2, (2, 3)), dtype=floatX)],
)
#inplace ops when the input is integer and the output is float*
# don't have a well defined behavior. We don't test that case.
AbsTester = makeBroadcastTester(op=tensor.abs_,
expected=lambda x: abs(x),
......@@ -1160,112 +1249,123 @@ SqrInplaceTester = makeBroadcastTester(op=inplace.sqr_inplace,
grad=_grad_broadcast_unary_normal,
inplace=True)
ExpTester = makeBroadcastTester(op=tensor.exp,
expected=numpy.exp,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
ExpInplaceTester = makeBroadcastTester(op=inplace.exp_inplace,
expected=numpy.exp,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
inplace=True)
def _numpy_exp2_round_int(x):
# Make sure exp2 on an int returns a value that can be correctly casted
# to an int. For instance, numpy.exp2(4) sometimes returns
# 15.999999999999998, we make sure we return 16. instead.
# This is used in Exp2InplaceTester.
out = numpy.exp2(x)
if x.dtype in tensor.discrete_dtypes:
out = numpy.round(out)
return out
ExpTester = makeBroadcastTester(
op=tensor.exp,
expected=upcast_float16_ufunc(numpy.exp),
good=dict(_good_broadcast_unary_normal,
int8=[numpy.arange(-127, 89, dtype='int8')]),
grad=_grad_broadcast_unary_normal)
ExpInplaceTester = makeBroadcastTester(
op=inplace.exp_inplace,
expected=numpy.exp,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
inplace=True)
Exp2Tester = makeBroadcastTester(op=tensor.exp2,
expected=numpy.exp2,
expected=upcast_float16_ufunc(numpy.exp2),
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
Exp2InplaceTester = makeBroadcastTester(op=inplace.exp2_inplace,
expected=_numpy_exp2_round_int,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
inplace=True)
Exp2InplaceTester = makeBroadcastTester(
op=inplace.exp2_inplace,
expected=numpy.exp2,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
inplace=True)
Expm1Tester = makeBroadcastTester(op=tensor.expm1,
expected=numpy.expm1,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
Expm1InplaceTester = makeBroadcastTester(op=inplace.expm1_inplace,
expected=numpy.expm1,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
inplace=True)
Expm1Tester = makeBroadcastTester(
op=tensor.expm1,
expected=upcast_float16_ufunc(numpy.expm1),
good=dict(_good_broadcast_unary_normal,
int8=[numpy.arange(-127, 89, dtype='int8')]),
grad=_grad_broadcast_unary_normal)
Expm1InplaceTester = makeBroadcastTester(
op=inplace.expm1_inplace,
expected=numpy.expm1,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
inplace=True)
_good_broadcast_unary_positive = dict(
normal=(rand_ranged(0.001, 5, (2, 3)),),
integers=(randint_ranged(1, 5, (2, 3)),),
uint8=[numpy.arange(1, 256, dtype='uint8')],
complex=(randc128_ranged(1, 5, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),
)
_good_broadcast_unary_positive = dict(normal=(rand_ranged(0.001, 5, (2, 3)),),
integers=(randint_ranged(1, 5, (2, 3)),),
complex=(randc128_ranged(1, 5, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),
)
_good_broadcast_unary_positive_float = copymod(
_good_broadcast_unary_positive,
without=['integers', 'uint8'])
_grad_broadcast_unary_positive = dict(normal=(rand_ranged(0.001, 5, (2, 3)),),)
LogTester = makeBroadcastTester(op=tensor.log,
expected=numpy.log,
expected=upcast_float16_ufunc(numpy.log),
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive)
LogInplaceTester = makeBroadcastTester(op=inplace.log_inplace,
expected=numpy.log,
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive,
inplace=True)
LogInplaceTester = makeBroadcastTester(
op=inplace.log_inplace,
expected=numpy.log,
good=_good_broadcast_unary_positive_float,
grad=_grad_broadcast_unary_positive,
inplace=True)
Log2Tester = makeBroadcastTester(op=tensor.log2,
expected=numpy.log2,
expected=upcast_float16_ufunc(numpy.log2),
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive)
Log2InplaceTester = makeBroadcastTester(op=inplace.log2_inplace,
expected=numpy.log2,
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive,
inplace=True)
Log2InplaceTester = makeBroadcastTester(
op=inplace.log2_inplace,
expected=numpy.log2,
good=_good_broadcast_unary_positive_float,
grad=_grad_broadcast_unary_positive,
inplace=True)
Log10Tester = makeBroadcastTester(op=tensor.log10,
expected=numpy.log10,
expected=upcast_float16_ufunc(numpy.log10),
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive)
Log10InplaceTester = makeBroadcastTester(op=inplace.log10_inplace,
expected=numpy.log10,
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive,
inplace=True)
Log10InplaceTester = makeBroadcastTester(
op=inplace.log10_inplace,
expected=numpy.log10,
good=_good_broadcast_unary_positive_float,
grad=_grad_broadcast_unary_positive,
inplace=True)
Log1pTester = makeBroadcastTester(op=tensor.log1p,
expected=numpy.log1p,
expected=upcast_float16_ufunc(numpy.log1p),
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive)
Log1pInplaceTester = makeBroadcastTester(op=inplace.log1p_inplace,
expected=numpy.log1p,
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive,
inplace=True)
Log1pInplaceTester = makeBroadcastTester(
op=inplace.log1p_inplace,
expected=numpy.log1p,
good=_good_broadcast_unary_positive_float,
grad=_grad_broadcast_unary_positive,
inplace=True)
SqrtTester = makeBroadcastTester(op=tensor.sqrt,
expected=numpy.sqrt,
expected=upcast_float16_ufunc(numpy.sqrt),
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive)
SqrtInplaceTester = makeBroadcastTester(op=inplace.sqrt_inplace,
expected=numpy.sqrt,
good=_good_broadcast_unary_positive,
grad=_grad_broadcast_unary_positive,
inplace=True)
SqrtInplaceTester = makeBroadcastTester(
op=inplace.sqrt_inplace,
expected=numpy.sqrt,
good=_good_broadcast_unary_positive_float,
grad=_grad_broadcast_unary_positive,
inplace=True)
_good_broadcast_unary_wide = dict(
normal=(rand_ranged(-1000, 1000, (2, 3)),),
integers=(randint_ranged(-1000, 1000, (2, 3)),),
int8=[numpy.arange(-127, 128, dtype='int8')],
complex=(randc128_ranged(-1000, 1000, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),)
_good_broadcast_unary_wide_float = copymod(
_good_broadcast_unary_wide,
without=['integers', 'int8'])
_grad_broadcast_unary_wide = dict(normal=(rand_ranged(-1000, 1000, (2, 3)),),)
if theano.config.floatX == 'float32':
......@@ -1275,82 +1375,92 @@ else:
Deg2radTester = makeBroadcastTester(
op=tensor.deg2rad,
expected=numpy.deg2rad,
expected=upcast_float16_ufunc(numpy.deg2rad),
good=_good_broadcast_unary_normal_no_complex,
grad=_grad_broadcast_unary_normal_no_complex,
eps=angle_eps)
Deg2radInplaceTester = makeBroadcastTester(
op=inplace.deg2rad_inplace,
expected=numpy.deg2rad,
good=_good_broadcast_unary_normal_no_complex,
good=_good_broadcast_unary_normal_float_no_complex,
grad=_grad_broadcast_unary_normal_no_complex,
inplace=True,
eps=angle_eps)
Rad2degTester = makeBroadcastTester(
op=tensor.rad2deg,
expected=numpy.rad2deg,
expected=upcast_float16_ufunc(numpy.rad2deg),
good=_good_broadcast_unary_normal_no_complex,
grad=_grad_broadcast_unary_normal_no_complex,
eps=angle_eps)
Rad2degInplaceTester = makeBroadcastTester(
op=inplace.rad2deg_inplace,
expected=numpy.rad2deg,
good=_good_broadcast_unary_normal_no_complex,
good=_good_broadcast_unary_normal_float_no_complex,
grad=_grad_broadcast_unary_normal_no_complex,
inplace=True,
eps=angle_eps)
SinTester = makeBroadcastTester(op=tensor.sin,
expected=numpy.sin,
expected=upcast_float16_ufunc(numpy.sin),
good=_good_broadcast_unary_wide,
grad=_grad_broadcast_unary_wide)
SinInplaceTester = makeBroadcastTester(op=inplace.sin_inplace,
expected=numpy.sin,
good=_good_broadcast_unary_wide,
grad=_grad_broadcast_unary_wide,
inplace=True)
SinInplaceTester = makeBroadcastTester(
op=inplace.sin_inplace,
expected=numpy.sin,
good=_good_broadcast_unary_wide_float,
grad=_grad_broadcast_unary_wide,
inplace=True)
_good_broadcast_unary_arcsin = dict(normal=(rand_ranged(-1, 1, (2, 3)),),
integers=(randint_ranged(-1, 1, (2, 3)),),
complex=(randc128_ranged(-1, 1, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),)
_good_broadcast_unary_arcsin = dict(
normal=(rand_ranged(-1, 1, (2, 3)),),
integers=(randint_ranged(-1, 1, (2, 3)),),
int8=[numpy.arange(-1, 2, dtype='int8')],
complex=(randc128_ranged(-1, 1, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),)
_good_broadcast_unary_arcsin_float = copymod(
_good_broadcast_unary_arcsin,
without=['integers', 'int8'])
_grad_broadcast_unary_arcsin = dict(normal=(rand_ranged(-1, 1, (2, 3)),),)
ArcsinTester = makeBroadcastTester(op=tensor.arcsin,
expected=numpy.arcsin,
expected=upcast_float16_ufunc(numpy.arcsin),
good=_good_broadcast_unary_arcsin,
grad=_grad_broadcast_unary_arcsin)
ArcsinInplaceTester = makeBroadcastTester(op=inplace.arcsin_inplace,
expected=numpy.arcsin,
good=_good_broadcast_unary_arcsin,
grad=_grad_broadcast_unary_arcsin,
inplace=True)
ArcsinInplaceTester = makeBroadcastTester(
op=inplace.arcsin_inplace,
expected=numpy.arcsin,
good=_good_broadcast_unary_arcsin_float,
grad=_grad_broadcast_unary_arcsin,
inplace=True)
CosTester = makeBroadcastTester(op=tensor.cos,
expected=numpy.cos,
expected=upcast_float16_ufunc(numpy.cos),
good=_good_broadcast_unary_wide,
grad=_grad_broadcast_unary_wide)
CosInplaceTester = makeBroadcastTester(op=inplace.cos_inplace,
expected=numpy.cos,
good=_good_broadcast_unary_wide,
grad=_grad_broadcast_unary_wide,
inplace=True)
CosInplaceTester = makeBroadcastTester(
op=inplace.cos_inplace,
expected=numpy.cos,
good=_good_broadcast_unary_wide_float,
grad=_grad_broadcast_unary_wide,
inplace=True)
ArccosTester = makeBroadcastTester(op=tensor.arccos,
expected=numpy.arccos,
expected=upcast_float16_ufunc(numpy.arccos),
good=_good_broadcast_unary_arcsin,
grad=_grad_broadcast_unary_arcsin)
ArccosInplaceTester = makeBroadcastTester(op=inplace.arccos_inplace,
expected=numpy.arccos,
good=_good_broadcast_unary_arcsin,
grad=_grad_broadcast_unary_arcsin,
inplace=True)
ArccosInplaceTester = makeBroadcastTester(
op=inplace.arccos_inplace,
expected=numpy.arccos,
good=_good_broadcast_unary_arcsin_float,
grad=_grad_broadcast_unary_arcsin,
inplace=True)
_good_broadcast_unary_tan = dict(
normal=(rand_ranged(-3.14, 3.14, (2, 3)),),
shifted=(rand_ranged(3.15, 6.28, (2, 3)),),
integers=(randint_ranged(-3, 3, (2, 3)),),
int8=[numpy.arange(-3, 4, dtype='int8')],
complex=(randc128_ranged(-3.14, 3.14, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),)
#We do not want to test around the discontinuity.
......@@ -1358,25 +1468,27 @@ _grad_broadcast_unary_tan = dict(normal=(rand_ranged(-1.5, 1.5, (2, 3)),),
shifted=(rand_ranged(1.6, 4.6, (2, 3)),))
TanTester = makeBroadcastTester(op=tensor.tan,
expected=numpy.tan,
expected=upcast_float16_ufunc(numpy.tan),
good=_good_broadcast_unary_tan,
grad=_grad_broadcast_unary_tan)
TanInplaceTester = makeBroadcastTester(op=inplace.tan_inplace,
expected=numpy.tan,
good=_good_broadcast_unary_tan,
grad=_grad_broadcast_unary_tan,
inplace=True)
TanInplaceTester = makeBroadcastTester(
op=inplace.tan_inplace,
expected=numpy.tan,
good=copymod(_good_broadcast_unary_tan, without=['integers', 'int8']),
grad=_grad_broadcast_unary_tan,
inplace=True)
ArctanTester = makeBroadcastTester(op=tensor.arctan,
expected=numpy.arctan,
expected=upcast_float16_ufunc(numpy.arctan),
good=_good_broadcast_unary_wide,
grad=_grad_broadcast_unary_wide)
ArctanInplaceTester = makeBroadcastTester(op=inplace.arctan_inplace,
expected=numpy.arctan,
good=_good_broadcast_unary_wide,
grad=_grad_broadcast_unary_wide,
inplace=True)
ArctanInplaceTester = makeBroadcastTester(
op=inplace.arctan_inplace,
expected=numpy.arctan,
good=_good_broadcast_unary_wide_float,
grad=_grad_broadcast_unary_wide,
inplace=True)
_good_broadcast_binary_arctan2 = dict(
same_shapes=(rand(2, 3), rand(2, 3)),
......@@ -1385,6 +1497,8 @@ _good_broadcast_binary_arctan2 = dict(
row=(rand(2, 3), rand(1, 3)),
column=(rand(2, 3), rand(2, 1)),
integers=(randint(2, 3), randint(2, 3)),
int8=[numpy.arange(-127, 128, dtype='int8'),
numpy.arange(-127, 128, dtype='int8')[:, numpy.newaxis]],
dtype_mixup_1=(rand(2, 3), randint(2, 3)),
dtype_mixup_2=(randint(2, 3), rand(2, 3)),
empty=(numpy.asarray([], dtype=config.floatX),
......@@ -1398,100 +1512,110 @@ _grad_broadcast_binary_arctan2 = dict(
column=(rand(2, 3), rand(2, 1)),
)
Arctan2Tester = makeBroadcastTester(op=tensor.arctan2,
expected=numpy.arctan2,
good=_good_broadcast_binary_arctan2,
grad=_grad_broadcast_binary_arctan2)
Arctan2InplaceTester = makeBroadcastTester(op=inplace.arctan2_inplace,
expected=numpy.arctan2,
good=_good_broadcast_binary_arctan2,
grad=_grad_broadcast_binary_arctan2,
inplace=True)
CoshTester = makeBroadcastTester(op=tensor.cosh,
expected=numpy.cosh,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
CoshInplaceTester = makeBroadcastTester(op=inplace.cosh_inplace,
expected=numpy.cosh,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
inplace=True)
Arctan2Tester = makeBroadcastTester(
op=tensor.arctan2,
expected=upcast_float16_ufunc(numpy.arctan2),
good=_good_broadcast_binary_arctan2,
grad=_grad_broadcast_binary_arctan2)
Arctan2InplaceTester = makeBroadcastTester(
op=inplace.arctan2_inplace,
expected=numpy.arctan2,
good=copymod(_good_broadcast_binary_arctan2, without=['integers', 'int8']),
grad=_grad_broadcast_binary_arctan2,
inplace=True)
CoshTester = makeBroadcastTester(
op=tensor.cosh,
expected=upcast_float16_ufunc(numpy.cosh),
good=dict(_good_broadcast_unary_normal,
int8=[numpy.arange(-89, 90, dtype='int8')]),
grad=_grad_broadcast_unary_normal)
CoshInplaceTester = makeBroadcastTester(
op=inplace.cosh_inplace,
expected=numpy.cosh,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
inplace=True)
_good_broadcast_unary_arccosh = dict(
normal=(rand_ranged(1, 1000, (2, 3)),),
integers=(randint_ranged(1, 1000, (2, 3)),),
uint8=[numpy.arange(1, 256, dtype='uint8')],
complex=(randc128_ranged(1, 1000, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),)
_grad_broadcast_unary_arccosh = dict(normal=(rand_ranged(1, 1000, (2, 3)),),)
ArccoshTester = makeBroadcastTester(op=tensor.arccosh,
expected=numpy.arccosh,
good=_good_broadcast_unary_arccosh,
grad=_grad_broadcast_unary_arccosh)
ArccoshInplaceTester = makeBroadcastTester(op=inplace.arccosh_inplace,
expected=numpy.arccosh,
good=_good_broadcast_unary_arccosh,
grad=_grad_broadcast_unary_arccosh,
inplace=True)
SinhTester = makeBroadcastTester(op=tensor.sinh,
expected=numpy.sinh,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
SinhInplaceTester = makeBroadcastTester(op=inplace.sinh_inplace,
expected=numpy.sinh,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
inplace=True)
ArcsinhTester = makeBroadcastTester(op=tensor.arcsinh,
expected=numpy.arcsinh,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
ArcsinhInplaceTester = makeBroadcastTester(op=inplace.arcsinh_inplace,
expected=numpy.arcsinh,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
inplace=True)
ArccoshTester = makeBroadcastTester(
op=tensor.arccosh,
expected=upcast_float16_ufunc(numpy.arccosh),
good=_good_broadcast_unary_arccosh,
grad=_grad_broadcast_unary_arccosh)
ArccoshInplaceTester = makeBroadcastTester(
op=inplace.arccosh_inplace,
expected=numpy.arccosh,
good=copymod(_good_broadcast_unary_arccosh, without=['integers', 'uint8']),
grad=_grad_broadcast_unary_arccosh,
inplace=True)
SinhTester = makeBroadcastTester(
op=tensor.sinh,
expected=upcast_float16_ufunc(numpy.sinh),
good=dict(_good_broadcast_unary_normal,
int8=[numpy.arange(-89, 90, dtype='int8')]),
grad=_grad_broadcast_unary_normal)
SinhInplaceTester = makeBroadcastTester(
op=inplace.sinh_inplace,
expected=numpy.sinh,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
inplace=True)
ArcsinhTester = makeBroadcastTester(
op=tensor.arcsinh,
expected=upcast_float16_ufunc(numpy.arcsinh),
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
ArcsinhInplaceTester = makeBroadcastTester(
op=inplace.arcsinh_inplace,
expected=numpy.arcsinh,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
inplace=True)
TanhTester = makeBroadcastTester(op=tensor.tanh,
expected=numpy.tanh,
expected=upcast_float16_ufunc(numpy.tanh),
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal)
TanhInplaceTester = makeBroadcastTester(op=inplace.tanh_inplace,
expected=numpy.tanh,
good=_good_broadcast_unary_normal,
grad=_grad_broadcast_unary_normal,
inplace=True)
TanhInplaceTester = makeBroadcastTester(
op=inplace.tanh_inplace,
expected=numpy.tanh,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
inplace=True)
_eps = 1e-10
_good_broadcast_unary_arctanh = dict(
normal=(rand_ranged(-1 + _eps, 1 - _eps, (2, 3)),),
integers=(randint_ranged(-1 + _eps, 1 - _eps, (2, 3)),),
int8=[numpy.arange(0, 1, dtype='int8')],
complex=(randc128_ranged(-1 + _eps, 1 - _eps, (2, 3)),),
empty=(numpy.asarray([], dtype=config.floatX),),)
_grad_broadcast_unary_arctanh = dict(
normal=(rand_ranged(-1 + _eps, 1 - _eps, (2, 3)),),)
ArctanhTester = makeBroadcastTester(op=tensor.arctanh,
expected=numpy.arctanh,
good=_good_broadcast_unary_arctanh,
grad=_grad_broadcast_unary_arctanh)
ArctanhInplaceTester = makeBroadcastTester(op=inplace.arctanh_inplace,
expected=numpy.arctanh,
good=_good_broadcast_unary_arctanh,
grad=_grad_broadcast_unary_arctanh,
inplace=True)
ArctanhTester = makeBroadcastTester(
op=tensor.arctanh,
expected=upcast_float16_ufunc(numpy.arctanh),
good=_good_broadcast_unary_arctanh,
grad=_grad_broadcast_unary_arctanh)
ArctanhInplaceTester = makeBroadcastTester(
op=inplace.arctanh_inplace,
expected=numpy.arctanh,
good=copymod(_good_broadcast_unary_arctanh, without=['integers', 'int8']),
grad=_grad_broadcast_unary_arctanh,
inplace=True)
#inplace ops when the input is integer and the output is float*
# don't have a well defined behavior. We don't test that case.
_good_broadcast_unary_normal_no_int_no_complex = _good_broadcast_unary_normal_no_complex.copy()
del _good_broadcast_unary_normal_no_int_no_complex['integers']
_good_broadcast_unary_normal_no_int = _good_broadcast_unary_normal.copy()
del _good_broadcast_unary_normal_no_int['integers']
# We can't test it if scipy is not installed!
# Precomputing the result is brittle(it have been broken!)
# As if we do any modification to random number here,
......@@ -1528,7 +1652,7 @@ ErfTester = makeBroadcastTester(
ErfInplaceTester = makeBroadcastTester(
op=inplace.erf_inplace,
expected=expected_erf,
good=_good_broadcast_unary_normal_no_int,
good=_good_broadcast_unary_normal_float,
grad=_grad_broadcast_unary_normal,
mode=mode_no_scipy,
eps=2e-10,
......@@ -1538,7 +1662,7 @@ ErfInplaceTester = makeBroadcastTester(
ErfcTester = makeBroadcastTester(
op=tensor.erfc,
expected=expected_erfc,
good=_good_broadcast_unary_normal_no_int_no_complex,
good=_good_broadcast_unary_normal_float_no_complex,
grad=_grad_broadcast_unary_normal,
eps=2e-10,
mode=mode_no_scipy,
......@@ -1546,7 +1670,7 @@ ErfcTester = makeBroadcastTester(
ErfcInplaceTester = makeBroadcastTester(
op=inplace.erfc_inplace,
expected=expected_erfc,
good=_good_broadcast_unary_normal_no_int_no_complex,
good=_good_broadcast_unary_normal_float_no_complex,
grad=_grad_broadcast_unary_normal,
eps=2e-10,
mode=mode_no_scipy,
......@@ -1556,7 +1680,7 @@ ErfcInplaceTester = makeBroadcastTester(
ErfinvTester = makeBroadcastTester(
op=tensor.erfinv,
expected=expected_erfinv,
good=_good_broadcast_unary_normal_no_int_no_complex,
good=_good_broadcast_unary_normal_float_no_complex,
grad=_grad_broadcast_unary_abs1_no_complex,
eps=2e-10,
mode=mode_no_scipy,
......@@ -1565,7 +1689,7 @@ ErfinvTester = makeBroadcastTester(
ErfcinvTester = makeBroadcastTester(
op=tensor.erfcinv,
expected=expected_erfcinv,
good=_good_broadcast_unary_normal_no_int_no_complex,
good=_good_broadcast_unary_normal_float_no_complex,
grad=_grad_broadcast_unary_0_2_no_complex,
eps=2e-10,
mode=mode_no_scipy,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论