提交 413f34ac authored 作者: lamblin's avatar lamblin

Merge pull request #1389 from nouiz/sigmoid

Sigmoid
......@@ -2,6 +2,7 @@
TODO: implement Images2Neibs.infer_shape() methods
"""
import theano
from theano import Op, Apply
import theano.tensor as T
from theano.gradient import grad_not_implemented
......@@ -111,6 +112,9 @@ class Images2Neibs(Op):
def perform(self, node, inp, out_):
ten4, neib_shape, neib_step = inp
z, = out_
# GpuImages2Neibs should not run this perform in DebugMode
if type(self) != Images2Neibs:
raise theano.gof.utils.MethodNotDefined()
def CEIL_INTDIV(a, b):
if a % b:
......
......@@ -845,10 +845,48 @@ class ScalarOp(Op):
def c_code_cache_version(self):
return (4,)
def c_code_contiguous(self, node, name, inp, out, sub):
"""This function is called by Elemwise when all inputs and
outputs are c_contiguous. This allow to use SIMD version
of this op.
The inputs are the same as c_code except:
- inp and out must be the variable name of the ndarray
- node must be the elemwise node. This is needed to know
the inputs/outputs type.
"""
raise theano.gof.utils.MethodNotDefined()
class UnaryScalarOp(ScalarOp):
nin = 1
amd_float32 = None
amd_float64 = None
def c_code_contiguous(self, node, name, (x, ), (z, ), sub):
if (not theano.config.lib.amdlibm or
# We compare the dtype AND the broadcast flag
# as this function do not broadcast
node.inputs[0].type != node.outputs[0].type):
raise theano.gof.utils.MethodNotDefined()
dtype = node.inputs[0].dtype
if dtype == 'float32' and self.amd_float32 is not None:
dtype = 'float'
fct = self.amd_float32
elif dtype == 'float64' and self.amd_float64 is not None:
dtype = 'double'
fct = self.amd_float64
else:
raise theano.gof.utils.MethodNotDefined()
return """
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
%(fct)s(n, x, z);
""" % locals()
class BinaryScalarOp(ScalarOp):
# One may define in subclasses the following fields:
......@@ -1615,6 +1653,44 @@ class Pow(BinaryScalarOp):
return (first_part, second_part)
def c_code_contiguous(self, node, name, (x, y), (z, ), sub):
if not theano.config.lib.amdlibm:
raise theano.gof.utils.MethodNotDefined()
# We compare the dtype AND the broadcast flag
# as this function do not broadcast
if (node.inputs[0].type == node.outputs[0].type and
node.inputs[1].type == node.outputs[0].type and
# amdlibm 3.0 do not have a float64 version of this SIMD function
node.inputs[0].dtype == 'float32'):
dtype = 'float'
fct = "amd_vrsa_powf"
return """
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * y = (%(dtype)s*) PyArray_DATA(%(y)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
%(fct)s(n, x, y, z);
""" % locals()
# We compare the dtype and check we broadcast a scalar
elif (node.inputs[0].type == node.outputs[0].type and
node.inputs[1].dtype == node.outputs[0].dtype and
all(node.inputs[1].broadcastable) and
# amdlibm 3.0 do not have a float64 version of this SIMD function
node.inputs[0].dtype == 'float32'):
dtype = 'float'
fct = "amd_vrsa_powxf"
return """
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * y = (%(dtype)s*) PyArray_DATA(%(y)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
%(fct)s(n, x, *y, z);
""" % locals()
raise theano.gof.utils.MethodNotDefined()
pow = Pow(upcast_out, name='pow')
......@@ -2019,6 +2095,9 @@ inv = Inv(upgrade_to_float, name='inv')
class Log(UnaryScalarOp):
""" log base e """
amd_float32 = "amd_vrsa_logf"
amd_float64 = "amd_vrda_log"
def impl(self, x):
return numpy.log(x)
......@@ -2042,6 +2121,9 @@ log = Log(upgrade_to_float, name='log')
class Log2(UnaryScalarOp):
""" log base 2 """
amd_float32 = "amd_vrsa_log2f"
amd_float64 = "amd_vrda_log2"
def impl(self, x):
return numpy.log2(x)
......@@ -2062,6 +2144,9 @@ log2 = Log2(upgrade_to_float, name='log2')
class Log10(UnaryScalarOp):
""" log base 10 """
amd_float32 = "amd_vrsa_log10f"
amd_float64 = "amd_vrda_log10"
def impl(self, x):
return numpy.log10(x)
......@@ -2100,6 +2185,9 @@ log1p = Log1p(upgrade_to_float, name='log1p')
class Exp(UnaryScalarOp):
amd_float32 = "amd_vrsa_expf"
amd_float64 = "amd_vrda_exp"
def impl(self, x):
return numpy.exp(x)
......@@ -2231,6 +2319,9 @@ rad2deg = Rad2Deg(upgrade_to_float, name='rad2deg')
class Cos(UnaryScalarOp):
amd_float32 = "amd_vrsa_cosf"
amd_float64 = "amd_vrda_cos"
def impl(self, x):
return numpy.cos(x)
......@@ -2269,6 +2360,9 @@ arccos = ArcCos(upgrade_to_float, name='arccos')
class Sin(UnaryScalarOp):
amd_float32 = "amd_vrsa_sinf"
amd_float64 = "amd_vrda_sin"
def impl(self, x):
return numpy.sin(x)
......
......@@ -1529,7 +1529,18 @@ class _tensor_py_operators:
if self._is_nonzero:
return True
else:
raise TypeError("Variable does not support boolean operations.")
raise TypeError(
"Variable does not support boolean operations. This"
"can happen if you do logical operator (<, <=, >, <=,"
"==, !=) between numpy.ndarray and theano tensor"
"variable. Due NumPy implementation before NumPy 1.8,"
"we can't make the python syntax work when the ndarray"
"is on the left, and this end with this error. To work"
"around that, just call"
"theano.tensor.{lt,le,eq,ne,gt,ge}(ndarray, tensor) or"
"use the python syntax with the theano tensor on the"
"left. Or update to NumPy 1.8 or above."
)
# BITWISE
def __invert__(self):
......
......@@ -1079,13 +1079,49 @@ class Elemwise(Op):
%(undefs)s
}
""" % locals()
if all([o.ndim <= 1 for o in node.outputs]):
if nnested:
all_code = [("", "")] * (nnested - 1) + [("", code)] + [""]
else:
all_code = [code]
loop = cgen.make_reordered_loop(
loop = cgen.make_loop(
loop_orders=orders + [range(nnested)] * len(real_onames),
dtypes=(idtypes + list(real_odtypes)),
loop_tasks=all_code,
sub=sub)
else:
loop = cgen.make_reordered_loop(
init_loop_orders=orders + [range(nnested)] * len(real_onames),
olv_index=olv_index,
dtypes=(idtypes + list(real_odtypes)),
inner_task=code,
sub=sub)
# If all inputs and outputs are contiguous
# and the scalar op define optimized code for that case
# use it!
if all([o.ndim >= 1 for o in node.outputs]):
try:
contig = self.scalar_op.c_code_contiguous(
node,
nodename + '_scalar_contig_',
_inames,
onames,
sub)
# PyArray_ISONESEGMENT(arr)
# return true if arr is fortran or c contiguous.
cond = ' && '.join(["PyArray_ISONESEGMENT(%s)" % arr
for arr in _inames + onames])
loop = """
if(%(cond)s){
%(contig)s
}else{
%(loop)s
}
""" % locals()
except theano.gof.utils.MethodNotDefined:
pass
return decl, checks, alloc, loop
def c_code(self, node, nodename, inames, onames, sub):
......@@ -1104,15 +1140,15 @@ class Elemwise(Op):
return support_code
def c_code_cache_version_apply(self, node):
version = [6] # the version corresponding to the c code in this Op
version = [8] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node))
version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.append(Scalar(dtype=i.type.dtype).c_code_cache_version())
if all(version):
return tuple(version)
else:
......@@ -1525,9 +1561,9 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node))
version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.append(Scalar(dtype=i.type.dtype).c_code_cache_version())
if all(version):
return tuple(version)
else:
......
......@@ -23,6 +23,9 @@ from theano.tensor import elemwise, opt, NotScalarConstantError
#
class ScalarSigmoid(scalar.UnaryScalarOp):
"""
This is just speed opt. Not for stability.
"""
@staticmethod
def st_impl(x):
if x < -30.0:
......@@ -64,6 +67,44 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
return (2,) + v
else:
return v
# This fct is disabled as it is slower then the normal code!
def c_code_contiguous_disabled(self, node, name, inp, out, sub):
x, = inp
z, = out
if (not theano.config.lib.amdlibm or
node.inputs[0].dtype != node.outputs[0].dtype):
raise theano.gof.utils.MethodNotDefined()
dtype = node.inputs[0].dtype
if dtype == 'float32' and self.amd_float32 is not None:
dtype = 'float'
fct = "amd_vrsa_expf"
elif dtype == 'float64' and self.amd_float64 is not None:
dtype = 'double'
fct = "amd_vrda_exp"
else:
raise theano.gof.utils.MethodNotDefined()
return """
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
// We block to keep the data in l1
// normal l1 size = 32k: 32k/2(input + output)/8(nb bytes of double)=2k
// We stay bellow the 2k limit to let space for
// This is faster then the not blocking version
for(int i=0;i<n;i+=2048){
npy_intp nb = (n-i<2048)?n-i:2048;
for(int j=0;j<nb;j++){
z[i+j] = -x[i+j];
}
%(fct)s(nb, z+i, z+i);
for(int j=0;j<nb;j++){
z[i+j] = 1.0 /(1.0+z[i+j]);
}
}
""" % locals()
raise theano.gof.utils.MethodNotDefined()
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论