提交 d0cbf05e authored 作者: Laurent Dinh's avatar Laurent Dinh

Merge branch 'master' of git://github.com/Theano/Theano into scan

......@@ -1438,8 +1438,12 @@ def get_gcc_shared_library_arg():
def std_include_dirs():
return (numpy.distutils.misc_util.get_numpy_include_dirs()
+ [distutils.sysconfig.get_python_inc()])
numpy_inc_dirs = numpy.distutils.misc_util.get_numpy_include_dirs()
py_inc = distutils.sysconfig.get_python_inc()
py_plat_spec_inc = distutils.sysconfig.get_python_inc(plat_specific=True)
python_inc_dirs = ([py_inc] if py_inc == py_plat_spec_inc
else [py_inc, py_plat_spec_inc])
return numpy_inc_dirs + python_inc_dirs
def std_lib_dirs_and_libs():
......@@ -1713,7 +1717,7 @@ class GCC_compiler(object):
continue
mj, mn, patch = [int(vp) for vp in version]
if (((mj, mn) == (4, 6) and patch < 4) or
((mj, mn) == (4, 7) and patch < 3) or
((mj, mn) == (4, 7) and patch <= 3) or
((mj, mn) == (4, 8) and patch < 1)):
new_flags[i] = p.rstrip('-avx')
......
import operator
import sys
import numpy
......@@ -213,20 +214,29 @@ def test_huge_elemwise_fusion():
"""
shape = (2, 3, 4, 5, 6)
ttype = tensor.tensor(dtype='float32', broadcastable=(False,) * len(shape))
vars = [tensor.tanh(ttype) for x in range(7)]
f = pfunc(vars, [vars[0] - vars[1] - vars[2] - vars[3] - vars[4] -
vars[5] - vars[6]], mode=mode_with_gpu)
gpu_ptr_size = theano.sandbox.cuda.opt.get_device_type_sizes()['gpu_ptr_size']
if gpu_ptr_size == 8:
nb_in = 7
len_topo = 10
elif gpu_ptr_size == 4:
nb_in = 8
len_topo = 11
else:
raise Exception("Unexpected value for gpu_ptr_size", gpu_ptr_size)
vars = [tensor.tanh(ttype) for x in range(nb_in)]
f = pfunc(vars, [reduce(operator.sub, vars)], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
#theano.printing.debugprint(f)
#for i, node in enumerate(topo):
# print >> sys.stdout, i, node
assert len(topo) == 10
assert len(topo) == len_topo
assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == 2
assert isinstance(topo[7].op.scalar_op, theano.scalar.basic.Sub)
assert isinstance(topo[8].op.scalar_op, theano.scalar.basic.Composite)
assert isinstance(topo[-3].op.scalar_op, theano.scalar.basic.Sub)
assert isinstance(topo[-2].op.scalar_op, theano.scalar.basic.Composite)
#let debugmode catch errors
gen = lambda: theano._asarray(numpy.random.rand(*shape), dtype='float32')
f(gen(), gen(), gen(), gen(), gen(), gen(), gen())
f(*[gen() for i in range(nb_in)])
# Test the case where we can't put the computation on the gpu! their is too
# many dimensions to the input to have 2 inputs to the op!
......
......@@ -909,7 +909,22 @@ class UnaryScalarOp(ScalarOp):
node.inputs[0].type != node.outputs[0].type):
raise theano.gof.utils.MethodNotDefined()
dtype = node.inputs[0].dtype
dtype = node.inputs[0].type.dtype_specs()[1]
fct_call = self.c_code_contiguous_raw(dtype, 'n', 'x', 'z')
return """
{
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
%(fct_call)s;
}
""" % locals()
def c_code_contiguous_raw(self, dtype, n, i, o):
if not config.lib.amdlibm:
raise theano.gof.utils.MethodNotDefined()
if dtype.startswith('npy_'):
dtype = dtype[4:]
if dtype == 'float32' and self.amd_float32 is not None:
dtype = 'float'
fct = self.amd_float32
......@@ -918,12 +933,7 @@ class UnaryScalarOp(ScalarOp):
fct = self.amd_float64
else:
raise theano.gof.utils.MethodNotDefined()
return """
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
%(fct)s(n, x, z);
""" % locals()
return "%(fct)s(%(n)s, %(i)s, %(o)s)" % locals()
class BinaryScalarOp(ScalarOp):
......
......@@ -173,12 +173,9 @@ SOMEPATH/Canopy_64bit/User/lib/python2.7/site-packages/numpy/distutils/system_in
warnings.warn('Specified path %s is invalid.' % d)
"""
#I'm not able to remove all printed stuff
with_context = warnings.catch_warnings(record=True)
with_context.__enter__()
try:
with warnings.catch_warnings(record=True):
numpy.distutils.system_info.system_info.verbosity = 0
blas_info = numpy.distutils.system_info.get_info("blas_opt")
finally:
with_context.__exit__(None, None, None)
# If we are in a EPD installation, mkl is available
if "EPD" in sys.version:
......
......@@ -95,7 +95,7 @@ class SoftmaxWithBias(gof.Op):
return ['<iostream>', '<cmath>']
@staticmethod
def c_code_template():
def c_code_template(dtype):
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
......@@ -107,6 +107,10 @@ class SoftmaxWithBias(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl = """
npy_intp* Nx = PyArray_DIMS(%(x)s);
npy_intp Sx = 0;
npy_intp Sb = 0;
npy_intp Ssm = 0;
if (PyArray_NDIM(%(x)s) != 2)
{
......@@ -151,6 +155,10 @@ class SoftmaxWithBias(gof.Op):
%(fail)s
}
}
Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
"""
begin_row_loop = """
......@@ -163,9 +171,7 @@ class SoftmaxWithBias(gof.Op):
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s));
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
......@@ -182,6 +188,9 @@ class SoftmaxWithBias(gof.Op):
row_max = (row_ij > row_max) ? row_ij : row_max;
}
"""
inside_row_loop = """
for (j = 0; j < Nx[1]; ++j)
{
dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
......@@ -201,6 +210,42 @@ class SoftmaxWithBias(gof.Op):
"""
# Get the vectorized version of exp if it exist
try:
vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype,
"Nx[1]", "sm_i", "sm_i")
inside_row_loop_contig = """
for (j = 0; j < Nx[1]; ++j)
{
dtype_%%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
//std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
dtype_%%(sm)s sm_ij = row_ij - row_max;
//std::cout << "3 " << j << " " << sm_ij << "\\n";
sm_i[j * Ssm] = sm_ij;
}
%(vec_exp)s;
for (j = 0; j < Nx[1]; ++j)
{
sum += sm_i[j * Ssm];
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm] *= sum_inv;
}
""" % locals()
inside_row_loop = """
if(Ssm == 1){
%(inside_row_loop_contig)s
}else{
%(inside_row_loop)s
}
""" % locals()
except theano.gof.utils.MethodNotDefined:
pass
end_row_loop = """
}
"""
......@@ -210,12 +255,13 @@ class SoftmaxWithBias(gof.Op):
def c_code(self, node, name, inp, out, sub):
x, b = inp
sm, = out
code_template = ''.join(self.c_code_template())
code_template = ''.join(self.c_code_template(
node.inputs[0].type.dtype_specs()[1]))
return code_template % dict(locals(), **sub)
@staticmethod
def c_code_cache_version():
return (6,)
return (8,)
softmax_with_bias = SoftmaxWithBias()
......@@ -384,7 +430,7 @@ class Softmax(gof.Op):
return ['<iostream>', '<cmath>']
@staticmethod
def c_code_template():
def c_code_template(dtype):
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
......@@ -396,6 +442,8 @@ class Softmax(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl = """
npy_intp* Nx = PyArray_DIMS(%(x)s);
npy_intp Sx1 = 0;
npy_intp Ssm1 = 0;
if (PyArray_NDIM(%(x)s) != 2)
{
......@@ -413,7 +461,7 @@ class Softmax(gof.Op):
|| (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
|| (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
{
if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
Py_XDECREF(%(sm)s);
%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
type_num_%(x)s);
if(!%(sm)s) {
......@@ -422,6 +470,8 @@ class Softmax(gof.Op):
%(fail)s
}
}
Sx1 = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
Ssm1 = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
"""
begin_row_loop = """
......@@ -433,11 +483,6 @@ class Softmax(gof.Op):
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0];
......@@ -445,46 +490,82 @@ class Softmax(gof.Op):
// Get the maximum value of the row
for (j = 1; j < Nx[1]; ++j)
{
dtype_%(sm)s row_ij = x_i[j * Sx] ;
dtype_%(sm)s row_ij = x_i[j * Sx1] ;
//std::cout << "1 " << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
}
"""
inside_row_loop = """
for (j = 0; j < Nx[1]; ++j)
{
dtype_%(sm)s row_ij = x_i[j * Sx] ;
dtype_%(sm)s row_ij = x_i[j * Sx1] ;
//std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
dtype_%(sm)s sm_ij = exp(row_ij - row_max);
//std::cout << "3 " << j << " " << sm_ij << "\\n";
sum += sm_ij;
sm_i[j * Ssm] = sm_ij;
sm_i[j * Ssm1] = sm_ij;
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm] *= sum_inv;
sm_i[j * Ssm1] *= sum_inv;
}
"""
# Get the vectorized version of exp if it exist
try:
vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype,
"Nx[1]", "sm_i", "sm_i")
inside_row_loop_contig = """
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm1] = x_i[j * Sx1] - row_max;
}
%(vec_exp)s;
for (j = 0; j < Nx[1]; ++j)
{
sum += sm_i[j * Ssm1];
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm1] *= sum_inv;
}
""" % locals()
inside_row_loop = """
if(Ssm1 == 1){
%(inside_row_loop_contig)s
}else{
%(inside_row_loop)s
}
""" % locals()
except theano.gof.utils.MethodNotDefined:
pass
end_row_loop = """
}
"""
return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
def c_code(self, node, name, inp, out, sub):
x, = inp
sm, = out
code_template = ''.join(self.c_code_template())
code_template = ''.join(self.c_code_template(
node.inputs[0].type.dtype_specs()[1]))
return code_template % dict(locals(), **sub)
@staticmethod
def c_code_cache_version():
return (1,)
return (3,)
softmax = Softmax()
......@@ -863,7 +944,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
return ['<iostream>', '<cmath>']
@staticmethod
def c_code_template():
def c_code_template(dtype):
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
......@@ -874,7 +955,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
(init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \
SoftmaxWithBias.c_code_template()
SoftmaxWithBias.c_code_template(dtype)
return (init_decl,
"""
if (PyArray_NDIM(%(y_idx)s) != 1)
......@@ -947,7 +1028,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
nll, sm, am = out
y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type
code_template = ''.join(self.c_code_template())
dtype = node.inputs[0].type.dtype_specs()[1]
code_template = ''.join(self.c_code_template(dtype))
return code_template % dict(locals(), **sub)
......
......@@ -587,7 +587,7 @@ class MakeVector(T.Op):
out[0][...] = inputs
def c_code_cache_version(self):
return (1,)
return (2,)
def c_code(self, node, name, inp, out_, sub):
out, = out_
......@@ -604,7 +604,7 @@ class MakeVector(T.Op):
ret = """
npy_intp dims[1];
dims[0] = %(out_shape)s;
if(!%(out)s || PyArray_DIMS(%(out)s)[0] == %(out_shape)s){
if(!%(out)s || PyArray_DIMS(%(out)s)[0] != %(out_shape)s){
Py_XDECREF(%(out)s);
%(out)s = (PyArrayObject*)PyArray_EMPTY(1, dims, %(out_dtype)s, 0);
}
......
......@@ -6736,6 +6736,17 @@ class TestTensorInstanceMethods(unittest.TestCase):
# Test equivalent advanced indexing
assert_array_equal(X[:,indices].eval({X: x}), x[:,indices])
def test_cumsum(self):
X, _ = self.vars
x, _ = self.vals
assert_array_equal(X.cumsum().eval({X: x}), x.cumsum())
def test_cumprod(self):
X, _ = self.vars
x, _ = self.vals
assert_array_equal(X.cumprod().eval({X: x}), x.cumprod())
def test_norm():
x = theano.tensor.vector('x')
n = x.norm(2)
......
......@@ -11,6 +11,7 @@ from theano.tensor.utils import hash_from_ndarray
from theano.tensor.type import TensorType
class AsTensorError(TypeError):
"""Raised when as_tensor_variable isn't able to create a
TensorVariable.
......@@ -509,13 +510,11 @@ class _tensor_py_operators:
def sort(self, axis=-1, kind='quicksort', order=None):
"""See `theano.tensor.sort`"""
from theano.tensor.sort import sort
return sort(self, axis, kind, order)
return theano.tensor.sort(self, axis, kind, order)
def argsort(self, axis=-1, kind='quicksort', order=None):
"""See `theano.tensor.argsort`"""
from theano.tensor.sort import argsort
return argsort(self, axis, kind, order)
return theano.tensor.argsort(self, axis, kind, order)
def clip(self, a_min, a_max):
"Clip (limit) the values in an array."
......@@ -529,16 +528,14 @@ class _tensor_py_operators:
def repeat(self, repeats, axis=None):
"""See `theano.tensor.repeat`"""
from theano.tensor.extra_ops import repeat
return repeat(self, repeats, axis)
return theano.tensor.extra_ops.repeat(self, repeats, axis)
def round(self, mode="half_away_from_zero"):
"""See `theano.tensor.round`"""
return theano.tensor.basic.round(self, mode)
def trace(self):
from theano.sandbox.linalg import trace
return trace(self)
return theano.sandbox.linalg.trace(self)
# TO TRUMP NUMPY OPERATORS
__array_priority__ = 1000
......@@ -549,6 +546,12 @@ class _tensor_py_operators:
def zeros_like(model, dtype=None):
return theano.tensor.basic.zeros_like(model, dtype=dtype)
def cumsum(self, axis=None):
return theano.tensor.extra_ops.cumsum(self, axis)
def cumprod(self, axis=None):
return theano.tensor.extra_ops.cumprod(self, axis)
class TensorVariable(_tensor_py_operators, Variable):
"""Subclass to add the tensor operators to the basic `Variable` class."""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论