提交 d0cbf05e authored 作者: Laurent Dinh's avatar Laurent Dinh

Merge branch 'master' of git://github.com/Theano/Theano into scan

...@@ -1438,8 +1438,12 @@ def get_gcc_shared_library_arg(): ...@@ -1438,8 +1438,12 @@ def get_gcc_shared_library_arg():
def std_include_dirs(): def std_include_dirs():
return (numpy.distutils.misc_util.get_numpy_include_dirs() numpy_inc_dirs = numpy.distutils.misc_util.get_numpy_include_dirs()
+ [distutils.sysconfig.get_python_inc()]) py_inc = distutils.sysconfig.get_python_inc()
py_plat_spec_inc = distutils.sysconfig.get_python_inc(plat_specific=True)
python_inc_dirs = ([py_inc] if py_inc == py_plat_spec_inc
else [py_inc, py_plat_spec_inc])
return numpy_inc_dirs + python_inc_dirs
def std_lib_dirs_and_libs(): def std_lib_dirs_and_libs():
...@@ -1713,7 +1717,7 @@ class GCC_compiler(object): ...@@ -1713,7 +1717,7 @@ class GCC_compiler(object):
continue continue
mj, mn, patch = [int(vp) for vp in version] mj, mn, patch = [int(vp) for vp in version]
if (((mj, mn) == (4, 6) and patch < 4) or if (((mj, mn) == (4, 6) and patch < 4) or
((mj, mn) == (4, 7) and patch < 3) or ((mj, mn) == (4, 7) and patch <= 3) or
((mj, mn) == (4, 8) and patch < 1)): ((mj, mn) == (4, 8) and patch < 1)):
new_flags[i] = p.rstrip('-avx') new_flags[i] = p.rstrip('-avx')
......
import operator
import sys import sys
import numpy import numpy
...@@ -213,20 +214,29 @@ def test_huge_elemwise_fusion(): ...@@ -213,20 +214,29 @@ def test_huge_elemwise_fusion():
""" """
shape = (2, 3, 4, 5, 6) shape = (2, 3, 4, 5, 6)
ttype = tensor.tensor(dtype='float32', broadcastable=(False,) * len(shape)) ttype = tensor.tensor(dtype='float32', broadcastable=(False,) * len(shape))
vars = [tensor.tanh(ttype) for x in range(7)] gpu_ptr_size = theano.sandbox.cuda.opt.get_device_type_sizes()['gpu_ptr_size']
f = pfunc(vars, [vars[0] - vars[1] - vars[2] - vars[3] - vars[4] - if gpu_ptr_size == 8:
vars[5] - vars[6]], mode=mode_with_gpu) nb_in = 7
len_topo = 10
elif gpu_ptr_size == 4:
nb_in = 8
len_topo = 11
else:
raise Exception("Unexpected value for gpu_ptr_size", gpu_ptr_size)
vars = [tensor.tanh(ttype) for x in range(nb_in)]
f = pfunc(vars, [reduce(operator.sub, vars)], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
#theano.printing.debugprint(f) #theano.printing.debugprint(f)
#for i, node in enumerate(topo): #for i, node in enumerate(topo):
# print >> sys.stdout, i, node # print >> sys.stdout, i, node
assert len(topo) == 10 assert len(topo) == len_topo
assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == 2 assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == 2
assert isinstance(topo[7].op.scalar_op, theano.scalar.basic.Sub) assert isinstance(topo[-3].op.scalar_op, theano.scalar.basic.Sub)
assert isinstance(topo[8].op.scalar_op, theano.scalar.basic.Composite) assert isinstance(topo[-2].op.scalar_op, theano.scalar.basic.Composite)
#let debugmode catch errors #let debugmode catch errors
gen = lambda: theano._asarray(numpy.random.rand(*shape), dtype='float32') gen = lambda: theano._asarray(numpy.random.rand(*shape), dtype='float32')
f(gen(), gen(), gen(), gen(), gen(), gen(), gen()) f(*[gen() for i in range(nb_in)])
# Test the case where we can't put the computation on the gpu! their is too # Test the case where we can't put the computation on the gpu! their is too
# many dimensions to the input to have 2 inputs to the op! # many dimensions to the input to have 2 inputs to the op!
......
...@@ -909,7 +909,22 @@ class UnaryScalarOp(ScalarOp): ...@@ -909,7 +909,22 @@ class UnaryScalarOp(ScalarOp):
node.inputs[0].type != node.outputs[0].type): node.inputs[0].type != node.outputs[0].type):
raise theano.gof.utils.MethodNotDefined() raise theano.gof.utils.MethodNotDefined()
dtype = node.inputs[0].dtype dtype = node.inputs[0].type.dtype_specs()[1]
fct_call = self.c_code_contiguous_raw(dtype, 'n', 'x', 'z')
return """
{
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
%(fct_call)s;
}
""" % locals()
def c_code_contiguous_raw(self, dtype, n, i, o):
if not config.lib.amdlibm:
raise theano.gof.utils.MethodNotDefined()
if dtype.startswith('npy_'):
dtype = dtype[4:]
if dtype == 'float32' and self.amd_float32 is not None: if dtype == 'float32' and self.amd_float32 is not None:
dtype = 'float' dtype = 'float'
fct = self.amd_float32 fct = self.amd_float32
...@@ -918,12 +933,7 @@ class UnaryScalarOp(ScalarOp): ...@@ -918,12 +933,7 @@ class UnaryScalarOp(ScalarOp):
fct = self.amd_float64 fct = self.amd_float64
else: else:
raise theano.gof.utils.MethodNotDefined() raise theano.gof.utils.MethodNotDefined()
return """ return "%(fct)s(%(n)s, %(i)s, %(o)s)" % locals()
npy_intp n = PyArray_SIZE(%(z)s);
%(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s);
%(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s);
%(fct)s(n, x, z);
""" % locals()
class BinaryScalarOp(ScalarOp): class BinaryScalarOp(ScalarOp):
......
...@@ -173,12 +173,9 @@ SOMEPATH/Canopy_64bit/User/lib/python2.7/site-packages/numpy/distutils/system_in ...@@ -173,12 +173,9 @@ SOMEPATH/Canopy_64bit/User/lib/python2.7/site-packages/numpy/distutils/system_in
warnings.warn('Specified path %s is invalid.' % d) warnings.warn('Specified path %s is invalid.' % d)
""" """
#I'm not able to remove all printed stuff #I'm not able to remove all printed stuff
with_context = warnings.catch_warnings(record=True) with warnings.catch_warnings(record=True):
with_context.__enter__() numpy.distutils.system_info.system_info.verbosity = 0
try:
blas_info = numpy.distutils.system_info.get_info("blas_opt") blas_info = numpy.distutils.system_info.get_info("blas_opt")
finally:
with_context.__exit__(None, None, None)
# If we are in a EPD installation, mkl is available # If we are in a EPD installation, mkl is available
if "EPD" in sys.version: if "EPD" in sys.version:
......
...@@ -95,7 +95,7 @@ class SoftmaxWithBias(gof.Op): ...@@ -95,7 +95,7 @@ class SoftmaxWithBias(gof.Op):
return ['<iostream>', '<cmath>'] return ['<iostream>', '<cmath>']
@staticmethod @staticmethod
def c_code_template(): def c_code_template(dtype):
# this implementation was lifted from # this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
...@@ -107,6 +107,10 @@ class SoftmaxWithBias(gof.Op): ...@@ -107,6 +107,10 @@ class SoftmaxWithBias(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl = """ init_decl = """
npy_intp* Nx = PyArray_DIMS(%(x)s); npy_intp* Nx = PyArray_DIMS(%(x)s);
npy_intp Sx = 0;
npy_intp Sb = 0;
npy_intp Ssm = 0;
if (PyArray_NDIM(%(x)s) != 2) if (PyArray_NDIM(%(x)s) != 2)
{ {
...@@ -151,6 +155,10 @@ class SoftmaxWithBias(gof.Op): ...@@ -151,6 +155,10 @@ class SoftmaxWithBias(gof.Op):
%(fail)s %(fail)s
} }
} }
Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
""" """
begin_row_loop = """ begin_row_loop = """
...@@ -163,9 +171,7 @@ class SoftmaxWithBias(gof.Op): ...@@ -163,9 +171,7 @@ class SoftmaxWithBias(gof.Op):
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i); const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s)); const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s));
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i); dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s); npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s); npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s); npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
...@@ -182,6 +188,9 @@ class SoftmaxWithBias(gof.Op): ...@@ -182,6 +188,9 @@ class SoftmaxWithBias(gof.Op):
row_max = (row_ij > row_max) ? row_ij : row_max; row_max = (row_ij > row_max) ? row_ij : row_max;
} }
"""
inside_row_loop = """
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb]; dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
...@@ -201,6 +210,42 @@ class SoftmaxWithBias(gof.Op): ...@@ -201,6 +210,42 @@ class SoftmaxWithBias(gof.Op):
""" """
# Get the vectorized version of exp if it exist
try:
vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype,
"Nx[1]", "sm_i", "sm_i")
inside_row_loop_contig = """
for (j = 0; j < Nx[1]; ++j)
{
dtype_%%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
//std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
dtype_%%(sm)s sm_ij = row_ij - row_max;
//std::cout << "3 " << j << " " << sm_ij << "\\n";
sm_i[j * Ssm] = sm_ij;
}
%(vec_exp)s;
for (j = 0; j < Nx[1]; ++j)
{
sum += sm_i[j * Ssm];
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm] *= sum_inv;
}
""" % locals()
inside_row_loop = """
if(Ssm == 1){
%(inside_row_loop_contig)s
}else{
%(inside_row_loop)s
}
""" % locals()
except theano.gof.utils.MethodNotDefined:
pass
end_row_loop = """ end_row_loop = """
} }
""" """
...@@ -210,12 +255,13 @@ class SoftmaxWithBias(gof.Op): ...@@ -210,12 +255,13 @@ class SoftmaxWithBias(gof.Op):
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
x, b = inp x, b = inp
sm, = out sm, = out
code_template = ''.join(self.c_code_template()) code_template = ''.join(self.c_code_template(
node.inputs[0].type.dtype_specs()[1]))
return code_template % dict(locals(), **sub) return code_template % dict(locals(), **sub)
@staticmethod @staticmethod
def c_code_cache_version(): def c_code_cache_version():
return (6,) return (8,)
softmax_with_bias = SoftmaxWithBias() softmax_with_bias = SoftmaxWithBias()
...@@ -384,7 +430,7 @@ class Softmax(gof.Op): ...@@ -384,7 +430,7 @@ class Softmax(gof.Op):
return ['<iostream>', '<cmath>'] return ['<iostream>', '<cmath>']
@staticmethod @staticmethod
def c_code_template(): def c_code_template(dtype):
# this implementation was lifted from # this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
...@@ -396,6 +442,8 @@ class Softmax(gof.Op): ...@@ -396,6 +442,8 @@ class Softmax(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl = """ init_decl = """
npy_intp* Nx = PyArray_DIMS(%(x)s); npy_intp* Nx = PyArray_DIMS(%(x)s);
npy_intp Sx1 = 0;
npy_intp Ssm1 = 0;
if (PyArray_NDIM(%(x)s) != 2) if (PyArray_NDIM(%(x)s) != 2)
{ {
...@@ -413,7 +461,7 @@ class Softmax(gof.Op): ...@@ -413,7 +461,7 @@ class Softmax(gof.Op):
|| (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0]) || (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
|| (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1])) || (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
{ {
if (NULL != %(sm)s) Py_XDECREF(%(sm)s); Py_XDECREF(%(sm)s);
%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
type_num_%(x)s); type_num_%(x)s);
if(!%(sm)s) { if(!%(sm)s) {
...@@ -422,6 +470,8 @@ class Softmax(gof.Op): ...@@ -422,6 +470,8 @@ class Softmax(gof.Op):
%(fail)s %(fail)s
} }
} }
Sx1 = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
Ssm1 = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
""" """
begin_row_loop = """ begin_row_loop = """
...@@ -433,11 +483,6 @@ class Softmax(gof.Op): ...@@ -433,11 +483,6 @@ class Softmax(gof.Op):
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i); const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i); dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0; size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0]; dtype_%(sm)s row_max = x_i[0];
...@@ -445,46 +490,82 @@ class Softmax(gof.Op): ...@@ -445,46 +490,82 @@ class Softmax(gof.Op):
// Get the maximum value of the row // Get the maximum value of the row
for (j = 1; j < Nx[1]; ++j) for (j = 1; j < Nx[1]; ++j)
{ {
dtype_%(sm)s row_ij = x_i[j * Sx] ; dtype_%(sm)s row_ij = x_i[j * Sx1] ;
//std::cout << "1 " << row_ij << "\\n"; //std::cout << "1 " << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j; row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max; row_max = (row_ij > row_max) ? row_ij : row_max;
} }
"""
inside_row_loop = """
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
dtype_%(sm)s row_ij = x_i[j * Sx] ; dtype_%(sm)s row_ij = x_i[j * Sx1] ;
//std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n"; //std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
dtype_%(sm)s sm_ij = exp(row_ij - row_max); dtype_%(sm)s sm_ij = exp(row_ij - row_max);
//std::cout << "3 " << j << " " << sm_ij << "\\n"; //std::cout << "3 " << j << " " << sm_ij << "\\n";
sum += sm_ij; sum += sm_ij;
sm_i[j * Ssm] = sm_ij; sm_i[j * Ssm1] = sm_ij;
} }
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n); //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum; double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
sm_i[j * Ssm] *= sum_inv; sm_i[j * Ssm1] *= sum_inv;
} }
""" """
# Get the vectorized version of exp if it exist
try:
vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype,
"Nx[1]", "sm_i", "sm_i")
inside_row_loop_contig = """
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm1] = x_i[j * Sx1] - row_max;
}
%(vec_exp)s;
for (j = 0; j < Nx[1]; ++j)
{
sum += sm_i[j * Ssm1];
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm1] *= sum_inv;
}
""" % locals()
inside_row_loop = """
if(Ssm1 == 1){
%(inside_row_loop_contig)s
}else{
%(inside_row_loop)s
}
""" % locals()
except theano.gof.utils.MethodNotDefined:
pass
end_row_loop = """ end_row_loop = """
} }
""" """
return (init_decl, begin_row_loop, inside_row_loop, end_row_loop) return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
x, = inp x, = inp
sm, = out sm, = out
code_template = ''.join(self.c_code_template()) code_template = ''.join(self.c_code_template(
node.inputs[0].type.dtype_specs()[1]))
return code_template % dict(locals(), **sub) return code_template % dict(locals(), **sub)
@staticmethod @staticmethod
def c_code_cache_version(): def c_code_cache_version():
return (1,) return (3,)
softmax = Softmax() softmax = Softmax()
...@@ -863,7 +944,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -863,7 +944,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
return ['<iostream>', '<cmath>'] return ['<iostream>', '<cmath>']
@staticmethod @staticmethod
def c_code_template(): def c_code_template(dtype):
# this implementation was lifted from # this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
...@@ -874,7 +955,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -874,7 +955,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
(init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \ (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \
SoftmaxWithBias.c_code_template() SoftmaxWithBias.c_code_template(dtype)
return (init_decl, return (init_decl,
""" """
if (PyArray_NDIM(%(y_idx)s) != 1) if (PyArray_NDIM(%(y_idx)s) != 1)
...@@ -947,7 +1028,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -947,7 +1028,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
nll, sm, am = out nll, sm, am = out
y_idx_type = node.inputs[2].type.dtype_specs()[1] y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type am_type = y_idx_type
code_template = ''.join(self.c_code_template()) dtype = node.inputs[0].type.dtype_specs()[1]
code_template = ''.join(self.c_code_template(dtype))
return code_template % dict(locals(), **sub) return code_template % dict(locals(), **sub)
......
...@@ -587,7 +587,7 @@ class MakeVector(T.Op): ...@@ -587,7 +587,7 @@ class MakeVector(T.Op):
out[0][...] = inputs out[0][...] = inputs
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
def c_code(self, node, name, inp, out_, sub): def c_code(self, node, name, inp, out_, sub):
out, = out_ out, = out_
...@@ -604,7 +604,7 @@ class MakeVector(T.Op): ...@@ -604,7 +604,7 @@ class MakeVector(T.Op):
ret = """ ret = """
npy_intp dims[1]; npy_intp dims[1];
dims[0] = %(out_shape)s; dims[0] = %(out_shape)s;
if(!%(out)s || PyArray_DIMS(%(out)s)[0] == %(out_shape)s){ if(!%(out)s || PyArray_DIMS(%(out)s)[0] != %(out_shape)s){
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = (PyArrayObject*)PyArray_EMPTY(1, dims, %(out_dtype)s, 0); %(out)s = (PyArrayObject*)PyArray_EMPTY(1, dims, %(out_dtype)s, 0);
} }
......
...@@ -6736,6 +6736,17 @@ class TestTensorInstanceMethods(unittest.TestCase): ...@@ -6736,6 +6736,17 @@ class TestTensorInstanceMethods(unittest.TestCase):
# Test equivalent advanced indexing # Test equivalent advanced indexing
assert_array_equal(X[:,indices].eval({X: x}), x[:,indices]) assert_array_equal(X[:,indices].eval({X: x}), x[:,indices])
def test_cumsum(self):
X, _ = self.vars
x, _ = self.vals
assert_array_equal(X.cumsum().eval({X: x}), x.cumsum())
def test_cumprod(self):
X, _ = self.vars
x, _ = self.vals
assert_array_equal(X.cumprod().eval({X: x}), x.cumprod())
def test_norm(): def test_norm():
x = theano.tensor.vector('x') x = theano.tensor.vector('x')
n = x.norm(2) n = x.norm(2)
......
...@@ -11,6 +11,7 @@ from theano.tensor.utils import hash_from_ndarray ...@@ -11,6 +11,7 @@ from theano.tensor.utils import hash_from_ndarray
from theano.tensor.type import TensorType from theano.tensor.type import TensorType
class AsTensorError(TypeError): class AsTensorError(TypeError):
"""Raised when as_tensor_variable isn't able to create a """Raised when as_tensor_variable isn't able to create a
TensorVariable. TensorVariable.
...@@ -509,13 +510,11 @@ class _tensor_py_operators: ...@@ -509,13 +510,11 @@ class _tensor_py_operators:
def sort(self, axis=-1, kind='quicksort', order=None): def sort(self, axis=-1, kind='quicksort', order=None):
"""See `theano.tensor.sort`""" """See `theano.tensor.sort`"""
from theano.tensor.sort import sort return theano.tensor.sort(self, axis, kind, order)
return sort(self, axis, kind, order)
def argsort(self, axis=-1, kind='quicksort', order=None): def argsort(self, axis=-1, kind='quicksort', order=None):
"""See `theano.tensor.argsort`""" """See `theano.tensor.argsort`"""
from theano.tensor.sort import argsort return theano.tensor.argsort(self, axis, kind, order)
return argsort(self, axis, kind, order)
def clip(self, a_min, a_max): def clip(self, a_min, a_max):
"Clip (limit) the values in an array." "Clip (limit) the values in an array."
...@@ -529,16 +528,14 @@ class _tensor_py_operators: ...@@ -529,16 +528,14 @@ class _tensor_py_operators:
def repeat(self, repeats, axis=None): def repeat(self, repeats, axis=None):
"""See `theano.tensor.repeat`""" """See `theano.tensor.repeat`"""
from theano.tensor.extra_ops import repeat return theano.tensor.extra_ops.repeat(self, repeats, axis)
return repeat(self, repeats, axis)
def round(self, mode="half_away_from_zero"): def round(self, mode="half_away_from_zero"):
"""See `theano.tensor.round`""" """See `theano.tensor.round`"""
return theano.tensor.basic.round(self, mode) return theano.tensor.basic.round(self, mode)
def trace(self): def trace(self):
from theano.sandbox.linalg import trace return theano.sandbox.linalg.trace(self)
return trace(self)
# TO TRUMP NUMPY OPERATORS # TO TRUMP NUMPY OPERATORS
__array_priority__ = 1000 __array_priority__ = 1000
...@@ -549,6 +546,12 @@ class _tensor_py_operators: ...@@ -549,6 +546,12 @@ class _tensor_py_operators:
def zeros_like(model, dtype=None): def zeros_like(model, dtype=None):
return theano.tensor.basic.zeros_like(model, dtype=dtype) return theano.tensor.basic.zeros_like(model, dtype=dtype)
def cumsum(self, axis=None):
return theano.tensor.extra_ops.cumsum(self, axis)
def cumprod(self, axis=None):
return theano.tensor.extra_ops.cumprod(self, axis)
class TensorVariable(_tensor_py_operators, Variable): class TensorVariable(_tensor_py_operators, Variable):
"""Subclass to add the tensor operators to the basic `Variable` class.""" """Subclass to add the tensor operators to the basic `Variable` class."""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论