提交 85db6f61 authored 作者: lamblin's avatar lamblin

Merge pull request #1442 from nouiz/mixed2

[WIP] Use the new grad interface.
...@@ -124,6 +124,27 @@ Do like in the section "Updating Theano", but use ...@@ -124,6 +124,27 @@ Do like in the section "Updating Theano", but use
.. _install_ubuntu_gpu: .. _install_ubuntu_gpu:
Manual Openblas instruction
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The openblas included in Ubuntu is limited to 2 threads. If you want
to use more cores at the same time, you will need to compile it
yourself. Here is some code that will help you.
.. code-block:: bash
# remove openblas if you installed it
sudo apt-get remove libopenblas-base
# Download the development version of OpenBLAS
git clone git://github.com/xianyi/OpenBLAS
cd OpenBLAS
make FC=gfortran
sudo make PREFIX=/usr/local/ install
cd /usr/local/lib
ln -s libopenblas.so /usr/lib/libblas.so
ln -s libopenblas.so.0 /usr/lib/libblas.so.3gf
Contributed GPU instruction Contributed GPU instruction
~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
...@@ -8,6 +8,7 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>" ...@@ -8,6 +8,7 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import __builtin__ import __builtin__
from itertools import izip
import logging import logging
import warnings import warnings
_logger = logging.getLogger('theano.gradient') _logger = logging.getLogger('theano.gradient')
...@@ -17,7 +18,6 @@ np = numpy ...@@ -17,7 +18,6 @@ np = numpy
import theano import theano
from itertools import izip
from theano import gof from theano import gof
from theano.gof import Variable from theano.gof import Variable
from theano.gof.python25 import OrderedDict from theano.gof.python25 import OrderedDict
...@@ -78,10 +78,9 @@ def grad_not_implemented(op, x_pos, x, comment=""): ...@@ -78,10 +78,9 @@ def grad_not_implemented(op, x_pos, x, comment=""):
gradient is not implemented. gradient is not implemented.
""" """
return (NullType( return (NullType((
( "This variable is Null because the grad method for "
"This variable is Null because the grad method for " "input %s (%s) of the %s op is not implemented. %s"
"input %s (%s) of the %s op is not implemented. %s"
) % (x_pos, x, op, comment)))() ) % (x_pos, x, op, comment)))()
...@@ -341,8 +340,8 @@ def Lop(f, wrt, eval_points, consider_constant=None, ...@@ -341,8 +340,8 @@ def Lop(f, wrt, eval_points, consider_constant=None,
known = dict(izip(f, grads)) known = dict(izip(f, grads))
ret = grad(cost=None, known_grads=known, ret = grad(cost=None, known_grads=known,
consider_constant=consider_constant, wrt=wrt, consider_constant=consider_constant, wrt=wrt,
disconnected_inputs=disconnected_inputs) disconnected_inputs=disconnected_inputs)
return format_as(using_list, using_tuple, ret) return format_as(using_list, using_tuple, ret)
...@@ -352,8 +351,8 @@ def Lop(f, wrt, eval_points, consider_constant=None, ...@@ -352,8 +351,8 @@ def Lop(f, wrt, eval_points, consider_constant=None,
######################### #########################
def grad(cost, wrt, consider_constant=None, def grad(cost, wrt, consider_constant=None,
disconnected_inputs='raise', add_names=True, disconnected_inputs='raise', add_names=True,
known_grads=None, return_disconnected='zero'): known_grads=None, return_disconnected='zero'):
""" """
:type cost: Scalar (0-dimensional) Variable. :type cost: Scalar (0-dimensional) Variable.
May optionally be None if known_grads is provided. May optionally be None if known_grads is provided.
...@@ -406,17 +405,16 @@ def grad(cost, wrt, consider_constant=None, ...@@ -406,17 +405,16 @@ def grad(cost, wrt, consider_constant=None,
if cost is not None and isinstance(cost.type, NullType): if cost is not None and isinstance(cost.type, NullType):
raise ValueError("Can't differentiate a NaN cost." raise ValueError("Can't differentiate a NaN cost."
"cost is NaN because " + \ "cost is NaN because " +
cost.type.why_null) cost.type.why_null)
if cost is not None and cost.ndim != 0: if cost is not None and cost.ndim != 0:
raise TypeError("cost must be a scalar.") raise TypeError("cost must be a scalar.")
if isinstance(wrt, set): if isinstance(wrt, set):
raise TypeError("wrt must not be a set. sets have no defined " raise TypeError("wrt must not be a set. sets have no defined "
"iteration order, so we can't return gradients in a matching" "iteration order, so we can't return gradients in a"
" order.") " matching order.")
using_list = isinstance(wrt, list) using_list = isinstance(wrt, list)
using_tuple = isinstance(wrt, tuple) using_tuple = isinstance(wrt, tuple)
...@@ -426,7 +424,7 @@ def grad(cost, wrt, consider_constant=None, ...@@ -426,7 +424,7 @@ def grad(cost, wrt, consider_constant=None,
for elem in wrt: for elem in wrt:
if not isinstance(elem, Variable): if not isinstance(elem, Variable):
raise TypeError("Expected Variable, got " + str(elem) + raise TypeError("Expected Variable, got " + str(elem) +
" of type "+str(type(elem))) " of type " + str(type(elem)))
outputs = [] outputs = []
if cost is not None: if cost is not None:
...@@ -435,7 +433,7 @@ def grad(cost, wrt, consider_constant=None, ...@@ -435,7 +433,7 @@ def grad(cost, wrt, consider_constant=None,
outputs.extend(known_grads.keys()) outputs.extend(known_grads.keys())
var_to_app_to_idx = _populate_var_to_app_to_idx( var_to_app_to_idx = _populate_var_to_app_to_idx(
outputs, wrt, consider_constant) outputs, wrt, consider_constant)
# build a dict mapping var to the gradient of cost with respect to var # build a dict mapping var to the gradient of cost with respect to var
grad_dict = OrderedDict() grad_dict = OrderedDict()
...@@ -452,7 +450,8 @@ def grad(cost, wrt, consider_constant=None, ...@@ -452,7 +450,8 @@ def grad(cost, wrt, consider_constant=None,
# g_cost may be Disconnected or NullType. A creative use of the function, # g_cost may be Disconnected or NullType. A creative use of the function,
# sure, but nonetheless one we can and should support. So before we try # sure, but nonetheless one we can and should support. So before we try
# to cast it make sure it even has a dtype # to cast it make sure it even has a dtype
if hasattr(g_cost.type, 'dtype') and cost.type.dtype not in tensor.discrete_dtypes: if (hasattr(g_cost.type, 'dtype') and
cost.type.dtype not in tensor.discrete_dtypes):
# Here we enforce the constraint that floating point variables have # Here we enforce the constraint that floating point variables have
# the same dtype as their gradient. # the same dtype as their gradient.
g_cost = g_cost.astype(cost.type.dtype) g_cost = g_cost.astype(cost.type.dtype)
...@@ -471,8 +470,8 @@ def grad(cost, wrt, consider_constant=None, ...@@ -471,8 +470,8 @@ def grad(cost, wrt, consider_constant=None,
'Ambiguous whether %s should be made into tensor' 'Ambiguous whether %s should be made into tensor'
' or sparse theano variable' % str(type(g_var))) ' or sparse theano variable' % str(type(g_var)))
if not isinstance(g_var.type, (NullType, DisconnectedType)) and 'float' \ if (not isinstance(g_var.type, (NullType, DisconnectedType)) and
not in str(g_var.type.dtype): 'float' not in str(g_var.type.dtype)):
raise TypeError("Gradients must always be NullType, " raise TypeError("Gradients must always be NullType, "
"DisconnectedType, or continuous, but grad was " "DisconnectedType, or continuous, but grad was "
"given a known_grad of type "+str(g_var.type)) "given a known_grad of type "+str(g_var.type))
...@@ -728,11 +727,13 @@ def _populate_var_to_app_to_idx(outputs, wrt, consider_constant): ...@@ -728,11 +727,13 @@ def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
return var_to_app_to_idx return var_to_app_to_idx
class NullTypeGradError(TypeError): class NullTypeGradError(TypeError):
""" """
Raised when grad encounters a NullType. Raised when grad encounters a NullType.
""" """
class DisconnectedInputError(ValueError): class DisconnectedInputError(ValueError):
""" """
Raised when grad is asked to compute the gradient Raised when grad is asked to compute the gradient
...@@ -740,8 +741,9 @@ class DisconnectedInputError(ValueError): ...@@ -740,8 +741,9 @@ class DisconnectedInputError(ValueError):
disconnected_inputs='raise'. disconnected_inputs='raise'.
""" """
def _populate_grad_dict(var_to_app_to_idx, def _populate_grad_dict(var_to_app_to_idx,
grad_dict, wrt, cost_name=None): grad_dict, wrt, cost_name=None):
""" """
Helper function for grad function. Helper function for grad function.
...@@ -783,7 +785,7 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -783,7 +785,7 @@ def _populate_grad_dict(var_to_app_to_idx,
# list of bools indicating if each output is connected to the cost # list of bools indicating if each output is connected to the cost
outputs_connected = [not isinstance(g.type, DisconnectedType) outputs_connected = [not isinstance(g.type, DisconnectedType)
for g in output_grads] for g in output_grads]
connection_pattern = _node_to_pattern(node) connection_pattern = _node_to_pattern(node)
...@@ -840,7 +842,7 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -840,7 +842,7 @@ def _populate_grad_dict(var_to_app_to_idx,
# each destroyed input. # each destroyed input.
try: try:
dinputs = [node.inputs[x[0]] for x in dinputs = [node.inputs[x[0]] for x in
node.op.destroy_map.values()] node.op.destroy_map.values()]
except AttributeError: except AttributeError:
dinputs = [] dinputs = []
...@@ -899,11 +901,11 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -899,11 +901,11 @@ def _populate_grad_dict(var_to_app_to_idx,
if input_grads is None: if input_grads is None:
raise TypeError("%s.grad returned NoneType, " raise TypeError("%s.grad returned NoneType, "
"expected iterable." % str(node.op)) "expected iterable." % str(node.op))
if len(input_grads) != len(inputs): if len(input_grads) != len(inputs):
raise ValueError(("%s returned the wrong number of" +\ raise ValueError(("%s returned the wrong number of" +
" gradient terms.") % str(node.op)) " gradient terms.") % str(node.op))
# must convert to list in case the op returns a tuple # must convert to list in case the op returns a tuple
# we won't be able to post-process out the Nones if it does that # we won't be able to post-process out the Nones if it does that
...@@ -926,7 +928,7 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -926,7 +928,7 @@ def _populate_grad_dict(var_to_app_to_idx,
# used to mean undefined, zero, or disconnected. # used to mean undefined, zero, or disconnected.
# We therefore don't allow it because its usage has become # We therefore don't allow it because its usage has become
# so muddied. # so muddied.
raise TypeError(('%s.grad returned None for' +\ raise TypeError(('%s.grad returned None for' +
' a gradient term, ' ' a gradient term, '
'this is prohibited. Instead of None,' 'this is prohibited. Instead of None,'
'return zeros_like(input), DisconnectedType()(),' 'return zeros_like(input), DisconnectedType()(),'
...@@ -964,7 +966,7 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -964,7 +966,7 @@ def _populate_grad_dict(var_to_app_to_idx,
msg += "verifiably zeros." msg += "verifiably zeros."
msg = msg % (str(node.op), str(term), msg = msg % (str(node.op), str(term),
str(type(term)), i) str(type(term)), i)
if is_zero == 'no': if is_zero == 'no':
msg = "%s.grad returned %s of type %s for input" msg = "%s.grad returned %s of type %s for input"
...@@ -980,8 +982,8 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -980,8 +982,8 @@ def _populate_grad_dict(var_to_app_to_idx,
#Check that op.connection_pattern matches the connectivity #Check that op.connection_pattern matches the connectivity
#logic driving the op.grad method #logic driving the op.grad method
for i, packed in \ for i, packed in enumerate(zip(inputs, input_grads,
enumerate(zip(inputs, input_grads, inputs_connected)): inputs_connected)):
ipt, ig, connected = packed ipt, ig, connected = packed
actually_connected = \ actually_connected = \
not isinstance(ig.type, DisconnectedType) not isinstance(ig.type, DisconnectedType)
...@@ -1027,11 +1029,11 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -1027,11 +1029,11 @@ def _populate_grad_dict(var_to_app_to_idx,
if not isinstance(term, gof.Variable): if not isinstance(term, gof.Variable):
raise TypeError("%s.grad returned %s, expected" raise TypeError("%s.grad returned %s, expected"
" Variable instance." % (str(node.op), " Variable instance." % (str(node.op),
type(term))) type(term)))
if isinstance(term.type, NullType): if isinstance(term.type, NullType):
raise NullTypeGradError("tensor.grad " raise NullTypeGradError("tensor.grad "
"encountered a NaN. " +\ "encountered a NaN. " +
term.type.why_null) term.type.why_null)
#Don't try to sum up DisconnectedType placeholders #Don't try to sum up DisconnectedType placeholders
...@@ -1121,9 +1123,9 @@ class numeric_grad(object): ...@@ -1121,9 +1123,9 @@ class numeric_grad(object):
# For now, we use a heuristic that catches very bad gradients, but is not # For now, we use a heuristic that catches very bad gradients, but is not
# perfectly accurate. # perfectly accurate.
type_eps = {'float64': 1e-7, type_eps = {'float64': 1e-7,
'float32': 3e-4, 'float32': 3e-4,
numpy.dtype('float64'): 1e-7, numpy.dtype('float64'): 1e-7,
numpy.dtype('float32'): 3e-4} numpy.dtype('float32'): 3e-4}
def __init__(self, f, pt, eps=None, out_type=None): def __init__(self, f, pt, eps=None, out_type=None):
"""Return the gradient of f at pt. """Return the gradient of f at pt.
...@@ -1243,15 +1245,13 @@ class numeric_grad(object): ...@@ -1243,15 +1245,13 @@ class numeric_grad(object):
""" """
if len(g_pt) != len(self.gf): if len(g_pt) != len(self.gf):
raise ValueError( raise ValueError('argument has wrong number of elements',
'argument has wrong number of elements', len(g_pt))
len(g_pt))
errs = [] errs = []
for i, (a, b) in enumerate(zip(g_pt, self.gf)): for i, (a, b) in enumerate(zip(g_pt, self.gf)):
if a.shape != b.shape: if a.shape != b.shape:
raise ValueError( raise ValueError('argument element %i has wrong shape %s' % (
'argument element %i has wrong shape %s' % ( i, str((a.shape, b.shape))))
i, str((a.shape, b.shape))))
errs.append(numeric_grad.abs_rel_err(a, b)) errs.append(numeric_grad.abs_rel_err(a, b))
return errs return errs
...@@ -1287,7 +1287,7 @@ class numeric_grad(object): ...@@ -1287,7 +1287,7 @@ class numeric_grad(object):
# max over the arrays in g_pt # max over the arrays in g_pt
max_arg = numpy.argmax(errs) max_arg = numpy.argmax(errs)
max_pos = pos[max_arg] max_pos = pos[max_arg]
return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg]) return (max_arg, max_pos, abs_errs[max_arg], rel_errs[max_arg])
def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
...@@ -1336,9 +1336,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1336,9 +1336,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
covers that case as well by using random projections. covers that case as well by using random projections.
""" """
# The import is here to prevent circular import.
from theano import compile, shared from theano import compile, shared
import theano.tensor import theano.tensor
from theano.tensor import as_tensor_variable, cast, TensorType from theano.tensor import as_tensor_variable, TensorType
assert isinstance(pt, (list, tuple)) assert isinstance(pt, (list, tuple))
pt = [numpy.array(p) for p in pt] pt = [numpy.array(p) for p in pt]
...@@ -1368,11 +1369,12 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1368,11 +1369,12 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
def function(inputs, output): def function(inputs, output):
if mode is None: if mode is None:
f = compile.function(inputs, output, accept_inplace=True, f = compile.function(inputs, output, accept_inplace=True,
allow_input_downcast=True, on_unused_input='ignore') allow_input_downcast=True,
on_unused_input='ignore')
else: else:
f = compile.function(inputs, output, accept_inplace=True, f = compile.function(inputs, output, accept_inplace=True,
allow_input_downcast=True, mode=mode, allow_input_downcast=True, mode=mode,
on_unused_input='ignore') on_unused_input='ignore')
return f return f
tensor_pt = [TensorType( tensor_pt = [TensorType(
...@@ -1421,24 +1423,32 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1421,24 +1423,32 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
grad_fn = function(tensor_pt, symbolic_grad) grad_fn = function(tensor_pt, symbolic_grad)
for test_num in xrange(n_tests): for test_num in xrange(n_tests):
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps, out_type) try:
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt],
eps, out_type)
analytic_grad = grad_fn(*[p.copy() for p in pt]) analytic_grad = grad_fn(*[p.copy() for p in pt])
# Since `tensor_pt` is a list, `analytic_grad` should be one too. # Since `tensor_pt` is a list, `analytic_grad` should be one too.
assert isinstance(analytic_grad, list) assert isinstance(analytic_grad, list)
max_arg, max_err_pos, max_abs_err, max_rel_err =\ max_arg, max_err_pos, max_abs_err, max_rel_err = num_grad.max_err(
num_grad.max_err(analytic_grad, abs_tol, rel_tol) analytic_grad, abs_tol, rel_tol)
if max_abs_err > abs_tol and max_rel_err > rel_tol: if max_abs_err > abs_tol and max_rel_err > rel_tol:
raise verify_grad.E_grad(max_arg, max_err_pos, raise verify_grad.E_grad(max_arg, max_err_pos,
max_abs_err, max_rel_err, abs_tol, rel_tol) max_abs_err, max_rel_err,
abs_tol, rel_tol)
# get new random projection for next test # get new random projection for next test
if test_num < n_tests - 1: if test_num < n_tests - 1:
t_r.set_value(random_projection(), borrow=True) t_r.set_value(random_projection(), borrow=True)
except Exception, e:
e.args += ("\nThe error happened with the following inputs:", pt,
"\nThe value of eps is:", eps,
"\nThe out_type is:", out_type)
raise
class GradientError(Exception): class GradientError(Exception):
...@@ -1517,9 +1527,9 @@ def jacobian(expression, wrt, consider_constant=None, ...@@ -1517,9 +1527,9 @@ def jacobian(expression, wrt, consider_constant=None,
rvals = [] rvals = []
for inp in args[2:]: for inp in args[2:]:
rval = grad(expr[idx], rval = grad(expr[idx],
inp, inp,
consider_constant=consider_constant, consider_constant=consider_constant,
disconnected_inputs=disconnected_inputs) disconnected_inputs=disconnected_inputs)
rvals.append(rval) rvals.append(rval)
return rvals return rvals
# Computing the gradients does not affect the random seeds on any random # Computing the gradients does not affect the random seeds on any random
...@@ -1527,8 +1537,8 @@ def jacobian(expression, wrt, consider_constant=None, ...@@ -1527,8 +1537,8 @@ def jacobian(expression, wrt, consider_constant=None,
# just backtracking over old values. (rp Jan 2012 - if anyone has a # just backtracking over old values. (rp Jan 2012 - if anyone has a
# counter example please show me) # counter example please show me)
jacobs, updates = theano.scan(inner_function, jacobs, updates = theano.scan(inner_function,
sequences=arange(expression.shape[0]), sequences=arange(expression.shape[0]),
non_sequences=[expression] + wrt) non_sequences=[expression] + wrt)
assert not updates, \ assert not updates, \
("Scan has returned a list of updates. This should not " ("Scan has returned a list of updates. This should not "
"happen! Report this to theano-users (also include the " "happen! Report this to theano-users (also include the "
...@@ -1537,7 +1547,7 @@ def jacobian(expression, wrt, consider_constant=None, ...@@ -1537,7 +1547,7 @@ def jacobian(expression, wrt, consider_constant=None,
def hessian(cost, wrt, consider_constant=None, def hessian(cost, wrt, consider_constant=None,
disconnected_inputs='raise'): disconnected_inputs='raise'):
""" """
:type cost: Scalar (0-dimensional) Variable. :type cost: Scalar (0-dimensional) Variable.
:type wrt: Vector (1-dimensional tensor) 'Variable' or list of :type wrt: Vector (1-dimensional tensor) 'Variable' or list of
......
...@@ -41,7 +41,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -41,7 +41,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
float * sm_data, int sms0, int sms1, float * sm_data, int sms0, int sms1,
float * am_data, int ams0) float * am_data, int ams0)
{ {
const int row = blockIdx.x; for (int row = blockIdx.x; row < M; row += gridDim.x){
const float * x = x_data + xs0 * row; const float * x = x_data + xs0 * row;
const int y_idx = (int)y_idx_data[row * y_idxs0]; const int y_idx = (int)y_idx_data[row * y_idxs0];
...@@ -83,6 +83,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -83,6 +83,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
+ log(sum); + log(sum);
} }
am_data[row*ams0] = row_max_j; am_data[row*ams0] = row_max_j;
}
} }
""" """
...@@ -168,7 +169,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -168,7 +169,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
} }
} }
{ {
int n_blocks = CudaNdarray_HOST_DIMS(%(sm)s)[0]; int n_blocks = std::min(CudaNdarray_HOST_DIMS(%(x)s)[0],
NUM_VECTOR_OP_BLOCKS);
//TODO: launch more threads per row and do parallel sum and max reductions //TODO: launch more threads per row and do parallel sum and max reductions
int n_threads = 1; int n_threads = 1;
int n_shared_bytes = 0; //n_threads * sizeof(float); int n_shared_bytes = 0; //n_threads * sizeof(float);
...@@ -195,8 +197,11 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -195,8 +197,11 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
if (cudaSuccess != err) if (cudaSuccess != err)
{ {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Cuda error: %(classname)s %(nodename)s: %%s.\\n", "Cuda error: %(classname)s %(nodename)s: %%s.\\n"
cudaGetErrorString(err)); "The kernel was launched with %%d threads,"
" %%d blocks and %%d shared memory\\n",
cudaGetErrorString(err),
n_threads, n_blocks, n_shared_bytes);
// no need to decref output vars the cleanup code will do it // no need to decref output vars the cleanup code will do it
%(fail)s; %(fail)s;
} }
...@@ -206,7 +211,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -206,7 +211,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
#return () #return ()
return (3,) return (4,)
gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias() gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias()
...@@ -235,7 +240,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp): ...@@ -235,7 +240,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
#return () #return ()
return (5,) return (6,)
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
...@@ -283,11 +288,12 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp): ...@@ -283,11 +288,12 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
} }
} }
{ {
int n_blocks = std::min(CudaNdarray_HOST_DIMS(%(dx)s)[0],
NUM_VECTOR_OP_BLOCKS);
int n_threads = std::min(CudaNdarray_HOST_DIMS(%(dx)s)[1],256);
kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s
<<< <<<n_blocks, n_threads>>>(
CudaNdarray_HOST_DIMS(%(dx)s)[0],
std::min(CudaNdarray_HOST_DIMS(%(dx)s)[1],256)
>>>(
CudaNdarray_HOST_DIMS(%(dx)s)[0], CudaNdarray_HOST_DIMS(%(dx)s)[0],
CudaNdarray_HOST_DIMS(%(dx)s)[1], CudaNdarray_HOST_DIMS(%(dx)s)[1],
...@@ -310,9 +316,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp): ...@@ -310,9 +316,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
if( cudaSuccess != err) if( cudaSuccess != err)
{ {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Cuda error: %%s: %%s.\\n", "Cuda error: %%s: %%s.\\n"
"The kernel was launched with %%d threads and"
" %%d blocks\\n",
"kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s", "kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s",
cudaGetErrorString(err)); cudaGetErrorString(err), n_threads, n_blocks);
%(fail)s; %(fail)s;
} }
} }
......
...@@ -25,7 +25,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): ...@@ -25,7 +25,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
We check that we loop when their is too much threads We check that we loop when their is too much threads
TODO: check that we loop when their is too much block(>32*1024)
""" """
...@@ -100,13 +99,16 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx(): ...@@ -100,13 +99,16 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
This is basic test for GpuCrossentropySoftmax1HotWithBiasDx This is basic test for GpuCrossentropySoftmax1HotWithBiasDx
We check that we loop when their is too much threads We check that we loop when their is too much threads
TODO: check that we loop when their is too much block(>32*1024)
""" """
n_in = 1000 n_in = 1000
batch_size = 4097 batch_size = 4097
n_out = 1250 n_out = 1250
if not isinstance(mode_with_gpu, theano.compile.DebugMode):
n_in = 4098
n_out = 4099
# Seed numpy.random with config.unittests.rseed # Seed numpy.random with config.unittests.rseed
utt.seed_rng() utt.seed_rng()
......
...@@ -715,10 +715,9 @@ class ExtractDiag(Op): ...@@ -715,10 +715,9 @@ class ExtractDiag(Op):
implemented our own. """ implemented our own. """
x, = ins x, = ins
z, = outs z, = outs
# zero-dimensional matrices ... # zero-dimensional matrices ...
if x.shape[0] == 0 or x.shape[1] == 0: if x.shape[0] == 0 or x.shape[1] == 0:
z[0] = numpy.zeros(0, dtype=x.dtype) z[0] = node.outputs[0].type.value_zeros((0,))
return return
if x.shape[0] < x.shape[1]: if x.shape[0] < x.shape[1]:
......
...@@ -204,8 +204,8 @@ def test_rop_lop(): ...@@ -204,8 +204,8 @@ def test_rop_lop():
rop_f = function([mx, mv], yv) rop_f = function([mx, mv], yv)
sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(), sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
sequences=tensor.arange(y.shape[0]), sequences=tensor.arange(y.shape[0]),
non_sequences=[y, mx, mv]) non_sequences=[y, mx, mv])
scan_f = function([mx, mv], sy) scan_f = function([mx, mv], sy)
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
...@@ -561,6 +561,7 @@ class test_Eigh(test_Eig): ...@@ -561,6 +561,7 @@ class test_Eigh(test_Eig):
class test_Eigh_float32(test_Eigh): class test_Eigh_float32(test_Eigh):
dtype = 'float32' dtype = 'float32'
def test_matrix_inverse_solve(): def test_matrix_inverse_solve():
if not imported_scipy: if not imported_scipy:
raise SkipTest("Scipy needed for the Solve op.") raise SkipTest("Scipy needed for the Solve op.")
......
...@@ -17,7 +17,7 @@ from theano.sparse import enable_sparse ...@@ -17,7 +17,7 @@ from theano.sparse import enable_sparse
from theano.gof.python25 import all, any, product from theano.gof.python25 import all, any, product
if enable_sparse == False: if not enable_sparse:
raise SkipTest('Optional package sparse disabled') raise SkipTest('Optional package sparse disabled')
from theano.sparse.basic import _is_dense, _is_sparse, _mtypes from theano.sparse.basic import _is_dense, _is_sparse, _mtypes
...@@ -35,7 +35,7 @@ from theano.sparse import ( ...@@ -35,7 +35,7 @@ from theano.sparse import (
SparseFromDense, SparseFromDense,
Cast, cast, HStack, VStack, AddSSData, add_s_s_data, Cast, cast, HStack, VStack, AddSSData, add_s_s_data,
structured_minimum, structured_maximum, structured_add, structured_minimum, structured_maximum, structured_add,
mul_s_v, structured_add_s_v, mul_s_v, structured_add_s_v,
SamplingDot, sampling_dot, SamplingDot, sampling_dot,
Diag, diag, SquareDiagonal, square_diagonal, Diag, diag, SquareDiagonal, square_diagonal,
EnsureSortedIndices, ensure_sorted_indices, clean, EnsureSortedIndices, ensure_sorted_indices, clean,
...@@ -372,17 +372,17 @@ class SparseInferShapeTester(utt.InferShapeTester): ...@@ -372,17 +372,17 @@ class SparseInferShapeTester(utt.InferShapeTester):
[x, y], [x, y],
[grads[0]], [grads[0]],
[as_sparse_format(random_lil((4, 5), [as_sparse_format(random_lil((4, 5),
config.floatX, 3), format), config.floatX, 3), format),
as_sparse_format(random_lil((5, 3), as_sparse_format(random_lil((5, 3),
config.floatX, 3), format)], config.floatX, 3), format)],
op) op)
self._compile_and_check( self._compile_and_check(
[x, y], [x, y],
[grads[1]], [grads[1]],
[as_sparse_format(random_lil((4, 5), [as_sparse_format(random_lil((4, 5),
config.floatX, 3), format), config.floatX, 3), format),
as_sparse_format(random_lil((5, 3), as_sparse_format(random_lil((5, 3),
config.floatX, 3), format)], config.floatX, 3), format)],
op) op)
def test_dense_from_sparse(self): def test_dense_from_sparse(self):
...@@ -398,8 +398,7 @@ class SparseInferShapeTester(utt.InferShapeTester): ...@@ -398,8 +398,7 @@ class SparseInferShapeTester(utt.InferShapeTester):
self._compile_and_check([x], self._compile_and_check([x],
[csc_from_dense(x)], [csc_from_dense(x)],
[numpy.random.randn(10, 40).astype( [numpy.random.randn(10, 40).astype(
config.floatX)], config.floatX)],
csc_from_dense.__class__) csc_from_dense.__class__)
def test_sparse_from_list(self): def test_sparse_from_list(self):
...@@ -674,27 +673,27 @@ class test_csm_properties(unittest.TestCase): ...@@ -674,27 +673,27 @@ class test_csm_properties(unittest.TestCase):
def test_csm_properties_grad(self): def test_csm_properties_grad(self):
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix} 'csr': sp.csr_matrix}
for format in ['csc', 'csr']: for format in ['csc', 'csr']:
for dtype in ['float32', 'float64']: for dtype in ['float32', 'float64']:
spmat = sp_types[format](random_lil((4, 3), dtype, 3)) spmat = sp_types[format](random_lil((4, 3), dtype, 3))
verify_grad_sparse(lambda *x: CSMProperties()(*x)[0], [spmat], verify_grad_sparse(lambda *x: CSMProperties()(*x)[0], [spmat],
structured=True) structured=True)
verify_grad_sparse(lambda *x: CSMProperties()(*x)[1], [spmat], verify_grad_sparse(lambda *x: CSMProperties()(*x)[1], [spmat],
structured=True) structured=True)
verify_grad_sparse(lambda *x: CSMProperties()(*x)[2], [spmat], verify_grad_sparse(lambda *x: CSMProperties()(*x)[2], [spmat],
structured=True) structured=True)
verify_grad_sparse(lambda *x: CSMProperties()(*x)[2], [spmat], verify_grad_sparse(lambda *x: CSMProperties()(*x)[2], [spmat],
structured=True) structured=True)
def test_csm_properties(self): def test_csm_properties(self):
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix} 'csr': sp.csr_matrix}
for format in ['csc', 'csr']: for format in ['csc', 'csr']:
for dtype in ['float32', 'float64']: for dtype in ['float32', 'float64']:
...@@ -717,7 +716,7 @@ class test_csm(unittest.TestCase): ...@@ -717,7 +716,7 @@ class test_csm(unittest.TestCase):
def test_csm_grad(self): def test_csm_grad(self):
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix} 'csr': sp.csr_matrix}
for format in ['csc', 'csr']: for format in ['csc', 'csr']:
for dtype in ['float32', 'float64']: for dtype in ['float32', 'float64']:
...@@ -732,7 +731,7 @@ class test_csm(unittest.TestCase): ...@@ -732,7 +731,7 @@ class test_csm(unittest.TestCase):
Test support for gradients sparser than the input. Test support for gradients sparser than the input.
""" """
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix} 'csr': sp.csr_matrix}
for format in ['csc', 'csr']: for format in ['csc', 'csr']:
for dtype in ['float32', 'float64']: for dtype in ['float32', 'float64']:
...@@ -742,7 +741,7 @@ class test_csm(unittest.TestCase): ...@@ -742,7 +741,7 @@ class test_csm(unittest.TestCase):
s = tensor.ivector() s = tensor.ivector()
a = as_sparse_variable(sp_types[format](random_lil((4, 3), a = as_sparse_variable(sp_types[format](random_lil((4, 3),
dtype, 1))) dtype, 1)))
f = theano.function([x, y, z, s], f = theano.function([x, y, z, s],
tensor.grad(dense_from_sparse( tensor.grad(dense_from_sparse(
...@@ -751,7 +750,7 @@ class test_csm(unittest.TestCase): ...@@ -751,7 +750,7 @@ class test_csm(unittest.TestCase):
spmat = sp_types[format](random_lil((4, 3), dtype, 3)) spmat = sp_types[format](random_lil((4, 3), dtype, 3))
res = f(spmat.data, spmat.indices, spmat.indptr, res = f(spmat.data, spmat.indices, spmat.indptr,
numpy.asarray(spmat.shape, 'int32')) numpy.asarray(spmat.shape, 'int32'))
assert len(spmat.data) == len(res) assert len(spmat.data) == len(res)
...@@ -760,7 +759,7 @@ class test_csm(unittest.TestCase): ...@@ -760,7 +759,7 @@ class test_csm(unittest.TestCase):
Test support for gradients of unsorted inputs. Test support for gradients of unsorted inputs.
""" """
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix} 'csr': sp.csr_matrix}
for format in ['csr', 'csc', ]: for format in ['csr', 'csc', ]:
for dtype in ['float32', 'float64']: for dtype in ['float32', 'float64']:
...@@ -773,7 +772,7 @@ class test_csm(unittest.TestCase): ...@@ -773,7 +772,7 @@ class test_csm(unittest.TestCase):
[1, 2, 1], [1, 2, 1],
[1, 2, 1], [1, 2, 1],
[1, 2, 1]], [1, 2, 1]],
dtype=dtype)[range(4)] dtype=dtype)[range(4)]
# Make sure it's unsorted # Make sure it's unsorted
assert not a.has_sorted_indices assert not a.has_sorted_indices
a = as_sparse_variable(a) a = as_sparse_variable(a)
...@@ -782,14 +781,15 @@ class test_csm(unittest.TestCase): ...@@ -782,14 +781,15 @@ class test_csm(unittest.TestCase):
dense_from_sparse(a * CSM(format)(x, y, z, s))), x)) dense_from_sparse(a * CSM(format)(x, y, z, s))), x))
spmat = sp_types[format](random_lil((4, 3), dtype, spmat = sp_types[format](random_lil((4, 3), dtype,
12))[range(4)] 12))[range(4)]
assert not spmat.has_sorted_indices assert not spmat.has_sorted_indices
res = f(spmat.data, spmat.indices, spmat.indptr, res = f(spmat.data, spmat.indices, spmat.indptr,
numpy.asarray(spmat.shape, 'int32')) numpy.asarray(spmat.shape, 'int32'))
col1 = sp_types[format]((res, spmat.indices, spmat.indptr), col1 = sp_types[format]((res, spmat.indices, spmat.indptr),
shape=numpy.asarray(spmat.shape, 'int32'))[:, 1].data shape=numpy.asarray(spmat.shape,
'int32'))[:, 1].data
assert numpy.all(col1 == 2) assert numpy.all(col1 == 2)
...@@ -808,7 +808,7 @@ class test_csm(unittest.TestCase): ...@@ -808,7 +808,7 @@ class test_csm(unittest.TestCase):
spmat = sp_types[format](random_lil((4, 3), dtype, 3)) spmat = sp_types[format](random_lil((4, 3), dtype, 3))
res = f(spmat.data, spmat.indices, spmat.indptr, res = f(spmat.data, spmat.indices, spmat.indptr,
numpy.asarray(spmat.shape, 'int32')) numpy.asarray(spmat.shape, 'int32'))
assert numpy.all(res.data == spmat.data) assert numpy.all(res.data == spmat.data)
assert numpy.all(res.indices == spmat.indices) assert numpy.all(res.indices == spmat.indices)
...@@ -909,8 +909,7 @@ class test_structureddot(unittest.TestCase): ...@@ -909,8 +909,7 @@ class test_structureddot(unittest.TestCase):
spmat = sp.csc_matrix(spmat) spmat = sp.csc_matrix(spmat)
images = tensor.Tensor(dtype='float32', images = tensor.Tensor(dtype='float32',
broadcastable=[False, False])( broadcastable=[False, False])('images')
'images')
cscmat = CSC(kerns, spmat.indices[:spmat.size], cscmat = CSC(kerns, spmat.indices[:spmat.size],
spmat.indptr, spmat.shape) spmat.indptr, spmat.shape)
...@@ -931,7 +930,8 @@ class test_structureddot(unittest.TestCase): ...@@ -931,7 +930,8 @@ class test_structureddot(unittest.TestCase):
#print 'type of kernvals = ', kernvals.dtype #print 'type of kernvals = ', kernvals.dtype
bsize = 3 bsize = 3
imvals = 1.0 * numpy.array(numpy.arange(bsize * spmat.shape[1]).\ imvals = 1.0 * numpy.array(numpy.arange(bsize * spmat.shape[1]).\
reshape(bsize, spmat.shape[1]), dtype='float32') reshape(bsize, spmat.shape[1]),
dtype='float32')
outvals = f(kernvals, imvals) outvals = f(kernvals, imvals)
#print outvals #print outvals
...@@ -949,10 +949,10 @@ class test_structureddot(unittest.TestCase): ...@@ -949,10 +949,10 @@ class test_structureddot(unittest.TestCase):
f = theano.function([a, b], theano.Out(d, borrow=True)) f = theano.function([a, b], theano.Out(d, borrow=True))
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
for M, N, K, nnz in [(4, 3, 2, 3), for M, N, K, nnz in [(4, 3, 2, 3),
(40, 30, 20, 3), (40, 30, 20, 3),
(40, 30, 20, 30), (40, 30, 20, 30),
(400, 3000, 200, 6000), (400, 3000, 200, 6000),
]: ]:
a_val = sp_mat[sparse_format_a]( a_val = sp_mat[sparse_format_a](
random_lil((M, N), sparse_dtype, nnz)) random_lil((M, N), sparse_dtype, nnz))
b_val = sp_mat[sparse_format_b]( b_val = sp_mat[sparse_format_b](
...@@ -969,10 +969,10 @@ class test_structureddot(unittest.TestCase): ...@@ -969,10 +969,10 @@ class test_structureddot(unittest.TestCase):
f = theano.function([a, b], theano.Out(d, borrow=True)) f = theano.function([a, b], theano.Out(d, borrow=True))
for M, N, K, nnz in [(4, 3, 2, 3), for M, N, K, nnz in [(4, 3, 2, 3),
(40, 30, 20, 3), (40, 30, 20, 3),
(40, 30, 20, 30), (40, 30, 20, 30),
(400, 3000, 200, 6000), (400, 3000, 200, 6000),
]: ]:
spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz)) spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz))
mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype) mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype)
theano_times = [] theano_times = []
...@@ -1017,10 +1017,10 @@ class test_structureddot(unittest.TestCase): ...@@ -1017,10 +1017,10 @@ class test_structureddot(unittest.TestCase):
f = theano.function([a, b], d) f = theano.function([a, b], d)
for M, N, K, nnz in [(4, 3, 2, 3), for M, N, K, nnz in [(4, 3, 2, 3),
(40, 30, 20, 3), (40, 30, 20, 3),
(40, 30, 20, 30), (40, 30, 20, 30),
(400, 3000, 200, 6000), (400, 3000, 200, 6000),
]: ]:
spmat = sp.csr_matrix(random_lil((M, N), sparse_dtype, nnz)) spmat = sp.csr_matrix(random_lil((M, N), sparse_dtype, nnz))
mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype) mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype)
t0 = time.time() t0 = time.time()
...@@ -1062,7 +1062,7 @@ class DotTests(utt.InferShapeTester): ...@@ -1062,7 +1062,7 @@ class DotTests(utt.InferShapeTester):
self.v_10 = numpy.asarray(numpy.random.uniform(-1, 1, 10), self.v_10 = numpy.asarray(numpy.random.uniform(-1, 1, 10),
dtype=theano.config.floatX) dtype=theano.config.floatX)
self.v_100 = numpy.asarray(numpy.random.uniform(-1, 1, 100), self.v_100 = numpy.asarray(numpy.random.uniform(-1, 1, 100),
dtype=theano.config.floatX) dtype=theano.config.floatX)
def test_csr_dense(self): def test_csr_dense(self):
x = theano.sparse.csr_matrix('x') x = theano.sparse.csr_matrix('x')
...@@ -1143,7 +1143,7 @@ class DotTests(utt.InferShapeTester): ...@@ -1143,7 +1143,7 @@ class DotTests(utt.InferShapeTester):
a = sparse.csr_matrix('a', dtype='float32') a = sparse.csr_matrix('a', dtype='float32')
b = cuda.float32_shared_constructor( b = cuda.float32_shared_constructor(
numpy.random.rand(3, 4).astype('float32')) numpy.random.rand(3, 4).astype('float32'))
d = sparse.dot(a, b) d = sparse.dot(a, b)
f = theano.function([a], d) f = theano.function([a], d)
...@@ -1281,7 +1281,7 @@ class UsmmTests(unittest.TestCase): ...@@ -1281,7 +1281,7 @@ class UsmmTests(unittest.TestCase):
for node in topo]) == len(topo) - 5) for node in topo]) == len(topo) - 5)
new_topo = [] new_topo = []
for node in topo: for node in topo:
if not (isinstance(node.op, tensor.Elemwise) and \ if not (isinstance(node.op, tensor.Elemwise) and
isinstance(node.op.scalar_op, isinstance(node.op.scalar_op,
theano.scalar.basic.Cast)): theano.scalar.basic.Cast)):
new_topo.append(node) new_topo.append(node)
...@@ -1360,8 +1360,8 @@ class test_zeros_like(unittest.TestCase): ...@@ -1360,8 +1360,8 @@ class test_zeros_like(unittest.TestCase):
x = theano.sparse.csr_matrix() x = theano.sparse.csr_matrix()
f = theano.function([x], theano.sparse.sp_zeros_like(x)) f = theano.function([x], theano.sparse.sp_zeros_like(x))
vx = scipy.sparse.csr_matrix(numpy.asarray( vx = scipy.sparse.csr_matrix(numpy.asarray(
numpy.random.binomial(1, 0.5, (100, 100)), numpy.random.binomial(1, 0.5, (100, 100)),
dtype=theano.config.floatX)) dtype=theano.config.floatX))
fx = f(vx) fx = f(vx)
...@@ -1571,7 +1571,7 @@ class SpSumTester(utt.InferShapeTester): ...@@ -1571,7 +1571,7 @@ class SpSumTester(utt.InferShapeTester):
shape=(10, 10)) shape=(10, 10))
z = theano.sparse.sp_sum(variable[0], axis=axis) z = theano.sparse.sp_sum(variable[0], axis=axis)
if axis == None: if axis is None:
assert z.type.broadcastable == () assert z.type.broadcastable == ()
else: else:
assert z.type.broadcastable == (False, ) assert z.type.broadcastable == (False, )
...@@ -1951,24 +1951,26 @@ class Test_getitem(unittest.TestCase): ...@@ -1951,24 +1951,26 @@ class Test_getitem(unittest.TestCase):
# the [] shortcut for getitem. # the [] shortcut for getitem.
# x[a:b] is not accepted because we don't have sparse vectors # x[a:b] is not accepted because we don't have sparse vectors
self.assertRaises(NotImplementedError, self.assertRaises(NotImplementedError,
x.__getitem__, (slice(a, b), c)) x.__getitem__, (slice(a, b), c))
# x[a:b:step, c:d] is not accepted because scipy silently drops # x[a:b:step, c:d] is not accepted because scipy silently drops
# the step (!) # the step (!)
self.assertRaises(ValueError, self.assertRaises(ValueError,
x.__getitem__, (slice(a, b, -1), slice(c, d))) x.__getitem__, (slice(a, b, -1), slice(c, d)))
self.assertRaises(ValueError, self.assertRaises(ValueError,
x.__getitem__, (slice(a, b), slice(c, d, 2))) x.__getitem__, (slice(a, b), slice(c, d, 2)))
# Advanced indexing is not supported # Advanced indexing is not supported
self.assertRaises(ValueError, self.assertRaises(ValueError,
x.__getitem__, (tensor.ivector('l'), slice(a, b))) x.__getitem__,
(tensor.ivector('l'), slice(a, b)))
# Indexing with random things is not supported either # Indexing with random things is not supported either
self.assertRaises(ValueError, self.assertRaises(ValueError,
x.__getitem__, slice(tensor.fscalar('f'), None)) x.__getitem__, slice(tensor.fscalar('f'), None))
self.assertRaises(ValueError, self.assertRaises(ValueError,
x.__getitem__, (slice(None), slice([1, 3, 4], None))) x.__getitem__,
(slice(None), slice([1, 3, 4], None)))
def test_GetItemScalar(self): def test_GetItemScalar(self):
sparse_formats = ('csc', 'csr') sparse_formats = ('csc', 'csr')
...@@ -1981,7 +1983,7 @@ class Test_getitem(unittest.TestCase): ...@@ -1981,7 +1983,7 @@ class Test_getitem(unittest.TestCase):
n = 42 n = 42
vx = as_sparse_format(self.rng.binomial(1, 0.5, (97, 100)), vx = as_sparse_format(self.rng.binomial(1, 0.5, (97, 100)),
format).astype(theano.config.floatX) format).astype(theano.config.floatX)
f1 = theano.function([x, a, b], x[a, b]) f1 = theano.function([x, a, b], x[a, b])
r1 = f1(vx, 10, 10) r1 = f1(vx, 10, 10)
...@@ -2248,6 +2250,7 @@ def elemwise_checker(op, expected_f, gap=None, test_dtypes=None, ...@@ -2248,6 +2250,7 @@ def elemwise_checker(op, expected_f, gap=None, test_dtypes=None,
else: else:
self.gap_grad = gap self.gap_grad = gap
# Ensure the test's name is correct. # Ensure the test's name is correct.
utt.seed_rng()
assert eval(self.__class__.__name__) is self.__class__ assert eval(self.__class__.__name__) is self.__class__
def test_op(self): def test_op(self):
...@@ -2449,7 +2452,8 @@ TanTester = elemwise_checker( ...@@ -2449,7 +2452,8 @@ TanTester = elemwise_checker(
ArcsinTester = elemwise_checker( ArcsinTester = elemwise_checker(
sparse.arcsin, sparse.arcsin,
numpy.arcsin, numpy.arcsin,
gap=(-1, 1)) gap=(-1, 1),
gap_grad=(-0.99, 0.99))
ArctanTester = elemwise_checker( ArctanTester = elemwise_checker(
sparse.arctan, sparse.arctan,
...@@ -2501,7 +2505,7 @@ FloorTester = elemwise_checker( ...@@ -2501,7 +2505,7 @@ FloorTester = elemwise_checker(
numpy.floor, numpy.floor,
grad_test=False, grad_test=False,
test_dtypes=[m for m in sparse.all_dtypes test_dtypes=[m for m in sparse.all_dtypes
if not m in sparse.complex_dtypes]) if not m in sparse.complex_dtypes])
Log1pTester = elemwise_checker( Log1pTester = elemwise_checker(
sparse.log1p, sparse.log1p,
...@@ -2516,20 +2520,20 @@ Deg2radTester = elemwise_checker( ...@@ -2516,20 +2520,20 @@ Deg2radTester = elemwise_checker(
sparse.deg2rad, sparse.deg2rad,
numpy.deg2rad, numpy.deg2rad,
test_dtypes=[m for m in sparse.all_dtypes test_dtypes=[m for m in sparse.all_dtypes
if not m in sparse.complex_dtypes]) if not m in sparse.complex_dtypes])
Rad2degTester = elemwise_checker( Rad2degTester = elemwise_checker(
sparse.rad2deg, sparse.rad2deg,
numpy.rad2deg, numpy.rad2deg,
test_dtypes=[m for m in sparse.all_dtypes test_dtypes=[m for m in sparse.all_dtypes
if not m in sparse.complex_dtypes]) if not m in sparse.complex_dtypes])
TruncTester = elemwise_checker( TruncTester = elemwise_checker(
sparse.trunc, sparse.trunc,
numpy.trunc, numpy.trunc,
test_dtypes=[m for m in sparse.all_dtypes test_dtypes=[m for m in sparse.all_dtypes
if not m in sparse.complex_dtypes]) if not m in sparse.complex_dtypes])
SqrTester = elemwise_checker( SqrTester = elemwise_checker(
...@@ -2548,7 +2552,7 @@ class MulSVTester(unittest.TestCase): ...@@ -2548,7 +2552,7 @@ class MulSVTester(unittest.TestCase):
def test_mul_s_v_grad(self): def test_mul_s_v_grad(self):
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix} 'csr': sp.csr_matrix}
for format in ['csr', 'csc']: for format in ['csr', 'csc']:
for dtype in ['float32', 'float64']: for dtype in ['float32', 'float64']:
...@@ -2556,7 +2560,8 @@ class MulSVTester(unittest.TestCase): ...@@ -2556,7 +2560,8 @@ class MulSVTester(unittest.TestCase):
mat = numpy.asarray(numpy.random.rand(3), dtype=dtype) mat = numpy.asarray(numpy.random.rand(3), dtype=dtype)
theano.sparse.verify_grad_sparse(mul_s_v, theano.sparse.verify_grad_sparse(mul_s_v,
[spmat, mat], structured=True) [spmat, mat],
structured=True)
def test_mul_s_v(self): def test_mul_s_v(self):
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
...@@ -2590,7 +2595,8 @@ class StructuredAddSVTester(unittest.TestCase): ...@@ -2590,7 +2595,8 @@ class StructuredAddSVTester(unittest.TestCase):
mat = numpy.asarray(numpy.random.rand(3), dtype=dtype) mat = numpy.asarray(numpy.random.rand(3), dtype=dtype)
theano.sparse.verify_grad_sparse(structured_add_s_v, theano.sparse.verify_grad_sparse(structured_add_s_v,
[spmat, mat], structured=True) [spmat, mat],
structured=True)
def test_structured_add_s_v(self): def test_structured_add_s_v(self):
sp_types = {'csc': sp.csc_matrix, sp_types = {'csc': sp.csc_matrix,
...@@ -2618,11 +2624,11 @@ class SamplingDotTester(utt.InferShapeTester): ...@@ -2618,11 +2624,11 @@ class SamplingDotTester(utt.InferShapeTester):
x.append(sparse.csr_matrix()) x.append(sparse.csr_matrix())
#unsquare shape #unsquare shape
a = [numpy.array(numpy.random.random_integers(5, size=(4, 3)) - 1, a = [numpy.array(numpy.random.random_integers(5, size=(4, 3)) - 1,
dtype=theano.config.floatX), dtype=theano.config.floatX),
numpy.array(numpy.random.random_integers(5, size=(5, 3)) - 1, numpy.array(numpy.random.random_integers(5, size=(5, 3)) - 1,
dtype=theano.config.floatX), dtype=theano.config.floatX),
numpy.array(numpy.random.random_integers(2, size=(4, 5)) - 1, numpy.array(numpy.random.random_integers(2, size=(4, 5)) - 1,
dtype=theano.config.floatX) dtype=theano.config.floatX)
] ]
a[2] = sp.csr_matrix(a[2]) a[2] = sp.csr_matrix(a[2])
...@@ -2672,7 +2678,7 @@ test_shared_options = theano.tensor.tests.test_sharedvar.makeSharedTester( ...@@ -2672,7 +2678,7 @@ test_shared_options = theano.tensor.tests.test_sharedvar.makeSharedTester(
ref_fct_=lambda a: numpy.asarray((a * 2).todense()), ref_fct_=lambda a: numpy.asarray((a * 2).todense()),
cast_value_=scipy.sparse.csr_matrix, cast_value_=scipy.sparse.csr_matrix,
name='test_shared_options', name='test_shared_options',
) )
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -144,7 +144,17 @@ class ArgSortOp(theano.Op): ...@@ -144,7 +144,17 @@ class ArgSortOp(theano.Op):
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
#No grad defined for intergers. #No grad defined for intergers.
return [None, None] inp, axis = inputs
inp_grad = theano.gradient.grad_not_implemented(
self, 0, axis,
"I'm not sure if argsort should have its gradient"
" implemented or is should be marked as undefined."
" So I mark it as not implemented for now.")
axis_grad = theano.gradient.grad_undefined(
self, 1, axis,
"argsort is not defined for non-integer axes so"
" argsort(x, axis+eps) is undefined")
return [inp_grad, axis_grad]
""" """
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
# R_op can receive None as eval_points. # R_op can receive None as eval_points.
......
...@@ -185,7 +185,9 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile, ...@@ -185,7 +185,9 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile,
subprocess_extra_args.update(dict( subprocess_extra_args.update(dict(
stdout=dummy_out.fileno(), stdout=dummy_out.fileno(),
stderr=dummy_out.fileno())) stderr=dummy_out.fileno()))
t0 = time.time()
subprocess.call(cmd, **subprocess_extra_args) subprocess.call(cmd, **subprocess_extra_args)
t1 = time.time()
# Recover failed test indices from the 'failed' field of the # Recover failed test indices from the 'failed' field of the
# '.noseids' file. We need to do it after each batch because # '.noseids' file. We need to do it after each batch because
# otherwise this field may get erased. We use a set because it # otherwise this field may get erased. We use a set because it
...@@ -193,8 +195,8 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile, ...@@ -193,8 +195,8 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile,
# to avoid duplicates. # to avoid duplicates.
failed = failed.union(cPickle.load(open(noseids_file, 'rb')) failed = failed.union(cPickle.load(open(noseids_file, 'rb'))
['failed']) ['failed'])
print '%s%% done (failed: %s)' % ((test_range[-1] * 100) // print '%s%% done in %.3fs (failed: %s)' % (
n_tests, len(failed)) (test_range[-1] * 100) // n_tests, t1 - t0, len(failed))
# Sort for cosmetic purpose only. # Sort for cosmetic purpose only.
failed = sorted(failed) failed = sorted(failed)
if failed: if failed:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论