Merge.

5b6e5783 · Pierre-Antoine Manzagol · 734b9d6f · e1e774ce · 5b6e5783 · 5b6e5783
--- a/doc/generate_dtype_tensor_table.py
+++ b/doc/generate_dtype_tensor_table.py
+
+letters = [
+    ('b', 'int8'),
+    ('w', 'int16'),
+    ('i', 'int32'),
+    ('l', 'int64'),
+    ('d', 'float64'),
+    ('f', 'float32'),
+    ('c', 'complex64'),
+    ('z', 'complex128') ]
+
+shapes = [
+        ('scalar', ()),
+        ('vector', (False,)),
+        ('row', (True, False)),
+        ('col', (False, True)),
+        ('matrix', (False,False)),
+        ('tensor3', (False,False,False)),
+        ('tensor4', (False,False,False,False)),]
+
+hdr = '============ =========== ==== =========== ================================='
+print hdr
+print 'Constructor  dtype       ndim shape       broadcastable'
+print hdr
+for letter in letters:
+    for shape in shapes:
+        suff = ',)' if len(shape[1])==1 else ')'
+        s = '(' + ','.join('1' if b else '?' for b in shape[1]) + suff
+        print '%s%-10s  %-10s  %-4s %-10s  %-20s' %(
+                letter[0], shape[0], letter[1], len(shape[1]), s, shape[1]
+                )
+print hdr
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
--- a/doc/library/tensor/index.txt
+++ b/doc/library/tensor/index.txt
@@ -10,8 +10,8 @@
 .. moduleauthor:: LISA

 Theano's strength is in expressing symbolic calculations involving tensors.  
-There are many types of symbolic expressions for tensors.  For everyone's
-sanity, they are grouped into the following sections:
+There are many types of symbolic expressions for tensors. 
+They are grouped into the following sections:


 .. toctree::

--- a/doc/library/tensor/shared_randomstreams.txt
+++ b/doc/library/tensor/shared_randomstreams.txt
@@ -23,79 +23,8 @@ put random variables in your graph.  Theano will allocate a numpy RandomState
 object for each such variable, and draw from it as necessary.  I'll call this sort of sequence of
 random numbers a *random stream*.

-Brief example
-------------
-
-Here's a brief example.  The setup code is:
-
-.. code-block:: python
-
-    from theano.tensor.shared_randomstreams import RandomStreams
-    srng = RandomStreams(seed=234)
-    rv_u = srng.uniform((2,2))
-    rv_n = srng.normal((2,2))
-    f = function([], rv_u, updates=[rv_u.update])
-    g = function([], rv_n)                              #omitting rv_n.update
-    nearly_zeros = function([], rv_u + rv_u - 2 * rv_u, updates=[rv_u.update])
-
-Here, 'rv_u' represents a random stream of 2x2 matrices of draws from a uniform
-distribution.  Likewise,  'rv_n' represenents a random stream of 2x2 matrices of
-draws from a normal distribution.  The distributions that are implemented are
-defined in :class:`RandomStreams`.
-
-Now let's use these things.  If we call f(), we get random uniform numbers.
-Since we are updating the internal state of the random number generator (via
-the ``updates`` argument, we get different random numbers every time.
-
->>> f_val0 = f()
->>> f_val1 = f()  #different numbers from f_val0
-
-When we omit the updates argument (as in ``g``) to ``function``, then the
-random number generator state is not affected by calling the returned function.  So for example, 
-calling ``g`` multiple times will return the same numbers.
-
->>> g_val0 = g()  # different numbers from f_val0 and f_val1
->>> g_val0 = g()  # same numbers as g_val0 !!!
-
-An important remark is that a random variable is drawn at most once during any
-single function execution.  So the ``nearly_zeros`` function is guaranteed to
-return approximately 0 (except for rounding error) even though the ``rv_u``
-random variable appears three times in the output expression.
-
->>> nearly_zeros = function([], rv_u + rv_u - 2 * rv_u, updates=[rv_u.update])
-
-Seedings Streams
----------------
-
-Random variables can be seeded individually or collectively.
-
-You can seed just one random variable by seeding or assigning to the
-``.rng.value`` attribute.
-
->>> rv_u.rng.value.seed(89234)  # seeds the generator for rv_u
-
-You can also seed *all* of the random variables allocated by a :class:`RandomStreams`
-object by that object's ``seed`` method.  This seed will be used to seed a
-temporary random number generator, that will in turn generate seeds for each
-of the random variables.
-
->>> srng.seed(902340)  # seeds rv_u and rv_n with different seeds each
-
-Sharing Streams between Functions
---------------------------------
-
-As usual for shared variables, the random number generators used for random
-variables are common between functions.  So our ``nearly_zeros`` function will
-update the state of the generators used in function ``f`` above.
-
-For example:
-
->>> state_after_v0 = rv_u.rng.value.get_state()
->>> nearly_zeros()       # this affects rv_u's generator
->>> v1 = f()             
->>> rv_u.rng.value.set_state(state_after_v0)
->>> v2 = f()             # v2 != v1
-
+For an example of how to use random numbers, see
+:ref:`using_random_numbers`.


 Reference

--- a/doc/tutorial/adding.txt
+++ b/doc/tutorial/adding.txt
@@ -65,7 +65,7 @@ is the type we assign to "0-dimensional arrays (`scalar`) of doubles

 ``dscalar`` is not a class. Therefore, neither ``x`` nor ``y``
 are actually instances of ``dscalar``. They are instances of
-:ref:`TensorVariable <libdoc_tensor_type>`. ``x`` and ``y``
+:class:`TensorVariable`. ``x`` and ``y``
 are, however, assigned the theano Type ``dscalar`` in their ``type``
 field, as you can see here:


--- a/doc/tutorial/examples.txt
+++ b/doc/tutorial/examples.txt
@@ -289,6 +289,7 @@ careful though, not to allow the expressions introduced by a givens
 substitution to be co-dependent, the order of substitution is not defined, so
 the substitutions have to work in any order.

+.. _using_random_numbers:

 Using Random Numbers
 ====================
@@ -376,7 +377,7 @@ For example:

 >>> state_after_v0 = rv_u.rng.value.get_state()
 >>> nearly_zeros()       # this affects rv_u's generator
->>> v1 = f()             
+>>> v1 = f()
 >>> rv_u.rng.value.set_state(state_after_v0)
 >>> v2 = f()             # v2 != v1


--- a/doc/tutorial/loading_and_saving.txt
+++ b/doc/tutorial/loading_and_saving.txt
@@ -5,29 +5,148 @@
 Loading and Saving
 ==================

-Many Theano objects can be serialized.  However, you will want to consider different mechanisms
-depending on the amount of time you anticipate between saving and reloading.  For short-term
-(such as temp files and network transfers) pickling is possible.  For longer-term (such as
-saving models from an experiment) you should not rely on pickled theano objects; we recommend
-loading and saving the underlying shared objects as you would in the course of any other python
-program.
+Python's standard way of saving class instances and reloading them
+is the pickle_ mechanism. Many Theano objects can be serialized (and
+deserialized) by ``pickle``, however, a limitation of ``pickle`` is that
+it does not save the code or data of a class along with the instance of
+the class being serialized. As a result, reloading objects created by a
+previous version of a class can be really problematic.

-pickling -- Short-term serialization
-=====================================
+Thus, you will want to consider different mechanisms depending on
+the amount of time you anticipate between saving and reloading.  For
+short-term (such as temp files and network transfers), pickling of
+the Theano objects or classes is possible.  For longer-term (such as
+saving models from an experiment) you should not rely on pickled Theano
+objects; we recommend loading and saving the underlying shared objects
+as you would in the course of any other Python program.

-Pickling and unpickling of functions. Caveats... basically don't do this for long-term storage.

-***TODO***
+.. _pickle: http://docs.python.org/library/pickle.html

-not-pickling -- Long-term serialization
-=======================================

-***TODO***
+The basics of pickling
+======================

-Give a short example of how to add a __getstate__ and __setstate__ to a class.  Point out to
-use protocol=-1 for numpy ndarrays.
+The two modules ``pickle`` and ``cPickle`` have the same functionalities, but
+``cPickle``, coded in C, is much faster.

-Point to the python docs for further reading.
+>>> import cPickle

+You can serialize (or *save*, or *pickle*) objects to a file with
+``cPickle.dump``:

+>>> f = file('obj.save', 'wb')
+>>> cPickle.dump(my_obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
+>>> f.close()
+
+.. note::
+
+    If you want your saved object to be stored efficiently, don't forget
+    to use ``cPickle.HIGHEST_PROTOCOL``, the resulting file can be
+    dozens of times smaller than with the default protocol.
+
+.. note::
+
+    Opening your file in binary mode (``'b'``) is required for portability
+    (especially between Unix and Windows).
+
+To de-serialize (or *load*, or *unpickle*) a pickled file, use
+``cPickle.load``:
+
+>>> f = file('obj.save', 'rb')
+>>> loaded_obj = cPickle.load(f)
+>>> f.close()
+
+
+You can pickle several objects into the same file, and load them all (in the
+same order):
+
+>>> f = file('objects.save', 'wb')
+>>> for obj in [obj1, obj2, obj3]:
+>>>     cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
+>>> f.close()
+
+Then:
+
+>>> f = file('objects.save', 'rb')
+>>> loaded_objects = []
+>>> for i in range(3):
+>>>     loaded_objects.append(cPickle.load(f))
+>>> f.close()
+
+For more details about pickle's usage, see
+`Python documentation <http://docs.python.org/library/pickle.html#usage>`_.
+
+
+Short-term serialization
+========================
+
+If you are confident that the class instance you are serializing will be
+deserialized by a compatible version of the code, pickling the whole model is
+an adequate solution. It would be the cas, for instance, if you are saving
+models and reloading them during the same execution of your program, or if the
+class you're saving has been really stable for a while.
+
+You can control what pickle will save from your object, by defining a
+`__getstate__
+<http://docs.python.org/library/pickle.html#object.__getstate__>`_ method,
+and similarly `__setstate__
+<http://docs.python.org/library/pickle.html#object.__getstate__>`_.
+
+This will be especially useful if, for instance, your model class contains a
+link to the data set currently in use, that you probably don't want to pickle
+along every instance of your model.
+
+For instance, you can define functions along the lines of:
+
+.. code-block:: python
+
+    def __getstate__(self):
+        state = dict(self.__dict__)
+        del state['training_set']
+        return state
+
+    def __setstate__(self, d):
+        self.__dict__.update(d)
+        self.training_set = cPickle.load(file(self.training_set_file, 'rb'))
+
+
+Long-term serialization
+=======================
+
+If the implementation of the class you want to save is quite unstable, for
+instance if functions are created or removed, class members are renamed, you
+should save and load only the immutable (and necessary) part of your class.
+
+You can do that by defining __getstate__ and __setstate__ functions as above,
+maybe defining the attributes you want to save, rather than the ones you
+don't.
+
+For instance, if the only parameters you want to save are a weight
+matrix ``W`` and a bias ``b``, you can define:
+
+.. code-block:: python
+
+    def __getstate__(self):
+        return (W, b)
+
+    def __setstate__(self, (W,b)):
+        self.W = W
+        self.b = b
+
+If, at some point in time, ``W`` is renamed to ``weights`` and ``b`` to
+``bias``, the older pickled files will still be usable, if you update these
+functions to reflect the change in name:
+
+.. code-block:: python
+
+    def __getstate__(self):
+        return (weights, bias)
+
+    def __setstate__(self, (W,b)):
+        self.weights = W
+        self.bias = b
+
+For more information on advanced use of pickle and its internals, see Python's
+pickle_ documentation.

--- a/doc/tutorial/symbolic_graphs.txt
+++ b/doc/tutorial/symbolic_graphs.txt
@@ -38,11 +38,20 @@ details about these building blocks see :ref:`variable`, :ref:`op`,

 **Diagram**

+.. _tutorial-graphfigure: 
+
 .. figure:: apply.png 
    :align: center

+    Interaction between instances of Apply (blue), Variable (red), Op (green),
+    and Type (purple).
+
+.. # COMMENT
+    WARNING: hyper-links and ref's seem to break the PDF build when placed
+    into this figure caption.

-Arrows represent references to the Python objects pointed at. The blue
+Arrows in this :ref:`figure <tutorial-graphfigure>` represent references to the 
+Python objects pointed at. The blue
 box is an :ref:`apply` node. Red boxes are :ref:`variable` nodes. Green
 circles are :ref:`Ops <op>`. Purple boxes are :ref:`Types <type>`.


--- a/theano/compile/pfunc.py
+++ b/theano/compile/pfunc.py
@@ -149,9 +149,6 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
                clone_d.setdefault(old_o, new_o)
        return clone_d[a]

-    #def v_clone(v):
-    #    return _v_clone(v, clone_d)
-
    # initialize the clone_d mapping with the `givens` argument
    try:
        givens = givens.items() # converts a dictionary to the sort of list that we want.
@@ -173,8 +170,6 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
    for i, iv in zip(inputs, input_variables):
        i.variable = iv

-    #set_of_param_variables = set(input_variables)
-
    # It was decided, as a first step, to prevent shared variables from being
    # used as function inputs. Although it is technically possible, it is also not clear
    # when/how to use the value of that shared variable (is it a default? ignored?, if the
@@ -200,10 +195,6 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
        update_d[store_into] = update_val
        update_expr.append((store_into, update_val))

-
-    # computed_list is a list of output variables (which will be extended later)
-    #computed_list = []
-
    # Elements of "outputs" are here cloned to "cloned_outputs"
    if isinstance(outputs, list):
        cloned_outputs = []
@@ -246,7 +237,6 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
            shared_inputs.append(v)
        i += 1

-    #updates = update_d #?
    for sv in shared_inputs:
        if sv in update_d:
            si = In(variable=sv, value=sv.container, mutable=True,
@@ -258,64 +248,6 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
    return orig_function(inputs, cloned_outputs, mode,
            accept_inplace=accept_inplace, name=name)

-    if 0:
-        # Add update values as quantities that must be computed.
-        # Here, we
-        #  - extend the computed_list
-        #  - replace some update expressions (but update keys remain)
-        new_updates = {}
-        for (store_into, update_val) in iter_over_pairs(updates):
-            if not isinstance(store_into, SharedVariable):
-                raise TypeError('update target must be a SharedVariable', store_into)
-            if store_into in new_updates:
-                raise ValueError('this shared variable already has an update expression',
-                        (store_into, new_updates[store_into]))
-            update_val = v_clone(store_into.filter_update(update_val))
-            if update_val.type != store_into.type:
-                raise TypeError('an update must have the same type as the original shared variable', 
-                        (store_into, store_into.type,
-                            update_val, update_val.type))
-            computed_list.append(update_val)
-            new_updates[store_into] = update_val
-        updates = new_updates
-
-        # Obtain all inputs we need to compute what we want.
-        graph_inputs = graph.inputs(computed_list,
-                blockers=set_of_param_variables)
-
-        shared_inputs = [i for i in graph_inputs if isinstance(i, SharedVariable)]
-
-        # Add shared variables (from shared_inputs) that were not already present in the list of
-        # params.
-        inputs += [In(variable=si, value=si.container, mutable=False) 
-            for si in shared_inputs
-            if si not in set_of_param_variables]
-        del shared_inputs
-
-        # Iterate over the updates, which are either pairs
-        # (shared_var, expressionvariable), or a similar dictionary.
-        # For each shared_variable, find the In instance that we created for it in the inputs list.
-        # Give that In instance (in_sv) an update expression.
-        # 
-        # I think we usually want to set these Inputs to be mutable,
-        # ... are there exceptions?
-
-        for (sv, new_val) in iter_over_pairs(updates):
-            in_sv = None
-            for in_sv_i in inputs:
-                if in_sv_i.variable is sv:
-                    assert in_sv is None
-                    in_sv = in_sv_i
-            if in_sv is None:
-                # This variable was not used anywhere and thus is not in the input
-                # list yet.
-                inputs.append(In(variable=sv, value=sv.container, mutable=True,
-                    update=new_val))
-            else:
-                in_sv.update = new_val
-                in_sv.mutable = True 
-
-        return orig_function(inputs, cloned_outputs, mode, accept_inplace=accept_inplace,name=name)

 def _pfunc_param_to_in(param):
    if isinstance(param, Constant):
@@ -354,22 +286,3 @@ def iter_over_pairs(pairs):
    else:
        return pairs

-#TODO: Make these non-recursive so they can deal with larger graphs
-def _a_clone(a, dct):
-    if a is None:
-        return None
-    if a not in dct:
-        for i in a.inputs:
-            _v_clone(i, dct)
-        dct[a] = a.clone_with_new_inputs([dct[i] for i in a.inputs])
-        for old_o, new_o in zip(a.outputs, dct[a].outputs):
-            dct.setdefault(old_o, new_o)
-    return dct[a]
-
-def _v_clone(v, dct):
-    assert v is not None
-    if v.owner:
-        _a_clone(v.owner, dct)
-    return dct.setdefault(v, v)
-
-
--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -18,6 +18,7 @@ except ImportError:
 import theano.sandbox.cuda as tcn
 import cuda_ndarray as cuda
 import theano.compile.mode
+from theano.tests import unittest_tools as utt

 mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')

@@ -81,6 +82,41 @@ def test_sum():
        assert numpy.allclose(f2(val2),f(val))
        

+def test_reshape():
+
+    a = tcn.CudaNdarrayType((False,))()
+    b = tcn.CudaNdarrayType((False,False))()
+    c = T.reshape(a, [2,3])
+
+    #basic
+    f = theano.function([a], c)
+    fv = f(cuda_ndarray.CudaNdarray(numpy.asarray([0,1,2,3,4,5],dtype='float32')))
+    import pdb;pdb.set_trace()
+    assert numpy.all(fv == numpy.asarray([[0,1,2], [3,4,5]]))
+
+    #test that it works without inplace operations
+    a_val = cuda_ndarray.CudaNdarray(numpy.asarray([0,1,2,3,4,5],dtype='float32'))
+    a_val_copy = cuda_ndarray.CudaNdarray(numpy.asarray([0,1,2,3,4,5],dtype='float32'))
+    b_val = cuda_ndarray.CudaNdarray(numpy.asarray([[0,1,2],[3,4,5]],dtype='float32'))
+
+    f_sub = theano.function([a,b], c-b)
+    assert numpy.all(f_sub(a_val, b_val) == 0.0)
+    assert numpy.all(numpy.asarray(a_val) == numpy.asarray(a_val_copy))
+
+    #test that it works with inplace operations
+    a_val = numpy.asarray([0,1,2,3,4,5], dtype='float32')
+    a_val_copy = numpy.asarray([0,1,2,3,4,5], dtype='float32')
+    b_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float32')
+
+    f_sub = theano.function([a,b], c-b)
+    assert numpy.all(f_sub(a_val, b_val) == 0.0)
+    assert numpy.all(numpy.asarray(a_val) == numpy.asarray(a_val_copy))
+
+    # verify gradient
+    def just_vals(v):
+        return T.Reshape(2)(v, numpy.asarray([2,3], dtype='int32'))
+    utt.verify_grad(just_vals, [a_val])
+
 def test_elemwise0():

    a = tcn.shared_constructor(numpy.random.rand(4,4), 'a')

--- a/theano/sandbox/downsample.py
+++ b/theano/sandbox/downsample.py
@@ -12,6 +12,7 @@ import numpy
 import __builtin__

 class DownsampleFactorMaxGrad(Op):
+
    def __init__(self, ds, ignore_border):
        self.ds = tuple(ds)
        self.ignore_border = ignore_border
@@ -147,10 +148,48 @@ class DownsampleFactorMaxGrad(Op):
        return ()

                
+def max_pool2D(input, ds, ignore_border=False):
+    """
+    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
+    the specified factor, by keeping only the maximum value of non-overlapping
+    patches of size (ds[0],ds[1])
+    :type input: N-D theano tensor of input images. 
+    :param input: input images. Max pooling will be done over the 2 last dimensions.
+    :type ds: tuple of length 2
+    :param ds: factor by which to downscale. (2,2) will halve the image in each
+               dimension.
+    :param ignore_border: boolean value. When True, (5,5) input with ds=(2,2)
+                         will generate a (2,2) output. (3,3) otherwise.
+    """
+    if input.ndim < 2:
+        raise NotImplementedError('max_pool2D requires a dimension >= 2')
+
+    # extract image dimensions
+    img_shape = input.shape[-2:]
+    
+    # count the number of "leading" dimensions, store as dmatrix
+    batch_size = tensor.prod(input.shape[:-2])
+    batch_size = tensor.shape_padright(batch_size,1)
+
+    # store as 4D tensor with shape: (batch_size,1,height,width)
+    new_shape = tensor.cast(tensor.join(0, batch_size, 
+        tensor.as_tensor([1,]), img_shape), 'int64')
+    input_4D = tensor.reshape(input, new_shape, ndim=4)
+
+    # downsample mini-batch of images
+    op = DownsampleFactorMax(ds, ignore_border)
+    output = op(input_4D)
+
+    # restore to original shape
+    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
+    return tensor.reshape(output, outshp, ndim=input.ndim)
+
+
 class DownsampleFactorMax(Op):
    """
-    For N-dimensional tensors, consider that the last two dimensions span images.
-    This Op downsamples these images by taking the max over non-overlapping rectangular regions.
+    For N-dimensional tensors, consider that the last two dimensions span images.  
+    This Op downsamples these images by a factor ds, by taking the max over non-
+    overlapping rectangular regions.
    """

    @staticmethod
@@ -192,6 +231,8 @@ class DownsampleFactorMax(Op):
        :param ignore_border: if ds doesn't divide imgshape, do we include an extra row/col of
        partial downsampling (False) or ignore it (True).
        :type ignore_border: bool
+
+        TODO: why is poolsize an op parameter here?
        """
        self.ds = tuple(ds)
        self.ignore_border = ignore_border

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -737,6 +737,11 @@ btensor3 = TensorType('int8', (False,)*3)
 wtensor3 = TensorType('int16', (False,)*3)
 itensor3 = TensorType('int32', (False,)*3)
 ltensor3 = TensorType('int64', (False,)*3)
+def tensor3(name=None, dtype='float64'):
+    type = TensorType(dtype, (False, False, False))
+    return type(name)
+tensor3s, ftensor3s, dtensor3s, itensor3s, ltensor3s = _multi(tensor3, ftensor3, dtensor3,
+        itensor3, ltensor3)

 ctensor4 = TensorType('complex64', (False,)*4)
 ztensor4 = TensorType('complex128', (False,)*4)
@@ -746,6 +751,11 @@ btensor4 = TensorType('int8', (False,)*4)
 wtensor4 = TensorType('int16', (False,)*4)
 itensor4 = TensorType('int32', (False,)*4)
 ltensor4 = TensorType('int64', (False,)*4)
+def tensor4(name=None, dtype='float64'):
+    type = TensorType(dtype, (False, False, False, False))
+    return type(name)
+tensor4s, ftensor4s, dtensor4s, itensor4s, ltensor4s = _multi(tensor4, ftensor4, dtensor4,
+        itensor4, ltensor4)

 class _tensor_py_operators:
    #UNARY
@@ -1666,6 +1676,10 @@ def sum(input, axis = None):

 pprint.assign(Sum(), printing.FunctionPrinter('sum'))

+@constructor
+def prod(input, axis = None):
+    """WRITEME"""
+    return elemwise.Prod(axis)(input)

 @constructor
 def mean(input, axis = None):

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -933,6 +933,9 @@ class Sum(CAReduce):
                int8='int32',
                int16='int32',
                int32='int64',
+                uint8='uint32',
+                uint16='uint32',
+                uint32='uint64',
                ).get(idtype, idtype)

    def grad(self, (x, ), (gz, )):
@@ -958,4 +961,38 @@ class Sum(CAReduce):
        else:
            return "Sum{%s}" % ", ".join(map(str, self.axis))

+class Prod(CAReduce):
+    """
+    Multiplies all the values of a tensor along the specified axis(es).
+
+    Equivalent to CAReduce(scalar.prod, axis = axis), with the
+    difference that this defines the gradient of prod wrt its tensor
+    input.
+    """
+    def __init__(self, axis = None):
+        CAReduce.__init__(self, scalar.mul, axis)
+
+    def _output_dtype(self, idtype):
+        # we want to protect against overflow
+        return dict(
+                int8='int64',
+                int16='int64',
+                int32='int64',
+                uint8='uint64',
+                uint16='uint64',
+                uint32='uint64',
+                ).get(idtype, idtype)
+
+    def grad(self, (x, ), (gz, )):
+        if x.dtype[0:3] in ('int','uin'):
+            return [None]
+        else:
+            raise NotImplementedError('Will be implemented shortly')
+
+    def __str__(self):
+        if self.axis is None:
+            return "Prod"
+        else:
+            return "Prod{%s}" % ", ".join(map(str, self.axis))
+