merge

18a962c9 · James Bergstra · dc68f404 · c7985bf7 · 18a962c9 · 18a962c9
--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -53,7 +53,10 @@ Scan will return a tuple, containing our result (``result``) and a
 dictionary of updates ( empty in this case). Note that the result 
 is not a matrix, but a 3D tensor containing the value of ``A**k`` for 
 each step. We want the last value ( after k steps ) so we compile 
-a function to return just that. 
+a function to return just that. Note that there is an optimization, that 
+at compile time will detect that you are using just the last value of the 
+result and ensure that scan does not store all the intermediate values 
+that are used. So do not worry if A and k are large. 

 Multiple outputs, several taps values - Recurrent Neural Network with Scan
 --------------------------------------------------------------------------
@@ -208,5 +211,9 @@ Reference

 .. automodule:: theano.scan

+.. autofunction:: theano.map
+.. autofunction:: theano.reduce
+.. autofunction:: theano.foldl
+.. autofunction:: theano.foldr
 .. autofunction:: theano.scan

--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -25,6 +25,13 @@ To learn more, check out:

 __docformat__ = "restructuredtext en"

+# Set a default logger. It is important to do this before importing some other
+# theano code, since this code may want to log some messages.
+import logging
+logging_default_handler = logging.StreamHandler()
+logging.getLogger("theano").addHandler(logging_default_handler)
+logging.getLogger("theano").setLevel(logging.WARNING)
+
 import configparser, configdefaults

 config = configparser.TheanoConfigParser()
@@ -159,12 +166,3 @@ def dot(l, r):
    return rval


-### 
-#   Set a default logger
-#
-import logging
-logging_default_handler = logging.StreamHandler()
-logging.getLogger("theano").addHandler(logging_default_handler)
-logging.getLogger("theano").setLevel(logging.WARNING)
-
-
--- a/theano/configparser.py
+++ b/theano/configparser.py
+#For flag of bool type, we consider the string 'False','false' and '0' as False 
+# and the string 'True', 'true', '1' as true.
+#We alsoaccept the bool type as its corresponding value!
+#Normally numpy consider only the empty string as false, but this give 
+# impression that it work when it do different people expected.
+
+
 import os, StringIO, sys
 import ConfigParser
 import logging
@@ -221,4 +228,18 @@ def IntParam(default, is_valid=None):
 def FloatParam(default, is_valid=None):
    return TypedParam(default, float, is_valid)
 def BoolParam(default, is_valid=None):
-    return TypedParam(default, bool, is_valid)
+#see comment at the beggining of this file.
+    def booltype(s):
+        if s in ['False','false','0', False]:
+            return False
+        elif s in ['True','true','1', True]:
+            return True
+
+    def is_valid_bool(s):
+        if s in ['False', 'false', '0', 'True', 'true', '1', False, True]:
+            return True
+        else: 
+            return False
+    if is_valid is None:
+        is_valid = is_valid_bool
+    return TypedParam(default, booltype, is_valid)
--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -848,9 +848,9 @@ class Mod(BinaryScalarOp):
        """
        #raise NotImplementedError("Unlike Python, C's modulo returns negative modulo on negative dividend (to implement)")
        t = node.inputs[0].type.upcast(*[ i.type for i in node.inputs[1:]])
-        if t in int_types:
+        if t in int_types or t in ['uint8','int8','uint16','int16','uint32','int32','uint64','int64']:
            x_mod_y = "%(x)s %% %(y)s"%locals()
-        elif t in float_types:
+        elif t in float_types or t in ['float32','float64']:
            x_mod_y = "fmod(%(x)s,%(y)s)"%locals()
        else:
            raise NotImplementedError('type not supported', type)

--- a/theano/scan.py
+++ b/theano/scan.py
@@ -28,6 +28,7 @@ __docformat__ = 'restructedtext en'
 import theano
 from theano.tensor import opt, TensorType
 from theano import gof, Apply
+from theano.gof import Optimizer, toolbox
 from theano.compile import optdb
 import theano.tensor.shared_randomstreams as shared_random
 from theano.gof.python25 import all
@@ -122,14 +123,12 @@ def reduce(fn, sequences, outputs_info, non_sequences = [], go_backwards = False
    for i,out_info in enumerate(outs_info):
        if out_info:
            if not type(out_info) == dict:
-                outs_info[i] = dict(initial = out_info, taps = [-1], store_steps = 1)
+                outs_info[i] = dict(initial = out_info,  return_steps = 1)
            else:
-                # we force to use only the last step
-                # and store only the alst step
-                outs_info[i]['taps'] = [-1]
+                # we tell scan to store only the last step
                outs_info[i]['store_steps'] = 1
-                # NOTE : Maybe some errors can be detected here were we can give
-                #        more meaningfull error messages than in scan  RP
+                # NOTE : Maybe some errors can be detected here and 
+                # we could give more meaningfull error messages then in scan ?
    return scan(fn, sequences = sequences, outputs_info = outs_info, 
            non_sequences = non_sequences, go_backwards = go_backwards,
            truncate_gradient = 1, mode = mode)
@@ -276,6 +275,10 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
        flag tells scan that the output should be computed in the memory spaced occupied
        by that input sequence. Note that scan will only do this if allowed by the
        rest of your computational graph.
+        * ``return_steps`` how many steps to return from your output. If not given, or 
+        0 scan will return all steps, otherwise it will return the last ``return_steps``.
+        Note that if you set this to something else then 0, scan will always be smart 
+        about the amount of memory it allocates for a given input.

        If the function applied recursively uses only the
        previous value of the output, the initial state should have
@@ -525,9 +528,8 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
    store_steps = [ 0 for i in xrange(n_outs)]

    for i in xrange(n_outs):
-        if outs_info[i].get('store_steps', None):
-            print 'here'
-            store_steps[i] = outs_info[i]['store_steps']
+        if outs_info[i].get('return_steps', None):
+            store_steps[i] = outs_info[i]['return_steps']

    # add shared variable that act as outputs
    #
@@ -632,7 +634,6 @@ class Scan(theano.Op):
          if k > n_seqs:
            raise ValueError(('Sequences past taps dictionary reffers to '
                    'an unexisting sequence %d')%k)
-
        #check outputs past taps
        for k,v in outs_taps.iteritems():
          if k > n_outs:
@@ -679,6 +680,7 @@ class Scan(theano.Op):
        self.inputs         = inputs
        self.givens         = givens
        self.outputs        = outputs
+        self.mode           = mode
        self.truncate_gradient = truncate_gradient
        self.go_backwards   = go_backwards
        self.slice_to_seqs  = slice_to_seqs
@@ -706,6 +708,7 @@ class Scan(theano.Op):
            (self.seqs_taps == other.seqs_taps) and \
            (self.outs_taps == other.outs_taps) and \
            (self.inplace_map == other.inplace_map) and \
+            (self.mode == other.mode) and \
            (self.n_seqs == other.n_seqs) and\
            (self.inplace == other.inplace) and\
            (self.go_backwards == other.go_backwards) and\
@@ -725,6 +728,7 @@ class Scan(theano.Op):
            hash(self.go_backwards) ^\
            hash(self.truncate_gradient) ^\
            hash(self.n_args) ^ \
+            hash(self.mode) ^\
            hash_listsDictsTuples(self.outputs) ^ \
            hash_listsDictsTuples(self.inputs) ^ \
            hash_listsDictsTuples(self.givens) ^ \
@@ -1048,13 +1052,96 @@ class Scan(theano.Op):
            '''


+class ScanSpaceOptimizer(Optimizer):
+    """ Graph Optimizer that reduces scan memory consumption """
+    def __init__(self):
+        Optimizer.__init__(self)
+
+    def add_requirements(self,env):
+        env.extend(toolbox.ReplaceValidate())
+
+    def apply(self, env):
+        nodelist = list(env.toposort())
+        for node in nodelist:
+            op = node.op
+            # If it is a scan Op
+            if isinstance(op, Scan):
+                outputs = node.outputs
+                store_steps = [0 for x in outputs]
+                # check the otuputs
+                for i,out in enumerate(node.outputs):
+                    if op.store_steps[i] == 0 :
+                        # if we do not have a range for this output
+                        req_steps = numpy.max(numpy.abs(op.outs_taps.get(i,1)))
+                        # look at all its clients
+                        for cl,_dx in out.clients:
+                            if type(cl) == str:
+                                # if the node is actually an output, then 
+                                # we need to store the entire thing 
+                                req_steps = 0
+                                break
+                            else:
+                                if not isinstance(cl.op,
+                                        theano.tensor.basic.Subtensor):
+                                    # if any of the clients is not a subtensor
+                                    # we also need to store the enitre thing
+                                    req_steps = 0
+                                    break
+                                else:
+                                    # if it is a tensor, and the first 
+                                    # dimension is just -1 
+                                    if cl.op.idx_list[0] == -1 :
+                                                req_steps = numpy.max([1, req_steps])
+                                    else:
+                                        # or a constant that evaluates to 
+                                        # -1
+                                        try:
+                                            idx = opt.get_constant_value(cl.op.idx_list[0])
+                                            if idx== -1:
+                                                req_steps = numpy.max([1, req_steps])
+                                            else:
+                                                req_steps = 0
+                                                break
+                                        except:
+                                            req_steps = 0
+                                            break
+                        store_steps[i] = req_steps
+                    else:
+                        store_steps[i] = op.store_steps[i]
+                if numpy.any(store_steps!= op.store_steps):
+                    new_scan = Scan((op.inputs, op.outputs, op.givens, 
+                        op.slice_to_seqs),op.n_seqs, op.n_outs,
+                        op.inplace_map, op.seqs_taps, op.outs_taps, 
+                        op.truncate_gradient, op.go_backwards,
+                        store_steps, op.mode,op.inplace).make_node(*node.inputs)
+                    # we not need to replace the outputs of scan
+                    for i,out in enumerate(node.outputs):
+                        # if we are dealing with an output for which 
+                        # we changed the number of stored steps we 
+                        # also need to get rid off the subtensor
+                        if op.store_steps[i] == 0 and store_steps[i] == 1:
+                            # get the output of the subtensor variables 
+                            outSubTens = [ x[0].outputs[0] for x in out.clients ]
+                            new_old = [(x,new_scan.outputs[i]) for x in outSubTens]
+                            env.replace_all_validate(new_old,reason = 
+                            'scan_space_optimizer')
+                        else:
+                            env.replace_all_validate([(out,
+                                new_scan.outputs[i])], reason =
+                                'scan_space_optimizer')
+
+
+
+
+optdb.register('scanOp_space_optimization', ScanSpaceOptimizer(), 74, 'fast_run')
+
 @gof.local_optimizer([None])
 def scan_make_inplace(node):
    op = node.op
    if isinstance(op, Scan) and (not op.inplace) and (op.inplace_map.keys() != []):
        return Scan((op.inputs, op.outputs, op.givens, op.slice_to_seqs ) , op.n_seqs,
            op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps,
-            op.truncate_gradient, op.go_backwards, op.store_steps,
+            op.truncate_gradient, op.go_backwards, op.store_steps, op.mode,
            inplace=True ).make_node(*node.inputs).outputs
    return False


--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -492,7 +492,13 @@ class ConvOp(Op):
        fmo = kshp[0]

        if self.imshp is not None and self.kshp is not None:
-            fmshp = ConvOp.getOutputShape(self.imshp[1:], self.kshp, (self.dx,self.dy), self.out_mode)
+            imshp = self.imshp
+            kshp = self.kshp
+            if self.imshp_logical:
+                imshp = self.imshp_logical
+            if self.kshp_logical:
+                kshp = self.kshp_logical
+            fmshp = ConvOp.getOutputShape(imshp[1:], kshp, (self.dx,self.dy), self.out_mode)
            outshp = (batch_size,fmo) + tuple(fmshp)
            return [outshp] 
        else:

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -1258,7 +1258,7 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
    def __eq__(self, other):
        return (type(self) == type(other)) and (self.val == other.val)
    def __hash__(self):
-        return tensor.hashtype(self) ^ hash(self.val.value)
+        return tensor.hashtype(self) ^ hash(self.val.data)
    def __str__(self):
        return '%s{%s}'%(self.__class__.__name__,self.val)


--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -624,7 +624,7 @@ def local_upcast_elemwise_constant_inputs(node):
    if isinstance(node.op, T.Elemwise):
        scalar_op = node.op.scalar_op
        #print "aa", scalar_op.output_types_preference
-        if scalar_op.output_types_preference in (T.scal.upgrade_to_float, T.scal.upcast_out):
+        if getattr(scalar_op,'output_types_preference',None) in (T.scal.upgrade_to_float, T.scal.upcast_out):
            # this is the kind of op that we can screw with the input dtypes by upcasting
            # explicitly
            #print "HELLO??"

--- a/theano/tests/test_scan.py
+++ b/theano/tests/test_scan.py
@@ -606,6 +606,7 @@ class T_Scan(unittest.TestCase):
        f = theano.function([v,s], result, updates = updates)
        rng = numpy.random.RandomState(utt.fetch_seed())
        v_v = rng.uniform( size = (5,), low = -5., high = 5.)
+        print f(v_v,0.)
        assert ( numpy.sum(v_v) == f(v_v, 0.) )