merge

6905a821 · James Bergstra · 122f8d1e · 40356219 · 6905a821 · 6905a821
--- a/theano/compile/sandbox/pfunc.py
+++ b/theano/compile/sandbox/pfunc.py
@@ -148,7 +148,7 @@ def _pfunc_param_to_in(param):
                mutable=param.mutable,
                strict=param.strict,
                implicit = param.implicit)
-    raise NotImplementedError()
+    raise NotImplementedError('Unknown parameter type: %s' % type(param))
 def iter_over_pairs(pairs):

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -583,14 +583,17 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
    :returns: dynamically-imported python module of the compiled code.
    """
-    #TODO: don't to the dlimport in this function
+    #TODO: Do not do the dlimport in this function
    if preargs is None:
      preargs = []
    else:
      preargs = list(preargs)
-    preargs.append('-fPIC')
+    if sys.platform != 'win32':
+        # Under Windows it looks like fPIC is useless. Compiler warning:
+        # '-fPIC ignored for target (all code is position independent)'
+        preargs.append('-fPIC')
    no_opt = False
    include_dirs = std_include_dirs() + include_dirs

--- a/theano/gof/compilelock.py
+++ b/theano/gof/compilelock.py
@@ -3,6 +3,17 @@
 import compiledir
 import os, random, time
+import logging
+_logger=logging.getLogger("theano.gof.compilelock")
+_logger.setLevel(logging.INFO) # INFO will show the the messages "Refreshing lock" message
+def info(*args):
+    _logger.info(' '.join(str(a) for a in args))
+def debug(*args):
+    _logger.debug(' '.join(str(a) for a in args))
+def warning(*args):
+    _logger.warning(' '.join(str(a) for a in args))
+def error(*args):
+    _logger.error(' '.join(str(a) for a in args))
 # In seconds, time that a process will wait before deciding to override an
 # existing lock. An override only happens when the existing lock is held by
@@ -51,8 +62,9 @@ def get_lock():
            # our lock after their 'timeout_before_override' timeout period.
            now = time.time()
            if now - get_lock.start_time > refresh_every:
-                print 'Refreshing lock'
+                lockpath = os.path.join(get_lock.lock_dir, 'lock')
-                refresh_lock(os.path.join(get_lock.lock_dir, 'lock'))
+                info('Refreshing lock', lockpath)
+                refresh_lock(lockpath)
                get_lock.start_time = now
    get_lock.n_lock += 1
@@ -151,8 +163,9 @@ def lock(tmp_dir, timeout=120, min_wait=5, max_wait=10, verbosity=1):
                    time_start = time.time()
                    no_display = (verbosity == 0)
                if not no_display:
-                    print 'Waiting for existing lock by %s (I am %s)' % (
+                    info('Waiting for existing lock by %s (I am %s)' % (
-                            read_owner, my_pid)
+                            read_owner, my_pid))
+                    info("To manually release the lock, delete", lock_file)
                    if verbosity <= 1:
                        no_display = True
                time.sleep(random.uniform(min_wait, max_wait))

--- a/theano/sandbox/conv.py
+++ b/theano/sandbox/conv.py
@@ -253,7 +253,8 @@ class ConvOp(Op):
        #The copy make that we return an object with the same stride as the c version.
        #The copy don't affect the performence during our experience as in that case we
        #execute the c version which is much faster.
-        zz = zz[:,:,0::self.dx,0::self.dy].copy()
+        if self.dx>1 or self.dy>1:
+            zz = zz[:,:,0::self.dx,0::self.dy].copy()
        #print 'zz (%s)'%str((self.dx, self.dy)), zz
        z[0]=zz
@@ -438,6 +439,18 @@ using namespace std;
 def convolve2(kerns, kshp, nkern, images, imshp, bsize, step=(1,1),
              bias=None, mode='valid', **d):
+    """
+    param kerns: kernel tensor
+    param kshp:  tuple(kern row, kern wid)
+    param nkern: int the number of kernel
+    param images:image tensor
+    param imshp: tuple([stack size,] image row, image wid)
+    param bsize: batch size
+    param step:  subsampling to apply to the output tuple(row, wid)
+    param bias:  if True, will add a bias
+    param mode:  'valid' or 'full'
+    return:      tuple(theano graph with the output of ConvOp flattened to 2 dimensions, ?)
+    """
    #TODO: remove the bias argument from this function because convolution has nothing to do with a bias
    # if imshp, is a tuple, images contains one input dimension
@@ -461,7 +474,6 @@ def convolve2(kerns, kshp, nkern, images, imshp, bsize, step=(1,1),
    rval = tensor.flatten(convout, 2)
    return rval, N.hstack((nkern, convop.outshp))
 _conv_op_code_a = """
 const int mode=%(mode)s;
 int typenum=0, typenum_f=0;

--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -434,13 +434,13 @@ class TestConvOp(unittest.TestCase):
        print '           TEST ConvOp.grad' 
        print '*************************************************'
-        nkern = 4
+        nkern = 3
-        bsize = 3
+        bsize = 2
        types = ["float32", "float64"]
-        kshps = [(3,4)]
+        kshps = [(2,3)]
-        imshps = [(2,8,7)]
+        imshps = [(2,3,4)]
        modes = ['valid', 'full']
-        unroll = [(0,0),(1,1),(1,4),(3,1),(3,4)]
+        unroll = [(0,0),(1,1),(2,3)]
        ssizes = [(1,1),(2,2)]
        for typ in types:
@@ -449,18 +449,16 @@ class TestConvOp(unittest.TestCase):
            for mode in modes:
                for imshp in imshps:
                    visdim = 1 if len(imshp)!=3 else imshp[0]
+                    imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype)
                    for kshp in kshps:
                        t=numpy.array([imshp[1]-kshp[0],imshp[2]-kshp[1]])
+                        kernvals = N.array(N.random.rand(nkern,visdim,kshp[0],
+                                                         kshp[1]),dtype=kerns.dtype)
                        # 'full' mode should support kernels bigger than the input
                        if mode == 'valid' and (t<0).any():
                            continue
                        for un_b,un_k in unroll:
                                for ss in ssizes:
-                                    imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype)
-                                    kernvals = N.array(N.random.rand(nkern,visdim,kshp[0],
-                                                             kshp[1]),dtype=kerns.dtype)
                                    print 'test_ConvOpGrad'
                                    print 'mode type:', mode, typ
                                    print 'imshp:', imshp
@@ -472,19 +470,15 @@ class TestConvOp(unittest.TestCase):
                                    print 'nkern:', 4
                                    def test_i(imgs):
-                                        out, outshp = convolve2(kernvals, kshp, nkern, 
+                                        convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
-                                                                imgs, imshp, bsize, 
+                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k)
-                                                                mode=mode, step=ss,
+                                        return convop(imgs, kernvals)
-                                                                unroll_batch=un_b,
-                                                                unroll_kern=un_k)
-                                        return out
                                    def test_k(kerns):
-                                        out, outshp = convolve2(kerns, kshp, nkern, 
+                                        convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
-                                                                imgvals, imshp, bsize, 
+                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k)
-                                                                mode=mode, step=ss,
+                                        return convop(imgvals, kerns)
-                                                                unroll_batch=un_b,
-                                                                unroll_kern=un_k)
-                                        return out
                                    #TODO the tolerance needed to pass is very high for float32(0.17). Is this acceptable? Expected?
                                    utt.verify_grad(test_i, [imgvals],
                                                    cast_to_output_type=True,

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2891,12 +2891,17 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
    the given tolerance.
    :param op: something that behaves like an Op instance with a single output
-               (can be a python function combining multiple ops)
+               (can be a python function combining multiple ops, but see note below)
    :param pt: the list of numpy.ndarrays to use as inputs to the op
    :param n_tests: number of times to run the test
    :param rng: random number generator from which to draw random samples
    :param eps: stepsize used in the Finite Difference Method (Default None is type-dependent)
    :param tol: relative tolerance used as threshold for gradient comparison
+    :note: WARNING to unit-test writers: if `op` is a function that builds a graph,
+           try to make it a SMALL graph.  Often verify grad is run in
+           debug mode, which can be very slow if it has to verify a lot
+           of intermediate computations.
    """
    pt = [numpy.array(p) for p in pt]

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -13,8 +13,7 @@ from theano import pprint
 import numpy
 #import scalar_opt
-from theano.compile.debugmode import DebugMode
+from theano import function, compile
-from theano import function
 def inputs(xbc = (0, 0), ybc = (0, 0), zbc = (0, 0)):
@@ -182,6 +181,18 @@ class test_canonize(unittest.TestCase):
        gof.TopoOptimizer(gof.LocalOptGroup(local_fill_cut, local_fill_lift), order = 'out_to_in').optimize(g)
        print pprint(g.outputs[0])
+    def test_elemwise_multiple_inputs_optimisation(self):
+        """
+        verify that the Canonizer merge sequential Elemwise({mul,add})
+        """
+        x, y, z = matrices('xyz')
+        for g,n in [
+            (x+y+z,1),
+            (x*y*z,1),
+            (x*y*(x+y+z),2),            
+            ]:
+            f = compile.function([x,y,z], g, mode=compile.Mode(optimizer='fast_run'))
+            assert(len(f.maker.env.toposort())==n)
 def test_mixeddiv():
    """Test that int division is preserved"""