merge

5d229740 · Dumitru Erhan · 9441e46b · 3591f59b · 5d229740 · 5d229740
--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -8,9 +8,11 @@ AddConfigVar('floatX',
        EnumStr('float64', 'float32'), 
        )

+#gpu mean let the driver select the gpu. Needed in case of gpu in exclusive mode.
+#gpuX mean use the gpu number X.
 AddConfigVar('device',
        "Default device for computations",
-        EnumStr('cpu', *['gpu%i'%i for i in range(4)])
+        EnumStr('cpu', 'gpu',*['gpu%i'%i for i in range(4)])
        )

 # keep the default mode.optimizer==config.optimizer and mode.linker==config.linker!

--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -112,7 +112,9 @@ if cuda_available:

 def use(device):
    global cuda_enabled, enabled_cuda
-    if device.startswith('gpu'):
+    if device == 'gpu':
+        pass
+    elif device.startswith('gpu'):
        device = int(device[3:])
    elif device == 'cpu':
        device = -1
@@ -120,13 +122,17 @@ def use(device):
        raise ValueError("Invalid device identifier", device)
    if use.device_number is None:
        # No successful call to use() has been made yet
-        if device<0:
+        if device != 'gpu' and device<0:
            return
        if device in [None,""]:
            device=0
-        device=int(device)
        try:
-            gpu_init(device)
+            if device !='gpu':
+                gpu_init(device)
+            else:
+                #warning To let people see that the gpu will be used.
+                _logger.warn("We let the driver select the gpu device to use")
+                
            handle_shared_float32(True)
            use.device_number = device
            cuda_enabled = True

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -162,16 +162,19 @@ class GpuConv(Op):
            and self.logical_img_hw == other.logical_img_hw \
            and self.logical_kern_hw == other.logical_kern_hw \
            and self.logical_kern_align_top == other.logical_kern_align_top \
-            and self.version == other.version
+            and self.version == other.version \
+            and self.verbose == other.verbose

    def __hash__(self):
+        # don't use hash(self.version) as hash(-1)==-2 and hash(-2)==-2 in python! 
        return hash(type(self)) \
            ^ hash(self.border_mode) \
            ^ hash(self.subsample) \
            ^ hash(self.logical_img_hw) \
            ^ hash(self.logical_kern_hw) \
            ^ hash(self.logical_kern_align_top) \
-            ^ self.version# don't use hash as hash(-1)==-2 and hash(-2)==-2 in python!
+            ^ self.version \
+            ^ self.verbose
    
    def __str__(self):
        return '%s{%s, %s, %s, %s, %s}' %(self.__class__.__name__,
@@ -200,7 +203,7 @@ class GpuConv(Op):
        return ['cuda_ndarray.cuh','<stdio.h>']

    def c_code_cache_version(self):
-        return (0,4)
+        return (0,5)

    def c_support_code_apply(self, node, nodename):
        return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\

--- a/theano/sandbox/cuda/conv.cu
+++ b/theano/sandbox/cuda/conv.cu
@@ -307,7 +307,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,

 #define CONV_ROWS_STACK_SPECIAL(kern_wid) \
 	if(!img_contiguous_2d || !kern_contiguous_2d) f = conv_rows_stack<kern_wid, false>;\
-	else f = conv_rows_stack<kern_wid, true>;\
+	else f = conv_rows_stack<kern_wid, true>;
 	CONV_ROWS_STACK_SPECIAL(THEANO_KERN_WID);

 	f<<< grid, threads, shared_size >>>
@@ -379,7 +379,8 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
 	if((!img_contiguous_2d || !kern_contiguous_2d)&&version==9) f = conv_rows_stack2<kern_wid, false,true>;\
 	else if(version==9) f = conv_rows_stack2<kern_wid, true,true>;\
 	else if(!img_contiguous_2d || !kern_contiguous_2d) f = conv_rows_stack2<kern_wid, false, false>;\
-	else f = conv_rows_stack2<kern_wid, true, false>;\
+	else f = conv_rows_stack2<kern_wid, true, false>;
+
 	CONV_ROWS_STACK2_SPECIAL(THEANO_KERN_WID);

 	f<<< grid, threads, shared_size >>>

--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -225,7 +225,6 @@ def get_shapes2(scales_img=(1,1), scales_kern=(1,1), subsample=(1,1), img_stride
    return shapes

 def test_valid():
-    raise Exception('One of the modes here causes a segmentation fault!')
    #          img shape,     kern shape, subsample shape

    shapes = get_basic_shapes()

--- a/theano/sandbox/cuda/tests/test_nnet.py
+++ b/theano/sandbox/cuda/tests/test_nnet.py
@@ -14,7 +14,7 @@ import numpy
 # Skip test if cuda_ndarray is not available.
 from nose.plugins.skip import SkipTest
 import theano.sandbox.cuda as cuda_ndarray
-if cuda_ndarray.cuda_enabled == False:
+if cuda_ndarray.cuda_available == False:
    raise SkipTest('Optional package cuda disabled')

 import theano.sandbox.cuda as tcn
@@ -23,6 +23,13 @@ import logging
 logging.getLogger('theano.sandbox.cuda.tests.test_nnet').setLevel(logging.INFO)


+def my_rand(*shape):
+    return theano._asarray(numpy.random.rand(*shape),dtype='float32')
+def my_randn(*shape):
+    return theano._asarray(numpy.random.randn(*shape),dtype='float32')
+def my_zeros(*shape):
+    return theano._asarray(numpy.zeros(*shape),dtype='float32')
+
 def get_mode(use_gpu):
    ret = theano.compile.get_default_mode()
    if isinstance(ret, theano.compile.ProfileMode):
@@ -44,15 +51,15 @@ def print_diff_mode(a,b):
 def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_iter=100):

    if use_gpu:
-        w = tcn.shared_constructor(0.01*(numpy.random.rand(n_in,n_hid)-0.5), 'w')
-        b = tcn.shared_constructor(numpy.zeros(n_hid), 'b')
-        v = tcn.shared_constructor(numpy.zeros((n_hid, n_out)), 'c')
-        c = tcn.shared_constructor(numpy.zeros(n_out), 'c')
+        w = tcn.shared_constructor(0.01*(my_rand(n_in,n_hid)-0.5), 'w')
+        b = tcn.shared_constructor(my_zeros(n_hid), 'b')
+        v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c')
+        c = tcn.shared_constructor(my_zeros(n_out), 'c')
    else:
-        w = shared(theano._asarray(0.01*(numpy.random.rand(n_in,n_hid)-0.5), dtype='float32'), 'w')
-        b = shared(theano._asarray(numpy.zeros(n_hid), dtype='float32'), 'b')
-        v = shared(theano._asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c')
-        c = shared(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c')
+        w = shared(0.01*(my_rand(n_in,n_hid)-0.5), 'w')
+        b = shared(my_zeros(n_hid), 'b')
+        v = shared(my_zeros((n_hid, n_out)), 'c')
+        c = shared(my_zeros(n_out), 'c')

    x = tensor.fmatrix('x')
    y = tensor.fmatrix('y')
@@ -75,8 +82,8 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_iter=100):
        for i, n in enumerate(train.maker.env.toposort()):
            print i, n

-    xval = theano._asarray(numpy.random.rand(n_batch, n_in), dtype='float32')
-    yval = theano._asarray(numpy.random.rand(n_batch, n_out), dtype='float32')
+    xval = my_rand(n_batch, n_in)
+    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    t0 = time.time()
@@ -123,10 +130,10 @@ def run_conv_nnet1(use_gpu):
    n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1]
    n_out = 10

-    w = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern)-0.5), dtype='float32'), 'w')
-    b = shared_fn(theano._asarray(numpy.zeros((n_kern,)), dtype='float32'), 'b')
-    v = shared_fn(theano._asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c')
-    c = shared_fn(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c')
+    w = shared_fn(0.01*(my_rand(*shape_kern)-0.5), 'w')
+    b = shared_fn(my_zeros((n_kern,)), 'b')
+    v = shared_fn(my_zeros((n_hid, n_out)), 'c')
+    c = shared_fn(my_zeros(n_out), 'c')

    x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x')
    y = tensor.fmatrix('y')
@@ -152,8 +159,8 @@ def run_conv_nnet1(use_gpu):
 #    for i, n in enumerate(train.maker.env.toposort()):
 #        print i, n

-    xval = theano._asarray(numpy.random.rand(*shape_img), dtype='float32')
-    yval = theano._asarray(numpy.random.rand(n_batch, n_out), dtype='float32')
+    xval = my_rand(*shape_img)
+    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    for i in xrange(10):
@@ -204,12 +211,12 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
    n_hid = n_kern1 * logical_hid_shape1[0] * logical_hid_shape1[1]
    n_out = 10

-    w0 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern)-0.5), dtype='float32'), 'w0')
-    b0 = shared_fn(theano._asarray(numpy.zeros((n_kern,)), dtype='float32'), 'b0')
-    w1 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1')
-    b1 = shared_fn(theano._asarray(numpy.zeros((n_kern1,)), dtype='float32'), 'b1')
-    v = shared_fn(theano._asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c')
-    c = shared_fn(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c')
+    w0 = shared_fn(0.01*(my_rand(*shape_kern)-0.5), 'w0')
+    b0 = shared_fn(my_zeros((n_kern,)), 'b0')
+    w1 = shared_fn(0.01*(my_rand(*shape_kern1)-0.5), 'w1')
+    b1 = shared_fn(my_zeros((n_kern1,)), 'b1')
+    v = shared_fn(my_zeros((n_hid, n_out)), 'c')
+    c = shared_fn(my_zeros(n_out), 'c')

    x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x')
    y = tensor.fmatrix('y')
@@ -238,8 +245,8 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
 #    for i, n in enumerate(train.maker.env.toposort()):
 #        print i, n

-    xval = theano._asarray(numpy.random.rand(*shape_img), dtype='float32')
-    yval = theano._asarray(numpy.random.rand(n_batch,n_out), dtype='float32')#int32 make all 0...
+    xval = my_rand(*shape_img)
+    yval = my_rand(n_batch,n_out)#int32 make all 0...
    lr = theano._asarray(0.01, dtype='float32')
    for i in xrange(n_train):
        rval = train(xval, yval, lr)
@@ -284,12 +291,12 @@ def run_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, n_iter,
    n_out = 10


-    w0 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern)-0.5), dtype='float32'), 'w0')
-    b0 = shared_fn(theano._asarray(numpy.zeros((n_kern,)), dtype='float32'), 'b0')
-    w1 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1')
-    b1 = shared_fn(theano._asarray(numpy.zeros((n_kern1,)), dtype='float32'), 'b1')
-    v = shared_fn(theano._asarray(0.01*numpy.random.randn(n_hid, n_out), dtype='float32'), 'v')
-    c = shared_fn(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c')
+    w0 = shared_fn(0.01*(my_rand(*shape_kern)-0.5), 'w0')
+    b0 = shared_fn(my_zeros((n_kern,)), 'b0')
+    w1 = shared_fn(0.01*(my_rand(*shape_kern1)-0.5), 'w1')
+    b1 = shared_fn(my_zeros((n_kern1,)), 'b1')
+    v = shared_fn(0.01*my_randn(n_hid, n_out), 'v')
+    c = shared_fn(my_zeros(n_out), 'c')

    print 'ALLOCATING ARCH: w0 shape', w0.value.shape
    print 'ALLOCATING ARCH: w1 shape', w1.value.shape
@@ -330,11 +337,11 @@ def run_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, n_iter,
        for i, n in enumerate(train.maker.env.toposort()):
            print i, n

-    xval = theano._asarray(numpy.random.rand(*shape_img), dtype='float32')
-    yval = theano._asarray(numpy.random.rand(n_batch,n_out), dtype='float32')
+    xval = my_rand(*shape_img)
+    yval = my_rand(n_batch,n_out)
    lr = theano._asarray(0.01, dtype='float32')

-    rvals=numpy.zeros(n_iter)
+    rvals=my_zeros(n_iter)
    t0 = time.time()
    for i in xrange(n_iter):
        rvals[i] = train(xval, yval, lr)[0]

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1064,7 +1064,7 @@ class TensorValue(Value, _tensor_py_operators):
 Tensor = TensorType

 #QUESTION: why are we doing this!?
-elemwise.as_tensor_variable = as_tensor_variable    
+elemwise.as_tensor_variable = as_tensor_variable
 elemwise.TensorType = TensorType
 elemwise.TensorVariable = TensorVariable
 elemwise.TensorConstant = TensorConstant

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -24,6 +24,8 @@ from theano import compile  #to register the optimizer built by this file
 from theano.gof.python25 import any, all
 from theano.gof.opt import Optimizer
 from theano.gof import toolbox, DestroyHandler
+
+
 # Utilities

 def out2in(*local_opts):
@@ -395,6 +397,13 @@ class ShapeFeature(object):
        else:
            self.shape_of[r] = tuple([self.unpack(s_i) for s_i in s])

+    def init_r(self,r):
+        if r not in self.shape_of:
+            try:
+                self.set_shape(r, self.shape_tuple(r))
+            except AttributeError:
+                self.set_shape(r,None)
+
    def make_vector_shape(self, r):
        return make_vector(*self.shape_of[r])
    #
@@ -421,11 +430,7 @@ class ShapeFeature(object):

        for i, r in enumerate(node.inputs):
            # make sure we have shapes for the inputs
-            if r not in self.shape_of:
-                try:
-                    self.set_shape(r, self.shape_tuple(r))
-                except AttributeError:
-                    self.set_shape(r, None ) # not a TensorType variable
+            self.init_r(r)

        try:
            shape_infer = node.op.infer_shape
@@ -453,7 +458,7 @@ class ShapeFeature(object):
        # TODO:
        # This tells us that r and new_r must have the same shape
        # if we didn't know that the shapes are related, now we do.
-
+        self.init_r(new_r)
        # change_input happens in two cases:
        # 1) we are trying to get rid of r, or
        # 2) we are putting things back after a failed transaction.

--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
@@ -212,7 +212,7 @@ class DownsampleFactorMax(Op):
        """ % locals()

    def c_code_cache_version(self):
-        return ()
+        return (0,1)


 class DownsampleFactorMaxGrad(Op):
@@ -349,4 +349,4 @@ class DownsampleFactorMaxGrad(Op):
        """ %locals()

    def c_code_cache_version(self):
-        return ()
+        return (0,1)