Added some logging.

Several minor changes to test_conv_nnet2_classif to run in DebugMode

Added some logging.
0f16316b · James Bergstra · 24858525 · 0f16316b · 0f16316b · 0f16316b
--- a/basic_ops.py
+++ b/basic_ops.py
@@ -7,6 +7,19 @@ from theano import tensor, scalar
 from .type import CudaNdarrayType
 from .type_support import filter as type_support_filter

+import logging
+_logger_name = 'theano_cuda_ndarray.basic_ops'
+_logger = logging.getLogger(_logger_name)
+_logger.setLevel(logging.DEBUG)
+_logger.addHandler(logging.StreamHandler()) #TO REMOVE
+def warning(*msg):
+    _logger.warning(_logger_name+'WARNING: '+' '.join(str(m) for m in msg))
+def info(*msg):
+    _logger.info(_logger_name+'INFO: '+' '.join(str(m) for m in msg))
+def debug(*msg):
+    _logger.debug(_logger_name+'DEBUG: '+' '.join(str(m) for m in msg))
+
+
 def as_cuda_ndarray_variable(x):
    if hasattr(x, '_as_CudaNdarrayVariable'):
        return x._as_CudaNdarrayVariable()
@@ -631,37 +644,6 @@ class GpuReshape(tensor.Reshape):
                    ', should be %i' % (len(shp), self.ndim), shp)
        out[0] = x.reshape(tuple(shp))

-class GpuDimFlip(Op):
-    """This Op implements a very special case of Subtensor, in which some (or all) of the
-    strides are negated.
-
-    This Op should be erased when a proper GpuSubtensor is implemented.
-    """
-
-    def __init__(self, mask):
-        Op.__init__(self)
-        self.mask = mask
-
-    def __eq__(self, other):
-        return type(self) == type(other) and self.mask == other.mask
-
-    def __hash__(self):
-        return hash(type(self)) ^ hash(self.mask)
-
-    def __str__(self):
-        return '%s{%s}' %(self.__class__.__name__, str(self.mask))
-
-    def perform(self, node, (x,), (out,)):
-        z = x.view()
-        total_dev_data_offset = 0
-        for i, f in enumerate(self.mask):
-            if f and z.shape[i] > 1:
-                dev_data_offset += (z.dim[i] - 1) * z.str[i]
-                z.str[i] *= -1
-        z.dev_data += total_dev_data_offset
-        out[0] = z
-
-
 class GpuSubtensor(tensor.Subtensor):
    def make_node(self, x, *inputs):
        rval = tensor.Subtensor.make_node(self, x, *inputs)
@@ -728,6 +710,8 @@ class GpuSubtensor(tensor.Subtensor):

                newlen = (stop - start) // stride
                offset += x_strides[i] * start
+                debug('GpuSubtensor slice', i, ': ', start, stop, stride)
+                debug('GpuSubtensor shape', i, ': ', x_shape[i], newlen)
                x._set_shape_i(i, newlen)
                x._set_stride(i, x_strides[i] * stride)

@@ -742,3 +726,35 @@ class GpuShape(tensor.Shape):
        return Apply(self, [x], [tensor.lvector()])
 gpu_shape = GpuShape()

+if 0:
+    class GpuDimFlip(Op):
+        """This Op implements a very special case of Subtensor, in which some (or all) of the
+        strides are negated.
+
+        This Op should be erased when a proper GpuSubtensor is implemented.
+        """
+
+        def __init__(self, mask):
+            Op.__init__(self)
+            self.mask = mask
+
+        def __eq__(self, other):
+            return type(self) == type(other) and self.mask == other.mask
+
+        def __hash__(self):
+            return hash(type(self)) ^ hash(self.mask)
+
+        def __str__(self):
+            return '%s{%s}' %(self.__class__.__name__, str(self.mask))
+
+        def perform(self, node, (x,), (out,)):
+            z = x.view()
+            total_dev_data_offset = 0
+            for i, f in enumerate(self.mask):
+                if f and z.shape[i] > 1:
+                    dev_data_offset += (z.dim[i] - 1) * z.str[i]
+                    z.str[i] *= -1
+            z.dev_data += total_dev_data_offset
+            out[0] = z
+
+
--- a/opt.py
+++ b/opt.py
@@ -67,6 +67,7 @@ def local_gpu_elemwise_1(node):
 def local_gpu_dimshuffle_0(node):
    """
    dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
+    gpu_from_host(dimshuffle) -> gpu_dimshuffle(gpu_from_host)
    """
    if isinstance(node.op, tensor.DimShuffle):
        input, = node.inputs
@@ -78,14 +79,6 @@ def local_gpu_dimshuffle_0(node):
                return [host_from_gpu(new_op(gpu_from_host(input)))]
            else:
                return [host_from_gpu(new_op(gpu_from_host(tensor.tensor_copy(input))))]
-    return False
-
-@register_opt()
-@local_optimizer([])
-def local_gpu_dimshuffle_1(node):
-    """
-    gpu_from_host(dimshuffle) -> gpu_dimshuffle(gpu_from_host)
-    """
    if node.op == gpu_from_host:
        host_input = node.inputs[0]
        if host_input.owner and isinstance(host_input.owner.op, tensor.DimShuffle):

--- a/tests/test_nnet.py
+++ b/tests/test_nnet.py
@@ -190,6 +190,8 @@ def test_conv_nnet2():

 def run_conv_nnet2_classif(shared_fn): # pretend we are training LeNet for MNIST

+    n_train_iter = 2
+
    n_batch = 60
    shape_img = (n_batch, 1, 32, 32)

@@ -205,9 +207,9 @@ def run_conv_nnet2_classif(shared_fn): # pretend we are training LeNet for MNIST
    n_out = 10

    w0 = shared_fn(numpy.asarray(0.01*(numpy.random.rand(*shape_kern)-0.5), dtype='float32'), 'w0')
-    b0 = shared_fn(numpy.asarray(numpy.zeros((n_kern,1,1)), dtype='float32'), 'b0')
+    b0 = shared_fn(numpy.asarray(numpy.zeros(n_kern), dtype='float32'), 'b0')
    w1 = shared_fn(numpy.asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1')
-    b1 = shared_fn(numpy.asarray(numpy.zeros((n_kern1,1,1)), dtype='float32'), 'b1')
+    b1 = shared_fn(numpy.asarray(numpy.zeros(n_kern1), dtype='float32'), 'b1')
    v = shared_fn(numpy.asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c')
    c = shared_fn(numpy.asarray(numpy.zeros(n_out), dtype='float32'), 'c')

@@ -218,17 +220,19 @@ def run_conv_nnet2_classif(shared_fn): # pretend we are training LeNet for MNIST
    conv_op = theano.sandbox.conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)
    conv_op1 = theano.sandbox.conv.ConvOp((n_kern,logical_hid_shape[0]/2, logical_hid_shape[1]/2), shape_kern1[2:], n_kern1, n_batch, 1, 1)

-    hid = tensor.tanh(conv_op(x, w0)+b0)
-    hid1 = tensor.tanh(conv_op1(hid[:,:,::2,::2], w1) + b1)
+    hid = tensor.tanh(conv_op(x, w0)+b0.dimshuffle('x', 0, 'x', 'x'))
+    hid1 = tensor.tanh(conv_op1(hid[:,:,::2,::2], w1) + b1.dimshuffle('x', 0, 'x', 'x'))
    hid_flat = hid1.reshape((n_batch, n_hid))
-    out = tensor.tanh(tensor.dot(hid_flat, v)+c)
-    loss = tensor.sum(0.5 * (out-y)**2 * lr)
+    loss = lr * tensor.nnet.crossentropy_categorical_1hot(
+            tensor.nnet.softmax(tensor.dot(hid_flat, v)+c),
+            tensor.argmax(y, axis=1))
    print 'loss type', loss.type

    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params)

-    mode = theano.compile.ProfileMode()
+    #mode = theano.compile.ProfileMode()
+    mode = None

    print 'building pfunc ...'
    train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
@@ -240,7 +244,7 @@ def run_conv_nnet2_classif(shared_fn): # pretend we are training LeNet for MNIST
    yval = numpy.asarray(numpy.random.rand(n_batch,n_out), dtype='int32')
    lr = numpy.asarray(0.01, dtype='float32')

-    for i in xrange(10):
+    for i in xrange(n_train_iter):
        rval = train(xval, yval, lr)
    try:
        mode.print_summary()
@@ -250,8 +254,8 @@ def run_conv_nnet2_classif(shared_fn): # pretend we are training LeNet for MNIST

 def test_conv_nnet2_classif():
    numpy.random.seed(23456)
-    rval_cpu = run_conv_nnet2(shared)
+    rval_gpu = run_conv_nnet2_classif(tcn.shared_constructor)
    numpy.random.seed(23456)
-    rval_gpu = run_conv_nnet2(tcn.shared_constructor)
+    rval_cpu = run_conv_nnet2_classif(shared)
    assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-6)