changes after abergeron commented on the code

53630ed1 · Arjun Jain · fb660352 · 53630ed1 · 53630ed1 · 53630ed1
--- a/theano/sandbox/cuda/caffe_common.hpp
+++ b/theano/sandbox/cuda/caffe_common.hpp
-// Copyright 2014 BVLC and contributors.
+/*
+Copyright (c) 2014, The Regents of the University of California (Regents)
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution. 
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
 #ifndef CAFFE_COMMON_HPP_
 #define CAFFE_COMMON_HPP_
-//#include <boost/shared_ptr.hpp>
 #include <cublas_v2.h>
 #include <cuda.h>
-#include <curand.h>
 #include <driver_types.h>  // cuda driver types
-//#include <glog/logging.h>
 // CUDA: grid stride looping
 #define CUDA_KERNEL_LOOP(i, n) \

--- a/theano/sandbox/cuda/conv_gemm.cu
+++ b/theano/sandbox/cuda/conv_gemm.cu
-// Copyright 2014 BVLC and contributors.
+/*
+Copyright (c) 2014, The Regents of the University of California (Regents)
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution. 
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
 #undef _GLIBCXX_ATOMIC_BUILTINS
 #include <Python.h>
 #include "cuda_ndarray.cuh"
 #include "caffe_common.hpp"
-// Author: Arjun Jain
 // Kernel for fast unfold+copy
 // (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu)
+// Reference code: https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
 __global__ void im2col_kernel(const int n, const float* data_im,
                              const int height, const int width, const int ksize, const int pad,
                              const int stride, const int height_col, const int width_col,
@@ -51,18 +75,13 @@ void im2col(const float* data_im, const int channels,
+// Author: Arjun Jain
 CudaNdarray* validMM(const CudaNdarray *input, 
 				      CudaNdarray *weight,
 				      CudaNdarray *output) 
 {
-    // TODO: This needs to be done in the singleton!
+  	cublasStatus_t status;
-    // Initialize CUBLAS
-    cublasHandle_t handle;
-  	cublasStatus_t status = cublasCreate(&handle);
-  	if (status != CUBLAS_STATUS_SUCCESS) {
-      		std::cerr << "!!!! CUBLAS initialization error\n";
-	}
    if (input->nd != 4)
    {
@@ -74,7 +93,6 @@ CudaNdarray* validMM(const CudaNdarray *input,
        PyErr_SetString(PyExc_ValueError, "required weight of 4D");
    }
-     // Reference code: https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
     // TODO: stride(dW, dH) and padding as function parameter
     int dH = 1; 
     int dW = 1;
@@ -146,19 +164,13 @@ CudaNdarray* validMM(const CudaNdarray *input,
                output->devdata + elt * op_stride, m
                );
+  	     if (status != CUBLAS_STATUS_SUCCESS) {
-		  cudaError_t err = cudaGetLastError();
+      		std::cerr << "!!!! CUBLAS initialization error\n";
-		  if (err != cudaSuccess) {
-		    printf("error in validMM: %s\n", cudaGetErrorString(err));
 	      }
      }
-    // TODO: How is columns and output deallocated? 
+    Py_DECREF(columns);
-    // device_free(columns->devdata);
-    // TODO: I did not kill the cublas context. If it comes from 
-    // the singleton, we dont need to kill it.
  return output;
 }

--- a/theano/sandbox/cuda/tests/test_conv_gemm.py
+++ b/theano/sandbox/cuda/tests/test_conv_gemm.py
 """
-Tests for GPU convolution
+Tests for Caffe GPU convolution
 """
 import sys
 import time
 import unittest
-import matplotlib.pyplot as plt
 import numpy