Fix wrapper's context deallocations, add ctc_cost_cpu_no_grad

- Remove PyArrayObject pointers from CTC context - Remove activations copy by assuming activations array is C-contiguous - Add missing CTC context deallocations when errors occur - Move allocation of CTC context from heap back to the stack for automatic deallocation - Add separate function name for when gradient computations are disabled, i.e. ctc_cost_cpu_no_grad Signed-off-by: João Victor Tozatti Risso <joaovictor.risso@gmail.com>

Fix wrapper's context deallocations, add ctc_cost_cpu_no_grad
56582372 · João Victor Tozatti Risso · a75255d9 · 56582372
--- a/theano/tensor/nnet/ctc_wrapper.c
+++ b/theano/tensor/nnet/ctc_wrapper.c
@@ -6,9 +6,6 @@ typedef struct ctc_context {
    int * input_lengths;
    int * flat_labels;
    int * label_lengths;
-    PyArrayObject * activations_copy;
-    PyArrayObject * costs;
-    PyArrayObject * gradients;
 } ctc_context_t;
 void ctc_context_init(ctc_context_t * context)
@@ -22,12 +19,9 @@ void ctc_context_init(ctc_context_t * context)
    context->input_lengths = NULL;
    context->flat_labels = NULL;
    context->label_lengths = NULL;
-    context->activations_copy = NULL;
-    context->costs = NULL;
-    context->gradients = NULL;
 }
-void ctc_context_destroy(ctc_context_t * context, int error = 0)
+void ctc_context_destroy(ctc_context_t * context)
 {
    if ( NULL != context->workspace )
        free( context->workspace );
@@ -40,15 +34,6 @@ void ctc_context_destroy(ctc_context_t * context, int error = 0)
    if ( NULL != context->label_lengths )
        free( context->label_lengths );
-    Py_XDECREF( context->activations_copy );
-    // Decrease reference to allocated costs and gradients, if an error occurs
-    if ( error )
-    {
-        Py_XDECREF( context->costs );
-        Py_XDECREF( context->gradients );
-    }
 }
 int ctc_check_result(ctcStatus_t retcode, const char * msg)
@@ -126,29 +111,18 @@ int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
                                 PyArrayObject ** out_costs,
                                 PyArrayObject ** out_gradients)
 {
-    ctc_context_t * context = (ctc_context_t *)malloc( sizeof( ctc_context_t ) );
+    ctc_context_t ctc_object;
+    ctc_context_t * context = &ctc_object;
    ctc_context_init( context );
-    npy_float32 * activations = NULL;
+    if ( !PyArray_IS_C_CONTIGUOUS( in_activations ) )
-    if ( PyArray_IS_C_CONTIGUOUS( in_activations ) )
-    {
-        activations = (npy_float32 *) PyArray_DATA( in_activations );
-    }
-    else
    {
-        context->activations_copy = PyArray_GETCONTIGUOUS( in_activations );
+        PyErr_SetString( PyExc_RuntimeError,
-        if ( NULL != context->activations_copy )
+            "activations array must be C-contiguous." );
-        {
-            activations = (npy_float32 *) PyArray_DATA( context->activations_copy );
-        }
-        else
-        {
-            PyErr_Format( PyExc_MemoryError,
-                "Could not create a contiguous copy of activations array." );
        return 1;
    }
-    }
+    npy_float32 * activations = (npy_float32 *) PyArray_DATA( in_activations );
    create_contiguous_input_lengths( in_input_lengths, &(context->input_lengths) );
@@ -165,7 +139,7 @@ int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
    if ( ( NULL == context->label_lengths ) || ( NULL == context->flat_labels ) )
    {
        // Destroy previous CTC context before returning exception
-        ctc_context_destroy( context, 1 );
+        ctc_context_destroy( context );
        PyErr_Format( PyExc_MemoryError,
            "Could not allocate storage for labels and their lengths" );
@@ -185,14 +159,11 @@ int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
        Py_XDECREF( *out_costs );
        // Allocate new matrix
        *out_costs = (PyArrayObject *) PyArray_ZEROS( 1, &cost_size, NPY_FLOAT32, 0 );
-        // Add a copy of the newly allocated costs, so that we can decrease
-        // its reference (deallocate it) in case of errors inside the wrapper.
-        context->costs = *out_costs;
        if ( NULL == (*out_costs) )
        {
            // Destroy previous CTC context before returning exception
-            ctc_context_destroy( context, 1 );
+            ctc_context_destroy( context );
            PyErr_Format( PyExc_MemoryError,
                "Could not allocate storage for CTC costs" );
@@ -218,15 +189,11 @@ int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
            // Allocate new array
            *out_gradients = (PyArrayObject *) PyArray_ZEROS(3, PyArray_DIMS( in_activations ),
                NPY_FLOAT32, 0);
-            // Add a copy of the newly allocated gradients, so that we can
-            // decrease its reference (deallocate it) in case of errors inside
-            // the wrapper.
-            context->gradients = *out_gradients;
            if ( NULL == (*out_gradients) )
            {
                // Destroy previous CTC context before returning exception
-                ctc_context_destroy( context, 1 );
+                ctc_context_destroy( context );
                PyErr_Format( PyExc_MemoryError,
                    "Could not allocate storage for CTC gradients!" );
@@ -245,14 +212,19 @@ int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
        "Failed to obtain CTC workspace size!" );
    if ( ctc_error )  // Exception is set by ctc_check_result, return error here
+    {
+        // Destroy previous CTC context before returning exception
+        ctc_context_destroy( context );
        return 1;
+    }
    context->workspace = malloc( cpu_workspace_size );
    if ( NULL == context->workspace )
    {
        // Destroy previous CTC context before returning exception
-        ctc_context_destroy( context, 1 );
+        ctc_context_destroy( context );
        PyErr_Format( PyExc_MemoryError,
            "Failed to allocate memory for CTC workspace!" );
@@ -265,21 +237,21 @@ int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
        context->options ), "Failed to compute CTC loss function!" );
    if ( ctc_error )  // Exception is set by ctc_check_result, return error here
+    {
+        ctc_context_destroy( context );
        return 1;
+    }
    ctc_context_destroy( context );
-    free( context );
    return 0;
 }
 /**
 * Wrapper version with gradient computation disabled.
- *
+ */
- * We assume that function overloading (C++) is available to distinguish between
+int APPLY_SPECIFIC(ctc_cost_cpu_no_grad)(PyArrayObject *  in_activations,
- * two versions of the ctc_cost_cpu function.
- **/
-int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
                                         PyArrayObject *  in_labels,
                                         PyArrayObject *  in_input_lengths,
                                         PyArrayObject ** out_costs)
@@ -290,3 +262,4 @@ int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
                                        out_costs,
                                        NULL);
 }