提交 61da262e authored 作者: nouiz's avatar nouiz

Merge pull request #649 from abergeron/cvm_gc

Add gc to CVM
...@@ -49,7 +49,7 @@ static int unpack_list_of_ssize_t(PyObject * pylist, Py_ssize_t **dst, Py_ssize_ ...@@ -49,7 +49,7 @@ static int unpack_list_of_ssize_t(PyObject * pylist, Py_ssize_t **dst, Py_ssize_
} }
assert (NULL == *dst); assert (NULL == *dst);
*len = buflen = PyList_Size(pylist); *len = buflen = PyList_Size(pylist);
*dst = buf = (Py_ssize_t*)malloc(buflen * sizeof(Py_ssize_t)); *dst = buf = (Py_ssize_t*)calloc(buflen, sizeof(Py_ssize_t));
assert(buf); assert(buf);
for (int ii = 0; ii < buflen; ++ii) for (int ii = 0; ii < buflen; ++ii)
{ {
...@@ -84,6 +84,8 @@ typedef struct { ...@@ -84,6 +84,8 @@ typedef struct {
int * var_computed; // 1 or 0 for every variable int * var_computed; // 1 or 0 for every variable
PyObject ** var_computed_cells; PyObject ** var_computed_cells;
PyObject ** var_value_cells; PyObject ** var_value_cells;
Py_ssize_t **dependencies; // list of vars dependencies for GC
Py_ssize_t *n_dependencies;
Py_ssize_t n_output_vars; Py_ssize_t n_output_vars;
Py_ssize_t * output_vars; // variables that *must* be evaluated by call Py_ssize_t * output_vars; // variables that *must* be evaluated by call
...@@ -140,6 +142,16 @@ CLazyLinker_dealloc(PyObject* _self) ...@@ -140,6 +142,16 @@ CLazyLinker_dealloc(PyObject* _self)
free(self->node_inputs); free(self->node_inputs);
free(self->node_outputs); free(self->node_outputs);
if (self->dependencies)
{
for (int i = 0; i < self->n_vars; ++i)
{
free(self->dependencies[i]);
}
free(self->dependencies);
free(self->n_dependencies);
}
free(self->var_owner); free(self->var_owner);
free(self->var_has_owner); free(self->var_has_owner);
free(self->var_computed); free(self->var_computed);
...@@ -179,6 +191,8 @@ CLazyLinker_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ...@@ -179,6 +191,8 @@ CLazyLinker_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->var_computed = NULL; self->var_computed = NULL;
self->var_computed_cells = NULL; self->var_computed_cells = NULL;
self->var_value_cells = NULL; self->var_value_cells = NULL;
self->dependencies = NULL;
self->n_dependencies = NULL;
self->n_output_vars = 0; self->n_output_vars = 0;
self->output_vars = NULL; self->output_vars = NULL;
...@@ -234,6 +248,7 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -234,6 +248,7 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
(char*)"node_prereqs", (char*)"node_prereqs",
(char*)"node_output_size", (char*)"node_output_size",
(char*)"update_storage", (char*)"update_storage",
(char*)"dependencies",
NULL}; NULL};
PyObject *compute_map_list=NULL, PyObject *compute_map_list=NULL,
...@@ -248,10 +263,11 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -248,10 +263,11 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
*output_vars=NULL, *output_vars=NULL,
*node_prereqs=NULL, *node_prereqs=NULL,
*node_output_size=NULL, *node_output_size=NULL,
*update_storage=NULL; *update_storage=NULL,
*dependencies=NULL;
assert(!self->nodes); assert(!self->nodes);
if (! PyArg_ParseTupleAndKeywords(args, kwds, "OOOiOOOOOOOOOOOOOOO", kwlist, if (! PyArg_ParseTupleAndKeywords(args, kwds, "OOOiOOOOOOOOOOOOOOOO", kwlist,
&self->nodes, &self->nodes,
&self->thunks, &self->thunks,
&self->pre_call_clear, &self->pre_call_clear,
...@@ -270,7 +286,8 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -270,7 +286,8 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
&output_vars, &output_vars,
&node_prereqs, &node_prereqs,
&node_output_size, &node_output_size,
&update_storage &update_storage,
&dependencies
)) ))
return -1; return -1;
Py_INCREF(self->nodes); Py_INCREF(self->nodes);
...@@ -291,27 +308,17 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -291,27 +308,17 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
// allocated and initialize thunk_cptr_data and thunk_cptr_fn // allocated and initialize thunk_cptr_data and thunk_cptr_fn
if (n_applies) if (n_applies)
{ {
self->thunk_cptr_data = (void**)malloc(n_applies * sizeof(void*)); self->thunk_cptr_data = (void**)calloc(n_applies, sizeof(void*));
self->thunk_cptr_fn = (void**)malloc(n_applies * sizeof(void*)); self->thunk_cptr_fn = (void**)calloc(n_applies, sizeof(void*));
self->is_lazy = (int*)malloc(n_applies * sizeof(int)); self->is_lazy = (int*)calloc(n_applies, sizeof(int));
self->node_prereqs = (Py_ssize_t**)malloc(n_applies*sizeof(Py_ssize_t*)); self->node_prereqs = (Py_ssize_t**)calloc(n_applies, sizeof(Py_ssize_t*));
self->node_n_prereqs = (Py_ssize_t*)malloc(n_applies*sizeof(Py_ssize_t)); self->node_n_prereqs = (Py_ssize_t*)calloc(n_applies, sizeof(Py_ssize_t));
assert(self->node_prereqs); assert(self->node_prereqs);
assert(self->node_n_prereqs); assert(self->node_n_prereqs);
assert(self->is_lazy); assert(self->is_lazy);
assert(self->thunk_cptr_fn); assert(self->thunk_cptr_fn);
assert(self->thunk_cptr_data); assert(self->thunk_cptr_data);
// init these basic arrays
for (int i = 0; i < n_applies; ++i)
{
self->thunk_cptr_data[i] = NULL;
self->thunk_cptr_fn[i] = NULL;
self->is_lazy[i] = 1;
self->node_prereqs[i] = NULL;
self->node_n_prereqs[i] = 0;
}
for (int i = 0; i < n_applies; ++i) for (int i = 0; i < n_applies; ++i)
{ {
PyObject * thunk = PyList_GetItem(self->thunks, i); PyObject * thunk = PyList_GetItem(self->thunks, i);
...@@ -326,11 +333,6 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -326,11 +333,6 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
Py_DECREF(cthunk); Py_DECREF(cthunk);
// cthunk is kept alive by membership in self->thunks // cthunk is kept alive by membership in self->thunks
} }
else
{
self->thunk_cptr_fn[i] = NULL;
self->thunk_cptr_data[i] = NULL;
}
PyObject * el_i = PyList_GetItem(is_lazy, i); PyObject * el_i = PyList_GetItem(is_lazy, i);
self->is_lazy[i] = PyNumber_AsSsize_t(el_i, NULL); self->is_lazy[i] = PyNumber_AsSsize_t(el_i, NULL);
...@@ -359,7 +361,7 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -359,7 +361,7 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
if (PyList_Check(base_input_output_list)) if (PyList_Check(base_input_output_list))
{ {
Py_ssize_t n_inputs_outputs_base = PyList_Size(base_input_output_list); Py_ssize_t n_inputs_outputs_base = PyList_Size(base_input_output_list);
self->node_inputs_outputs_base = (Py_ssize_t*)malloc(n_inputs_outputs_base*sizeof(Py_ssize_t)); self->node_inputs_outputs_base = (Py_ssize_t*)calloc(n_inputs_outputs_base,sizeof(Py_ssize_t));
assert(self->node_inputs_outputs_base); assert(self->node_inputs_outputs_base);
for (int i = 0; i < n_inputs_outputs_base; ++i) for (int i = 0; i < n_inputs_outputs_base; ++i)
{ {
...@@ -368,13 +370,13 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -368,13 +370,13 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
if (PyErr_Occurred()) return -1; if (PyErr_Occurred()) return -1;
self->node_inputs_outputs_base[i] = idx; self->node_inputs_outputs_base[i] = idx;
} }
self->node_n_inputs = (Py_ssize_t*)malloc(n_applies*sizeof(Py_ssize_t)); self->node_n_inputs = (Py_ssize_t*)calloc(n_applies,sizeof(Py_ssize_t));
assert(self->node_n_inputs); assert(self->node_n_inputs);
self->node_n_outputs = (Py_ssize_t*)malloc(n_applies*sizeof(Py_ssize_t)); self->node_n_outputs = (Py_ssize_t*)calloc(n_applies,sizeof(Py_ssize_t));
assert(self->node_n_outputs); assert(self->node_n_outputs);
self->node_inputs = (Py_ssize_t**)malloc(n_applies*sizeof(Py_ssize_t*)); self->node_inputs = (Py_ssize_t**)calloc(n_applies,sizeof(Py_ssize_t*));
assert(self->node_inputs); assert(self->node_inputs);
self->node_outputs = (Py_ssize_t**)malloc(n_applies*sizeof(Py_ssize_t*)); self->node_outputs = (Py_ssize_t**)calloc(n_applies,sizeof(Py_ssize_t*));
assert(self->node_outputs); assert(self->node_outputs);
for (int i = 0; i < n_applies; ++i) for (int i = 0; i < n_applies; ++i)
{ {
...@@ -406,11 +408,11 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -406,11 +408,11 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
// allocation for var_owner // allocation for var_owner
if (PyList_Check(var_owner)) if (PyList_Check(var_owner))
{ {
self->var_owner = (Py_ssize_t*)malloc(self->n_vars*sizeof(Py_ssize_t)); self->var_owner = (Py_ssize_t*)calloc(self->n_vars,sizeof(Py_ssize_t));
self->var_has_owner = (int*)malloc(self->n_vars*sizeof(int)); self->var_has_owner = (int*)calloc(self->n_vars,sizeof(int));
self->var_computed = (int*)malloc(self->n_vars*sizeof(int)); self->var_computed = (int*)calloc(self->n_vars,sizeof(int));
self->var_computed_cells = (PyObject**)malloc(self->n_vars*sizeof(PyObject*)); self->var_computed_cells = (PyObject**)calloc(self->n_vars,sizeof(PyObject*));
self->var_value_cells = (PyObject**)malloc(self->n_vars*sizeof(PyObject*)); self->var_value_cells = (PyObject**)calloc(self->n_vars,sizeof(PyObject*));
for (int i = 0; i < self->n_vars; ++i) for (int i = 0; i < self->n_vars; ++i)
{ {
PyObject * el_i = PyList_GetItem(var_owner, i); PyObject * el_i = PyList_GetItem(var_owner, i);
...@@ -438,6 +440,23 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds) ...@@ -438,6 +440,23 @@ CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
return -1; return -1;
} }
if (dependencies != Py_None)
{
self->dependencies = (Py_ssize_t**)calloc(self->n_vars, sizeof(Py_ssize_t *));
self->n_dependencies = (Py_ssize_t*)calloc(self->n_vars, sizeof(Py_ssize_t));
assert(self->dependencies);
assert(self->n_dependencies);
for (int i = 0; i < self->n_vars; ++i)
{
PyObject *tmp = PyList_GetItem(dependencies, i);
// refcounting - tmp is borrowed
if (unpack_list_of_ssize_t(tmp, &self->dependencies[i], &self->n_dependencies[i],
"dependencies"))
return -1;
}
}
if (unpack_list_of_ssize_t(output_vars, &self->output_vars, &self->n_output_vars, if (unpack_list_of_ssize_t(output_vars, &self->output_vars, &self->n_output_vars,
"output_vars")) "output_vars"))
return -1; return -1;
...@@ -537,191 +556,230 @@ static int c_call(CLazyLinker * self, Py_ssize_t node_idx, int verbose) ...@@ -537,191 +556,230 @@ static int c_call(CLazyLinker * self, Py_ssize_t node_idx, int verbose)
static static
int lazy_rec_eval(CLazyLinker * self, Py_ssize_t var_idx, PyObject*one, PyObject*zero) int lazy_rec_eval(CLazyLinker * self, Py_ssize_t var_idx, PyObject*one, PyObject*zero)
{ {
PyObject *rval = NULL;
int verbose = 0; int verbose = 0;
if (verbose) fprintf(stderr, "lazy_rec computing %i\n", (int)var_idx);
int err = 0; int err = 0;
if (verbose) fprintf(stderr, "lazy_rec computing %i\n", (int)var_idx);
if (self->var_computed[var_idx] || !self->var_has_owner[var_idx]) if (self->var_computed[var_idx] || !self->var_has_owner[var_idx])
return 0;
Py_ssize_t owner_idx = self->var_owner[var_idx];
// STEP 1: compute the pre-requirements of the node
// Includes input nodes for non-lazy ops.
for (int i = 0; i < self->node_n_prereqs[owner_idx]; ++i)
{ {
return 0; Py_ssize_t prereq_idx = self->node_prereqs[owner_idx][i];
if (!self->var_computed[prereq_idx])
{
err = lazy_rec_eval(self, prereq_idx, one, zero);
if (err) return err;
}
assert (self->var_computed[prereq_idx]);
} }
else
{
Py_ssize_t owner_idx = self->var_owner[var_idx];
// STEP 1: compute the pre-requirements of the node // STEP 2: compute the node itself
for (int i = 0; i < self->node_n_prereqs[owner_idx]; ++i) if (self->is_lazy[owner_idx])
{
// update the compute_map cells corresponding to the inputs of this thunk
for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
{ {
Py_ssize_t prereq_idx = self->node_prereqs[owner_idx][i]; int in_idx = self->node_inputs[owner_idx][i];
if (!self->var_computed[prereq_idx]) if (self->var_computed[in_idx])
{ {
err = lazy_rec_eval(self, prereq_idx, one, zero); Py_INCREF(one);
if (err) return err; err = PyList_SetItem(self->var_computed_cells[in_idx], 0, one);
}
else
{
Py_INCREF(zero);
err = PyList_SetItem(self->var_computed_cells[in_idx], 0, zero);
} }
assert (self->var_computed[prereq_idx]); if (err) goto fail;
} }
// STEP 2: compute the node itself rval = pycall(self, owner_idx, verbose);
if (self->is_lazy[owner_idx]) // refcounting - rval is new ref
//TODO: to prevent infinite loops
// - consider check that a thunk does not ask for an input that is already computed
if (rval == NULL)
{ {
// update the compute_map cells corresponding to the inputs of this thunk assert (PyErr_Occurred());
for (int i = 0; i < self->node_n_inputs[owner_idx] && (!err); ++i) err = 1;
goto fail;
}
//update the computed-ness of any output cells
for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
{
int out_idx = self->node_outputs[owner_idx][i];
PyObject * el_i = PyList_GetItem(self->var_computed_cells[out_idx], 0);
Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
if (PyErr_Occurred())
{ {
int in_idx = self->node_inputs[owner_idx][i]; err = -1;
if (self->var_computed[in_idx]) goto pyfail;
{ }
Py_INCREF(one); assert (N==0 || N==1);
err = PyList_SetItem(self->var_computed_cells[in_idx], 0, one); self->var_computed[out_idx] = N;
} }
else if (!self->var_computed[var_idx])
{
/*
* If self is not computed after the call, this means that some
* inputs are needed. Compute the ones on the returned list
* and try to compute the current node again (with recursive call).
* This allows a node to request more nodes more than once before
* finally yielding a result.
*/
if (!PyList_Check(rval))
{
//TODO: More helpful error to help find *which node* made this
// bad thunk
PyErr_SetString(PyExc_TypeError,
"lazy thunk should return a list");
err = 1;
goto pyfail;
}
if (!PyList_Size(rval))
{
PyErr_SetString(PyExc_ValueError,
"lazy thunk returned empty list without computing output");
err = 1;
goto pyfail;
}
for (int i = 0; i < PyList_Size(rval); ++i)
{
PyObject * el_i = PyList_GetItem(rval, i);
Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
if (PyErr_Occurred())
{ {
Py_INCREF(zero); err = 1;
err = PyList_SetItem(self->var_computed_cells[in_idx], 0, zero); goto pyfail;
} }
assert (N <= self->node_n_inputs[owner_idx]);
Py_ssize_t input_idx = self->node_inputs[owner_idx][N];
err = lazy_rec_eval(self, input_idx, one, zero);
if (err) goto pyfail;
} }
if (err)
Py_DECREF(rval);
/*
* We intentionally skip all the end-of-function processing
* (mark outputs, GC) as it will be performed by the call
* that actually manages to compute the result.
*/
return lazy_rec_eval(self, var_idx, one, zero);
}
Py_DECREF(rval);
}
else //owner is not a lazy op. Ensure all intputs are evaluated.
{
// loop over inputs to owner
// call lazy_rec_eval on each one that is not computed.
// if there's an error, pass it up the stack
for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
{
Py_ssize_t input_idx = self->node_inputs[owner_idx][i];
if (!self->var_computed[input_idx])
{ {
set_position_of_error(self, owner_idx); err = lazy_rec_eval(self, input_idx, one, zero);
return err; if (err) return err;
} }
assert (self->var_computed[input_idx]);
}
PyObject * rval = pycall(self, owner_idx, verbose); // call the thunk for this owner.
// refcounting - rval is new ref if (self->thunk_cptr_fn[owner_idx])
//TODO: to prevent infinite loops {
// - consider check that a thunk does not ask for an input that is already computed err = c_call(self, owner_idx, verbose);
if (rval) //call returned normally (no exception) if (err) goto fail;
}
else
{
rval = pycall(self, owner_idx, verbose);
//rval is new ref
if (rval) //pycall returned normally (no exception)
{ {
//update the computed-ness of any output cells if (rval == Py_None)
for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
{ {
int out_idx = self->node_outputs[owner_idx][i]; Py_DECREF(rval); //ignore a return of None
PyObject * el_i = PyList_GetItem(self->var_computed_cells[out_idx], 0);
Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
if (PyErr_Occurred())
{
Py_DECREF(rval);
set_position_of_error(self, owner_idx);
return -1;
}
assert (N==0 || N==1);
self->var_computed[out_idx] = N;
} }
if (!self->var_computed[var_idx]) else if (PyList_Check(rval))
{ {
if (PyList_Check(rval)) PyErr_SetString(PyExc_TypeError,
{ "non-lazy thunk should return None, not list");
if (PyList_Size(rval)) err = 1;
{ goto pyfail;
for (int i = 0; i < PyList_Size(rval) && (!err); ++i) }
{ else // don't know what it returned, but it wasn't right.
PyObject * el_i = PyList_GetItem(rval, i); {
Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError); PyErr_SetObject(PyExc_TypeError, rval);
if (PyErr_Occurred()) err = 1;
{ // We don't release rval since we put it in the error above
err = 1; goto fail;
}
else
{
assert (N <= self->node_n_inputs[owner_idx]);
Py_ssize_t input_idx = self->node_inputs[owner_idx][N];
err = lazy_rec_eval(self, input_idx, one, zero);
}
}
if (!err)
err = lazy_rec_eval(self, var_idx, one, zero);
}
else
{
PyErr_SetString(PyExc_ValueError,
"lazy thunk returned empty list without computing output");
err = 1;
set_position_of_error(self, owner_idx);
}
Py_DECREF(rval);
set_position_of_error(self, owner_idx);
return err;
}
else // don't know what it returned, but it wasn't right.
{
//TODO: More helpful error to help find *which node* made this
// bad thunk
PyErr_SetString(PyExc_TypeError,
"lazy thunk should list");
Py_DECREF(rval);
set_position_of_error(self, owner_idx);
return 1;
}
} }
Py_DECREF(rval);
} }
else // pycall returned NULL (internal error) else // pycall returned NULL (internal error)
{ {
assert (PyErr_Occurred()); err = 1;
set_position_of_error(self, owner_idx); goto fail;
return 1;
} }
} }
else //owner is not a lazy op. Ensure all intputs are evaluated. }
// loop over all outputs and mark them as computed
for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
{
self->var_computed[self->node_outputs[owner_idx][i]] = 1;
}
// Free vars that are not needed anymore
if (self->allow_gc)
{
for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
{ {
// loop over inputs to owner int cleanup = 1;
// call lazy_rec_eval on each one that is not computed. Py_ssize_t i_idx = self->node_inputs[owner_idx][i];
// if there's an error, pass it up the stack if (!self->var_has_owner[i_idx])
for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i) continue;
for (int j = 0; j < self->n_output_vars; ++j)
{ {
Py_ssize_t input_idx = self->node_inputs[owner_idx][i]; if (i_idx == self->output_vars[j])
if (!self->var_computed[input_idx])
{ {
err = lazy_rec_eval(self, input_idx, one, zero); cleanup = 0;
if (err) return err; break;
} }
assert (self->var_computed[input_idx]);
} }
if (!cleanup) continue;
// call the thunk for this owner. for (int j = 0; j < self->n_dependencies[i_idx]; ++j)
if (self->thunk_cptr_fn[owner_idx])
{ {
err = c_call(self, owner_idx, verbose); if (!self->var_computed[self->dependencies[i_idx][j]])
}
else
{
PyObject * rval = pycall(self, owner_idx, verbose);
//rval is new ref
if (rval) //pycall returned normally (no exception)
{
if (rval == Py_None)
{
Py_DECREF(rval); //ignore a return of None
}
else if (PyList_Check(rval))
{
PyErr_SetString(PyExc_TypeError,
"non-lazy thunk should return None, not list");
err=1;
set_position_of_error(self, owner_idx);
Py_DECREF(rval);
}
else // don't know what it returned, but it wasn't right.
{
PyErr_SetObject(PyExc_TypeError, rval);
err=1;
set_position_of_error(self, owner_idx);
}
}
else // pycall returned NULL (internal error)
{ {
err=1; cleanup = 0;
set_position_of_error(self, owner_idx); break;
} }
} }
} if (!cleanup) continue;
// loop over all outputs and mark them as computed Py_INCREF(Py_None);
for (int i = 0; i < self->node_n_outputs[owner_idx] && (!err); ++i) err = PyList_SetItem(self->var_value_cells[i_idx], 0, Py_None);
{ if (err) goto fail;
self->var_computed[self->node_outputs[owner_idx][i]] = 1;
} }
} }
return 0;
pyfail:
Py_DECREF(rval);
fail:
set_position_of_error(self, owner_idx);
return err; return err;
} }
PyObject * PyObject *
...@@ -803,6 +861,17 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds) ...@@ -803,6 +861,17 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
Py_INCREF(tmp); Py_INCREF(tmp);
PyList_SetItem(self->var_value_cells[dst], 0, tmp); PyList_SetItem(self->var_value_cells[dst], 0, tmp);
} }
// Free all intermediate values (outputs and updates have
// already been copied above).
if (self->allow_gc)
{
for (int i = 0; i < self->n_vars; ++i)
{
Py_INCREF(Py_None);
PyList_SetItem(self->var_value_cells[i], 0, Py_None);
}
}
} }
} }
Py_DECREF(one); Py_DECREF(one);
...@@ -886,7 +955,7 @@ static PyTypeObject lazylinker_ext_CLazyLinkerType = { ...@@ -886,7 +955,7 @@ static PyTypeObject lazylinker_ext_CLazyLinkerType = {
static PyObject * get_version(PyObject *dummy, PyObject *args) static PyObject * get_version(PyObject *dummy, PyObject *args)
{ {
PyObject *result = PyFloat_FromDouble(0.13); PyObject *result = PyFloat_FromDouble(0.14);
return result; return result;
} }
......
...@@ -13,7 +13,7 @@ if config.compiledir not in sys.path: ...@@ -13,7 +13,7 @@ if config.compiledir not in sys.path:
sys.path.append(config.compiledir) sys.path.append(config.compiledir)
force_compile = False force_compile = False
version = 0.13 # must match constant returned in function get_version() version = 0.14 # must match constant returned in function get_version()
try: try:
......
...@@ -184,9 +184,8 @@ class Stack(VM): ...@@ -184,9 +184,8 @@ class Stack(VM):
""" """
def __init__(self, nodes, thunks, pre_call_clear, def __init__(self, nodes, thunks, pre_call_clear,
storage_map, compute_map, storage_map, compute_map, env, allow_gc,
env, allow_gc, dependencies=None, callback=None):
callback=None):
super(Stack, self).__init__(nodes, thunks, pre_call_clear) super(Stack, self).__init__(nodes, thunks, pre_call_clear)
self.allow_gc = allow_gc self.allow_gc = allow_gc
...@@ -211,16 +210,11 @@ class Stack(VM): ...@@ -211,16 +210,11 @@ class Stack(VM):
for prereq in ords[node]: for prereq in ords[node]:
node.destroy_dependencies += prereq.outputs node.destroy_dependencies += prereq.outputs
dependencies = self.dependencies = {} self.dependencies = dependencies
for k in storage_map:
dependencies[k] = [] if self.allow_gc and self.dependencies is None:
if k.owner and k.clients: raise ValueError("Must set dependencies when using GC")
ls = []
is_output = 0
for cl in k.clients:
if cl[0] is not 'output':
ls += cl[0].outputs
dependencies[k] += ls
if config.profile: if config.profile:
self.memory_size_map = {"nt8": 1, "t16": 2, "t32": 4, self.memory_size_map = {"nt8": 1, "t16": 2, "t32": 4,
"t64": 8, "128": 16} "t64": 8, "128": 16}
...@@ -454,6 +448,19 @@ class VM_Linker(link.LocalLinker): ...@@ -454,6 +448,19 @@ class VM_Linker(link.LocalLinker):
# admittedly confusing, and it could use some cleaning up. The base # admittedly confusing, and it could use some cleaning up. The base
# Linker object should probably go away completely. # Linker object should probably go away completely.
def compute_gc_dependencies(self, smap):
dependencies = {}
for k in smap:
dependencies[k] = []
if k.owner and k.clients:
ls = []
is_output = 0
for cl in k.clients:
if cl[0] is not 'output':
ls += cl[0].outputs
dependencies[k] += ls
return dependencies
def make_vm(self, nodes, thunks, def make_vm(self, nodes, thunks,
input_storage, output_storage, storage_map, input_storage, output_storage, storage_map,
post_thunk_clear, post_thunk_clear,
...@@ -467,10 +474,14 @@ class VM_Linker(link.LocalLinker): ...@@ -467,10 +474,14 @@ class VM_Linker(link.LocalLinker):
if self.callback is not None: if self.callback is not None:
if self.use_cloop: if self.use_cloop:
logger.warn('CLoop does not support callback, using Stack VM.') logger.warn('CLoop does not support callback, using Stack VM.')
deps = None
if self.allow_gc:
deps = self.compute_gc_dependencies(storage_map)
vm = Stack( vm = Stack(
nodes, thunks, pre_call_clear, nodes, thunks, pre_call_clear,
storage_map, compute_map, storage_map, compute_map,
self.env, self.allow_gc, self.env, self.allow_gc,
dependencies=deps,
callback=self.callback) callback=self.callback)
elif self.use_cloop: elif self.use_cloop:
# create a map from nodes to ints and vars to ints # create a map from nodes to ints and vars to ints
...@@ -500,6 +511,14 @@ class VM_Linker(link.LocalLinker): ...@@ -500,6 +511,14 @@ class VM_Linker(link.LocalLinker):
assert type(storage_map_list[0]) is list assert type(storage_map_list[0]) is list
assert type(compute_map_list[0]) is list assert type(compute_map_list[0]) is list
if self.allow_gc:
dependency_map=self.compute_gc_dependencies(storage_map)
dependency_map_list = [
[vars_idx[d] for d in dependency_map[vars_idx_inv[i]]]
for i in xrange(len(vars_idx_inv))]
else:
dependency_map_list = None
# build the pointers to node inputs and offsets # build the pointers to node inputs and offsets
base_input_output_list = [] base_input_output_list = []
node_n_inputs = [] node_n_inputs = []
...@@ -566,6 +585,7 @@ class VM_Linker(link.LocalLinker): ...@@ -566,6 +585,7 @@ class VM_Linker(link.LocalLinker):
node_prereqs=node_prereqs, node_prereqs=node_prereqs,
node_output_size=node_output_size, node_output_size=node_output_size,
update_storage=update_storage, update_storage=update_storage,
dependencies=dependency_map_list,
) )
assert c0 == sys.getrefcount(node_n_inputs) assert c0 == sys.getrefcount(node_n_inputs)
else: else:
...@@ -583,10 +603,14 @@ class VM_Linker(link.LocalLinker): ...@@ -583,10 +603,14 @@ class VM_Linker(link.LocalLinker):
thunks, thunks,
pre_call_clear) pre_call_clear)
else: else:
deps = None
if self.allow_gc:
deps = self.compute_gc_dependencies(storage_map)
vm = Stack( vm = Stack(
nodes, thunks, pre_call_clear, nodes, thunks, pre_call_clear,
storage_map, compute_map, storage_map, compute_map,
self.env, self.allow_gc self.env, self.allow_gc,
dependencies=deps
) )
return vm return vm
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论