merge

aa00203b · Olivier Breuleux · 7d181357 · ab3f5eaf · aa00203b · aa00203b
--- a/_test_compile.py
+++ b/_test_compile.py
@@ -169,194 +169,6 @@ class T_OpFromGraph(unittest.TestCase):
        assert numpy.all(11.0 == fn(xv, yv, zv))
-class T_state(unittest.TestCase):
-    def test_accumulator(self):
-        """Test low-level interface with state."""
-        x = T.scalar('x')
-        s = T.scalar('s')
-        fn, states = program_states(inputs = [x], outputs = [], states = [(s, 0, s+x)])
-        sum = 0
-        for inc in [1, 4, 5,23, -324]:
-            sum += inc
-            fn.run([inc], states)
-            assert sum == states[0].value
-    def test_misc0(self):
-        fn_inc, states_inc = function_states(\
-                inputs = [x], outputs = [], states = [(s, 0, s+x)])
-        fn_inc2, states_inc2 = function_states(\
-                inputs = [x], outputs = [], states = [(s, 0, s+x)])
-        fn_inc_copy = copy.copy(fn_inc) #USE fn copy
-        # run() is like __call__, but requires an explicit state argument
-        fn_inc.run([5], states_inc) #run on own state object
-        fn_inc2.run([3], states_inc) #run on compatible state object
-        assert states_inc[0].value == 8
-        states_inc_copy = copy.copy(states_inc) #USE state copy
-        fn_inc_copy.run([2], states_inc_copy)
-        assert states_inc[0].value == 10   #compatible
-        fn_dec, states_dec = function_states(\
-                inputs = [x], outputs = [], states = [(s, states_inc[0], s-x)])
-        try:
-            fn_inc.run([5], states_dec) # wrong kind of state for given program
-            self.fail("fn accepted an invalid state argument")
-        except SpecificException:
-            raise NotImplementedError() #TODO
-        except Exception:
-            self.fail("fn accepted an invalid state argument")
-    def test_perceptron(self):
-        """Test high-level state interface."""
-        mu0 = numpy.array([1.0,0.0])
-        mu1 = numpy.array([0.0,0.1])
-        si0 = numpy.ones_like(mu0) #unit variance
-        si1 = numpy.ones_like(mu1) #unit variance
-        #implicit internal state
-        label = random.bernoulli(0.5) 
-        #implicit internal state for each DiagGaussian
-        x = label * random.DiagGaussian(mu0, si0) \
-                + (1 - label) * random.DiagGaussian(mu1,si1)
-        w = T.tensor.dvector()
-        b = T.tensor.dscalar()
-        lr = 0.01
-        decision = dot(x,w) + b > 0
-        new_w = w + neq(label, decision) * lr * x
-        new_b = b + neq(label, decision) * (label * (-lr) + (1-label)*lr)
-        init_w = numpy.array([0.0, 0.0])
-        init_b = 0.0
-        io_stream = T.function([], [label, x])
-        perceptron_learn = T.function([x, label], [decision], 
-                state={
-                    'w':(w, init_w, update_w),
-                    'b':(b, init_b, update_b),
-                    'lr':(lr, 0.01)})
-        perceptron_use = T.function([x], [decision],
-                state={
-                    'w':(w, perceptron_learn.shared['w']),
-                    'b':(b, perceptron_learn.shared['b'])})
-        errs = 0
-        for i in xrange(100):
-            il, ix = io_stream()
-            d0 = perceptron_use(ix)
-            d1 = perceptron_learn(ix, il)
-            assert d0 == d1
-            errs += (d0 != d1)
-            print d0
-        print 'errs =', errs 
-    def test_shared(self):
-        """Test shared r/w state."""
-        x = T.scalar('x')
-        s = T.scalar('s')
-        fn_inc, states_inc = function_states(\
-                inputs = [x], outputs = [], states = [(s, 0, s+x)])
-        fn_dec, states_dec = function_states(\
-                inputs = [x], outputs = [], states = [(s, states_inc[0], s-x)])
-        sum = 0
-        for inc in [1, 4, 5,23, -324]:
-            sum += inc
-            fn_inc.run([inc], states_inc)
-            assert sum == states_inc[0].value
-        a = sum
-        for inc in [1, 4, 5,23, -324]:
-            sum -= inc
-            fn_dec(inc)
-        assert sum == 0
-        assert states_inc[0].value == sum
-        for inc in [1, 4, 5,23, -324]:
-            sum -= inc
-            fn_dec(inc)
-        assert sum == -a
-        assert states_inc[0].value == sum
-class T_dict_interface(unittest.TestCase):
-    def test_keyword(self):
-        x = T.scalar('x')
-        y = T.scalar('y')
-        s = T.scalar('s')
-        fn = function(input_kw = {'a':x, 'b':y}, outputs = [], state = {'s':(s, 0, s+x/y)})
-        try:
-            fn(1, 1)
-            self.fail("non-keyword call accepted!")
-        except SpecificException:
-            raise NotImplementedError()
-        except Exception:
-            self.fail("non-keyword call accepted!")
-        try:
-            fn(a=1)
-            self.fail("incomplete call accepted!")
-        except SpecificException:
-            raise NotImplementedError()
-        except Exception:
-            self.fail("incomplete call accepted!")
-        try:
-            fn(a=1, b=1, c=1)
-            self.fail("overcomplete call accepted!")
-        except SpecificException:
-            raise NotImplementedError()
-        except Exception:
-            self.fail("overcomplete call accepted!")
-    def test_aliased_state(self):
-        """Test keyword input and copy."""
-        x = T.scalar('x')
-        y = T.scalar('y')
-        s = T.scalar('s')
-        fn = function(input_kw = {'a':x, 'b':y}, outputs = [], state = {'s':(s, 0, s+x/y)})
-        fn2 = fn.copy()
-        fn3 = fn.copy()
-        fn(a=2, b=5)
-        fn2(a=5, b=2)
-        fn3(b=2, a=5)
-        assert fn.state['s'] == 2.0/5
-        assert fn2.state['s'] == 5.0/2 
-        assert fn3.state['s'] == 5.0/2
-        #fn and fn3 use the same sort of state, so this is OK.
-        fn3.state = fn.state 
-        fn.state['s'] = 0
-        fn(a=1, b=1)   #increment the shared state
-        assert fn3.state['s'] == 1
-        fn3(a=-1, b=1) #decrement the shared state
-        assert fn.state['s'] == 0
 if __name__ == '__main__':
    if 1:

--- a/_test_tensor.py
+++ b/_test_tensor.py
@@ -30,7 +30,7 @@ def make_tester(name, op, expected, checks = {}, good = {}, bad_build = {}, bad_
        grad = good
    _op, _expected, _checks, _good, _bad_build, _bad_runtime, _grad = op, expected, checks, good, bad_build, bad_runtime, grad
    class Checker(unittest.TestCase):
        op = _op
@@ -67,7 +67,7 @@ def make_tester(name, op, expected, checks = {}, good = {}, bad_build = {}, bad_
                    raise type, exc_value, traceback
                expecteds = self.expected(*inputs)
                try:
                    results = f(*inputs)
                except:
@@ -129,7 +129,7 @@ def make_tester(name, op, expected, checks = {}, good = {}, bad_build = {}, bad_
                    results = f(*inputs)
                except:
                    return
                self.fail("Test %s::%s: Successful call on the following bad inputs: %s"
                          % (self.op, testname, inputs))
@@ -148,7 +148,7 @@ def make_tester(name, op, expected, checks = {}, good = {}, bad_build = {}, bad_
    Checker.__name__ = name
    return Checker
 rand = lambda *shape: 2 * numpy.random.rand(*shape) - 1
 randint = lambda *shape: numpy.random.random_integers(-5, 5, shape)
@@ -513,7 +513,7 @@ DotTester = make_tester(name = 'DotTester',
 def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001,
        linker='c&py'):
-    """testcase.failUnless( analytic gradient matches finite-diff gradient) """
+    """testcase.failUnless(analytic gradient matches finite-diff gradient)"""
    pt = [numpy.asarray(p) for p in pt]
    for test_num in xrange(n_tests):
@@ -550,7 +550,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, to
                print op
        grad_fn = function(tensor_pt, symbolic_grad,linker=linker)
        analytic_grad = grad_fn(*pt)
        if not isinstance(analytic_grad, (list, tuple)):
            analytic_grad = [analytic_grad]
@@ -697,7 +697,7 @@ class T_transpose(unittest.TestCase):
        #test aliasing
        tval += 55.0
        self.failUnless(n.data == 1.0)
    def test1(self):
        n = as_tensor(numpy.ones(5))
        t = transpose(n)
@@ -708,7 +708,7 @@ class T_transpose(unittest.TestCase):
        #test aliasing
        tval += 55.0
        self.failUnless(n.data[0] == 1.0)
    def test2(self):
        n = as_tensor(numpy.ones((5,3)))
        t = transpose(n)
@@ -749,7 +749,7 @@ class T_subtensor(unittest.TestCase):
            self.failUnless(e[0] is Subtensor.e_invalid)
            return
        self.fail()
    def test1_err_bounds(self):
        n = as_tensor(numpy.ones(3))
        t = n[7]
@@ -1025,7 +1025,7 @@ class _test_bitwise(unittest.TestCase):
        r = numpy.asarray([0,1,0,1], dtype = 'int8')
        v = fn(l, r)
        self.failUnless(numpy.all(v == (operator.and_(l, r))), (l, r, v))
    def test_inv(self):
        x, y = bvector(), bvector()
        fn = function([x,y], [~x])
@@ -1473,7 +1473,7 @@ class t_gemm(unittest.TestCase):
        A = self.rand(4,5)[:,:4]
        B = self.rand(4,5)[:,:4]
        C = self.rand(4,5)[:,:4]
        def t(z,x,y,a=1.0, b=0.0,l='c|py',dt='float64'):
            z,a,x,y,b = [numpy.asarray(p,dtype=dt) for p in z,a,x,y,b]
            z_orig = z.copy()

--- a/elemwise.py
+++ b/elemwise.py
@@ -47,7 +47,7 @@ class DimShuffle(Op):
       the second of the resulting tensor, etc. If the tensor has
       shape (20, 30, 40), the resulting tensor will have dimensions
       (1, 40, 1, 20, 30). (AxBxC tensor is mapped to 1xCx1xAxB tensor)
      DimShuffle((True, False), [1])
       This op will only work on 2d tensors with the first dimension broadcastable.
@@ -65,7 +65,7 @@ class DimShuffle(Op):
      DimShuffle((False, False), [0, 'x', 1]) -> AxB to Ax1xB
      DimShuffle((False, False), [1, 'x', 0]) -> AxB to Bx1xA
    """
    def __init__(self, input_broadcastable, new_order, inplace = False):
        """
        Usage: DimShuffle(input_broadcastable, new_order, inplace = False)
@@ -128,11 +128,11 @@ class DimShuffle(Op):
                ob.append(True)
            else:
                ob.append(ib[value])
        output = Tensor(dtype = input.type.dtype,
                        broadcastable = ob).make_result()
        return Apply(self, [input], [output])
    def __eq__(self, other):
        # it's probably not necessary to compare input_broadcastable
        return type(self) == type(other) \
@@ -188,7 +188,7 @@ class DimShuffle(Op):
 class Elemwise(Op):
    """
    Generalizes a scalar op to tensors.
    All the inputs must have the same number of dimensions. When the
    Op is performed, for each dimension, each input's size for that
    dimension must be the same. As a special case, it can also be 1
@@ -215,7 +215,7 @@ class Elemwise(Op):
    def __init__(self, scalar_op, inplace_pattern = {}, name = None):
        """
        Usage: Elemwise(scalar_op, inplace_pattern = {})
        * scalar_op: an instance of a subclass of scalar.ScalarOp which works uniquely on
                     scalars
        * inplace_pattern: a dictionary that maps the index of an output to the
@@ -238,7 +238,7 @@ class Elemwise(Op):
        using DimShuffle.
        """
-        inputs = map(as_tensor, inputs)        
+        inputs = map(as_tensor, inputs)
        shadow = self.scalar_op.make_node(*[Scalar(dtype = t.type.dtype)() for t in inputs])
        target_length = max([input.type.ndim for input in inputs])
@@ -254,7 +254,7 @@ class Elemwise(Op):
                args.append(DimShuffle(input.type.broadcastable, ['x']*difference + range(length), inplace = True)(input))
        inputs = args
-#         # Following conditions should always be true? 
+#         # Following conditions should always be true?
 #         try:
 #             assert len(set([len(input.type.broadcastable) for input in inputs])) == 1
 #         except (AssertionError, AttributeError):
@@ -317,7 +317,7 @@ class Elemwise(Op):
                ret.append(None)
                continue
            r = transform(scalar_igrad)
            # list of all the dimensions that are broadcastable for that input so we
            # can sum over them
            # todo: only count dimensions that were effectively broadcasted
@@ -382,7 +382,7 @@ class Elemwise(Op):
        inames = gof.utils.uniq(inames)
        inputs = gof.utils.uniq(node.inputs)
        defines = ""
        undefs = ""
        dmap = dict([(node.outputs[i], [node.inputs[o]]) for i, o in self.inplace_pattern.items()])
@@ -402,7 +402,7 @@ class Elemwise(Op):
            aliased_outputs, aliased_onames = aliased
        else:
            aliased_outputs, aliased_onames = [], []
        orders = [[x and 'x' or i for i, x in enumerate(input.type.broadcastable)] for input in inputs]
        nnested = len(orders[0])
        sub = dict(sub)
@@ -419,7 +419,7 @@ class Elemwise(Op):
            alloc += cgen.make_declare([range(nnested)], [odtype], dict(sub, lv0 = oname))
            alloc += cgen.make_alloc(orders, odtype, sub)
            alloc += cgen.make_checks([range(nnested)], [odtype], dict(sub, lv0 = oname))
        for output, oname in zip(aliased_outputs, aliased_onames):
            iname = inames[inputs.index(dmap[output][0])]
            alloc += """
@@ -454,7 +454,7 @@ class Elemwise(Op):
            all_code = [code]
        loop = cgen.make_loop(orders + [range(nnested)] * len(real_onames), idtypes + list(real_odtypes), all_code, sub)
        return decl, checks, alloc, loop
    def c_code(self, node, name, inames, onames, sub):
        code = "\n".join(self._c_all(node, name, inames, onames, sub))
        return code
@@ -468,7 +468,7 @@ class Elemwise(Op):
 class CAReduce(Op):
    """
    Reduces a scalar operation along the specified axis(es).
    The output will have the same shape as the input minus the reduced
    dimensions. It will contain the result of accumulating all values
    over the reduced dimensions using the specified scalar op.
@@ -506,7 +506,7 @@ class CAReduce(Op):
        else:
            self.axis = axis
        self.ufunc = numpy.frompyfunc(scalar_op.impl, 2, 1)
    def make_node(self, input):
        input = as_tensor(input)
        axis = self.axis
@@ -524,13 +524,13 @@ class CAReduce(Op):
            return hash(self.scalar_op)
        else:
            return hash(self.scalar_op) ^ hash(tuple(self.axis))
    def __str__(self):
        if self.axis is not None:
            return "Reduce{%s}{%s}" % (self.scalar_op, ", ".join(str(x) for x in self.axis))
        else:
            return "Reduce{%s}" % self.scalar_op
    def perform(self, node, (input, ), (output, )):
        axis = self.axis
        if axis is None:
@@ -551,7 +551,7 @@ class CAReduce(Op):
        iname = inames[0]
        oname = onames[0]
        idtype = input.type.dtype_specs()[1]
        odtype = output.type.dtype_specs()[1]
@@ -565,7 +565,7 @@ class CAReduce(Op):
        order1 = [i for i in xrange(input.type.ndim) if i not in axis]
        order = order1 + list(axis)
        nnested = len(order1)
        sub = dict(sub)
@@ -610,10 +610,9 @@ class CAReduce(Op):
                all_code = [("", "")] * nnested + [(task0_decl, "")] + [("", "")] * (len(axis) - 2) + [("", code1), ""]
        else:
            all_code = [task0_decl + code1]
        loop = cgen.make_loop([order, range(nnested) + ['x'] * len(axis)], [idtype, odtype], all_code, sub)
        return decl, checks, alloc, loop
    def c_code(self, node, name, inames, onames, sub):
        code = "\n".join(self._c_all(node, name, inames, onames, sub))
        return code

--- a/gof/cc.py
+++ b/gof/cc.py
@@ -140,7 +140,7 @@ def struct_gen(args, struct_builders, blocks, sub):
     * sub -> dictionary used to template the struct.
       * failure_var -> must contain a variable name to use for
         the failure code.
    In a nutshell, this returns code for a struct that represents
    a function with state. The state's initialization and destruction
    are handled by struct_builders and the actual behavior of the
@@ -173,7 +173,7 @@ def struct_gen(args, struct_builders, blocks, sub):
    storage_incref = "\n".join(["Py_XINCREF(%s);" % arg for arg in args])
    # decrements the storage's refcount in the destructor
    storage_decref = "\n".join(["Py_XDECREF(this->%s);" % arg for arg in args])
    args_names = ", ".join(args)
    args_decl = ", ".join(["PyObject* %s" % arg for arg in args])
@@ -205,7 +205,7 @@ def struct_gen(args, struct_builders, blocks, sub):
        // The failure code is returned to index what code block failed.
        return %(failure_var)s;
        """ % sub
    sub = dict(sub)
    sub.update(locals())
@@ -217,7 +217,7 @@ def struct_gen(args, struct_builders, blocks, sub):
        %(storage_decl)s
        %(struct_decl)s
        %(name)s() {}
        ~%(name)s(void) {
            cleanup();
@@ -321,7 +321,7 @@ def struct_result_codeblocks(result, policies, id, symbol_table, sub):
        to the table.
    sub -> dictionary for use by L{CodeBlock}.
    """
    name = "V%i" % id
    symbol_table[result] = name
    sub = dict(sub)
@@ -340,7 +340,7 @@ def struct_result_codeblocks(result, policies, id, symbol_table, sub):
 class CLinker(link.Linker):
    """
    Creates C code for an env, compiles it and returns callables
    through make_thunk and make_function that make use of the compiled
    code.
@@ -374,7 +374,7 @@ class CLinker(link.Linker):
        self.orphans = list(r for r in self.results if isinstance(r, graph.Value) and r not in self.inputs) #list(env.orphans.difference(self.outputs))
        self.temps = list(set(self.results).difference(self.inputs).difference(self.outputs).difference(self.orphans))
        self.node_order = env.toposort()
    def code_gen(self):
        """
        Generates code for a struct that does the computation of the env and
@@ -395,7 +395,7 @@ class CLinker(link.Linker):
        no_recycling = self.no_recycling
        env = self.env
        consts = []
        symbol = {}
@@ -469,7 +469,7 @@ class CLinker(link.Linker):
            # each Result generates two CodeBlocks, one to declare/initialize/destroy struct variables
            # and the other to declare/extract/cleanup each time the function is run.
            # Typically, only one of the two actually does anything (see all the possible combinations above)
            init_tasks.append((result, 'init', id))
            init_blocks.append(builder)
@@ -479,7 +479,7 @@ class CLinker(link.Linker):
            id += 2
        for node in self.node_order:
            # We populate sub with a mapping from the variable names specified by the op's c_var_names
            # method to the actual variable names that we will use.
 ##            ivnames, ovnames = op.c_var_names()
@@ -506,7 +506,7 @@ class CLinker(link.Linker):
            try: cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub)
            except utils.AbstractFunctionError:
                cleanup = ""
            blocks.append(CodeBlock("", behavior, cleanup, sub))
            tasks.append((node, 'code', id))
            id += 1
@@ -515,7 +515,7 @@ class CLinker(link.Linker):
        # must only be passed once because they are mapped to the same name.
        args = []
        args += ["storage_%s" % symbol[result] for result in utils.uniq(self.inputs + self.outputs + self.orphans)]
        struct_code = struct_gen(args, init_blocks, blocks, dict(failure_var = failure_var, name = "<<<<NAME>>>>"))
        # The hash calculated on the code identifies it so weave can cache properly.
@@ -535,12 +535,12 @@ class CLinker(link.Linker):
        self.blocks = blocks
        self.tasks = tasks
        all = self.inputs + self.outputs + self.orphans
        # List of indices that should be ignored when passing the arguments
        # (basically, everything that the previous call to uniq eliminated)
        self.dupidx = [i for i, x in enumerate(all) if all.count(x) > 1 and all.index(x) != i]
        return self.struct_code
    def support_code(self):
        """
        Returns a list of support code strings that are needed by
@@ -580,7 +580,7 @@ class CLinker(link.Linker):
            try: ret += x.c_headers()
            except utils.AbstractFunctionError: pass
        return ret
    def libraries(self):
        """
        Returns a list of libraries that are needed by one
@@ -597,7 +597,7 @@ class CLinker(link.Linker):
    def __compile__(self, input_storage = None, output_storage = None):
        """
        Compiles this linker's env.
        @type input_storage: list or None
        @param input_storage: list of lists of length 1. In order to use
            the thunk returned by __compile__, the inputs must be put in
@@ -633,7 +633,7 @@ class CLinker(link.Linker):
        Compiles this linker's env and returns a function to perform the
        computations, as well as lists of storage cells for both the
        inputs and outputs.
        @type input_storage: list or None
        @param input_storage: list of lists of length 1. In order to use
            the thunk returned by __compile__, the inputs must be put in
@@ -653,7 +653,7 @@ class CLinker(link.Linker):
        """
        cthunk, in_storage, out_storage, error_storage = self.__compile__(input_storage, output_storage)
        return _execute(cthunk, self.init_tasks, self.tasks, error_storage), in_storage, out_storage
    def cthunk_factory(self, error_storage, in_storage, out_storage):
        """
        error_storage -> list of length 3
@@ -669,14 +669,14 @@ class CLinker(link.Linker):
        # check if we already compiled this
        if not getattr(self, 'instantiate', False):
            self.code_gen()
            module_name = self.hash
            # Eliminate duplicate inputs and outputs from the storage that we will pass to instantiate
            out_storage = [x for i, x in enumerate(out_storage) if (i+len(in_storage)) not in self.dupidx]
            in_storage = [x for i, x in enumerate(in_storage) if i not in self.dupidx]
            cthunk = object() # dummy so weave can get the type
            mod = weave.ext_tools.ext_module(module_name)
@@ -739,7 +739,7 @@ class CLinker(link.Linker):
            module = __import__("%s" % (module_name), {}, {}, [module_name])
            self.instantiate = module.instantiate
-        else:            
+        else:
            # Eliminate duplicate inputs and outputs from the storage that we will pass to instantiate
            out_storage = [x for i, x in enumerate(out_storage) if (i+len(in_storage)) not in self.dupidx]
            in_storage = [x for i, x in enumerate(in_storage) if i not in self.dupidx]
@@ -778,7 +778,7 @@ def _execute(cthunk, init_tasks, tasks, error_storage):
            exc_value.__thunk_trace__ = trace # this can be used to retrieve the location the Op was declared
            raise exc_type, exc_value, exc_trace
    return execute
 class OpWiseCLinker(link.LocalLinker):
@@ -798,7 +798,7 @@ class OpWiseCLinker(link.LocalLinker):
    """
    __cache__ = {}
    def __init__(self, fallback_on_perform = True):
        self.env = None
        self.fallback_on_perform = fallback_on_perform
@@ -847,7 +847,7 @@ class OpWiseCLinker(link.LocalLinker):
                            self.__cache__[desc] = cl
                        except:
                            pass
                thunk, node_input_filters, node_output_filters = cl.make_thunk(
                    input_storage = node_input_storage,
                    output_storage = node_output_storage)
@@ -872,7 +872,7 @@ class OpWiseCLinker(link.LocalLinker):
            no_recycling = [storage_map[r] for r in no_recycling if r not in env.inputs]
        f = link.streamline(env, thunks, order, no_recycling = no_recycling, profiler = profiler)
        return f, [link.Filter(input, storage) for input, storage in zip(env.inputs, input_storage)], \
            [link.Filter(output, storage, True) for output, storage in zip(env.outputs, output_storage)], \
            thunks, order
@@ -903,7 +903,7 @@ class DualLinker(link.Linker):
    def __init__(self, checker = _default_checker):
        """
        Initialize a DualLinker.
        The checker argument must be a function that takes two lists
        of length 1. The first one passed will contain the output
        computed by PerformLinker and the second one the output
@@ -938,7 +938,7 @@ class DualLinker(link.Linker):
        env = self.env
        no_recycling = self.no_recycling
        _f, i1, o1, thunks1, order1 = link.PerformLinker().accept(env, no_recycling = no_recycling).make_all(**kwargs)
        _f, i2, o2, thunks2, order2 =      OpWiseCLinker().accept(env, no_recycling = no_recycling).make_all(**kwargs)

--- a/gof/graph.py
+++ b/gof/graph.py
@@ -376,127 +376,6 @@ def clone_get_equiv(i, o, copy_inputs_and_orphans = True):
    return d
-##    Previous version
-#    for input in i:
-#        if copy_inputs_and_orphans:
-#            cpy = input.clone()
-#            cpy.owner = None
-#            cpy.index = None
-#            d[input] = cpy
-#        else:
-#            d[input] = input
-#
-#    def clone_helper(result):
-#        if result in d:
-#            return d[result]
-#        node = result.owner
-#        if node is None: # result is an orphan
-#            if copy_inputs_and_orphans:
-#                cpy = result.clone()
-#                d[result] = cpy
-#            else:
-#                d[result] = result
-#            return d[result]
-#        else:
-#            new_node = node.clone_with_new_inputs([clone_helper(input) for input in node.inputs])
-#            d[node] = new_node
-#            for output, new_output in zip(node.outputs, new_node.outputs):
-#                d[output] = new_output
-#            return d[result]
-#
-#    for output in o:
-#        clone_helper(output)
-#
-#    return d
-# def clone_with_new_inputs(i, o, new_i):
-#     equiv = clone_with_new_inputs_get_equiv(i, o, new_i)
-#     return [equiv[input] for input in i], [equiv[output] for output in o]
-# def clone_with_new_inputs_get_equiv(i, o, new_i, copy_orphans = True):
-#     # note: this does not exactly mirror Apply.clone_with_new_inputs
-#     # here it is possible to give different types to new_i and then
-#     # make_node is called on the ops instead of clone_with_new_inputs
-#     # whenever the type is different.
-#     d = {}
-#     for input, new_input in zip(i, new_i):
-#         d[input] = new_input
-#     def clone_helper(result):
-#         if result in d:
-#             return d[result]
-#         node = result.owner
-#         if node is None: # result is an orphan
-#             if copy_orphans:
-#                 cpy = result.clone()
-#                 d[result] = cpy
-#             else:
-#                 d[result] = result
-#             return d[result]
-#         else:
-#             cloned_inputs = [clone_helper(input) for input in node.inputs]
-#             if any(input != cloned_input for input, cloned_input in zip(node.inputs, cloned_inputs)):
-#                 new_node = node.op.make_node(*cloned_inputs)
-#             else:
-#                 new_node = node.clone_with_new_inputs(cloned_inputs)
-#             d[node] = new_node
-#             for output, new_output in zip(node.outputs, new_node.outputs):
-#                 d[output] = new_output
-#             return d[result]
-#     for output in o:
-#         clone_helper(output)
-#     return d
-def clone_with_equiv(i, o, d, missing_input_policy = 'fail', orphan_policy = 'copy'):
-    def clone_helper(result):
-        if result in d:
-            return d[result]
-        node = result.owner
-        if node is None: # result is an input or an orphan not in d
-            if isinstance(result, Value):
-                if orphan_policy == 'copy':
-                    d[result] = copy(result)
-                elif orphan_policy == 'keep':
-                    d[result] = result
-                else:
-                    raise ValueError("unknown orphan_policy: '%s'" % orphan_policy)
-            else:
-                if missing_input_policy == 'fail':
-                    raise ValueError("missing input: %s" % result)
-                elif missing_input_policy == 'keep':
-                    d[result] = result
-                else:
-                    raise ValueError("unknown missing_input_policy: '%s'" % missing_input_policy)
-            return d[result]
-        else:
-            cloned_inputs = [clone_helper(input) for input in node.inputs]
-            if all(input is cloned_input for input, cloned_input in zip(node.inputs, cloned_inputs)):
-                new_node = node
-            else:
-                new_node = node.clone_with_new_inputs(cloned_inputs, strict = False)
-#             if any(input != cloned_input for input, cloned_input in zip(node.inputs, cloned_inputs)):
-#                 new_node = node.op.make_node(*cloned_inputs)
-#             else:
-#                 new_node = node.clone_with_new_inputs(cloned_inputs)
-            d[node] = new_node
-            for output, new_output in zip(node.outputs, new_node.outputs):
-                d[output] = new_output
-            return d[result]
-    for output in o:
-        clone_helper(output)
-    return [d[input] for input in i], [d[output] for output in o]
 def general_toposort(r_out, deps, debug_print = False):
    """
    @note: deps(i) should behave like a pure function (no funny business with
@@ -561,8 +440,6 @@ def io_toposort(i, o, orderings = {}):
    return [o for o in topo if isinstance(o, Apply)]
 default_leaf_formatter = str
 default_node_formatter = lambda op, argstrings: "%s(%s)" % (op.op,
                                                            ", ".join(argstrings))
@@ -667,3 +544,4 @@ def view_roots(r):
    else:
        return [r]
--- a/tensor.py
+++ b/tensor.py
@@ -191,7 +191,7 @@ class Tensor(Type):
            Py_XDECREF(%(name)s);
        }
        """ % locals()
    def c_sync(self, name, sub):
        return """
        Py_XDECREF(py_%(name)s);
@@ -1026,7 +1026,7 @@ class Dot(Op):
            if nx not in (1,2): raise TypeError('not matrix or vector', x)
            if ny not in (1,2): raise TypeError('not matrix or vector', y)
            if nx == 2 and ny == 2:
                bz = [x.type.broadcastable[0], y.type.broadcastable[1]]
            elif nx == 1 and ny == 2:
@@ -1041,7 +1041,7 @@ class Dot(Op):
        return Apply(self, inputs, outputs)
    def perform(self, node, (x, y), (z, )):
-        z[0] = numpy.dot(x, y)
+        z[0] = numpy.asarray(numpy.dot(x, y))
    def grad(self, (x, y), (gz,)):
        if gz.type.ndim == 0:
            return gz * y, gz * x