Merge pull request #1839 from nouiz/specify_shape

Specify shape and print op to the new gpu back-end

Merge pull request #1839 from nouiz/specify_shape
e2c4597d · abergeron · e37988e5 · 15584910 · e2c4597d · e2c4597d
--- a/doc/install.txt
+++ b/doc/install.txt
@@ -20,9 +20,10 @@ instructions below for detailed installation steps):
        We develop mainly on 64-bit Linux machines. other architectures are
        not well-tested.
-    Python_ >= 2.4
+    Python_ >= 2.6
        The development package (``python-dev`` or ``python-devel``
        on most Linux distributions) is recommended (see just below).
+	Python 2.4 was supported up to and including the release 0.6.
    ``g++``, ``python-dev``
        Not technically required but *highly* recommended, in order to compile

--- a/theano/compile/__init__.py
+++ b/theano/compile/__init__.py
@@ -3,7 +3,8 @@ from theano.compile.ops import (
        Shape, shape, register_shape_c_code,
        Shape_i, register_shape_i_c_code,
        ViewOp, view_op, register_view_op_c_code, FromFunctionOp, 
-        as_op, Rebroadcast, register_rebroadcast_c_code)
+        as_op, Rebroadcast, register_rebroadcast_c_code,
+        SpecifyShape, specify_shape, register_specify_shape_c_code)
 from theano.compile.function_module import *

--- a/theano/compile/ops.py
+++ b/theano/compile/ops.py
@@ -18,7 +18,7 @@ def register_view_op_c_code(type, code, version=()):
    :param typ: A Theano type. It must be the Theano class itself and not an
                instance of the class.
-    :param code: C code that deep copies the Theano type 'typ'.
+    :param code: C code that return a view for the Theano type 'typ'.
                 Use %(iname)s and %(oname)s for the input and output C
                 variable names respectively.
    :param version: A number indicating the version of the code, for cache.
@@ -193,7 +193,8 @@ def register_shape_c_code(type, code, version=()):
    :param typ: A Theano type. It must be the Theano class itself and not an
                instance of the class.
-    :param code: C code that deep copies the Theano type 'typ'.
+    :param code: C code that return a vector representing the shape
+                 for the Theano type 'typ'.
                 Use %(iname)s and %(oname)s for the input and output C
                 variable names respectively.
    :param version: A number indicating the version of the code, for cache.
@@ -250,7 +251,7 @@ class Shape(gof.Op):
        # the elements of the tensor variable do not participate
        # in the computation of the shape, so they are not really
        # part of the graph
-        return [DisconnectedType()()]
+        return [theano.gradient.DisconnectedType()()]
    def R_op(self, inputs, eval_points):
        return [None]
@@ -372,7 +373,7 @@ def register_shape_i_c_code(typ, code, version=()):
    :param typ: A Theano type. It must be the Theano class itself and not an
                instance of the class.
-    :param code: C code that deep copies the Theano type 'typ'.
+    :param code: C code that get the shape of dimensions %(i)s for the Theano type 'typ'.
                 Use %(iname)s and %(oname)s for the input and output C
                 variable names respectively.
    :param version: A number indicating the version of the code, for cache.
@@ -479,14 +480,16 @@ def as_op(itypes, otypes, infer_shape=None):
 def register_rebroadcast_c_code(typ, code, version=()):
-    """ Tell Rebroadcast how to generate C code for a Theano Type
+    """Tell Rebroadcast how to generate C code for a Theano Type
    :param typ: A Theano type. It must be the Theano class itself and not an
                instance of the class.
-    :param code: C code that deep copies the Theano type 'typ'.
-                 Use %(iname)s and %(oname)s for the input and output C
+    :param code: C code that check if the dimensions %(axis) is of
-                 variable names respectively.
+                 shape 1 for the Theano type 'typ'.  Use %(iname)s and
-                 %(axis)s for the axis that we need to check.
+                 %(oname)s for the input and output C variable names
+                 respectively.  %(axis)s for the axis that we need to
+                 check. This code is put in a loop for all axis
    :param version: A number indicating the version of the code, for cache.
    """
    Rebroadcast.c_code_and_version[typ] = (code, version)
@@ -611,3 +614,143 @@ class Rebroadcast(gof.Op):
            version.append((str(t), v))
        return tuple(version)
+def register_specify_shape_c_code(typ, code, version=(),
+                                  c_support_code_apply=None):
+    """ Tell SpecifyShape how to generate C code for a Theano Type
+    :param typ: A Theano type. It must be the Theano class itself and not an
+                instance of the class.
+    :param code: C code that check the shape and return a view for the Theano type 'typ'.
+                 Use %(iname)s and %(oname)s for the input and output C
+                 variable names respectively.
+                 %(shape)s is the vector of shape of %(iname)s.
+                 Check that its length is good.
+    :param version: A number indicating the version of the code, for cache.
+    :param c_support_code_apply: extra code.
+    """
+    SpecifyShape.c_code_and_version[typ] = (code, version, c_support_code_apply)
+class SpecifyShape(gof.Op):
+    """
+    L{Op} that puts into the graph the user-provided shape.
+    In the case where this op stays in the final graph, we assert the shape.
+    For this the output of this op must be used in the graph. This is not
+    the case most of the time if we only take the shape of the output.
+    Maybe there are other optimizations that will mess with this.
+    @note:     Maybe in the future we will never do the assert!
+    @note:     We currently don't support specifying partial shape information.
+    @todo:     test this op with sparse and cuda ndarray.
+               Do C code for them too.
+    """
+    view_map = {0: [0]}
+    # Mapping from Type to C code (and version) to use.
+    # In the C code, the name of the input variable is %(iname)s,
+    # the output variable is %(oname)s.
+    c_code_and_version = {}
+    def __hash__(self):
+        return hash(type(self))
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __str__(self):
+        return self.__class__.__name__
+    def make_node(self, x, shape):
+        if not isinstance(x, gof.Variable):
+            x = theano.tensor.as_tensor_variable(x)
+        shape = theano.tensor.as_tensor_variable(shape)
+        assert shape.ndim == 1
+        assert "int" in shape.dtype
+        if isinstance(shape, theano.tensor.TensorConstant):
+            assert shape.data.size == x.ndim
+        return gof.Apply(self, [x, shape], [x.type()])
+    def perform(self, node, inp, out_):
+        x, shape = inp
+        out, = out_
+        assert x.ndim == shape.size
+        assert numpy.all(x.shape == shape), ("got shape", x.shape,
+                                             "expected", shape)
+        out[0] = x
+    def infer_shape(self, node, shapes):
+        xshape, sshape = shapes
+        new_shape = []
+        for dim in xrange(node.inputs[0].ndim):
+            try:
+                s = theano.tensor.get_scalar_constant_value(node.inputs[1][dim])
+                s = theano.tensor.as_tensor_variable(s)
+                new_shape.append(s)
+            except theano.tensor.NotScalarConstantError:
+                new_shape.append(node.inputs[1][dim])
+        assert len(new_shape) == len(xshape)
+        return [new_shape]
+    def connection_pattern(self, node):
+        return [[True], [False]]
+    def grad(self, inp, grads):
+        x, s = inp
+        gz, = grads
+        # Should I set an SpecifyShape on gz? I think so
+        # But I don't do it now as we need to make an optimization
+        # to remove that op from the graph to don't block other optimization
+        # Should I do an optimizer that will remove the SpecifyShape?
+        # I think Yes
+        return [gz, theano.gradient.DisconnectedType()()]
+        return [specify_shape(gz, s), theano.gradient.DisconnectedType()()]
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None:
+            # It means that the this op sits on top of a non-differentiable
+            # path
+            return [None]
+        return self.make_node(eval_points[0], *inputs[1:]).outputs
+    def c_support_code_apply(self, node, name):
+        itype = node.inputs[0].type.__class__
+        if itype in self.c_code_and_version:
+            _, _, support_code = self.c_code_and_version[itype]
+            if support_code:
+                return support_code
+        return super(SpecifyShape, self).c_support_code_apply(node, name)
+    def c_code(self, node, name, inames, onames, sub):
+        iname, shape = inames
+        oname, = onames
+        fail = sub['fail']
+        itype = node.inputs[0].type.__class__
+        if itype in self.c_code_and_version:
+            code, version, _ = self.c_code_and_version[itype]
+            return code % locals()
+        return super(SpecifyShape, self).c_code(node, node, inames, onames, sub)
+    def c_code_cache_version(self):
+        version = []
+        # If any of the c code is unversionned, we have to return ()
+        # Else, we will return a list of (type name, version) pairs.
+        for t, (c, v, _) in sorted(self.c_code_and_version.items(),
+                                key=lambda pair: str(pair[0])):
+            if not v:
+                warnings.warn("Type %s has C code for SpecifyShape, but it has "
+                        "no version. You should add a 'version' keyword arg "
+                        "when calling register_specify_shape_c_code." % t,
+                        stacklevel=2)
+                return ()
+            version.append((str(t), v))
+        return tuple(version)
+specify_shape = SpecifyShape()
--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -22,14 +22,14 @@ from theano.sandbox.gpuarray.type import GpuArrayType
 from theano.sandbox.gpuarray.basic_ops import (
    host_from_gpu, gpu_from_host, HostFromGpu,
    gpu_alloc, GpuAlloc, GpuReshape, GpuEye, gpu_join, GpuJoin,
-    )
+)
 from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
 from theano.sandbox.gpuarray.conv import GpuConv
 from theano.sandbox.gpuarray.nnet import (
    GpuCrossentropySoftmaxArgmax1HotWithBias,
    GpuCrossentropySoftmax1HotWithBiasDx,
    GpuSoftmaxWithBias, GpuSoftmax
-    )
+)
 from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
                                              GpuDimShuffle, GpuCAReduceCuda)
 from theano.sandbox.gpuarray.subtensor import (GpuIncSubtensor, GpuSubtensor,
@@ -220,7 +220,7 @@ def local_gpu_rebroadcast(node):
 @op_lifter([tensor.Flatten])
 def local_gpuflatten(node):
    op = node.op
-    shp =[]
+    shp = []
    if op.outdim != 1:
        shp = [node.inputs[0].shape[i] for i in range(op.outdim - 1)]
    shp += [-1]
@@ -282,7 +282,24 @@ def local_gpua_dimshuffle(node):
 @register_opt()
 @op_lifter([tensor.SpecifyShape])
 def local_gpua_specifyShape(node):
-    return tensor.specify_shape
+    if isinstance(node.inputs[0].type, GpuArrayType):
+        return
+    inp = [gpu_from_host(node.inputs[0])] + node.inputs[1:]
+    return tensor.specify_shape(*inp)
+def gpu_print_wrapper(op, cnda):
+    op.old_op.global_fn(op.old_op, numpy.asarray(cnda))
+@register_opt()
+@op_lifter([tensor.printing.Print])
+def local_gpu_print_op(node):
+    x, = node.inputs
+    gpu_x, = x.owner.inputs
+    new_op = node.op.__class__(global_fn=gpu_print_wrapper)
+    new_op.old_op = node.op
+    return new_op(gpu_x)
 @register_opt()
@@ -322,11 +339,11 @@ def local_gpua_incsubtensor(node):
 @register_opt()
 @op_lifter([tensor.AdvancedIncSubtensor1])
 def local_gpua_advanced_incsubtensor(node):
    # This optimization is disabled if cuda is not active
    if pygpu.get_default_context().kind != "cuda":
        return None
    x, y = node.inputs[0:2]
    coords = node.inputs[2:]
    set_instead_of_inc = node.op.set_instead_of_inc
@@ -334,13 +351,13 @@ def local_gpua_advanced_incsubtensor(node):
    device_properties = theano.sandbox.cuda.device_properties
    compute_capability = device_properties(active_device_no)['major']
    if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
        return GpuAdvancedIncSubtensor1(
-                    set_instead_of_inc=set_instead_of_inc)
+            set_instead_of_inc=set_instead_of_inc)
    else:
        return GpuAdvancedIncSubtensor1_dev20(
-                    set_instead_of_inc=set_instead_of_inc)
+            set_instead_of_inc=set_instead_of_inc)
 @register_opt()
@@ -483,15 +500,15 @@ def local_gpu_conv(node):
        #print op.kshp, op.imshp[1:3]
        #print op.kshp_logical, logical_img_hw
        ret = GpuConv(border_mode=op.out_mode,
-                    subsample=(op.dx, op.dy),
+                      subsample=(op.dx, op.dy),
-                    logical_img_hw=logical_img_hw,
+                      logical_img_hw=logical_img_hw,
-                    logical_kern_hw=op.kshp_logical,
+                      logical_kern_hw=op.kshp_logical,
-                    logical_kern_align_top=op.kshp_logical_top_aligned,
+                      logical_kern_align_top=op.kshp_logical_top_aligned,
-                    kshp=op.kshp,
+                      kshp=op.kshp,
-                    version=op.version,
+                      version=op.version,
-                    verbose=op.verbose,
+                      verbose=op.verbose,
-                    imshp=op.imshp,
+                      imshp=op.imshp,
-                    )
+                  )
        if op.imshp_logical is not None:
            logical_img_hw = op.imshp_logical[1:3]
            if logical_img_hw != op.imshp[1:3]:
@@ -626,8 +643,8 @@ def local_scan_to_gpua(node):
    _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
    info['gpu_hash'] = hash(_cmodule_key)
-    nw_op =  scan_op.Scan(scan_ins, scan_outs, info,
+    nw_op = scan_op.Scan(scan_ins, scan_outs, info,
-                          typeConstructor=GpuArrayType).make_node(*nw_ins)
+                         typeConstructor=GpuArrayType).make_node(*nw_ins)
    return nw_op.outputs
 optdb.register('gpua_scanOp_make_inplace',

--- a/theano/sandbox/gpuarray/tests/test_opt.py
+++ b/theano/sandbox/gpuarray/tests/test_opt.py
@@ -5,12 +5,15 @@ from theano import tensor
 from theano.tests import unittest_tools as utt
 import theano.sandbox.gpuarray
 from theano.sandbox.gpuarray.type import GpuArrayType
-from theano.sandbox.gpuarray.basic_ops import GpuAlloc, GpuReshape, gpu_alloc
+from theano.sandbox.gpuarray.basic_ops import (
-from theano.sandbox.gpuarray.elemwise import GpuCAReduceCuda
+    GpuAlloc, GpuReshape, gpu_alloc, gpu_from_host, host_from_gpu)
+from theano.sandbox.gpuarray.elemwise import GpuCAReduceCuda, GpuElemwise
 from theano.sandbox.gpuarray.tests.test_basic_ops import (
    rand_gpuarray, mode_with_gpu, mode_without_gpu
    )
 from theano.tests.unittest_tools import SkipTest
+from theano.tensor.tests.test_basic import TestSpecifyShape
 def test_flatten():
    m = theano.tensor.fmatrix()
@@ -108,3 +111,25 @@ def test_rebroadcast():
    assert isinstance(rebr.inputs[0].type, GpuArrayType)
    assert isinstance(rebr.outputs[0].type, GpuArrayType)
+class TestSpecifyShape(TestSpecifyShape):
+    mode = mode_with_gpu
+    input_type = GpuArrayType
+    pass
+def test_print_op():
+    """ Test that print ops don't block gpu optimization"""
+    b = tensor.fmatrix()
+    f = theano.function([b], theano.printing.Print()(b) * 2,
+                        mode=mode_with_gpu)
+    theano.printing.debugprint(f)
+    #print f.maker.fgraph.toposort()
+#[GpuFromHost(<TensorType(float32, matrix)>), <theano.printing.Print object at 0x3581210>(GpuFromHost.0), GpuElemwise{mul}(CudaNdarray{[[ 2.]]}, <theano.printing.Print object at 0x3581210>.0), HostFromGpu(GpuElemwise{mul}.0)]
+    topo = f.maker.fgraph.toposort()
+    assert topo[0].op == gpu_from_host
+    assert isinstance(topo[1].op, theano.printing.Print)
+    assert isinstance(topo[2].op, GpuElemwise)
+    assert topo[3].op == host_from_gpu
+    f(numpy.random.random((5, 5)).astype('float32'))
--- a/theano/sandbox/gpuarray/type.py
+++ b/theano/sandbox/gpuarray/type.py
@@ -339,4 +339,34 @@ theano.compile.register_rebroadcast_c_code(
        %(fail)s
    }
    """,
-        version=1)
+    version=1)
+theano.compile.register_specify_shape_c_code(
+    GpuArrayType,
+    """
+        if (PyGpuArray_NDIM(%(iname)s) != PyArray_DIMS(%(shape)s)[0]) {
+            PyErr_Format(PyExc_AssertionError,
+                         "SpecifyShape: vector of shape has %%d elements,"
+                         " but the input has %%d dimensions.",
+                         PyGpuArray_NDIM(%(iname)s),
+                         PyArray_DIMS(%(shape)s)[0]);
+            %(fail)s;
+        }
+        for(int i = 0; i < PyGpuArray_NDIM(%(iname)s); i++){
+            dtype_%(shape)s shp = ((dtype_%(shape)s*)PyArray_GETPTR1(%(shape)s,
+                                                                     i))[0];
+            if (PyGpuArray_DIMS(%(iname)s)[i] != shp) {
+                PyErr_Format(PyExc_AssertionError,
+                             "SpecifyShape: dim %%d of input has shape %%d,"
+                             " expected %%d.",
+                             i, PyGpuArray_DIMS(%(iname)s)[i],
+                             shp);
+                %(fail)s;
+            }
+        }
+        Py_XDECREF(%(oname)s);
+        %(oname)s = %(iname)s;
+        Py_XINCREF(%(oname)s);
+    """,
+    version=1,
+    c_support_code_apply='#include <numpy_compat.h>')
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -25,7 +25,8 @@ from theano.gof.python25 import partial, any, all
 from theano.gof.utils import hashtype
 from theano import compile, printing
 from theano.printing import pprint, min_informative_str
-from theano.compile import Rebroadcast, Shape, shape  #For history
+#For history
+from theano.compile import Rebroadcast, Shape, shape, SpecifyShape, specify_shape
 # We use these exceptions as well.
@@ -1164,129 +1165,6 @@ def old_shape(a):
        return va.type.shape
-class SpecifyShape(Op):
-    """
-    L{Op} that puts into the graph the user-provided shape.
-    In the case where this op stays in the final graph, we assert the shape.
-    For this the output of this op must be used in the graph. This is not
-    the case most of the time if we only take the shape of the output.
-    Maybe there are other optimizations that will mess with this.
-    @note:     Maybe in the future we will never do the assert!
-    @note:     We currently don't support specifying partial shape information.
-    @todo:     test this op with sparse and cuda ndarray.
-               Do C code for them too.
-    """
-    view_map = {0: [0]}
-    def __hash__(self):
-        return hash(type(self))
-    def __eq__(self, other):
-        return type(self) == type(other)
-    def __str__(self):
-        return self.__class__.__name__
-    def make_node(self, x, shape):
-        if not isinstance(x, Variable):
-            x = as_tensor_variable(x)
-        shape = as_tensor_variable(shape)
-        assert shape.ndim == 1
-        assert "int" in shape.dtype
-        if isinstance(shape, TensorConstant):
-            assert shape.data.size == x.ndim
-        return Apply(self, [x, shape], [x.type()])
-    def perform(self, node, inp, out_):
-        x, shape = inp
-        out, = out_
-        assert x.ndim == shape.size
-        assert numpy.all(x.shape == shape), ("got shape", x.shape,
-                                           "expected", shape)
-        out[0] = x
-    def infer_shape(self, node, shapes):
-        xshape, sshape = shapes
-        new_shape = []
-        for dim in xrange(node.inputs[0].ndim):
-            try:
-                s = get_scalar_constant_value(node.inputs[1][dim])
-                s = as_tensor_variable(s)
-                new_shape.append(s)
-            except NotScalarConstantError:
-                new_shape.append(node.inputs[1][dim])
-        assert len(new_shape) == len(xshape)
-        return [new_shape]
-    def connection_pattern(self, node):
-        return [[True], [False]]
-    def grad(self, inp, grads):
-        x, s = inp
-        gz, = grads
-        # Should I set an SpecifyShape on gz? I think so
-        # But I don't do it now as we need to make an optimization
-        # to remove that op from the graph to don't block other optimization
-        # Should I do an optimizer that will remove the SpecifyShape?
-        # I think Yes
-        return [gz, DisconnectedType()()]
-        return [specify_shape(gz, s), DisconnectedType()()]
-    def R_op(self, inputs, eval_points):
-        if eval_points[0] is None:
-            # It means that the this op sits on top of a non-differentiable
-            # path
-            return [None]
-        return self.make_node(eval_points[0], *inputs[1:]).outputs
-    def c_code(self, node, nodename, inp, out, sub):
-        if not isinstance(node.inputs[0], TensorVariable):
-            # The C code below supports only Tensor.  super.c_code
-            # will raise an exception to tell that there is no C code
-            # for the other cases.
-            return super(SpecifyShape, self).c_code(node, nodename,
-                                                    inp, out, sub)
-        iname, shape = inp
-        oname, = out
-        fail = sub['fail']
-        return """
-        if (PyArray_NDIM(%(iname)s) != PyArray_DIMS(%(shape)s)[0]) {
-            PyErr_Format(PyExc_AssertionError,
-                         "SpecifyShape: vector of shape has %%d elements,"
-                         " but the input has %%d dimensions.",
-                         PyArray_NDIM(%(iname)s),
-                         PyArray_DIMS(%(shape)s)[0]);
-            %(fail)s;
-        }
-        for(int i = 0; i < PyArray_NDIM(%(iname)s); i++){
-            dtype_%(shape)s shp = ((dtype_%(shape)s*)PyArray_GETPTR1(%(shape)s,
-                                                                     i))[0];
-            if (PyArray_DIMS(%(iname)s)[i] != shp) {
-                PyErr_Format(PyExc_AssertionError,
-                             "SpecifyShape: dim %%d of input has shape %%d,"
-                             " expected %%d.",
-                             i, PyArray_DIMS(%(iname)s)[i],
-                             shp);
-                %(fail)s;
-            }
-        }
-        Py_XDECREF(%(oname)s);
-        %(oname)s = %(iname)s;
-        Py_XINCREF(%(oname)s);
-        """ % locals()
-    def c_code_cache_version(self):
-        return (1,)
-specify_shape = SpecifyShape()
 class MaxAndArgmax(Op):
    """Calculate the max and argmax over a given axis or over all axes.
    """

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -6178,7 +6178,11 @@ def test_stacklists():
    x = numpy.ones((4, 4), 'float32')
    assert f(x,x,x,x).shape == (2, 2, 4, 4)
 class TestSpecifyShape(unittest.TestCase):
+    mode = None
+    input_type = TensorType
    def shortDescription(self):
        return None
@@ -6189,14 +6193,21 @@ class TestSpecifyShape(unittest.TestCase):
        x = vector()
        xval = numpy.random.rand(2).astype(floatX)
-        f = theano.function([x], specify_shape(x, [2]))
+        f = theano.function([x], specify_shape(x, [2]), mode=self.mode)
        f(xval)
        xval = numpy.random.rand(3).astype(floatX)
        self.assertRaises(AssertionError, f, xval)
+        theano.printing.debugprint(f)
+        assert isinstance([n for n in f.maker.fgraph.toposort()
+                           if isinstance(n.op, SpecifyShape)][0].inputs[0].type,
+                          self.input_type)
        x = matrix()
        xval = numpy.random.rand(2, 3).astype(floatX)
-        f = theano.function([x], specify_shape(x, [2, 3]))
+        f = theano.function([x], specify_shape(x, [2, 3]), mode=self.mode)
+        assert isinstance([n for n in f.maker.fgraph.toposort()
+                           if isinstance(n.op, SpecifyShape)][0].inputs[0].type,
+                          self.input_type)
        f(xval)
        for shape in [(1, 3), (2, 2), (5, 5)]:
            xval = numpy.random.rand(*shape).astype(floatX)
@@ -6212,7 +6223,11 @@ class TestSpecifyShape(unittest.TestCase):
        self.assertRaises(AssertionError, specify_shape, x, [])
        self.assertRaises(AssertionError, specify_shape, x, [2, 2])
-        f = theano.function([x, shape_vec], specify_shape(x, shape_vec))
+        f = theano.function([x, shape_vec], specify_shape(x, shape_vec),
+                            mode=self.mode)
+        assert isinstance([n for n in f.maker.fgraph.toposort()
+                           if isinstance(n.op, SpecifyShape)][0].inputs[0].type,
+                          self.input_type)
        self.assertRaises(AssertionError, f, xval, [])
        self.assertRaises(AssertionError, f, xval, [2, 2])
@@ -6222,7 +6237,11 @@ class TestSpecifyShape(unittest.TestCase):
                      (1,),
                      (2, 3, 4)]:
            self.assertRaises(AssertionError, specify_shape, x, shape)
-            f = theano.function([x, shape_vec], specify_shape(x, shape_vec))
+            f = theano.function([x, shape_vec], specify_shape(x, shape_vec),
+                                mode=self.mode)
+            assert isinstance([n for n in f.maker.fgraph.toposort()
+                               if isinstance(n.op, SpecifyShape)][0].inputs[0].type,
+                              self.input_type)
            self.assertRaises(AssertionError, f, xval, shape)

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -673,7 +673,6 @@ theano.compile.register_deep_copy_op_c_code(
        version=2)
-# Register TensorType C code for ViewOp.
 theano.compile.register_rebroadcast_c_code(
    TensorType,
    """
@@ -686,3 +685,33 @@ theano.compile.register_rebroadcast_c_code(
    }
    """,
        version=1)
+theano.compile.register_specify_shape_c_code(
+    TensorType,
+    """
+        if (PyArray_NDIM(%(iname)s) != PyArray_DIMS(%(shape)s)[0]) {
+            PyErr_Format(PyExc_AssertionError,
+                         "SpecifyShape: vector of shape has %%d elements,"
+                         " but the input has %%d dimensions.",
+                         PyArray_NDIM(%(iname)s),
+                         PyArray_DIMS(%(shape)s)[0]);
+            %(fail)s;
+        }
+        for(int i = 0; i < PyArray_NDIM(%(iname)s); i++){
+            dtype_%(shape)s shp = ((dtype_%(shape)s*)PyArray_GETPTR1(%(shape)s,
+                                                                     i))[0];
+            if (PyArray_DIMS(%(iname)s)[i] != shp) {
+                PyErr_Format(PyExc_AssertionError,
+                             "SpecifyShape: dim %%d of input has shape %%d,"
+                             " expected %%d.",
+                             i, PyArray_DIMS(%(iname)s)[i],
+                             shp);
+                %(fail)s;
+            }
+        }
+        Py_XDECREF(%(oname)s);
+        %(oname)s = %(iname)s;
+        Py_XINCREF(%(oname)s);
+    """,
+    version=1)