提交 c23e936e authored 作者: Frederic's avatar Frederic

Add Shape_i c code for the new gpu back-end.

Refactor it to make a registry op c code.
上级 724d0d32
from theano.compile.ops import ( from theano.compile.ops import (
DeepCopyOp, deep_copy_op, register_deep_copy_op_c_code, DeepCopyOp, deep_copy_op, register_deep_copy_op_c_code,
Shape_i, register_shape_i_c_code,
ViewOp, view_op, register_view_op_c_code) ViewOp, view_op, register_view_op_c_code)
from theano.compile.function_module import * from theano.compile.function_module import *
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import copy import copy
import warnings import warnings
#import theano import theano
from theano import gof from theano import gof
...@@ -155,7 +155,7 @@ class DeepCopyOp(gof.Op): ...@@ -155,7 +155,7 @@ class DeepCopyOp(gof.Op):
# Else, we will return a list of (type name, version) pairs. # Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items(), key=lambda pair: str(pair[0])): for t, (c, v) in sorted(self.c_code_and_version.items(), key=lambda pair: str(pair[0])):
if not v: if not v:
warnings.warn("Type %s has C code for OutputGuard, but it has " warnings.warn("Type %s has C code for DeepCopyOp, but it has "
"no version. You should add a 'version' keyword arg " "no version. You should add a 'version' keyword arg "
"when calling register_OutputGuard_c_code." % t, "when calling register_OutputGuard_c_code." % t,
stacklevel=2) stacklevel=2)
...@@ -180,6 +180,99 @@ class DeepCopyOp(gof.Op): ...@@ -180,6 +180,99 @@ class DeepCopyOp(gof.Op):
deep_copy_op = DeepCopyOp() deep_copy_op = DeepCopyOp()
class Shape_i(gof.Op):
"""
L{Op} to return the shape of a matrix.
@note: Non-differentiable.
"""
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
c_code_and_version = {}
def __init__(self, i):
self.i = i
def __hash__(self):
return hash(type(self)) ^ self.i
def __eq__(self, other):
return type(self) == type(other) and self.i == other.i
def __str__(self):
return '%s{%i}' % (self.__class__.__name__, self.i)
def make_node(self, x):
# x could be one of a number of types
# the only thing we require is that the variable have a .ndim,
# and that the value have a .shape
if not isinstance(x, theano.Variable):
raise TypeError('x must be Variable with ndim attribute', x)
if x.ndim <= self.i:
raise TypeError('x has too few dimensions for Shape_i',
(x, self.i))
return theano.Apply(self, [x], [theano.tensor.lscalar()])
def perform(self, node, inp, out_):
x, = inp
out, = out_
if out[0] is None:
out[0] = theano._asarray(x.shape[self.i], dtype='int64')
else:
out[0][...] = x.shape[self.i]
def c_code_cache_version(self):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
if not v:
warnings.warn("Type %s has C code for Shape_i, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_OutputGuard_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
return tuple(version)
def c_code(self, node, name, inames, onames, sub):
iname, = inames
oname, = onames
fail = sub['fail']
i = self.i
itype = node.inputs[0].type.__class__
if itype in self.c_code_and_version:
code, version = self.c_code_and_version[itype]
return code % locals()
# Else, no C code
return super(Shape_i, self).c_code(node, name, inames, onames, sub)
def infer_shape(self, node, input_shapes):
return [()]
def grad(self, inp, grads):
return [None]
def register_shape_i_c_code(typ, code, version=()):
""" Tell DeepCopyOp how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an
instance of the class.
:param code: C code that deep copies the Theano type 'typ'.
Use %(iname)s and %(oname)s for the input and output C
variable names respectively.
:param version: A number indicating the version of the code, for cache.
"""
Shape_i.c_code_and_version[typ] = (code, version)
# List of Theano Types that one can add an extra dimension and for which # List of Theano Types that one can add an extra dimension and for which
# Scan can deal with. # Scan can deal with.
expandable_types = () expandable_types = ()
...@@ -438,6 +438,13 @@ theano.compile.register_view_op_c_code( ...@@ -438,6 +438,13 @@ theano.compile.register_view_op_c_code(
""", """,
version=1) version=1)
theano.compile.register_shape_i_c_code(CudaNdarrayType, """
if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0] =
CudaNdarray_HOST_DIMS(%(iname)s)[%(i)s];
""", version=(0,))
# Register CudaNdarrayType to the DeepCopyOp list of types with c code. # Register CudaNdarrayType to the DeepCopyOp list of types with c code.
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
CudaNdarrayType, CudaNdarrayType,
......
...@@ -336,3 +336,39 @@ def test_gpueye(): ...@@ -336,3 +336,39 @@ def test_gpueye():
# M != N, k = 0 # M != N, k = 0
yield check, dtype, 3, 5 yield check, dtype, 3, 5
yield check, dtype, 5, 3 yield check, dtype, 5, 3
def test_hostfromgpu_shape_i():
"""
Test that the shape is lifted over hostfromgpu
"""
m = mode_with_gpu.including('local_dot_to_dot22',
'local_dot22_to_dot22scalar','specialize')
a = T.fmatrix('a')
ca = theano.sandbox.gpuarray.type.GpuArrayType('float32', (False, False))()
av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
cv = gpuarray.asarray(numpy.random.rand(5, 4),
dtype='float32')
gpu_from_host = theano.sandbox.gpuarray.basic_ops.gpu_from_host
host_from_gpu = theano.sandbox.gpuarray.basic_ops.host_from_gpu
f = theano.function([a], gpu_from_host(a), mode=m)
assert gpu_from_host in [x.op
for x in f.maker.fgraph.toposort()]
f = theano.function([a], gpu_from_host(a).shape, mode=m)
topo = f.maker.fgraph.toposort()
assert isinstance(topo[0].op, T.opt.Shape_i)
assert isinstance(topo[1].op, T.opt.Shape_i)
assert isinstance(topo[2].op, T.opt.MakeVector)
assert tuple(f(av)) == (5, 4)
f = theano.function([ca], host_from_gpu(ca), mode=m)
assert host_from_gpu in [x.op
for x in f.maker.fgraph.toposort()]
f = theano.function([ca], host_from_gpu(ca).shape, mode=m)
topo = f.maker.fgraph.toposort()
assert isinstance(topo[0].op, theano.compile.Shape_i)
assert isinstance(topo[1].op, theano.compile.Shape_i)
assert isinstance(topo[2].op, theano.tensor.opt.MakeVector)
assert tuple(f(cv)) == (5, 4)
...@@ -278,6 +278,13 @@ theano.compile.register_view_op_c_code(GpuArrayType, """ ...@@ -278,6 +278,13 @@ theano.compile.register_view_op_c_code(GpuArrayType, """
Py_XINCREF(%(oname)s); Py_XINCREF(%(oname)s);
""", version=(0,)) """, version=(0,))
theano.compile.register_shape_i_c_code(GpuArrayType, """
if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0] =
%(iname)s->ga.dimensions[%(i)s];
""", version=(0,))
theano.compile.register_deep_copy_op_c_code(GpuArrayType, """ theano.compile.register_deep_copy_op_c_code(GpuArrayType, """
Py_XDECREF(%(oname)s); Py_XDECREF(%(oname)s);
%(oname)s = pygpu_copy(%(iname)s, GA_ANY_ORDER); %(oname)s = pygpu_copy(%(iname)s, GA_ANY_ORDER);
......
...@@ -29,6 +29,7 @@ from theano.tensor.subtensor import (get_idx_list, get_canonical_form_slice, ...@@ -29,6 +29,7 @@ from theano.tensor.subtensor import (get_idx_list, get_canonical_form_slice,
from theano import scalar from theano import scalar
from theano.tensor import basic as T from theano.tensor import basic as T
from theano import compile # to register the optimizer built by this file from theano import compile # to register the optimizer built by this file
from theano.compile.ops import Shape_i
from theano.gof.python25 import any, all from theano.gof.python25 import any, all
from theano.gof.opt import (Optimizer, pre_constant_merge, from theano.gof.opt import (Optimizer, pre_constant_merge,
...@@ -637,78 +638,6 @@ T.pprint.assign(lambda pstate, r: r.owner and isinstance( ...@@ -637,78 +638,6 @@ T.pprint.assign(lambda pstate, r: r.owner and isinstance(
r.owner.op, MakeVector), MakeVectorPrinter()) r.owner.op, MakeVector), MakeVectorPrinter())
class Shape_i(T.Op):
"""
L{Op} to return the shape of a matrix.
@note: Non-differentiable.
"""
def __init__(self, i):
self.i = i
def __hash__(self):
return hash(type(self)) ^ self.i
def __eq__(self, other):
return type(self) == type(other) and self.i == other.i
def __str__(self):
return '%s{%i}' % (self.__class__.__name__, self.i)
def make_node(self, x):
# x could be one of a number of types
# the only thing we require is that the variable have a .ndim,
# and that the value have a .shape
if not isinstance(x, T.Variable):
raise TypeError('x must be Variable with ndim attribute', x)
if x.ndim <= self.i:
raise TypeError('x has too few dimensions for Shape_i',
(x, self.i))
return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, inp, out_):
x, = inp
out, = out_
if out[0] is None:
out[0] = theano._asarray(x.shape[self.i], dtype='int64')
else:
out[0][...] = x.shape[self.i]
def c_code_cache_version(self):
return (0, 1)
def c_code(self, node, name, inp, out_, sub):
x, = inp
out, = out_
i = self.i
if isinstance(node.inputs[0].type, T.TensorType):
return """
if(!%(out)s)
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(out)s))[0]=PyArray_DIMS(%(x)s)[%(i)s];
""" % locals()
elif node.inputs[0].type.__class__.__name__ == "CudaNdarrayType":
#Don't want to import cuda stuff here.
return """
if(!%(out)s)
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(out)s))[0]=
CudaNdarray_HOST_DIMS(%(x)s)[%(i)s];
""" % locals()
else:
#TODO: if your type is not listed here, make a damn registry of
# shape_i ops for various types of variables.
# Do not continue this madness.
return super(Shape_i, self).c_code(node, name, (x,), (out,), sub)
def infer_shape(self, node, input_shapes):
return [()]
def grad(self, inp, grads):
return [None]
class ShapeFeature(object): class ShapeFeature(object):
"""Graph optimizer for removing all calls to shape() """Graph optimizer for removing all calls to shape()
......
...@@ -611,6 +611,16 @@ theano.compile.register_view_op_c_code( ...@@ -611,6 +611,16 @@ theano.compile.register_view_op_c_code(
""", """,
version=1) version=1)
# Register TensorType C code for ViewOp.
theano.compile.register_shape_i_c_code(
TensorType,
"""
if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""",
version=1)
# Register TensorType C code for DeepCopyOp # Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
TensorType, TensorType,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论