提交 0366c559 authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Make Op.perform an abstractmethod and provide Op type hints

This change makes `Op.perform` a mandatory method. Since more than a few `Op`s do not have Python implementations, they've been made to extend `_NoPython*Op` classes that provide an `Op.perform` that simply raises a `NotImplementedError`.
上级 5a1a147d
...@@ -118,6 +118,9 @@ class WeirdBrokenOp(COp): ...@@ -118,6 +118,9 @@ class WeirdBrokenOp(COp):
r = Apply(self, [a_], [a_.type()]) r = Apply(self, [a_], [a_.type()])
return r return r
def perform(*args, **kwargs):
raise NotImplementedError()
def dontuse_perform(self, node, inp, out_): def dontuse_perform(self, node, inp, out_):
(a,) = inp (a,) = inp
(out,) = out_ (out,) = out_
......
...@@ -41,6 +41,9 @@ class IncOneC(COp): ...@@ -41,6 +41,9 @@ class IncOneC(COp):
(z,) = outputs (z,) = outputs
return f"{z} = {x} + 1;" return f"{z} = {x} + 1;"
def perform(self, *args, **kwargs):
raise NotImplementedError()
class TestComputeTestValue: class TestComputeTestValue:
def test_destroy_map(self): def test_destroy_map(self):
......
...@@ -85,6 +85,9 @@ class MyOp(Op): ...@@ -85,6 +85,9 @@ class MyOp(Op):
outputs = [MyVariable(self.name + "_R") for i in range(self.nout)] outputs = [MyVariable(self.name + "_R") for i in range(self.nout)]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
def __str__(self): def __str__(self):
return self.name return self.name
......
...@@ -58,6 +58,9 @@ class MyOp(Op): ...@@ -58,6 +58,9 @@ class MyOp(Op):
outputs = [MyVariable(sum(input.type.thingy for input in inputs))] outputs = [MyVariable(sum(input.type.thingy for input in inputs))]
return Apply(self, list(inputs), outputs) return Apply(self, list(inputs), outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
MyOp = MyOp() MyOp = MyOp()
......
...@@ -60,6 +60,9 @@ class MyOp(Op): ...@@ -60,6 +60,9 @@ class MyOp(Op):
outputs = [MyType(sum([input.type.thingy for input in inputs]))()] outputs = [MyType(sum([input.type.thingy for input in inputs]))()]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
MyOp = MyOp() MyOp = MyOp()
...@@ -104,6 +107,9 @@ counter%(name)s++; ...@@ -104,6 +107,9 @@ counter%(name)s++;
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (1,)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
class TestOp: class TestOp:
...@@ -206,6 +212,9 @@ class TestMakeThunk: ...@@ -206,6 +212,9 @@ class TestMakeThunk:
(z,) = outputs (z,) = outputs
return f"{z} = {x} + 1;" return f"{z} = {x} + 1;"
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
i = scalar.int32("i") i = scalar.int32("i")
o = IncOneC()(i) o = IncOneC()(i)
......
...@@ -48,6 +48,9 @@ class TestNodeFinder: ...@@ -48,6 +48,9 @@ class TestNodeFinder:
def __str__(self): def __str__(self):
return self.name return self.name
def perform(self, *args, **kwargs):
raise NotImplementedError()
sigmoid = MyOp(1, "Sigmoid") sigmoid = MyOp(1, "Sigmoid")
add = MyOp(2, "Add") add = MyOp(2, "Add")
dot = MyOp(2, "Dot") dot = MyOp(2, "Dot")
......
...@@ -39,6 +39,9 @@ Py_INCREF(%(inp)s); ...@@ -39,6 +39,9 @@ Py_INCREF(%(inp)s);
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,) return (0,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
class GetOp(COp): class GetOp(COp):
__props__ = () __props__ = ()
...@@ -65,6 +68,9 @@ Py_INCREF(%(out)s); ...@@ -65,6 +68,9 @@ Py_INCREF(%(out)s);
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,) return (0,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
@pytest.mark.skipif( @pytest.mark.skipif(
not theano.config.cxx, reason="G++ not available, so we need to skip this test." not theano.config.cxx, reason="G++ not available, so we need to skip this test."
...@@ -192,6 +198,9 @@ class MyOpCEnumType(COp): ...@@ -192,6 +198,9 @@ class MyOpCEnumType(COp):
def make_node(self): def make_node(self):
return Apply(self, [], [scalar.uint32()]) return Apply(self, [], [scalar.uint32()])
def perform(self, *args, **kwargs):
raise NotImplementedError()
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (3,)
......
...@@ -4,7 +4,6 @@ import pytest ...@@ -4,7 +4,6 @@ import pytest
import theano import theano
from theano import config, tensor from theano import config, tensor
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gpuarray.basic_ops import CGpuKernelBase from theano.gpuarray.basic_ops import CGpuKernelBase
from theano.gpuarray.type import GpuArrayType, get_context, gpu_context_type from theano.gpuarray.type import GpuArrayType, get_context, gpu_context_type
...@@ -12,11 +11,11 @@ from theano.gradient import grad_undefined ...@@ -12,11 +11,11 @@ from theano.gradient import grad_undefined
from theano.scalar import int32 as int_t from theano.scalar import int32 as int_t
# This is an implementation to test that CGpuKernelBase works and also class GpuEye(CGpuKernelBase):
# to use as an example in the docs. It is not used for user graphs. """Eye for GPU.
class GpuEye(CGpuKernelBase, Op):
""" This is an implementation to test that `CGpuKernelBase` works and also
Eye for GPU. to use as an example in the docs. It is not used for user graphs.
""" """
...@@ -28,9 +27,7 @@ class GpuEye(CGpuKernelBase, Op): ...@@ -28,9 +27,7 @@ class GpuEye(CGpuKernelBase, Op):
dtype = config.floatX dtype = config.floatX
self.dtype = dtype self.dtype = dtype
self.context_name = context_name self.context_name = context_name
CGpuKernelBase.__init__( super().__init__(["c_code/tstgpueye.c"], "APPLY_SPECIFIC(tstgpueye)")
self, ["c_code/tstgpueye.c"], "APPLY_SPECIFIC(tstgpueye)"
)
def get_params(self, node): def get_params(self, node):
pygpu_gpuarray = pytest.importorskip("pygpu.gpuarray") pygpu_gpuarray = pytest.importorskip("pygpu.gpuarray")
......
...@@ -984,6 +984,9 @@ class ApplyDefaultTestOp(Op): ...@@ -984,6 +984,9 @@ class ApplyDefaultTestOp(Op):
x = tt.as_tensor_variable(x) x = tt.as_tensor_variable(x)
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
def perform(self, *args, **kwargs):
raise NotImplementedError()
def test_constant(): def test_constant():
int8_vector_type = tt.TensorType(dtype="int8", broadcastable=(False,)) int8_vector_type = tt.TensorType(dtype="int8", broadcastable=(False,))
...@@ -3862,6 +3865,9 @@ class TestGrad: ...@@ -3862,6 +3865,9 @@ class TestGrad:
gz0, gz1 = grads gz0, gz1 = grads
return self.gval0, self.gval1 return self.gval0, self.gval1
def perform(self, *args, **kwargs):
raise NotImplementedError()
def test_1param(self): def test_1param(self):
# grad: Test passing a single variable param # grad: Test passing a single variable param
o = TestGrad.Obj1() o = TestGrad.Obj1()
......
...@@ -38,6 +38,9 @@ class MyOp(Op): ...@@ -38,6 +38,9 @@ class MyOp(Op):
outputs = [MyType()()] outputs = [MyType()()]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
def __str__(self): def __str__(self):
return self.name return self.name
......
...@@ -51,6 +51,9 @@ class TestGradSourcesInputs: ...@@ -51,6 +51,9 @@ class TestGradSourcesInputs:
(x,) = inp (x,) = inp
(gz,) = grads (gz,) = grads
def perform(self, *args, **kwargs):
raise NotImplementedError()
a = retNone().make_node() a = retNone().make_node()
with pytest.raises(TypeError): with pytest.raises(TypeError):
grad_sources_inputs([(a.out, one)], None) grad_sources_inputs([(a.out, one)], None)
...@@ -68,6 +71,9 @@ class TestGradSourcesInputs: ...@@ -68,6 +71,9 @@ class TestGradSourcesInputs:
def grad(self, inputs, grads): def grad(self, inputs, grads):
return [inputs[0].zeros_like()] return [inputs[0].zeros_like()]
def perform(self, *args, **kwargs):
raise NotImplementedError()
i = theano.tensor.vector() i = theano.tensor.vector()
j = theano.tensor.vector() j = theano.tensor.vector()
a1 = retOne().make_node(i) a1 = retOne().make_node(i)
...@@ -91,6 +97,9 @@ class TestGradSourcesInputs: ...@@ -91,6 +97,9 @@ class TestGradSourcesInputs:
def grad(self, inp, grads): def grad(self, inp, grads):
return (gval,) return (gval,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node() a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None) g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval assert g[a1.inputs[0]] is gval
...@@ -112,6 +121,9 @@ class TestGradSourcesInputs: ...@@ -112,6 +121,9 @@ class TestGradSourcesInputs:
gz1, gz2 = grads gz1, gz2 = grads
return (gval,) return (gval,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node() a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None) g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval assert g[a1.inputs[0]] is gval
...@@ -134,6 +146,9 @@ class TestGradSourcesInputs: ...@@ -134,6 +146,9 @@ class TestGradSourcesInputs:
(gz,) = grads (gz,) = grads
return (gval0, gval1) return (gval0, gval1)
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node() a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None) g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval0 assert g[a1.inputs[0]] is gval0
...@@ -155,6 +170,9 @@ class TestGradSourcesInputs: ...@@ -155,6 +170,9 @@ class TestGradSourcesInputs:
def grad(self, inp, grads): def grad(self, inp, grads):
return gval0, gval1 return gval0, gval1
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node() a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None) g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval0 assert g[a1.inputs[0]] is gval0
...@@ -190,6 +208,9 @@ class TestGrad: ...@@ -190,6 +208,9 @@ class TestGrad:
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
return [theano.gradient.grad_not_implemented(self, 0, inputs[0])] return [theano.gradient.grad_not_implemented(self, 0, inputs[0])]
def perform(self, *args, **kwargs):
raise NotImplementedError()
a = theano.tensor.scalar() a = theano.tensor.scalar()
b = DummyOp()(a) b = DummyOp()(a)
...@@ -208,6 +229,9 @@ class TestGrad: ...@@ -208,6 +229,9 @@ class TestGrad:
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
return [theano.gradient.grad_undefined(self, 0, inputs[0])] return [theano.gradient.grad_undefined(self, 0, inputs[0])]
def perform(self, *args, **kwargs):
raise NotImplementedError()
a = theano.tensor.scalar() a = theano.tensor.scalar()
b = DummyOp()(a) b = DummyOp()(a)
...@@ -380,6 +404,9 @@ class TestGrad: ...@@ -380,6 +404,9 @@ class TestGrad:
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
return [inputs[0].zeros_like()] return [inputs[0].zeros_like()]
def perform(self, *args, **kwargs):
raise NotImplementedError()
# Op2 has two inputs, f and g # Op2 has two inputs, f and g
# Its gradient with respect to g is not defined # Its gradient with respect to g is not defined
class Op2(Op): class Op2(Op):
...@@ -391,6 +418,9 @@ class TestGrad: ...@@ -391,6 +418,9 @@ class TestGrad:
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
return [inputs[0].zeros_like(), NullType()()] return [inputs[0].zeros_like(), NullType()()]
def perform(self, *args, **kwargs):
raise NotImplementedError()
x = theano.tensor.vector() x = theano.tensor.vector()
f, g = Op1()(x) f, g = Op1()(x)
cost = Op2()(f, g) cost = Op2()(f, g)
......
...@@ -581,6 +581,9 @@ class IfElseIfElseIf(Op): ...@@ -581,6 +581,9 @@ class IfElseIfElseIf(Op):
thunk.lazy = True thunk.lazy = True
return thunk return thunk
def perform(self, *args, **kwargs):
raise NotImplementedError()
class NotImplementedOpException(Exception): class NotImplementedOpException(Exception):
pass pass
...@@ -597,6 +600,9 @@ class NotImplementedOp(Op): ...@@ -597,6 +600,9 @@ class NotImplementedOp(Op):
thunk.lazy = False thunk.lazy = False
return thunk return thunk
def perform(self, *args, **kwargs):
raise NotImplementedError()
def test_ifelse(): def test_ifelse():
a = tt.scalar() a = tt.scalar()
......
...@@ -10,8 +10,22 @@ import inspect ...@@ -10,8 +10,22 @@ import inspect
import os import os
import re import re
import sys import sys
import typing
import warnings import warnings
from abc import abstractmethod
from typing import (
Any,
Callable,
ClassVar,
Dict,
List,
NoReturn,
Optional,
Pattern,
Set,
Text,
Tuple,
Union,
)
import numpy as np import numpy as np
...@@ -19,7 +33,7 @@ import theano ...@@ -19,7 +33,7 @@ import theano
from theano.configdefaults import config from theano.configdefaults import config
from theano.gof.fg import FunctionGraph from theano.gof.fg import FunctionGraph
from theano.gof.graph import Apply, NoParams, Variable from theano.gof.graph import Apply, NoParams, Variable
from theano.gof.params_type import ParamsType from theano.gof.params_type import Params, ParamsType
from theano.gof.utils import ( from theano.gof.utils import (
MetaObject, MetaObject,
MethodNotDefined, MethodNotDefined,
...@@ -30,15 +44,22 @@ from theano.gof.utils import ( ...@@ -30,15 +44,22 @@ from theano.gof.utils import (
from theano.link.c.interface import CLinkerOp from theano.link.c.interface import CLinkerOp
__authors__ = "theano-dev" __authors__ = "theano-dev" "PyMC Developers"
__copyright__ = "(c) 2010, Universite de Montreal" __copyright__ = "(c) 2010, Universite de Montreal"
__license__ = "3-clause BSD License"
__contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
StorageMapType = List[Optional[List[Any]]]
ComputeMapType = List[bool]
OutputStorageType = List[Optional[List[Any]]]
ParamsInputType = Optional[Tuple[Any]]
PerformMethodType = Callable[
[Apply, List[Any], OutputStorageType, ParamsInputType], NoReturn
]
ThunkType = Callable[[PerformMethodType, StorageMapType, ComputeMapType, Apply], Any]
def compute_test_value(node): def compute_test_value(node: Apply):
"""Computes the test value of a node. """Computes the test value of a node.
Parameters Parameters
...@@ -149,7 +170,7 @@ class Op(MetaObject): ...@@ -149,7 +170,7 @@ class Op(MetaObject):
""" """
def make_node(self, *inputs) -> Apply: def make_node(self, *inputs: Variable) -> Apply:
"""Construct an `Apply` node that represent the application of this operation to the given inputs. """Construct an `Apply` node that represent the application of this operation to the given inputs.
This must be implemented by sub-classes. This must be implemented by sub-classes.
...@@ -182,9 +203,7 @@ class Op(MetaObject): ...@@ -182,9 +203,7 @@ class Op(MetaObject):
) )
return Apply(self, inputs, [o() for o in self.otypes]) return Apply(self, inputs, [o() for o in self.otypes])
def __call__( def __call__(self, *inputs: Any, **kwargs) -> Union[Variable, List[Variable]]:
self, *inputs, **kwargs
) -> typing.Union[Variable, typing.List[Variable],]:
"""Construct an `Apply` node using `self.make_node` and return its outputs. """Construct an `Apply` node using `self.make_node` and return its outputs.
This method is just a wrapper around `Op.make_node`. This method is just a wrapper around `Op.make_node`.
...@@ -246,14 +265,16 @@ class Op(MetaObject): ...@@ -246,14 +265,16 @@ class Op(MetaObject):
else: else:
return node.outputs return node.outputs
def __ne__(self, other): def __ne__(self, other: Any) -> bool:
return not (self == other) return not (self == other)
# Convenience so that subclass implementers don't have to import utils # Convenience so that subclass implementers don't have to import utils
# just to self.add_tag_trace # just to self.add_tag_trace
add_tag_trace = staticmethod(add_tag_trace) add_tag_trace = staticmethod(add_tag_trace)
def grad(self, inputs, output_grads): def grad(
self, inputs: List[Variable], output_grads: List[Variable]
) -> List[Variable]:
"""Construct a graph for the gradient with respect to each input variable. """Construct a graph for the gradient with respect to each input variable.
Each returned `Variable` represents the gradient with respect to that Each returned `Variable` represents the gradient with respect to that
...@@ -277,7 +298,12 @@ class Op(MetaObject): ...@@ -277,7 +298,12 @@ class Op(MetaObject):
""" """
raise NotImplementedError() raise NotImplementedError()
def L_op(self, inputs, outputs, output_grads): def L_op(
self,
inputs: List[Variable],
outputs: List[Variable],
output_grads: List[Variable],
) -> List[Variable]:
r"""Construct a graph for the L-operator. r"""Construct a graph for the L-operator.
This method is primarily used by `tensor.Lop` and dispatches to This method is primarily used by `tensor.Lop` and dispatches to
...@@ -298,7 +324,9 @@ class Op(MetaObject): ...@@ -298,7 +324,9 @@ class Op(MetaObject):
""" """
return self.grad(inputs, output_grads) return self.grad(inputs, output_grads)
def R_op(self, inputs, eval_points): def R_op(
self, inputs: List[Variable], eval_points: Union[Variable, List[Variable]]
) -> List[Variable]:
"""Construct a graph for the R-operator. """Construct a graph for the R-operator.
This method is primarily used by tensor.Rop This method is primarily used by tensor.Rop
...@@ -325,10 +353,15 @@ class Op(MetaObject): ...@@ -325,10 +353,15 @@ class Op(MetaObject):
""" """
raise NotImplementedError() raise NotImplementedError()
def perform(self, node, inputs, output_storage, params=None): @abstractmethod
""" def perform(
Required: Calculate the function on the inputs and put the variables in self,
the output storage. Return None. node: Apply,
inputs: List[Variable],
output_storage: OutputStorageType,
params: ParamsInputType = None,
) -> NoReturn:
"""Calculate the function on the inputs and put the variables in the output storage.
Parameters Parameters
---------- ----------
...@@ -358,21 +391,9 @@ class Op(MetaObject): ...@@ -358,21 +391,9 @@ class Op(MetaObject):
A `Op` is free to reuse `output_storage` as it sees fit, or to A `Op` is free to reuse `output_storage` as it sees fit, or to
discard it and allocate new memory. discard it and allocate new memory.
Raises
------
MethodNotDefined
The subclass does not override this method.
""" """
raise MethodNotDefined(
"perform",
type(self),
self.__class__.__name__,
"Did you used Theano flags mode=FAST_COMPILE?"
" You can use optimizer=fast_compile instead.",
)
def do_constant_folding(self, fgraph: FunctionGraph, node: Apply): def do_constant_folding(self, fgraph: FunctionGraph, node: Apply) -> bool:
"""Determine whether or not constant folding should be performed for the given node. """Determine whether or not constant folding should be performed for the given node.
This allows each `Op` to determine if it wants to be constant This allows each `Op` to determine if it wants to be constant
...@@ -393,9 +414,8 @@ class Op(MetaObject): ...@@ -393,9 +414,8 @@ class Op(MetaObject):
""" """
return True return True
# We add a default get_params() implementation which will try to detect params from the op def get_params(self, node: Apply) -> Params:
# if params_type is set to a ParamsType. If not, we raise a MethodNotDefined exception. """Try to detect params from the op if `Op.params_type` is set to a `ParamsType`."""
def get_params(self, node):
if hasattr(self, "params_type") and isinstance(self.params_type, ParamsType): if hasattr(self, "params_type") and isinstance(self.params_type, ParamsType):
wrapper = self.params_type wrapper = self.params_type
if not all(hasattr(self, field) for field in wrapper.fields): if not all(hasattr(self, field) for field in wrapper.fields):
...@@ -410,10 +430,14 @@ class Op(MetaObject): ...@@ -410,10 +430,14 @@ class Op(MetaObject):
return self.params_type.get_params(self) return self.params_type.get_params(self)
raise MethodNotDefined("get_params") raise MethodNotDefined("get_params")
def prepare_node(self, node, storage_map, compute_map, impl): def prepare_node(
""" self,
Make any special modifications that the Op needs before doing node: Apply,
make_thunk(). storage_map: StorageMapType,
compute_map: ComputeMapType,
impl: Optional[Text],
) -> NoReturn:
"""Make any special modifications that the Op needs before doing `Op.make_thunk`.
This can modify the node inplace and should return nothing. This can modify the node inplace and should return nothing.
...@@ -423,9 +447,17 @@ class Op(MetaObject): ...@@ -423,9 +447,17 @@ class Op(MetaObject):
""" """
def make_py_thunk(self, node, storage_map, compute_map, no_recycling, debug=False): def make_py_thunk(
""" self,
Like make_thunk() but only makes python thunks. node: Apply,
storage_map: StorageMapType,
compute_map: ComputeMapType,
no_recycling: bool,
debug: bool = False,
) -> ThunkType:
"""Make a Python thunk.
Like `Op.make_thunk` but only makes python thunks.
""" """
node_input_storage = [storage_map[r] for r in node.inputs] node_input_storage = [storage_map[r] for r in node.inputs]
...@@ -467,7 +499,14 @@ class Op(MetaObject): ...@@ -467,7 +499,14 @@ class Op(MetaObject):
rval.lazy = False rval.lazy = False
return rval return rval
def make_thunk(self, node, storage_map, compute_map, no_recycling, impl=None): def make_thunk(
self,
node: Apply,
storage_map: StorageMapType,
compute_map: ComputeMapType,
no_recycling: bool,
impl: Optional[Text] = None,
) -> ThunkType:
"""Create a thunk. """Create a thunk.
This function must return a thunk, that is a zero-arguments This function must return a thunk, that is a zero-arguments
...@@ -513,8 +552,18 @@ class Op(MetaObject): ...@@ -513,8 +552,18 @@ class Op(MetaObject):
class COp(Op, CLinkerOp): class COp(Op, CLinkerOp):
"""An `Op` with a C implementation.""" """An `Op` with a C implementation."""
def make_c_thunk(self, node, storage_map, compute_map, no_recycling): def make_c_thunk(
"""Like make_thunk, but will only try to make a C thunk.""" self,
node: Apply,
storage_map: StorageMapType,
compute_map: ComputeMapType,
no_recycling: bool,
) -> ThunkType:
"""Create a thunk for a C implementation.
Like `Op.make_thunk`, but will only try to make a C thunk.
"""
# FIXME: Putting the following import on the module level causes an import cycle. # FIXME: Putting the following import on the module level causes an import cycle.
# The conclusion should be that the antire "make_c_thunk" method should be defined # The conclusion should be that the antire "make_c_thunk" method should be defined
# in theano.link.c and dispatched onto the Op! # in theano.link.c and dispatched onto the Op!
...@@ -593,7 +642,7 @@ class COp(Op, CLinkerOp): ...@@ -593,7 +642,7 @@ class COp(Op, CLinkerOp):
) )
def get_test_value(v): def get_test_value(v: Variable) -> Any:
"""Get the test value for `v`. """Get the test value for `v`.
If input `v` is not already a variable, it is turned into one by calling If input `v` is not already a variable, it is turned into one by calling
...@@ -610,7 +659,7 @@ def get_test_value(v): ...@@ -610,7 +659,7 @@ def get_test_value(v):
return v.get_test_value() return v.get_test_value()
def missing_test_message(msg): def missing_test_message(msg: Text) -> NoReturn:
""" """
Displays msg, a message saying that some test_value is missing, Displays msg, a message saying that some test_value is missing,
in the appropriate form based on config.compute_test_value: in the appropriate form based on config.compute_test_value:
...@@ -635,8 +684,9 @@ def missing_test_message(msg): ...@@ -635,8 +684,9 @@ def missing_test_message(msg):
assert action in ["ignore", "off"] assert action in ["ignore", "off"]
def get_test_values(*args): def get_test_values(*args: Variable) -> Union[Any, List[Any]]:
""" """Get test values for multiple `Variable`s.
Intended use: Intended use:
for val_1, ..., val_n in get_debug_values(var_1, ..., var_n): for val_1, ..., val_n in get_debug_values(var_1, ..., var_n):
...@@ -681,7 +731,7 @@ def get_test_values(*args): ...@@ -681,7 +731,7 @@ def get_test_values(*args):
return [tuple(rval)] return [tuple(rval)]
ops_with_inner_function = {} ops_with_inner_function: Dict[Op, Text] = {}
""" """
Registry of Ops that have an inner compiled Theano function. Registry of Ops that have an inner compiled Theano function.
...@@ -711,18 +761,18 @@ class OpenMPOp(COp): ...@@ -711,18 +761,18 @@ class OpenMPOp(COp):
""" """
gxx_support_openmp = None gxx_support_openmp: Optional[bool] = None
""" """
True/False after we tested this. True/False after we tested this.
""" """
def __init__(self, openmp=None): def __init__(self, openmp: Optional[bool] = None):
if openmp is None: if openmp is None:
openmp = config.openmp openmp = config.openmp
self.openmp = openmp self.openmp = openmp
def __setstate__(self, d): def __setstate__(self, d: Dict):
self.__dict__.update(d) self.__dict__.update(d)
# If we unpickle old op # If we unpickle old op
if not hasattr(self, "openmp"): if not hasattr(self, "openmp"):
...@@ -748,9 +798,7 @@ class OpenMPOp(COp): ...@@ -748,9 +798,7 @@ class OpenMPOp(COp):
@staticmethod @staticmethod
def test_gxx_support(): def test_gxx_support():
""" """Check if openMP is supported."""
Check if openMP is supported
"""
from theano.link.c.cmodule import GCC_compiler from theano.link.c.cmodule import GCC_compiler
code = """ code = """
...@@ -769,7 +817,7 @@ int main( int argc, const char* argv[] ) ...@@ -769,7 +817,7 @@ int main( int argc, const char* argv[] )
) )
return default_openmp return default_openmp
def update_self_openmp(self): def update_self_openmp(self) -> NoReturn:
""" """
Make sure self.openmp is not True if there is no support in gxx. Make sure self.openmp is not True if there is no support in gxx.
...@@ -797,21 +845,60 @@ int main( int argc, const char* argv[] ) ...@@ -797,21 +845,60 @@ int main( int argc, const char* argv[] )
self.update_self_openmp() self.update_self_openmp()
def lquote_macro(txt: Text) -> Text:
"""Turn the last line of text into a ``\\``-commented line."""
res = []
spl = txt.split("\n")
for l in spl[:-1]:
res.append(l + " \\")
res.append(spl[-1])
return "\n".join(res)
def get_sub_macros(sub: Dict[Text, Text]) -> Tuple[Text]:
define_macros = []
undef_macros = []
define_macros.append(f"#define FAIL {lquote_macro(sub['fail'])}")
undef_macros.append("#undef FAIL")
if "params" in sub:
define_macros.append(f"#define PARAMS {sub['params']}")
undef_macros.append("#undef PARAMS")
return "\n".join(define_macros), "\n".join(undef_macros)
def get_io_macros(inputs: List[Text], outputs: List[Text]) -> Tuple[List[Text]]:
define_macros = []
undef_macros = []
for i, inp in enumerate(inputs):
define_macros.append(f"#define INPUT_{int(i)} {inp}")
undef_macros.append(f"#undef INPUT_{int(i)}")
for i, out in enumerate(outputs):
define_macros.append(f"#define OUTPUT_{int(i)} {out}")
undef_macros.append(f"#undef OUTPUT_{int(i)}")
return "\n".join(define_macros), "\n".join(undef_macros)
class ExternalCOp(COp): class ExternalCOp(COp):
""" """Class for an `Op` with an external C implementation.
Class to allow an op to have an external C implementation.
An op can use this class by inheriting from it and calling its One can inherit from this class, provide its constructor with a path to
__init__() method, providing it with a path to an external file containing an external C source file and the name of a function within it, and define
the C implementation and the name of the function, in that file, to call an `Op` for said function.
to perform the computations for the op.
""" """
section_re = re.compile(r"^#section ([a-zA-Z0-9_]+)$", re.MULTILINE) section_re: ClassVar[Pattern] = re.compile(
backward_re = re.compile(r"^THEANO_(APPLY|SUPPORT)_CODE_SECTION$", re.MULTILINE) r"^#section ([a-zA-Z0-9_]+)$", re.MULTILINE
)
backward_re: ClassVar[Pattern] = re.compile(
r"^THEANO_(APPLY|SUPPORT)_CODE_SECTION$", re.MULTILINE
)
# This is the set of allowed markers # This is the set of allowed markers
SECTIONS = { SECTIONS: ClassVar[Set[Text]] = {
"init_code", "init_code",
"init_code_apply", "init_code_apply",
"init_code_struct", "init_code_struct",
...@@ -824,11 +911,10 @@ class ExternalCOp(COp): ...@@ -824,11 +911,10 @@ class ExternalCOp(COp):
} }
@classmethod @classmethod
def get_path(cls, f): def get_path(cls, f: Text) -> Text:
""" """Convert a path relative to the location of the class file into an absolute path.
Convert a path relative to the location of the class file into
an aboslute path. Paths that are already absolute are passed Paths that are already absolute are passed through unchanged.
through unchanged.
""" """
if not os.path.isabs(f): if not os.path.isabs(f):
...@@ -837,7 +923,9 @@ class ExternalCOp(COp): ...@@ -837,7 +923,9 @@ class ExternalCOp(COp):
f = os.path.realpath(os.path.join(class_dir, f)) f = os.path.realpath(os.path.join(class_dir, f))
return f return f
def __init__(self, func_files, func_name=None): def __init__(
self, func_files: Union[Text, List[Text]], func_name: Optional[Text] = None
):
""" """
Sections are loaded from files in order with sections in later Sections are loaded from files in order with sections in later
files overriding sections in previous files. files overriding sections in previous files.
...@@ -868,10 +956,8 @@ class ExternalCOp(COp): ...@@ -868,10 +956,8 @@ class ExternalCOp(COp):
"and specify the func_name" "and specify the func_name"
) )
def load_c_code(self, func_files): def load_c_code(self, func_files: List[Text]) -> NoReturn:
""" """Loads the C code to perform the `Op`."""
Loads the c code to perform the Op
"""
func_files = [self.get_path(f) for f in func_files] func_files = [self.get_path(f) for f in func_files]
self.func_codes = [] self.func_codes = []
for func_file in func_files: for func_file in func_files:
...@@ -940,10 +1026,8 @@ class ExternalCOp(COp): ...@@ -940,10 +1026,8 @@ class ExternalCOp(COp):
f"No valid section marker was found in file {func_files[i]}" f"No valid section marker was found in file {func_files[i]}"
) )
def __get_op_params(self): def __get_op_params(self) -> List[Text]:
""" """Construct name, value pairs that will be turned into macros for use within the `Op`'s code.
Returns a list of (name, value) pairs that will be turned into
macros for use within the op code.
The names must be strings that are not a C keyword and the The names must be strings that are not a C keyword and the
values must be strings of literal C representations. values must be strings of literal C representations.
...@@ -1031,10 +1115,12 @@ class ExternalCOp(COp): ...@@ -1031,10 +1115,12 @@ class ExternalCOp(COp):
else: else:
return super().c_cleanup_code_struct(node, name) return super().c_cleanup_code_struct(node, name)
def format_c_function_args(self, inp, out): def format_c_function_args(self, inp: List[Text], out: List[Text]) -> Text:
# Generate an string containing the arguments sent to the external C """Generate a string containing the arguments sent to the external C function.
# function. The argstring will be of format :
# "input0, input1, input2, &output0, &output1" The result will have the format: ``"input0, input1, input2, &output0, &output1"``.
"""
inp = list(inp) inp = list(inp)
numi = getattr(self, "_cop_num_inputs", len(inp)) numi = getattr(self, "_cop_num_inputs", len(inp))
while len(inp) < numi: while len(inp) < numi:
...@@ -1045,7 +1131,10 @@ class ExternalCOp(COp): ...@@ -1045,7 +1131,10 @@ class ExternalCOp(COp):
out.append("NULL") out.append("NULL")
return ", ".join(inp + out) return ", ".join(inp + out)
def get_c_macros(self, node, name, check_input=None): def get_c_macros(
self, node: Apply, name: Text, check_input: Optional[bool] = None
) -> Tuple[Text]:
"Construct a pair of C ``#define`` and ``#undef`` code strings."
define_template = "#define %s %s" define_template = "#define %s %s"
undef_template = "#undef %s" undef_template = "#undef %s"
define_macros = [] define_macros = []
...@@ -1097,37 +1186,6 @@ class ExternalCOp(COp): ...@@ -1097,37 +1186,6 @@ class ExternalCOp(COp):
return "\n".join(define_macros), "\n".join(undef_macros) return "\n".join(define_macros), "\n".join(undef_macros)
def _lquote_macro(self, txt):
res = []
spl = txt.split("\n")
for l in spl[:-1]:
res.append(l + " \\")
res.append(spl[-1])
return "\n".join(res)
def get_sub_macros(self, sub):
define_macros = []
undef_macros = []
define_macros.append(f"#define FAIL {self._lquote_macro(sub['fail'])}")
undef_macros.append("#undef FAIL")
if "params" in sub:
define_macros.append(f"#define PARAMS {sub['params']}")
undef_macros.append("#undef PARAMS")
return "\n".join(define_macros), "\n".join(undef_macros)
def get_io_macros(self, inputs, outputs):
define_macros = []
undef_macros = []
for i, inp in enumerate(inputs):
define_macros.append(f"#define INPUT_{int(i)} {inp}")
undef_macros.append(f"#undef INPUT_{int(i)}")
for i, out in enumerate(outputs):
define_macros.append(f"#define OUTPUT_{int(i)} {inp}")
undef_macros.append(f"#undef OUTPUT_{int(i)}")
def c_init_code_struct(self, node, name, sub): def c_init_code_struct(self, node, name, sub):
""" """
Stitches all the macros and "init_code" together Stitches all the macros and "init_code" together
...@@ -1137,7 +1195,7 @@ class ExternalCOp(COp): ...@@ -1137,7 +1195,7 @@ class ExternalCOp(COp):
op_code = self.code_sections["init_code_struct"] op_code = self.code_sections["init_code_struct"]
def_macros, undef_macros = self.get_c_macros(node, name) def_macros, undef_macros = self.get_c_macros(node, name)
def_sub, undef_sub = self.get_sub_macros(sub) def_sub, undef_sub = get_sub_macros(sub)
return "\n".join( return "\n".join(
["", def_macros, def_sub, op_code, undef_sub, undef_macros] ["", def_macros, def_sub, op_code, undef_sub, undef_macros]
...@@ -1179,8 +1237,8 @@ class ExternalCOp(COp): ...@@ -1179,8 +1237,8 @@ class ExternalCOp(COp):
op_code = self.code_sections["code"] op_code = self.code_sections["code"]
def_macros, undef_macros = self.get_c_macros(node, name) def_macros, undef_macros = self.get_c_macros(node, name)
def_sub, undef_sub = self.get_sub_macros(sub) def_sub, undef_sub = get_sub_macros(sub)
def_io, undef_io = self.get_io_macros(inp, out) def_io, undef_io = get_io_macros(inp, out)
return "\n".join( return "\n".join(
[ [
...@@ -1204,8 +1262,8 @@ class ExternalCOp(COp): ...@@ -1204,8 +1262,8 @@ class ExternalCOp(COp):
op_code = self.code_sections["code_cleanup"] op_code = self.code_sections["code_cleanup"]
def_macros, undef_macros = self.get_c_macros(node, name) def_macros, undef_macros = self.get_c_macros(node, name)
def_sub, undef_sub = self.get_sub_macros(sub) def_sub, undef_sub = get_sub_macros(sub)
def_io, undef_io = self.get_io_macros(inputs, outputs) def_io, undef_io = get_io_macros(inputs, outputs)
return "\n".join( return "\n".join(
[ [
...@@ -1220,3 +1278,38 @@ class ExternalCOp(COp): ...@@ -1220,3 +1278,38 @@ class ExternalCOp(COp):
) )
else: else:
return super().c_code_cleanup(node, name, inputs, outputs, sub) return super().c_code_cleanup(node, name, inputs, outputs, sub)
class _NoPythonOp(Op):
"""A class used to indicate that an `Op` does not provide a Python implementation.
XXX: Do not use this class; it's only for tracking bad implementations internally.
"""
def perform(self, node, inputs, output_storage, params=None):
raise NotImplementedError("No Python implementation is provided by this Op.")
class _NoPythonCOp(COp):
"""A class used to indicate that a `COp` does not provide a Python implementation.
XXX: Do not use this class; it's only for tracking bad implementations internally.
"""
def perform(self, node, inputs, output_storage, params=None):
raise NotImplementedError("No Python implementation is provided by this COp.")
class _NoPythonExternalCOp(ExternalCOp):
"""A class used to indicate that a `ExternalCOp` does not provide a Python implementation.
XXX: Do not use this class; it's only for tracking bad implementations internally.
"""
def perform(self, node, inputs, output_storage, params=None):
raise NotImplementedError(
"No Python implementation is provided by this ExternalCOp."
)
...@@ -9,7 +9,7 @@ import theano ...@@ -9,7 +9,7 @@ import theano
from theano import tensor from theano import tensor
from theano.configdefaults import config from theano.configdefaults import config
from theano.gof.graph import Apply, Variable from theano.gof.graph import Apply, Variable
from theano.gof.op import COp, ExternalCOp, Op from theano.gof.op import COp, ExternalCOp, Op, _NoPythonOp
from theano.gof.opt import copy_stack_trace from theano.gof.opt import copy_stack_trace
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gof.type import CType from theano.gof.type import CType
...@@ -493,6 +493,14 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{ ...@@ -493,6 +493,14 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
return (9,) return (9,)
class GpuKernelBaseCOp(GpuKernelBase, COp):
pass
class GpuKernelBaseExternalCOp(GpuKernelBase, ExternalCOp):
pass
def forward_string_meth(name): def forward_string_meth(name):
def f(*args): def f(*args):
res = getattr(GpuKernelBase, name)(*args) res = getattr(GpuKernelBase, name)(*args)
...@@ -517,7 +525,7 @@ def get_dtype(s): ...@@ -517,7 +525,7 @@ def get_dtype(s):
return np.dtype(s) return np.dtype(s)
class CGpuKernelBase(ExternalCOp, GpuKernelBase): class CGpuKernelBase(GpuKernelBaseExternalCOp, _NoPythonOp):
""" """
Class to combine GpuKernelBase and ExternalCOp. Class to combine GpuKernelBase and ExternalCOp.
...@@ -1498,7 +1506,7 @@ class GpuJoin(HideC, Join): ...@@ -1498,7 +1506,7 @@ class GpuJoin(HideC, Join):
gpu_join = GpuJoin() gpu_join = GpuJoin()
class GpuSplit(HideC, Split): class GpuSplit(HideC, Split, _NoPythonOp):
""" """
Split for GPU. Split for GPU.
...@@ -1748,7 +1756,7 @@ def profile_printer( ...@@ -1748,7 +1756,7 @@ def profile_printer(
print("", file=file) print("", file=file)
class GpuEye(GpuKernelBase, Op): class GpuEye(GpuKernelBaseCOp, _NoPythonOp):
""" """
Eye for GPU. Eye for GPU.
...@@ -1882,7 +1890,7 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off, ...@@ -1882,7 +1890,7 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off,
return (10,) return (10,)
class GpuTri(GpuKernelBase, Op): class GpuTri(GpuKernelBaseCOp, _NoPythonOp):
""" """
Tri for GPU. Tri for GPU.
......
import theano import theano
from theano.compile import optdb from theano.compile import optdb
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import COp from theano.gof.op import _NoPythonCOp
from theano.gof.opt import LocalOptGroup from theano.gof.opt import LocalOptGroup
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.scalar import bool as bool_t from theano.scalar import bool as bool_t
...@@ -27,7 +27,7 @@ except ImportError: ...@@ -27,7 +27,7 @@ except ImportError:
pass pass
class BlasOp(COp): class BlasOp(_NoPythonCOp):
def c_headers(self, **kwargs): def c_headers(self, **kwargs):
return ["<blas_api.h>", "<numpy_compat.h>", "<gpuarray_helper.h>"] return ["<blas_api.h>", "<numpy_compat.h>", "<gpuarray_helper.h>"]
...@@ -412,7 +412,7 @@ class GpuDot22(BlasOp): ...@@ -412,7 +412,7 @@ class GpuDot22(BlasOp):
gpu_dot22 = GpuDot22() gpu_dot22 = GpuDot22()
class GpuGemmBatch(BlasOp): class GpuGemmBatch(BlasOp, _NoPythonCOp):
params_type = ParamsType(inplace=bool_t) params_type = ParamsType(inplace=bool_t)
__props__ = ("inplace",) __props__ = ("inplace",)
_f16_ok = True _f16_ok = True
...@@ -1009,7 +1009,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -1009,7 +1009,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
) )
class GpuCorrMM(BaseGpuCorrMM): class GpuCorrMM(BaseGpuCorrMM, _NoPythonCOp):
""" """
GPU correlation implementation using Matrix Multiplication. GPU correlation implementation using Matrix Multiplication.
...@@ -1129,7 +1129,7 @@ class GpuCorrMM(BaseGpuCorrMM): ...@@ -1129,7 +1129,7 @@ class GpuCorrMM(BaseGpuCorrMM):
return d_bottom, d_weights return d_bottom, d_weights
class GpuCorrMM_gradWeights(BaseGpuCorrMM): class GpuCorrMM_gradWeights(BaseGpuCorrMM, _NoPythonCOp):
""" """
Gradient wrt. filters for `GpuCorrMM`. Gradient wrt. filters for `GpuCorrMM`.
...@@ -1235,7 +1235,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM): ...@@ -1235,7 +1235,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
return [[1], [1], [0], [0]] # no connection to height, width return [[1], [1], [0], [0]] # no connection to height, width
class GpuCorrMM_gradInputs(BaseGpuCorrMM): class GpuCorrMM_gradInputs(BaseGpuCorrMM, _NoPythonCOp):
""" """
Gradient wrt. inputs for `GpuCorrMM`. Gradient wrt. inputs for `GpuCorrMM`.
...@@ -1337,7 +1337,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM): ...@@ -1337,7 +1337,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
return [[1], [1], [0], [0]] # no connection to height, width return [[1], [1], [0], [0]] # no connection to height, width
class BaseGpuCorr3dMM(CGpuKernelBase): class BaseGpuCorr3dMM(CGpuKernelBase, _NoPythonCOp):
""" """
Base class for `GpuCorr3dMM`, `GpuCorr3dMM_gradWeights` and Base class for `GpuCorr3dMM`, `GpuCorr3dMM_gradWeights` and
`GpuCorr3dMM_gradInputs`. Cannot be used directly. `GpuCorr3dMM_gradInputs`. Cannot be used directly.
...@@ -1777,7 +1777,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1777,7 +1777,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
) )
class GpuCorr3dMM(BaseGpuCorr3dMM): class GpuCorr3dMM(BaseGpuCorr3dMM, _NoPythonCOp):
""" """
GPU correlation implementation using Matrix Multiplication. GPU correlation implementation using Matrix Multiplication.
...@@ -1881,7 +1881,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM): ...@@ -1881,7 +1881,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
return d_bottom, d_weights return d_bottom, d_weights
class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM): class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM, _NoPythonCOp):
""" """
Gradient wrt. filters for `GpuCorr3dMM`. Gradient wrt. filters for `GpuCorr3dMM`.
...@@ -1970,7 +1970,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM): ...@@ -1970,7 +1970,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
return [[1], [1], [0], [0], [0]] # no connection to height, width, depth return [[1], [1], [0], [0], [0]] # no connection to height, width, depth
class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM): class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM, _NoPythonCOp):
""" """
Gradient wrt. inputs for `GpuCorr3dMM`. Gradient wrt. inputs for `GpuCorr3dMM`.
......
...@@ -4,7 +4,7 @@ import numpy as np ...@@ -4,7 +4,7 @@ import numpy as np
from theano import tensor from theano import tensor
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import ExternalCOp from theano.gof.op import _NoPythonExternalCOp
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
from theano.scalar import bool as bool_t from theano.scalar import bool as bool_t
...@@ -17,7 +17,7 @@ from .type import gpu_context_type ...@@ -17,7 +17,7 @@ from .type import gpu_context_type
_logger = logging.getLogger("theano.gpuarray.blocksparse") _logger = logging.getLogger("theano.gpuarray.blocksparse")
class GpuSparseBlockGemv(ExternalCOp): class GpuSparseBlockGemv(_NoPythonExternalCOp):
""" """
GPU version of SparseBlockGemv. Check SparseBlockGemv's docstring for more GPU version of SparseBlockGemv. Check SparseBlockGemv's docstring for more
information. information.
...@@ -32,7 +32,7 @@ class GpuSparseBlockGemv(ExternalCOp): ...@@ -32,7 +32,7 @@ class GpuSparseBlockGemv(ExternalCOp):
# NB: DTYPE_INPUT_* is used in C code, so I think we should not set check_input to False. # NB: DTYPE_INPUT_* is used in C code, so I think we should not set check_input to False.
def __init__(self, inplace=False): def __init__(self, inplace=False):
ExternalCOp.__init__(self, "c_code/blockgemv.c", "APPLY_SPECIFIC(blockgemv)") super().__init__("c_code/blockgemv.c", "APPLY_SPECIFIC(blockgemv)")
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -92,7 +92,7 @@ gpu_sparse_block_gemv = GpuSparseBlockGemv(False) ...@@ -92,7 +92,7 @@ gpu_sparse_block_gemv = GpuSparseBlockGemv(False)
gpu_sparse_block_gemv_inplace = GpuSparseBlockGemv(True) gpu_sparse_block_gemv_inplace = GpuSparseBlockGemv(True)
class GpuSparseBlockOuter(ExternalCOp): class GpuSparseBlockOuter(_NoPythonExternalCOp):
""" """
GPU version of SparseBlockOuter. See SparseBlockOuter's docstring for more GPU version of SparseBlockOuter. See SparseBlockOuter's docstring for more
information. information.
...@@ -106,7 +106,7 @@ class GpuSparseBlockOuter(ExternalCOp): ...@@ -106,7 +106,7 @@ class GpuSparseBlockOuter(ExternalCOp):
params_type = ParamsType(inplace=bool_t, context=gpu_context_type) params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
def __init__(self, inplace=False): def __init__(self, inplace=False):
ExternalCOp.__init__(self, ["c_code/blockger.c"], "APPLY_SPECIFIC(blockger)") super().__init__(["c_code/blockger.c"], "APPLY_SPECIFIC(blockger)")
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
......
...@@ -4,7 +4,7 @@ import sys ...@@ -4,7 +4,7 @@ import sys
import theano.tensor as tt import theano.tensor as tt
from theano.configdefaults import config from theano.configdefaults import config
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import ExternalCOp from theano.gof.op import _NoPythonExternalCOp
from theano.gof.opt import local_optimizer from theano.gof.opt import local_optimizer
from theano.gpuarray import pygpu from theano.gpuarray import pygpu
from theano.gpuarray.basic_ops import ( from theano.gpuarray.basic_ops import (
...@@ -20,7 +20,7 @@ from theano.tensor.nnet.ctc import ctc_available ...@@ -20,7 +20,7 @@ from theano.tensor.nnet.ctc import ctc_available
from theano.tensor.opt import register_canonicalize from theano.tensor.opt import register_canonicalize
class GpuConnectionistTemporalClassification(ExternalCOp): class GpuConnectionistTemporalClassification(_NoPythonExternalCOp):
""" """
GPU wrapper for Baidu CTC loss function. GPU wrapper for Baidu CTC loss function.
......
...@@ -12,7 +12,7 @@ from theano import tensor ...@@ -12,7 +12,7 @@ from theano import tensor
from theano.compile.ops import shape_i, shape_i_op from theano.compile.ops import shape_i, shape_i_op
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME, config from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME, config
from theano.gof.graph import Apply, Variable from theano.gof.graph import Apply, Variable
from theano.gof.op import COp, ExternalCOp from theano.gof.op import ExternalCOp, _NoPythonCOp, _NoPythonExternalCOp
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gof.type import CDataType, EnumList, Generic from theano.gof.type import CDataType, EnumList, Generic
from theano.gpuarray import cudnn_defs, pygpu from theano.gpuarray import cudnn_defs, pygpu
...@@ -302,7 +302,7 @@ class MakerCDataType(CDataType): ...@@ -302,7 +302,7 @@ class MakerCDataType(CDataType):
return self._get_func()(ptr) return self._get_func()(ptr)
class CDataMaker(COp): class CDataMaker(_NoPythonCOp):
"""This is the equally lame `Op` that accompanies `MakerCDataType`.""" """This is the equally lame `Op` that accompanies `MakerCDataType`."""
__props__ = ("rtype",) __props__ = ("rtype",)
...@@ -350,7 +350,7 @@ def CUDNNDataType(name, freefunc=None): ...@@ -350,7 +350,7 @@ def CUDNNDataType(name, freefunc=None):
) )
class DnnVersion(COp): class DnnVersion(_NoPythonCOp):
__props__ = () __props__ = ()
def c_headers(self, **kwargs): def c_headers(self, **kwargs):
...@@ -460,7 +460,7 @@ def get_precision(precision, inputs, for_grad=False): ...@@ -460,7 +460,7 @@ def get_precision(precision, inputs, for_grad=False):
return precision, common_dtype return precision, common_dtype
class DnnBase(ExternalCOp): class DnnBase(_NoPythonExternalCOp):
""" """
Creates a handle for cudnn and pulls in the cudnn libraries and headers. Creates a handle for cudnn and pulls in the cudnn libraries and headers.
...@@ -496,7 +496,7 @@ class DnnBase(ExternalCOp): ...@@ -496,7 +496,7 @@ class DnnBase(ExternalCOp):
def __init__(self, files=None, c_func=None): def __init__(self, files=None, c_func=None):
if files is None: if files is None:
files = [] files = []
ExternalCOp.__init__(self, ["c_code/dnn_base.c"] + files, c_func) super().__init__(["c_code/dnn_base.c"] + files, c_func)
def c_headers(self, **kwargs): def c_headers(self, **kwargs):
return [ return [
...@@ -535,7 +535,7 @@ class DnnBase(ExternalCOp): ...@@ -535,7 +535,7 @@ class DnnBase(ExternalCOp):
return (super().c_code_cache_version(), version(), 4) return (super().c_code_cache_version(), version(), 4)
class GpuDnnConvDesc(ExternalCOp): class GpuDnnConvDesc(_NoPythonExternalCOp):
""" """
This Op builds a convolution descriptor for use in the other convolution This Op builds a convolution descriptor for use in the other convolution
...@@ -607,7 +607,7 @@ class GpuDnnConvDesc(ExternalCOp): ...@@ -607,7 +607,7 @@ class GpuDnnConvDesc(ExternalCOp):
precision="float32", precision="float32",
num_groups=1, num_groups=1,
): ):
ExternalCOp.__init__(self, ["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)") super().__init__(["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
if version() < 6000 and any([d != 1 for d in dilation]): if version() < 6000 and any([d != 1 for d in dilation]):
raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.") raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.")
...@@ -756,8 +756,7 @@ class GpuDnnConv(DnnBase): ...@@ -756,8 +756,7 @@ class GpuDnnConv(DnnBase):
) )
def __init__(self, algo=None, inplace=False, num_groups=1): def __init__(self, algo=None, inplace=False, num_groups=1):
DnnBase.__init__( super().__init__(
self,
["c_code/dnn_conv_base.c", "c_code/dnn_fwd.c"], ["c_code/dnn_conv_base.c", "c_code/dnn_fwd.c"],
"APPLY_SPECIFIC(conv_fwd)", "APPLY_SPECIFIC(conv_fwd)",
) )
...@@ -918,8 +917,7 @@ class GpuDnnConvGradW(DnnBase): ...@@ -918,8 +917,7 @@ class GpuDnnConvGradW(DnnBase):
) )
def __init__(self, inplace=False, algo=None, num_groups=1): def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__( super().__init__(
self,
["c_code/dnn_conv_base.c", "c_code/dnn_gw.c"], ["c_code/dnn_conv_base.c", "c_code/dnn_gw.c"],
"APPLY_SPECIFIC(conv_gw)", "APPLY_SPECIFIC(conv_gw)",
) )
...@@ -1088,8 +1086,7 @@ class GpuDnnConvGradI(DnnBase): ...@@ -1088,8 +1086,7 @@ class GpuDnnConvGradI(DnnBase):
) )
def __init__(self, inplace=False, algo=None, num_groups=1): def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__( super().__init__(
self,
["c_code/dnn_conv_base.c", "c_code/dnn_gi.c"], ["c_code/dnn_conv_base.c", "c_code/dnn_gi.c"],
"APPLY_SPECIFIC(conv_gi)", "APPLY_SPECIFIC(conv_gi)",
) )
...@@ -1767,7 +1764,7 @@ def dnn_gradinput3d( ...@@ -1767,7 +1764,7 @@ def dnn_gradinput3d(
) )
class GpuDnnPoolDesc(COp): class GpuDnnPoolDesc(_NoPythonCOp):
""" """
This Op builds a pooling descriptor for use in the other This Op builds a pooling descriptor for use in the other
pooling operations. pooling operations.
...@@ -1911,7 +1908,7 @@ class GpuDnnPoolBase(DnnBase): ...@@ -1911,7 +1908,7 @@ class GpuDnnPoolBase(DnnBase):
params_type = ParamsType(mode=cudnn.cudnnPoolingMode_t, handle=handle_type) params_type = ParamsType(mode=cudnn.cudnnPoolingMode_t, handle=handle_type)
def __init__(self, mode="max"): def __init__(self, mode="max"):
DnnBase.__init__(self, [self.c_file], self.c_function) super().__init__([self.c_file], self.c_function)
if mode == "average": if mode == "average":
mode = "average_inc_pad" mode = "average_inc_pad"
# Supported modes depend on runtime cuDNN version. # Supported modes depend on runtime cuDNN version.
...@@ -2114,7 +2111,7 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -2114,7 +2111,7 @@ class GpuDnnSoftmaxBase(DnnBase):
) )
def __init__(self, algo, mode): def __init__(self, algo, mode):
DnnBase.__init__(self, [self.file], self.c_func) super().__init__([self.file], self.c_func)
assert cudnn.cudnnSoftmaxAlgorithm_t.has_alias(algo) assert cudnn.cudnnSoftmaxAlgorithm_t.has_alias(algo)
self.algo = algo self.algo = algo
...@@ -2207,7 +2204,7 @@ class GpuDnnReduction(DnnBase): ...@@ -2207,7 +2204,7 @@ class GpuDnnReduction(DnnBase):
) )
def __init__(self, red_op, axis, acc_dtype, dtype, return_indices): def __init__(self, red_op, axis, acc_dtype, dtype, return_indices):
DnnBase.__init__(self, ["c_code/dnn_redux.c"], "APPLY_SPECIFIC(dnn_redux)") super().__init__(["c_code/dnn_redux.c"], "APPLY_SPECIFIC(dnn_redux)")
assert cudnn.cudnnReduceTensorOp_t.has_alias(red_op) assert cudnn.cudnnReduceTensorOp_t.has_alias(red_op)
self.red_op = red_op self.red_op = red_op
assert acc_dtype in ["float16", "float32", "float64"] assert acc_dtype in ["float16", "float32", "float64"]
...@@ -2328,8 +2325,7 @@ class GpuDnnBatchNorm(DnnBase): ...@@ -2328,8 +2325,7 @@ class GpuDnnBatchNorm(DnnBase):
inplace_running_var=False, inplace_running_var=False,
inplace_output=False, inplace_output=False,
): ):
DnnBase.__init__( super().__init__(
self,
["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm.c"], ["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm.c"],
"dnn_batchnorm_op", "dnn_batchnorm_op",
) )
...@@ -2460,8 +2456,7 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -2460,8 +2456,7 @@ class GpuDnnBatchNormInference(DnnBase):
) )
def __init__(self, mode="per-activation", inplace=False): def __init__(self, mode="per-activation", inplace=False):
DnnBase.__init__( super().__init__(
self,
["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm_inf.c"], ["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm_inf.c"],
"dnn_batchnorm_op", "dnn_batchnorm_op",
) )
...@@ -2546,8 +2541,7 @@ class GpuDnnBatchNormGrad(DnnBase): ...@@ -2546,8 +2541,7 @@ class GpuDnnBatchNormGrad(DnnBase):
params_type = ParamsType(mode=cudnn.cudnnBatchNormMode_t, handle=handle_type) params_type = ParamsType(mode=cudnn.cudnnBatchNormMode_t, handle=handle_type)
def __init__(self, mode="per-activation"): def __init__(self, mode="per-activation"):
DnnBase.__init__( super().__init__(
self,
["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm_grad.c"], ["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm_grad.c"],
"dnn_batchnorm_grad", "dnn_batchnorm_grad",
) )
...@@ -2585,7 +2579,7 @@ class GpuDnnDropoutOp(DnnBase): ...@@ -2585,7 +2579,7 @@ class GpuDnnDropoutOp(DnnBase):
__props__ = ("inplace",) __props__ = ("inplace",)
def __init__(self, inplace=False): def __init__(self, inplace=False):
DnnBase.__init__(self, ["c_code/dnn_dropout_fwd.c"], "dnn_dropout_fwd") super().__init__(["c_code/dnn_dropout_fwd.c"], "dnn_dropout_fwd")
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {1: [2]} self.destroy_map = {1: [2]}
...@@ -2605,7 +2599,7 @@ class _DropoutDescriptor(DnnBase): ...@@ -2605,7 +2599,7 @@ class _DropoutDescriptor(DnnBase):
__props__ = ("context_name",) __props__ = ("context_name",)
def __init__(self, context_name): def __init__(self, context_name):
DnnBase.__init__(self, ["c_code/dnn_dropout_desc.c"], "dnn_dropout_desc") super().__init__(["c_code/dnn_dropout_desc.c"], "dnn_dropout_desc")
self.context_name = context_name self.context_name = context_name
def dnn_context(self, node): def dnn_context(self, node):
...@@ -2666,7 +2660,7 @@ class _RNNDescriptor(DnnBase): ...@@ -2666,7 +2660,7 @@ class _RNNDescriptor(DnnBase):
def __init__(self, context_name): def __init__(self, context_name):
if version() < 5005: if version() < 5005:
raise RuntimeError("cudnn RNN require cudnn v5 final or higher.") raise RuntimeError("cudnn RNN require cudnn v5 final or higher.")
DnnBase.__init__(self, ["c_code/dnn_rnn_desc.c"], "dnn_rnn_desc") super().__init__(["c_code/dnn_rnn_desc.c"], "dnn_rnn_desc")
self.context_name = context_name self.context_name = context_name
def dnn_context(self, node): def dnn_context(self, node):
...@@ -2759,7 +2753,7 @@ class _RNNParamSize(DnnBase): ...@@ -2759,7 +2753,7 @@ class _RNNParamSize(DnnBase):
__props__ = ("context_name",) __props__ = ("context_name",)
def __init__(self, context_name): def __init__(self, context_name):
DnnBase.__init__(self, ["c_code/dnn_rnn_paramsize.c"], "dnn_rnn_paramsize") super().__init__(["c_code/dnn_rnn_paramsize.c"], "dnn_rnn_paramsize")
self.context_name = context_name self.context_name = context_name
def dnn_context(self, node): def dnn_context(self, node):
...@@ -2792,7 +2786,7 @@ class _RNNSplitParams(DnnBase): ...@@ -2792,7 +2786,7 @@ class _RNNSplitParams(DnnBase):
__props__ = ("rnn_mode",) __props__ = ("rnn_mode",)
def __init__(self, rnn_mode): def __init__(self, rnn_mode):
DnnBase.__init__(self) super().__init__()
self.rnn_mode = rnn_mode self.rnn_mode = rnn_mode
def make_node(self, w, desc, layer, isize, typecode): def make_node(self, w, desc, layer, isize, typecode):
...@@ -3035,7 +3029,7 @@ class GpuDnnRNNOp(DnnBase): ...@@ -3035,7 +3029,7 @@ class GpuDnnRNNOp(DnnBase):
_cop_num_outputs = 4 _cop_num_outputs = 4
def __init__(self, rnn_mode, direction_mode): def __init__(self, rnn_mode, direction_mode):
DnnBase.__init__(self, ["c_code/dnn_rnn_fwd.c"], "dnn_rnn_fwd") super().__init__(["c_code/dnn_rnn_fwd.c"], "dnn_rnn_fwd")
self.rnn_mode = rnn_mode self.rnn_mode = rnn_mode
if direction_mode == "bidirectional": if direction_mode == "bidirectional":
self.num_dirs = 2 self.num_dirs = 2
...@@ -3126,7 +3120,7 @@ class GpuDnnRNNGradInputs(DnnBase): ...@@ -3126,7 +3120,7 @@ class GpuDnnRNNGradInputs(DnnBase):
_cop_num_outputs = 4 _cop_num_outputs = 4
def __init__(self, rnn_mode, grad_h, grad_c): def __init__(self, rnn_mode, grad_h, grad_c):
DnnBase.__init__(self, ["c_code/dnn_rnn_gi.c"], "dnn_rnn_gi") super().__init__(["c_code/dnn_rnn_gi.c"], "dnn_rnn_gi")
self.rnn_mode = rnn_mode self.rnn_mode = rnn_mode
self.grad_h = grad_h self.grad_h = grad_h
self.grad_c = grad_c self.grad_c = grad_c
...@@ -3175,7 +3169,7 @@ class GpuDnnRNNGradWeights(DnnBase): ...@@ -3175,7 +3169,7 @@ class GpuDnnRNNGradWeights(DnnBase):
__props__ = () __props__ = ()
def __init__(self): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_rnn_gw.c"], "dnn_rnn_gw") super().__init__(["c_code/dnn_rnn_gw.c"], "dnn_rnn_gw")
def make_node(self, desc, x, hx, y, reserve, w): def make_node(self, desc, x, hx, y, reserve, w):
# We trust the callers here # We trust the callers here
...@@ -3579,9 +3573,7 @@ class GpuDnnTransformerGrid(DnnBase): ...@@ -3579,9 +3573,7 @@ class GpuDnnTransformerGrid(DnnBase):
check_input = False check_input = False
def __init__(self): def __init__(self):
DnnBase.__init__( super().__init__(["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)")
self, ["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)"
)
def make_node(self, theta, out_dims): def make_node(self, theta, out_dims):
""" """
...@@ -3640,8 +3632,8 @@ class GpuDnnTransformerSampler(DnnBase): ...@@ -3640,8 +3632,8 @@ class GpuDnnTransformerSampler(DnnBase):
check_input = False check_input = False
def __init__(self): def __init__(self):
DnnBase.__init__( super().__init__(
self, ["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)" ["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)"
) )
def make_node(self, img, grid): def make_node(self, img, grid):
...@@ -3704,7 +3696,7 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -3704,7 +3696,7 @@ class GpuDnnTransformerGradI(DnnBase):
check_input = False check_input = False
def __init__(self): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)") super().__init__(["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
def make_node(self, img, grid, dy): def make_node(self, img, grid, dy):
context_name = infer_context_name(img, grid, dy) context_name = infer_context_name(img, grid, dy)
...@@ -3742,7 +3734,7 @@ class GpuDnnTransformerGradT(DnnBase): ...@@ -3742,7 +3734,7 @@ class GpuDnnTransformerGradT(DnnBase):
check_input = False check_input = False
def __init__(self): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)") super().__init__(["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)")
def make_node(self, dgrid): def make_node(self, dgrid):
context_name = infer_context_name(dgrid) context_name = infer_context_name(dgrid)
......
...@@ -5,7 +5,7 @@ import numpy as np ...@@ -5,7 +5,7 @@ import numpy as np
from theano import scalar from theano import scalar
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import Op from theano.gof.op import _NoPythonOp
from theano.gof.utils import MethodNotDefined from theano.gof.utils import MethodNotDefined
from theano.link.c.interface import HideC from theano.link.c.interface import HideC
from theano.scalar import Composite, Scalar from theano.scalar import Composite, Scalar
...@@ -84,7 +84,7 @@ def max_inputs_to_GpuElemwise(node_or_outputs): ...@@ -84,7 +84,7 @@ def max_inputs_to_GpuElemwise(node_or_outputs):
return max_nb_inputs return max_nb_inputs
class GpuElemwise(HideC, Elemwise): class GpuElemwise(_NoPythonOp, HideC, Elemwise):
""" """
Elemwise on the GPU. Elemwise on the GPU.
...@@ -414,9 +414,6 @@ class GpuElemwise(HideC, Elemwise): ...@@ -414,9 +414,6 @@ class GpuElemwise(HideC, Elemwise):
return str(code) return str(code)
# To disable the superclass perform.
perform = Op.perform
# Since we don't have a perform ... # Since we don't have a perform ...
def python_constant_folding(self, node): def python_constant_folding(self, node):
return False return False
...@@ -482,7 +479,7 @@ class GpuDimShuffle(DimShuffle): ...@@ -482,7 +479,7 @@ class GpuDimShuffle(DimShuffle):
storage[0] = res storage[0] = res
class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype, _NoPythonOp):
""" """
GpuCAReduceCuda is a Reduction along some dimensions by a scalar op. GpuCAReduceCuda is a Reduction along some dimensions by a scalar op.
...@@ -616,9 +613,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -616,9 +613,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
], ],
) )
def perform(self, node, inp, out, ctx):
Op.perform(self, node, inp, out, ctx)
def supports_c_code(self, inputs): def supports_c_code(self, inputs):
""" """
Returns True if the current op and reduce pattern has functioning C code. Returns True if the current op and reduce pattern has functioning C code.
......
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import Op from theano.gof.op import _NoPythonOp
from theano.tensor.extra_ops import CumOp from theano.tensor.extra_ops import CumOp
...@@ -11,7 +11,7 @@ except ImportError: ...@@ -11,7 +11,7 @@ except ImportError:
import theano.scalar as scalar import theano.scalar as scalar
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gpuarray.basic_ops import ( from theano.gpuarray.basic_ops import (
GpuKernelBase, GpuKernelBaseCOp,
GpuReshape, GpuReshape,
Kernel, Kernel,
as_gpuarray_variable, as_gpuarray_variable,
...@@ -22,7 +22,7 @@ from theano.gpuarray.opt import op_lifter, register_opt, register_opt2 ...@@ -22,7 +22,7 @@ from theano.gpuarray.opt import op_lifter, register_opt, register_opt2
from theano.gpuarray.type import gpu_context_type from theano.gpuarray.type import gpu_context_type
class GpuCumOp(GpuKernelBase, Op): class GpuCumOp(GpuKernelBaseCOp, _NoPythonOp):
""" """
Parameters Parameters
---------- ----------
...@@ -505,7 +505,7 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -505,7 +505,7 @@ class GpuCumOp(GpuKernelBase, Op):
# GpuCumsumOp exists only to serve backward compatibility. # GpuCumsumOp exists only to serve backward compatibility.
# Once an object is created, it will be converted to CumOp object. # Once an object is created, it will be converted to CumOp object.
class GpuCumsumOp(GpuKernelBase, Op): class GpuCumsumOp(GpuKernelBaseCOp, _NoPythonOp):
SUPPORTED_NDIMS = 3 SUPPORTED_NDIMS = 3
__props__ = ("axis",) __props__ = ("axis",)
......
...@@ -2,7 +2,7 @@ import numpy as np ...@@ -2,7 +2,7 @@ import numpy as np
import theano.tensor as tt import theano.tensor as tt
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import Op from theano.gof.op import _NoPythonOp
from theano.gpuarray.basic_ops import ( from theano.gpuarray.basic_ops import (
as_gpuarray_variable, as_gpuarray_variable,
gpu_contiguous, gpu_contiguous,
...@@ -37,7 +37,7 @@ except Exception: ...@@ -37,7 +37,7 @@ except Exception:
skcuda_available = False skcuda_available = False
class CuRFFTOp(Op): class CuRFFTOp(_NoPythonOp):
__props__ = () __props__ = ()
...@@ -168,7 +168,7 @@ class CuRFFTOp(Op): ...@@ -168,7 +168,7 @@ class CuRFFTOp(Op):
curfft_op = CuRFFTOp() curfft_op = CuRFFTOp()
class CuIRFFTOp(Op): class CuIRFFTOp(_NoPythonOp):
__props__ = () __props__ = ()
......
...@@ -11,12 +11,12 @@ except ImportError: ...@@ -11,12 +11,12 @@ except ImportError:
import theano import theano
import theano.sandbox.multinomial import theano.sandbox.multinomial
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import Op from theano.gof.op import _NoPythonOp
from theano.scalar import as_scalar from theano.scalar import as_scalar
from theano.tensor import NotScalarConstantError, get_scalar_constant_value from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from .basic_ops import ( from .basic_ops import (
GpuKernelBase, GpuKernelBaseCOp,
Kernel, Kernel,
as_gpuarray_variable, as_gpuarray_variable,
gpuarray_helper_inc_dir, gpuarray_helper_inc_dir,
...@@ -28,12 +28,12 @@ from .opt import op_lifter, register_opt, register_opt2 ...@@ -28,12 +28,12 @@ from .opt import op_lifter, register_opt, register_opt2
from .type import GpuArrayType from .type import GpuArrayType
class GPUAMultinomialFromUniform(GpuKernelBase, Op): class GPUAMultinomialFromUniform(GpuKernelBaseCOp, _NoPythonOp):
__props__ = ("odtype",) __props__ = ("odtype",)
_f16_ok = True _f16_ok = True
def __init__(self, odtype): def __init__(self, odtype):
Op.__init__(self) super().__init__(self)
self.odtype = odtype self.odtype = odtype
def get_params(self, node): def get_params(self, node):
...@@ -251,7 +251,7 @@ KERNEL void k_multi_warp_multinomial( ...@@ -251,7 +251,7 @@ KERNEL void k_multi_warp_multinomial(
return (7,) return (7,)
class GPUAChoiceFromUniform(GpuKernelBase, Op): class GPUAChoiceFromUniform(GpuKernelBaseCOp, _NoPythonOp):
""" """
The output is transposed compared to MultinomialWOReplacementFromUniform. The output is transposed compared to MultinomialWOReplacementFromUniform.
We must insert a Transpose op after it. We must insert a Transpose op after it.
...@@ -263,7 +263,7 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op): ...@@ -263,7 +263,7 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op):
__props__ = ("odtype", "replace") __props__ = ("odtype", "replace")
def __init__(self, odtype, replace=False): def __init__(self, odtype, replace=False):
Op.__init__(self) super().__init__(self)
self.odtype = odtype self.odtype = odtype
self.replace = replace self.replace = replace
......
import theano.tensor as tt import theano.tensor as tt
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import Op from theano.gof.op import _NoPythonOp
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.tensor.nnet.neighbours import Images2Neibs from theano.tensor.nnet.neighbours import Images2Neibs
...@@ -11,7 +11,7 @@ except ImportError: ...@@ -11,7 +11,7 @@ except ImportError:
pass pass
from theano.gpuarray.basic_ops import ( from theano.gpuarray.basic_ops import (
GpuKernelBase, GpuKernelBaseCOp,
Kernel, Kernel,
as_gpuarray_variable, as_gpuarray_variable,
infer_context_name, infer_context_name,
...@@ -19,7 +19,7 @@ from theano.gpuarray.basic_ops import ( ...@@ -19,7 +19,7 @@ from theano.gpuarray.basic_ops import (
from theano.gpuarray.type import GpuArrayType, gpu_context_type from theano.gpuarray.type import GpuArrayType, gpu_context_type
class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): class GpuImages2Neibs(GpuKernelBaseCOp, Images2Neibs, _NoPythonOp):
""" """
Images2Neibs for the GPU. Images2Neibs for the GPU.
...@@ -627,7 +627,3 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -627,7 +627,3 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
params=sub["params"], params=sub["params"],
fail=sub["fail"], fail=sub["fail"],
) )
def perform(self, node, inp, out, params):
# Disable the perform method from the CPU version
Op.perform(self, node, inp, out, params)
...@@ -3,7 +3,7 @@ from io import StringIO ...@@ -3,7 +3,7 @@ from io import StringIO
import numpy as np import numpy as np
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import Op from theano.gof.op import _NoPythonOp
try: try:
...@@ -12,18 +12,18 @@ try: ...@@ -12,18 +12,18 @@ try:
except ImportError: except ImportError:
pass pass
from .basic_ops import ( from theano.gpuarray.basic_ops import (
GpuKernelBase, GpuKernelBaseCOp,
Kernel, Kernel,
as_gpuarray_variable, as_gpuarray_variable,
gpuarray_helper_inc_dir, gpuarray_helper_inc_dir,
infer_context_name, infer_context_name,
) )
from .fp16_help import load_w, work_dtype, write_w from theano.gpuarray.fp16_help import load_w, work_dtype, write_w
from .type import GpuArrayType from theano.gpuarray.type import GpuArrayType
class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBaseCOp, _NoPythonOp):
""" """
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu. Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
...@@ -283,7 +283,7 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = ( ...@@ -283,7 +283,7 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = (
) )
class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBaseCOp, _NoPythonOp):
""" """
Implement CrossentropySoftmax1HotWithBiasDx on the gpu. Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
...@@ -508,7 +508,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -508,7 +508,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx() gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
class GpuSoftmax(GpuKernelBase, Op): class GpuSoftmax(GpuKernelBaseCOp, _NoPythonOp):
""" """
Implement Softmax on the gpu. Implement Softmax on the gpu.
...@@ -804,7 +804,7 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -804,7 +804,7 @@ class GpuSoftmax(GpuKernelBase, Op):
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
class GpuSoftmaxWithBias(GpuKernelBase, Op): class GpuSoftmaxWithBias(GpuKernelBaseCOp, _NoPythonOp):
""" """
Implement SoftmaxWithBias on the gpu. Implement SoftmaxWithBias on the gpu.
......
...@@ -20,7 +20,7 @@ import theano.tensor ...@@ -20,7 +20,7 @@ import theano.tensor
from theano.compile import optdb from theano.compile import optdb
from theano.configdefaults import config from theano.configdefaults import config
from theano.gof.graph import Apply, Variable, is_in_ancestors from theano.gof.graph import Apply, Variable, is_in_ancestors
from theano.gof.op import Op from theano.gof.op import _NoPythonOp
from theano.gof.opt import GlobalOptimizer, local_optimizer from theano.gof.opt import GlobalOptimizer, local_optimizer
from theano.scan.utils import clone from theano.scan.utils import clone
from theano.tensor import TensorType, opt from theano.tensor import TensorType, opt
...@@ -40,7 +40,7 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>" ...@@ -40,7 +40,7 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>"
_logger = logging.getLogger("theano.ifelse") _logger = logging.getLogger("theano.ifelse")
class IfElse(Op): class IfElse(_NoPythonOp):
""" """
Op that provides conditional graph evaluation if used with the CVM/VM Op that provides conditional graph evaluation if used with the CVM/VM
linkers. Note that there exist a helpful function `ifelse` that should linkers. Note that there exist a helpful function `ifelse` that should
......
...@@ -1067,7 +1067,8 @@ class Scan(Op): ...@@ -1067,7 +1067,8 @@ class Scan(Op):
) )
except (ImportError, MissingGXX): except (ImportError, MissingGXX):
p = self.execute p = self.perform
# default arguments are stored in the closure of `rval` # default arguments are stored in the closure of `rval`
# Big ugly hack since we can't get the real value of allow_gc # Big ugly hack since we can't get the real value of allow_gc
...@@ -1246,9 +1247,10 @@ class Scan(Op): ...@@ -1246,9 +1247,10 @@ class Scan(Op):
) )
return list_inputs[offset:] return list_inputs[offset:]
def execute(self, node, args, outs): def perform(self, node, inputs, output_storage, params=None):
""" """Compute the scan operation in Python.
The args are packed like this:
The `inputs` are packed like this:
n_steps n_steps
...@@ -1259,7 +1261,7 @@ class Scan(Op): ...@@ -1259,7 +1261,7 @@ class Scan(Op):
W other inputs w_1, w_2, ... w_W W other inputs w_1, w_2, ... w_W
There are at least 1 + self.n_seqs + self.n_outs inputs, and the There are at least ``1 + self.n_seqs + self.n_outs`` inputs, and the
ones above this number are passed to the scanned function as ones above this number are passed to the scanned function as
non-sequential inputs. non-sequential inputs.
...@@ -1272,7 +1274,7 @@ class Scan(Op): ...@@ -1272,7 +1274,7 @@ class Scan(Op):
# negative flip sequences around, and make n_steps positive # negative flip sequences around, and make n_steps positive
t0_call = time.time() t0_call = time.time()
t_fn = 0 t_fn = 0
n_steps = args[0] n_steps = inputs[0]
seqs = [] seqs = []
if n_steps < 0: if n_steps < 0:
# History, in the past, this was used for backward # History, in the past, this was used for backward
...@@ -1285,7 +1287,7 @@ class Scan(Op): ...@@ -1285,7 +1287,7 @@ class Scan(Op):
"We didn't implemented yet the case where scan do 0 iteration" "We didn't implemented yet the case where scan do 0 iteration"
) )
else: else:
for idx, seq in enumerate(args[1 : self.seqs_arg_offset]): for idx, seq in enumerate(inputs[1 : self.seqs_arg_offset]):
if seq.shape[0] < n_steps: if seq.shape[0] < n_steps:
raise ValueError( raise ValueError(
( (
...@@ -1305,11 +1307,12 @@ class Scan(Op): ...@@ -1305,11 +1307,12 @@ class Scan(Op):
# output # output
store_steps = [ store_steps = [
arg.shape[0] for arg in args[self.seqs_arg_offset : self.shared_arg_offset] arg.shape[0]
for arg in inputs[self.seqs_arg_offset : self.shared_arg_offset]
] ]
store_steps += [ store_steps += [
arg arg
for arg in args[ for arg in inputs[
self.nit_sot_arg_offset : self.nit_sot_arg_offset + self.n_nit_sot self.nit_sot_arg_offset : self.nit_sot_arg_offset + self.n_nit_sot
] ]
] ]
...@@ -1325,31 +1328,32 @@ class Scan(Op): ...@@ -1325,31 +1328,32 @@ class Scan(Op):
if idx in self.destroy_map: if idx in self.destroy_map:
# ^ Case 1. Outputs should be computed inplace of their # ^ Case 1. Outputs should be computed inplace of their
# initial state # initial state
outs[idx][0] = args[self.seqs_arg_offset + idx] output_storage[idx][0] = inputs[self.seqs_arg_offset + idx]
elif ( elif (
outs[idx][0] is not None output_storage[idx][0] is not None
and outs[idx][0].shape[1:] == args[self.seqs_arg_offset + idx].shape[1:] and output_storage[idx][0].shape[1:]
and outs[idx][0].shape[0] >= store_steps[idx] == inputs[self.seqs_arg_offset + idx].shape[1:]
and output_storage[idx][0].shape[0] >= store_steps[idx]
): ):
# Put in the values of the initial state # Put in the values of the initial state
outs[idx][0] = outs[idx][0][: store_steps[idx]] output_storage[idx][0] = output_storage[idx][0][: store_steps[idx]]
if idx > self.n_mit_mot: if idx > self.n_mit_mot:
l = -self.mintaps[idx] l = -self.mintaps[idx]
outs[idx][0][:l] = args[self.seqs_arg_offset + idx][:l] output_storage[idx][0][:l] = inputs[self.seqs_arg_offset + idx][:l]
else: else:
outs[idx][0][:] = args[self.seqs_arg_offset + idx] output_storage[idx][0][:] = inputs[self.seqs_arg_offset + idx]
else: else:
outs[idx][0] = args[self.seqs_arg_offset + idx].copy() output_storage[idx][0] = inputs[self.seqs_arg_offset + idx].copy()
offset = self.nit_sot_arg_offset + self.n_nit_sot offset = self.nit_sot_arg_offset + self.n_nit_sot
other_args = args[offset:] other_args = inputs[offset:]
input_storage = self.fn.input_storage inner_input_storage = self.fn.input_storage
nb_mitmot_in = sum(map(len, self.tap_array[: self.n_mit_mot])) nb_mitmot_in = sum(map(len, self.tap_array[: self.n_mit_mot]))
old_mitmot_input_storage = [None] * nb_mitmot_in old_mitmot_input_storage = [None] * nb_mitmot_in
old_mitmot_input_data = [None] * nb_mitmot_in old_mitmot_input_data = [None] * nb_mitmot_in
output_storage = self.fn.output_storage inner_output_storage = self.fn.output_storage
old_output_storage = [None] * len(output_storage) old_inner_output_storage = [None] * len(inner_output_storage)
old_output_data = [None] * len(output_storage) old_inner_output_data = [None] * len(inner_output_storage)
fn = self.fn.fn fn = self.fn.fn
offset = ( offset = (
self.n_seqs self.n_seqs
...@@ -1357,7 +1361,7 @@ class Scan(Op): ...@@ -1357,7 +1361,7 @@ class Scan(Op):
+ self.n_shared_outs + self.n_shared_outs
) )
for idx in range(len(other_args)): for idx in range(len(other_args)):
input_storage[idx + offset].storage[0] = other_args[idx] inner_input_storage[idx + offset].storage[0] = other_args[idx]
i = 0 i = 0
cond = True cond = True
...@@ -1368,34 +1372,40 @@ class Scan(Op): ...@@ -1368,34 +1372,40 @@ class Scan(Op):
# 3. collect input slices # 3. collect input slices
for idx in range(self.n_seqs): for idx in range(self.n_seqs):
if self.vector_seqs[idx]: if self.vector_seqs[idx]:
input_storage[idx].storage[0] = seqs[idx][i : i + 1].reshape(()) inner_input_storage[idx].storage[0] = seqs[idx][i : i + 1].reshape(
()
)
else: else:
input_storage[idx].storage[0] = seqs[idx][i] inner_input_storage[idx].storage[0] = seqs[idx][i]
offset = self.n_seqs offset = self.n_seqs
for idx in range(self.n_outs): for idx in range(self.n_outs):
if self.vector_outs[idx]: if self.vector_outs[idx]:
for tap in self.tap_array[idx]: for tap in self.tap_array[idx]:
_idx = (pos[idx] + tap) % store_steps[idx] _idx = (pos[idx] + tap) % store_steps[idx]
input_storage[offset].storage[0] = outs[idx][0][ inner_input_storage[offset].storage[0] = output_storage[idx][0][
_idx : _idx + 1 _idx : _idx + 1
].reshape(()) ].reshape(())
offset += 1 offset += 1
else: else:
for tap in self.tap_array[idx]: for tap in self.tap_array[idx]:
_idx = (pos[idx] + tap) % store_steps[idx] _idx = (pos[idx] + tap) % store_steps[idx]
input_storage[offset].storage[0] = outs[idx][0][_idx] inner_input_storage[offset].storage[0] = output_storage[idx][0][
_idx
]
offset += 1 offset += 1
a_offset = self.shared_arg_offset a_offset = self.shared_arg_offset
o_offset = self.n_outs + self.n_nit_sot o_offset = self.n_outs + self.n_nit_sot
if i == 0: if i == 0:
for j in range(self.n_shared_outs): for j in range(self.n_shared_outs):
input_storage[offset].storage[0] = args[a_offset + j] inner_input_storage[offset].storage[0] = inputs[a_offset + j]
offset += 1 offset += 1
else: else:
for j in range(self.n_shared_outs): for j in range(self.n_shared_outs):
input_storage[offset].storage[0] = outs[o_offset + j][0] inner_input_storage[offset].storage[0] = output_storage[
o_offset + j
][0]
offset += 1 offset += 1
# 4. collecting slices where the output should be stored # 4. collecting slices where the output should be stored
...@@ -1404,7 +1414,7 @@ class Scan(Op): ...@@ -1404,7 +1414,7 @@ class Scan(Op):
offset = 0 offset = 0
for idx in range(self.n_mit_mot_outs): for idx in range(self.n_mit_mot_outs):
if not self.mitmots_preallocated[idx]: if not self.mitmots_preallocated[idx]:
output_storage[offset].storage[0] = None inner_output_storage[offset].storage[0] = None
offset += 1 offset += 1
# 4.2. Collect slices for mitsots, sitsots and nitsots # 4.2. Collect slices for mitsots, sitsots and nitsots
...@@ -1414,25 +1424,25 @@ class Scan(Op): ...@@ -1414,25 +1424,25 @@ class Scan(Op):
store_steps[idx + self.n_mit_mot] == 1 store_steps[idx + self.n_mit_mot] == 1
or self.vector_outs[idx + self.n_mit_mot] or self.vector_outs[idx + self.n_mit_mot]
): ):
output_storage[idx + offset].storage[0] = None inner_output_storage[idx + offset].storage[0] = None
else: else:
_pos0 = idx + self.n_mit_mot _pos0 = idx + self.n_mit_mot
output_storage[idx + offset].storage[0] = outs[_pos0][0][ inner_output_storage[idx + offset].storage[0] = output_storage[
pos[_pos0] _pos0
] ][0][pos[_pos0]]
else: else:
for idx in range(self.n_outs + self.n_nit_sot - self.n_mit_mot): for idx in range(self.n_outs + self.n_nit_sot - self.n_mit_mot):
output_storage[idx + offset].storage[0] = None inner_output_storage[idx + offset].storage[0] = None
# 4.3. Collect slices for shared outputs # 4.3. Collect slices for shared outputs
offset += self.n_outs + self.n_nit_sot - self.n_mit_mot offset += self.n_outs + self.n_nit_sot - self.n_mit_mot
for idx in range(self.n_shared_outs): for idx in range(self.n_shared_outs):
output_storage[idx + offset].storage[0] = None inner_output_storage[idx + offset].storage[0] = None
# 4.4. If there is a condition add it to the mix # 4.4. If there is a condition add it to the mix
if self.as_while: if self.as_while:
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None inner_output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, GpuArrays, # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them # etc) currently in the output_storage to be able to compare them
...@@ -1440,17 +1450,17 @@ class Scan(Op): ...@@ -1440,17 +1450,17 @@ class Scan(Op):
# execution. Also keep pointers to their data to be able to detect # execution. Also keep pointers to their data to be able to detect
# cases where outputs reused the allocated object but alter the # cases where outputs reused the allocated object but alter the
# memory region they refer to. # memory region they refer to.
for idx in range(len(output_storage)): for idx in range(len(inner_output_storage)):
var = output_storage[idx].storage[0] var = inner_output_storage[idx].storage[0]
old_output_storage[idx] = var old_inner_output_storage[idx] = var
if var is None: if var is None:
old_output_data[idx] = None old_inner_output_data[idx] = None
elif self.outs_is_tensor[idx]: elif self.outs_is_tensor[idx]:
old_output_data[idx] = var.data old_inner_output_data[idx] = var.data
else: else:
old_output_data[idx] = var.gpudata old_inner_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, GpuArrays, # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the # etc) associated with mitmot inputs currently in the
...@@ -1460,7 +1470,7 @@ class Scan(Op): ...@@ -1460,7 +1470,7 @@ class Scan(Op):
# reused the allocated object but alter the memory region they # reused the allocated object but alter the memory region they
# refer to. # refer to.
for idx in range(nb_mitmot_in): for idx in range(nb_mitmot_in):
var = input_storage[idx + self.n_seqs].storage[0] var = inner_input_storage[idx + self.n_seqs].storage[0]
old_mitmot_input_storage[idx] = var old_mitmot_input_storage[idx] = var
if var is None: if var is None:
...@@ -1502,19 +1512,19 @@ class Scan(Op): ...@@ -1502,19 +1512,19 @@ class Scan(Op):
dt_fn = time.time() - t0_fn dt_fn = time.time() - t0_fn
if self.as_while: if self.as_while:
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
cond = output_storage[pdx].storage[0] == 0 cond = inner_output_storage[pdx].storage[0] == 0
# 5.2. By calling fn() directly instead of calling the theano # 5.2. By calling fn() directly instead of calling the theano
# function, it is possible that the updates have not been # function, it is possible that the updates have not been
# performed. Perform the updates if needed. # performed. Perform the updates if needed.
offset_out = len(output_storage) - 1 offset_out = len(inner_output_storage) - 1
if getattr(fn, "need_update_inputs", True): if getattr(fn, "need_update_inputs", True):
# Update the inputs that have an update function # Update the inputs that have an update function
for inp, storage in zip( for inp, storage in zip(
self.fn.maker.expanded_inputs[::-1], self.fn.input_storage[::-1] self.fn.maker.expanded_inputs[::-1], self.fn.input_storage[::-1]
): ):
if inp.update is not None: if inp.update is not None:
storage.data = output_storage[offset_out].data storage.data = inner_output_storage[offset_out].data
offset_out -= 1 offset_out -= 1
t_fn += dt_fn t_fn += dt_fn
...@@ -1532,7 +1542,7 @@ class Scan(Op): ...@@ -1532,7 +1542,7 @@ class Scan(Op):
# Verify whether the input points to the same data as # Verify whether the input points to the same data as
# it did before the execution of the inner function. # it did before the execution of the inner function.
old_var = old_mitmot_input_storage[inp_idx] old_var = old_mitmot_input_storage[inp_idx]
new_var = input_storage[self.n_seqs + inp_idx].storage[0] new_var = inner_input_storage[self.n_seqs + inp_idx].storage[0]
if old_var is new_var: if old_var is new_var:
old_data = old_mitmot_input_data[inp_idx] old_data = old_mitmot_input_data[inp_idx]
if self.inps_is_tensor[self.n_seqs + inp_idx]: if self.inps_is_tensor[self.n_seqs + inp_idx]:
...@@ -1547,14 +1557,16 @@ class Scan(Op): ...@@ -1547,14 +1557,16 @@ class Scan(Op):
# nothing needs to be done. Otherwise, recover the # nothing needs to be done. Otherwise, recover the
# and store it in `outs` as usual # and store it in `outs` as usual
if not same_data: if not same_data:
outs[j][0][k + pos[j]] = input_storage[ output_storage[j][0][k + pos[j]] = inner_input_storage[
self.n_seqs + inp_idx self.n_seqs + inp_idx
].storage[0] ].storage[0]
else: else:
# This output tap has not been preallocated, recover # This output tap has not been preallocated, recover
# its value as usual # its value as usual
outs[j][0][k + pos[j]] = output_storage[offset_out].storage[0] output_storage[j][0][k + pos[j]] = inner_output_storage[
offset_out
].storage[0]
offset_out += 1 offset_out += 1
mitmot_out_idx += 1 mitmot_out_idx += 1
...@@ -1570,14 +1582,16 @@ class Scan(Op): ...@@ -1570,14 +1582,16 @@ class Scan(Op):
# Copy the output value to `outs`, if necessary # Copy the output value to `outs`, if necessary
if store_steps[j] == 1 or self.vector_outs[j]: if store_steps[j] == 1 or self.vector_outs[j]:
outs[j][0][pos[j]] = output_storage[offset_out + j].storage[0] output_storage[j][0][pos[j]] = inner_output_storage[
offset_out + j
].storage[0]
else: else:
# Check whether the initialization of the output storage # Check whether the initialization of the output storage
# map for this output has been reused. # map for this output has been reused.
old_var = old_output_storage[offset_out + j] old_var = old_inner_output_storage[offset_out + j]
new_var = output_storage[offset_out + j].storage[0] new_var = inner_output_storage[offset_out + j].storage[0]
if old_var is new_var: if old_var is new_var:
old_data = old_output_data[offset_out + j] old_data = old_inner_output_data[offset_out + j]
if old_data is None: if old_data is None:
output_reused = False output_reused = False
elif self.outs_is_tensor[offset_out + j]: elif self.outs_is_tensor[offset_out + j]:
...@@ -1589,9 +1603,9 @@ class Scan(Op): ...@@ -1589,9 +1603,9 @@ class Scan(Op):
if not output_reused: if not output_reused:
try: try:
outs[j][0][pos[j]] = output_storage[offset_out + j].storage[ output_storage[j][0][pos[j]] = inner_output_storage[
0 offset_out + j
] ].storage[0]
except ValueError as e: except ValueError as e:
if i == 0: if i == 0:
# First iteration, so don't change the # First iteration, so don't change the
...@@ -1614,26 +1628,30 @@ class Scan(Op): ...@@ -1614,26 +1628,30 @@ class Scan(Op):
if i == 0: if i == 0:
jout = j + offset_out jout = j + offset_out
shape = (store_steps[j],) + output_storage[jout].storage[0].shape shape = (store_steps[j],) + inner_output_storage[jout].storage[
dtype = output_storage[jout].storage[0].dtype 0
].shape
dtype = inner_output_storage[jout].storage[0].dtype
if ( if (
outs[j][0] is None output_storage[j][0] is None
or outs[j][0].shape[0] < store_steps[j] or output_storage[j][0].shape[0] < store_steps[j]
or outs[j][0].shape[1:] != shape[1:] or output_storage[j][0].shape[1:] != shape[1:]
or outs[j][0].dtype != dtype or output_storage[j][0].dtype != dtype
): ):
outs[j][0] = node.outputs[j].type.value_zeros(shape) output_storage[j][0] = node.outputs[j].type.value_zeros(shape)
elif outs[j][0].shape[0] != store_steps[j]: elif output_storage[j][0].shape[0] != store_steps[j]:
outs[j][0] = outs[j][0][: store_steps[j]] output_storage[j][0] = output_storage[j][0][: store_steps[j]]
outs[j][0][pos[j]] = output_storage[jout].storage[0] output_storage[j][0][pos[j]] = inner_output_storage[jout].storage[0]
elif store_steps[j] == 1 or self.vector_outs[j]: elif store_steps[j] == 1 or self.vector_outs[j]:
outs[j][0][pos[j]] = output_storage[j + offset_out].storage[0] output_storage[j][0][pos[j]] = inner_output_storage[
j + offset_out
].storage[0]
else: else:
# Check whether the initialization of the output storage map # Check whether the initialization of the output storage map
# for this output has been reused. # for this output has been reused.
old_var = old_output_storage[offset_out + j] old_var = old_inner_output_storage[offset_out + j]
old_data = old_output_data[offset_out + j] old_data = old_inner_output_data[offset_out + j]
new_var = output_storage[offset_out + j].storage[0] new_var = inner_output_storage[offset_out + j].storage[0]
if old_var is new_var: if old_var is new_var:
if old_data is None: if old_data is None:
output_reused = False output_reused = False
...@@ -1645,7 +1663,9 @@ class Scan(Op): ...@@ -1645,7 +1663,9 @@ class Scan(Op):
output_reused = False output_reused = False
if not output_reused: if not output_reused:
outs[j][0][pos[j]] = output_storage[j + offset_out].storage[0] output_storage[j][0][pos[j]] = inner_output_storage[
j + offset_out
].storage[0]
# 5.6 Copy over the values for outputs corresponding to shared # 5.6 Copy over the values for outputs corresponding to shared
# variables # variables
...@@ -1653,7 +1673,7 @@ class Scan(Op): ...@@ -1653,7 +1673,7 @@ class Scan(Op):
end += self.n_shared_outs end += self.n_shared_outs
for j in range(begin, end): for j in range(begin, end):
jout = j + offset_out jout = j + offset_out
outs[j][0] = output_storage[jout].storage[0] output_storage[j][0] = inner_output_storage[jout].storage[0]
pos = [(idx + 1) % store for idx, store in zip(pos, store_steps)] pos = [(idx + 1) % store for idx, store in zip(pos, store_steps)]
i = i + 1 i = i + 1
...@@ -1672,25 +1692,29 @@ class Scan(Op): ...@@ -1672,25 +1692,29 @@ class Scan(Op):
# are read and written. # are read and written.
# This way, there will be no information overwritten # This way, there will be no information overwritten
# before it is read (as it used to happen). # before it is read (as it used to happen).
shape = (pdx,) + outs[idx][0].shape[1:] shape = (pdx,) + output_storage[idx][0].shape[1:]
tmp = node.outputs[idx].type.value_zeros(shape) tmp = node.outputs[idx].type.value_zeros(shape)
tmp[:] = outs[idx][0][:pdx] tmp[:] = output_storage[idx][0][:pdx]
outs[idx][0][: store_steps[idx] - pdx] = outs[idx][0][pdx:] output_storage[idx][0][: store_steps[idx] - pdx] = output_storage[
outs[idx][0][store_steps[idx] - pdx :] = tmp idx
][0][pdx:]
output_storage[idx][0][store_steps[idx] - pdx :] = tmp
del tmp del tmp
else: else:
shape = (store_steps[idx] - pdx,) + outs[idx][0].shape[1:] shape = (store_steps[idx] - pdx,) + output_storage[idx][0].shape[1:]
tmp = node.outputs[idx].type.value_zeros(shape) tmp = node.outputs[idx].type.value_zeros(shape)
tmp[:] = outs[idx][0][pdx:] tmp[:] = output_storage[idx][0][pdx:]
outs[idx][0][store_steps[idx] - pdx :] = outs[idx][0][:pdx] output_storage[idx][0][store_steps[idx] - pdx :] = output_storage[
outs[idx][0][: store_steps[idx] - pdx] = tmp idx
][0][:pdx]
output_storage[idx][0][: store_steps[idx] - pdx] = tmp
del tmp del tmp
# This would normally happen only when doing truncated # This would normally happen only when doing truncated
# backpropagation through time. In such a scenarion Scan is # backpropagation through time. In such a scenarion Scan is
# expected to return 0 for all entries for which the gradient is # expected to return 0 for all entries for which the gradient is
# not actually computed # not actually computed
elif store_steps[idx] > i - self.mintaps[idx]: elif store_steps[idx] > i - self.mintaps[idx]:
outs[idx][0][i - self.mintaps[idx] :] = 0 output_storage[idx][0][i - self.mintaps[idx] :] = 0
# This is a fix for a bug introduced by while. If you say # This is a fix for a bug introduced by while. If you say
# you want to loop up to a condition, you expect the output # you want to loop up to a condition, you expect the output
# to have that length ( and not the maximal length possible) # to have that length ( and not the maximal length possible)
...@@ -1709,13 +1733,13 @@ class Scan(Op): ...@@ -1709,13 +1733,13 @@ class Scan(Op):
# every output and then do outs[0][:i+maximal_tap], # every output and then do outs[0][:i+maximal_tap],
# which implies I think more computations then this # which implies I think more computations then this
# little trick that I used # little trick that I used
outs[idx][0] = outs[idx][0][: -(n_steps - i)] output_storage[idx][0] = output_storage[idx][0][: -(n_steps - i)]
# We never reuse the input or output storage of the # We never reuse the input or output storage of the
# inner function so we clear it. # inner function so we clear it.
for i_s in input_storage: for i_s in inner_input_storage:
i_s.storage[0] = None i_s.storage[0] = None
for o_s in output_storage: for o_s in inner_output_storage:
o_s.storage[0] = None o_s.storage[0] = None
t_call = time.time() - t0_call t_call = time.time() - t0_call
...@@ -1735,7 +1759,6 @@ class Scan(Op): ...@@ -1735,7 +1759,6 @@ class Scan(Op):
self.t_call = t_call self.t_call = t_call
self.t_fn = t_fn self.t_fn = t_fn
# Infer Shape
def infer_shape(self, fgraph, node, input_shapes): def infer_shape(self, fgraph, node, input_shapes):
# input_shapes correspond to the shapes of node.inputs # input_shapes correspond to the shapes of node.inputs
for inp, inp_shp in zip(node.inputs, input_shapes): for inp, inp_shp in zip(node.inputs, input_shapes):
...@@ -2085,7 +2108,6 @@ class Scan(Op): ...@@ -2085,7 +2108,6 @@ class Scan(Op):
return mappings return mappings
# GRAD FUNCTION
def L_op(self, inputs, outs, dC_douts): def L_op(self, inputs, outs, dC_douts):
if not isinstance(outs, (list, tuple)): if not isinstance(outs, (list, tuple)):
outs = [outs] outs = [outs]
......
...@@ -5,7 +5,7 @@ import theano ...@@ -5,7 +5,7 @@ import theano
from theano import scalar, tensor from theano import scalar, tensor
from theano.configdefaults import config from theano.configdefaults import config
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import COp from theano.gof.op import COp, _NoPythonCOp
from theano.gof.opt import PatternSub, TopoOptimizer, local_optimizer from theano.gof.opt import PatternSub, TopoOptimizer, local_optimizer
from theano.misc.safe_asarray import _asarray from theano.misc.safe_asarray import _asarray
from theano.sparse import basic as sparse from theano.sparse import basic as sparse
...@@ -78,7 +78,7 @@ theano.compile.optdb.register( ...@@ -78,7 +78,7 @@ theano.compile.optdb.register(
) )
class AddSD_ccode(COp): class AddSD_ccode(_NoPythonCOp):
""" """
Add a sparse and a dense matrix. Add a sparse and a dense matrix.
...@@ -663,7 +663,7 @@ def local_structured_dot(fgraph, node): ...@@ -663,7 +663,7 @@ def local_structured_dot(fgraph, node):
# register_specialize(local_structured_dot) # register_specialize(local_structured_dot)
class UsmmCscDense(COp): class UsmmCscDense(_NoPythonCOp):
""" """
Performs the expression is `alpha` * `x` `y` + `z`. Performs the expression is `alpha` * `x` `y` + `z`.
...@@ -995,7 +995,7 @@ def local_usmm_csx(fgraph, node): ...@@ -995,7 +995,7 @@ def local_usmm_csx(fgraph, node):
register_specialize(local_usmm_csx, "cxx_only") register_specialize(local_usmm_csx, "cxx_only")
class CSMGradC(COp): class CSMGradC(_NoPythonCOp):
__props__ = () __props__ = ()
...@@ -1138,7 +1138,7 @@ def local_csm_grad_c(fgraph, node): ...@@ -1138,7 +1138,7 @@ def local_csm_grad_c(fgraph, node):
# register_specialize(local_csm_grad_c, 'cxx_only') # register_specialize(local_csm_grad_c, 'cxx_only')
class MulSDCSC(COp): class MulSDCSC(_NoPythonCOp):
""" """
Multiplication of sparse matrix by a broadcasted dense vector Multiplication of sparse matrix by a broadcasted dense vector
element wise. element wise.
...@@ -1181,9 +1181,6 @@ class MulSDCSC(COp): ...@@ -1181,9 +1181,6 @@ class MulSDCSC(COp):
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (3,)
# def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplementedError()
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
( (
...@@ -1275,7 +1272,7 @@ class MulSDCSC(COp): ...@@ -1275,7 +1272,7 @@ class MulSDCSC(COp):
mul_s_d_csc = MulSDCSC() mul_s_d_csc = MulSDCSC()
class MulSDCSR(COp): class MulSDCSR(_NoPythonCOp):
""" """
Multiplication of sparse matrix by a broadcasted dense vector Multiplication of sparse matrix by a broadcasted dense vector
element wise. element wise.
...@@ -1318,9 +1315,6 @@ class MulSDCSR(COp): ...@@ -1318,9 +1315,6 @@ class MulSDCSR(COp):
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (3,)
# def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplemented()
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
( (
...@@ -1463,7 +1457,7 @@ def local_mul_s_d(fgraph, node): ...@@ -1463,7 +1457,7 @@ def local_mul_s_d(fgraph, node):
register_specialize(local_mul_s_d, "cxx_only") register_specialize(local_mul_s_d, "cxx_only")
class MulSVCSR(COp): class MulSVCSR(_NoPythonCOp):
""" """
Multiplication of sparse matrix by a broadcasted dense vector Multiplication of sparse matrix by a broadcasted dense vector
element wise. element wise.
...@@ -1627,7 +1621,7 @@ def local_mul_s_v(fgraph, node): ...@@ -1627,7 +1621,7 @@ def local_mul_s_v(fgraph, node):
register_specialize(local_mul_s_v, "cxx_only") register_specialize(local_mul_s_v, "cxx_only")
class StructuredAddSVCSR(COp): class StructuredAddSVCSR(_NoPythonCOp):
""" """
Structured addition of a sparse matrix and a dense vector. Structured addition of a sparse matrix and a dense vector.
The elements of the vector are are only added to the corresponding The elements of the vector are are only added to the corresponding
...@@ -1806,7 +1800,7 @@ def local_structured_add_s_v(fgraph, node): ...@@ -1806,7 +1800,7 @@ def local_structured_add_s_v(fgraph, node):
register_specialize(local_structured_add_s_v, "cxx_only") register_specialize(local_structured_add_s_v, "cxx_only")
class SamplingDotCSR(COp): class SamplingDotCSR(_NoPythonCOp):
""" """
Operand optimized for calculating the dot product dot(`x`, `y`.T) = `z` Operand optimized for calculating the dot product dot(`x`, `y`.T) = `z`
when you only want to calculate a subset of `z`. when you only want to calculate a subset of `z`.
......
...@@ -4,7 +4,7 @@ import os ...@@ -4,7 +4,7 @@ import os
import theano import theano
from theano.configdefaults import config from theano.configdefaults import config
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import OpenMPOp from theano.gof.op import OpenMPOp, _NoPythonOp
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gof.type import EnumList from theano.gof.type import EnumList
from theano.scalar import int8, int64 from theano.scalar import int8, int64
...@@ -18,7 +18,7 @@ from theano.tensor.type import TensorType ...@@ -18,7 +18,7 @@ from theano.tensor.type import TensorType
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class BaseCorrMM(OpenMPOp): class BaseCorrMM(OpenMPOp, _NoPythonOp):
""" """
Base class for `CorrMM`, `CorrMM_gradWeights` and Base class for `CorrMM`, `CorrMM_gradWeights` and
`CorrMM_gradInputs`. Cannot be used directly. `CorrMM_gradInputs`. Cannot be used directly.
......
...@@ -4,7 +4,7 @@ import os ...@@ -4,7 +4,7 @@ import os
import theano import theano
from theano.configdefaults import config from theano.configdefaults import config
from theano.gof.graph import Apply from theano.gof.graph import Apply
from theano.gof.op import OpenMPOp from theano.gof.op import OpenMPOp, _NoPythonOp
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gof.type import EnumList from theano.gof.type import EnumList
from theano.scalar import int64 from theano.scalar import int64
...@@ -18,7 +18,7 @@ from theano.tensor.type import TensorType ...@@ -18,7 +18,7 @@ from theano.tensor.type import TensorType
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class BaseCorr3dMM(OpenMPOp): class BaseCorr3dMM(OpenMPOp, _NoPythonOp):
""" """
Base class for `Corr3dMM`, `Corr3dMM_gradWeights` and Base class for `Corr3dMM`, `Corr3dMM_gradWeights` and
`Corr3dMM_gradInputs`. Cannot be used directly. `Corr3dMM_gradInputs`. Cannot be used directly.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论