提交 0366c559 authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Make Op.perform an abstractmethod and provide Op type hints

This change makes `Op.perform` a mandatory method. Since more than a few `Op`s do not have Python implementations, they've been made to extend `_NoPython*Op` classes that provide an `Op.perform` that simply raises a `NotImplementedError`.
上级 5a1a147d
......@@ -118,6 +118,9 @@ class WeirdBrokenOp(COp):
r = Apply(self, [a_], [a_.type()])
return r
def perform(*args, **kwargs):
raise NotImplementedError()
def dontuse_perform(self, node, inp, out_):
(a,) = inp
(out,) = out_
......
......@@ -41,6 +41,9 @@ class IncOneC(COp):
(z,) = outputs
return f"{z} = {x} + 1;"
def perform(self, *args, **kwargs):
raise NotImplementedError()
class TestComputeTestValue:
def test_destroy_map(self):
......
......@@ -85,6 +85,9 @@ class MyOp(Op):
outputs = [MyVariable(self.name + "_R") for i in range(self.nout)]
return Apply(self, inputs, outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
def __str__(self):
return self.name
......
......@@ -58,6 +58,9 @@ class MyOp(Op):
outputs = [MyVariable(sum(input.type.thingy for input in inputs))]
return Apply(self, list(inputs), outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
MyOp = MyOp()
......
......@@ -60,6 +60,9 @@ class MyOp(Op):
outputs = [MyType(sum([input.type.thingy for input in inputs]))()]
return Apply(self, inputs, outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
MyOp = MyOp()
......@@ -104,6 +107,9 @@ counter%(name)s++;
def c_code_cache_version(self):
return (1,)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
class TestOp:
......@@ -206,6 +212,9 @@ class TestMakeThunk:
(z,) = outputs
return f"{z} = {x} + 1;"
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
i = scalar.int32("i")
o = IncOneC()(i)
......
......@@ -48,6 +48,9 @@ class TestNodeFinder:
def __str__(self):
return self.name
def perform(self, *args, **kwargs):
raise NotImplementedError()
sigmoid = MyOp(1, "Sigmoid")
add = MyOp(2, "Add")
dot = MyOp(2, "Dot")
......
......@@ -39,6 +39,9 @@ Py_INCREF(%(inp)s);
def c_code_cache_version(self):
return (0,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
class GetOp(COp):
__props__ = ()
......@@ -65,6 +68,9 @@ Py_INCREF(%(out)s);
def c_code_cache_version(self):
return (0,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
@pytest.mark.skipif(
not theano.config.cxx, reason="G++ not available, so we need to skip this test."
......@@ -192,6 +198,9 @@ class MyOpCEnumType(COp):
def make_node(self):
return Apply(self, [], [scalar.uint32()])
def perform(self, *args, **kwargs):
raise NotImplementedError()
def c_code_cache_version(self):
return (3,)
......
......@@ -4,7 +4,6 @@ import pytest
import theano
from theano import config, tensor
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.params_type import ParamsType
from theano.gpuarray.basic_ops import CGpuKernelBase
from theano.gpuarray.type import GpuArrayType, get_context, gpu_context_type
......@@ -12,11 +11,11 @@ from theano.gradient import grad_undefined
from theano.scalar import int32 as int_t
# This is an implementation to test that CGpuKernelBase works and also
# to use as an example in the docs. It is not used for user graphs.
class GpuEye(CGpuKernelBase, Op):
"""
Eye for GPU.
class GpuEye(CGpuKernelBase):
"""Eye for GPU.
This is an implementation to test that `CGpuKernelBase` works and also
to use as an example in the docs. It is not used for user graphs.
"""
......@@ -28,9 +27,7 @@ class GpuEye(CGpuKernelBase, Op):
dtype = config.floatX
self.dtype = dtype
self.context_name = context_name
CGpuKernelBase.__init__(
self, ["c_code/tstgpueye.c"], "APPLY_SPECIFIC(tstgpueye)"
)
super().__init__(["c_code/tstgpueye.c"], "APPLY_SPECIFIC(tstgpueye)")
def get_params(self, node):
pygpu_gpuarray = pytest.importorskip("pygpu.gpuarray")
......
......@@ -984,6 +984,9 @@ class ApplyDefaultTestOp(Op):
x = tt.as_tensor_variable(x)
return Apply(self, [x], [x.type()])
def perform(self, *args, **kwargs):
raise NotImplementedError()
def test_constant():
int8_vector_type = tt.TensorType(dtype="int8", broadcastable=(False,))
......@@ -3862,6 +3865,9 @@ class TestGrad:
gz0, gz1 = grads
return self.gval0, self.gval1
def perform(self, *args, **kwargs):
raise NotImplementedError()
def test_1param(self):
# grad: Test passing a single variable param
o = TestGrad.Obj1()
......
......@@ -38,6 +38,9 @@ class MyOp(Op):
outputs = [MyType()()]
return Apply(self, inputs, outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError("No Python implementation available.")
def __str__(self):
return self.name
......
......@@ -51,6 +51,9 @@ class TestGradSourcesInputs:
(x,) = inp
(gz,) = grads
def perform(self, *args, **kwargs):
raise NotImplementedError()
a = retNone().make_node()
with pytest.raises(TypeError):
grad_sources_inputs([(a.out, one)], None)
......@@ -68,6 +71,9 @@ class TestGradSourcesInputs:
def grad(self, inputs, grads):
return [inputs[0].zeros_like()]
def perform(self, *args, **kwargs):
raise NotImplementedError()
i = theano.tensor.vector()
j = theano.tensor.vector()
a1 = retOne().make_node(i)
......@@ -91,6 +97,9 @@ class TestGradSourcesInputs:
def grad(self, inp, grads):
return (gval,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval
......@@ -112,6 +121,9 @@ class TestGradSourcesInputs:
gz1, gz2 = grads
return (gval,)
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval
......@@ -134,6 +146,9 @@ class TestGradSourcesInputs:
(gz,) = grads
return (gval0, gval1)
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval0
......@@ -155,6 +170,9 @@ class TestGradSourcesInputs:
def grad(self, inp, grads):
return gval0, gval1
def perform(self, *args, **kwargs):
raise NotImplementedError()
a1 = TestOp().make_node()
g = grad_sources_inputs([(a1.outputs[0], one)], None)
assert g[a1.inputs[0]] is gval0
......@@ -190,6 +208,9 @@ class TestGrad:
def grad(self, inputs, output_grads):
return [theano.gradient.grad_not_implemented(self, 0, inputs[0])]
def perform(self, *args, **kwargs):
raise NotImplementedError()
a = theano.tensor.scalar()
b = DummyOp()(a)
......@@ -208,6 +229,9 @@ class TestGrad:
def grad(self, inputs, output_grads):
return [theano.gradient.grad_undefined(self, 0, inputs[0])]
def perform(self, *args, **kwargs):
raise NotImplementedError()
a = theano.tensor.scalar()
b = DummyOp()(a)
......@@ -380,6 +404,9 @@ class TestGrad:
def grad(self, inputs, output_grads):
return [inputs[0].zeros_like()]
def perform(self, *args, **kwargs):
raise NotImplementedError()
# Op2 has two inputs, f and g
# Its gradient with respect to g is not defined
class Op2(Op):
......@@ -391,6 +418,9 @@ class TestGrad:
def grad(self, inputs, output_grads):
return [inputs[0].zeros_like(), NullType()()]
def perform(self, *args, **kwargs):
raise NotImplementedError()
x = theano.tensor.vector()
f, g = Op1()(x)
cost = Op2()(f, g)
......
......@@ -581,6 +581,9 @@ class IfElseIfElseIf(Op):
thunk.lazy = True
return thunk
def perform(self, *args, **kwargs):
raise NotImplementedError()
class NotImplementedOpException(Exception):
pass
......@@ -597,6 +600,9 @@ class NotImplementedOp(Op):
thunk.lazy = False
return thunk
def perform(self, *args, **kwargs):
raise NotImplementedError()
def test_ifelse():
a = tt.scalar()
......
......@@ -10,8 +10,22 @@ import inspect
import os
import re
import sys
import typing
import warnings
from abc import abstractmethod
from typing import (
Any,
Callable,
ClassVar,
Dict,
List,
NoReturn,
Optional,
Pattern,
Set,
Text,
Tuple,
Union,
)
import numpy as np
......@@ -19,7 +33,7 @@ import theano
from theano.configdefaults import config
from theano.gof.fg import FunctionGraph
from theano.gof.graph import Apply, NoParams, Variable
from theano.gof.params_type import ParamsType
from theano.gof.params_type import Params, ParamsType
from theano.gof.utils import (
MetaObject,
MethodNotDefined,
......@@ -30,15 +44,22 @@ from theano.gof.utils import (
from theano.link.c.interface import CLinkerOp
__authors__ = "theano-dev"
__authors__ = "theano-dev" "PyMC Developers"
__copyright__ = "(c) 2010, Universite de Montreal"
__license__ = "3-clause BSD License"
__contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en"
StorageMapType = List[Optional[List[Any]]]
ComputeMapType = List[bool]
OutputStorageType = List[Optional[List[Any]]]
ParamsInputType = Optional[Tuple[Any]]
PerformMethodType = Callable[
[Apply, List[Any], OutputStorageType, ParamsInputType], NoReturn
]
ThunkType = Callable[[PerformMethodType, StorageMapType, ComputeMapType, Apply], Any]
def compute_test_value(node):
def compute_test_value(node: Apply):
"""Computes the test value of a node.
Parameters
......@@ -149,7 +170,7 @@ class Op(MetaObject):
"""
def make_node(self, *inputs) -> Apply:
def make_node(self, *inputs: Variable) -> Apply:
"""Construct an `Apply` node that represent the application of this operation to the given inputs.
This must be implemented by sub-classes.
......@@ -182,9 +203,7 @@ class Op(MetaObject):
)
return Apply(self, inputs, [o() for o in self.otypes])
def __call__(
self, *inputs, **kwargs
) -> typing.Union[Variable, typing.List[Variable],]:
def __call__(self, *inputs: Any, **kwargs) -> Union[Variable, List[Variable]]:
"""Construct an `Apply` node using `self.make_node` and return its outputs.
This method is just a wrapper around `Op.make_node`.
......@@ -246,14 +265,16 @@ class Op(MetaObject):
else:
return node.outputs
def __ne__(self, other):
def __ne__(self, other: Any) -> bool:
return not (self == other)
# Convenience so that subclass implementers don't have to import utils
# just to self.add_tag_trace
add_tag_trace = staticmethod(add_tag_trace)
def grad(self, inputs, output_grads):
def grad(
self, inputs: List[Variable], output_grads: List[Variable]
) -> List[Variable]:
"""Construct a graph for the gradient with respect to each input variable.
Each returned `Variable` represents the gradient with respect to that
......@@ -277,7 +298,12 @@ class Op(MetaObject):
"""
raise NotImplementedError()
def L_op(self, inputs, outputs, output_grads):
def L_op(
self,
inputs: List[Variable],
outputs: List[Variable],
output_grads: List[Variable],
) -> List[Variable]:
r"""Construct a graph for the L-operator.
This method is primarily used by `tensor.Lop` and dispatches to
......@@ -298,7 +324,9 @@ class Op(MetaObject):
"""
return self.grad(inputs, output_grads)
def R_op(self, inputs, eval_points):
def R_op(
self, inputs: List[Variable], eval_points: Union[Variable, List[Variable]]
) -> List[Variable]:
"""Construct a graph for the R-operator.
This method is primarily used by tensor.Rop
......@@ -325,10 +353,15 @@ class Op(MetaObject):
"""
raise NotImplementedError()
def perform(self, node, inputs, output_storage, params=None):
"""
Required: Calculate the function on the inputs and put the variables in
the output storage. Return None.
@abstractmethod
def perform(
self,
node: Apply,
inputs: List[Variable],
output_storage: OutputStorageType,
params: ParamsInputType = None,
) -> NoReturn:
"""Calculate the function on the inputs and put the variables in the output storage.
Parameters
----------
......@@ -358,21 +391,9 @@ class Op(MetaObject):
A `Op` is free to reuse `output_storage` as it sees fit, or to
discard it and allocate new memory.
Raises
------
MethodNotDefined
The subclass does not override this method.
"""
raise MethodNotDefined(
"perform",
type(self),
self.__class__.__name__,
"Did you used Theano flags mode=FAST_COMPILE?"
" You can use optimizer=fast_compile instead.",
)
def do_constant_folding(self, fgraph: FunctionGraph, node: Apply):
def do_constant_folding(self, fgraph: FunctionGraph, node: Apply) -> bool:
"""Determine whether or not constant folding should be performed for the given node.
This allows each `Op` to determine if it wants to be constant
......@@ -393,9 +414,8 @@ class Op(MetaObject):
"""
return True
# We add a default get_params() implementation which will try to detect params from the op
# if params_type is set to a ParamsType. If not, we raise a MethodNotDefined exception.
def get_params(self, node):
def get_params(self, node: Apply) -> Params:
"""Try to detect params from the op if `Op.params_type` is set to a `ParamsType`."""
if hasattr(self, "params_type") and isinstance(self.params_type, ParamsType):
wrapper = self.params_type
if not all(hasattr(self, field) for field in wrapper.fields):
......@@ -410,10 +430,14 @@ class Op(MetaObject):
return self.params_type.get_params(self)
raise MethodNotDefined("get_params")
def prepare_node(self, node, storage_map, compute_map, impl):
"""
Make any special modifications that the Op needs before doing
make_thunk().
def prepare_node(
self,
node: Apply,
storage_map: StorageMapType,
compute_map: ComputeMapType,
impl: Optional[Text],
) -> NoReturn:
"""Make any special modifications that the Op needs before doing `Op.make_thunk`.
This can modify the node inplace and should return nothing.
......@@ -423,9 +447,17 @@ class Op(MetaObject):
"""
def make_py_thunk(self, node, storage_map, compute_map, no_recycling, debug=False):
"""
Like make_thunk() but only makes python thunks.
def make_py_thunk(
self,
node: Apply,
storage_map: StorageMapType,
compute_map: ComputeMapType,
no_recycling: bool,
debug: bool = False,
) -> ThunkType:
"""Make a Python thunk.
Like `Op.make_thunk` but only makes python thunks.
"""
node_input_storage = [storage_map[r] for r in node.inputs]
......@@ -467,7 +499,14 @@ class Op(MetaObject):
rval.lazy = False
return rval
def make_thunk(self, node, storage_map, compute_map, no_recycling, impl=None):
def make_thunk(
self,
node: Apply,
storage_map: StorageMapType,
compute_map: ComputeMapType,
no_recycling: bool,
impl: Optional[Text] = None,
) -> ThunkType:
"""Create a thunk.
This function must return a thunk, that is a zero-arguments
......@@ -513,8 +552,18 @@ class Op(MetaObject):
class COp(Op, CLinkerOp):
"""An `Op` with a C implementation."""
def make_c_thunk(self, node, storage_map, compute_map, no_recycling):
"""Like make_thunk, but will only try to make a C thunk."""
def make_c_thunk(
self,
node: Apply,
storage_map: StorageMapType,
compute_map: ComputeMapType,
no_recycling: bool,
) -> ThunkType:
"""Create a thunk for a C implementation.
Like `Op.make_thunk`, but will only try to make a C thunk.
"""
# FIXME: Putting the following import on the module level causes an import cycle.
# The conclusion should be that the antire "make_c_thunk" method should be defined
# in theano.link.c and dispatched onto the Op!
......@@ -593,7 +642,7 @@ class COp(Op, CLinkerOp):
)
def get_test_value(v):
def get_test_value(v: Variable) -> Any:
"""Get the test value for `v`.
If input `v` is not already a variable, it is turned into one by calling
......@@ -610,7 +659,7 @@ def get_test_value(v):
return v.get_test_value()
def missing_test_message(msg):
def missing_test_message(msg: Text) -> NoReturn:
"""
Displays msg, a message saying that some test_value is missing,
in the appropriate form based on config.compute_test_value:
......@@ -635,8 +684,9 @@ def missing_test_message(msg):
assert action in ["ignore", "off"]
def get_test_values(*args):
"""
def get_test_values(*args: Variable) -> Union[Any, List[Any]]:
"""Get test values for multiple `Variable`s.
Intended use:
for val_1, ..., val_n in get_debug_values(var_1, ..., var_n):
......@@ -681,7 +731,7 @@ def get_test_values(*args):
return [tuple(rval)]
ops_with_inner_function = {}
ops_with_inner_function: Dict[Op, Text] = {}
"""
Registry of Ops that have an inner compiled Theano function.
......@@ -711,18 +761,18 @@ class OpenMPOp(COp):
"""
gxx_support_openmp = None
gxx_support_openmp: Optional[bool] = None
"""
True/False after we tested this.
"""
def __init__(self, openmp=None):
def __init__(self, openmp: Optional[bool] = None):
if openmp is None:
openmp = config.openmp
self.openmp = openmp
def __setstate__(self, d):
def __setstate__(self, d: Dict):
self.__dict__.update(d)
# If we unpickle old op
if not hasattr(self, "openmp"):
......@@ -748,9 +798,7 @@ class OpenMPOp(COp):
@staticmethod
def test_gxx_support():
"""
Check if openMP is supported
"""
"""Check if openMP is supported."""
from theano.link.c.cmodule import GCC_compiler
code = """
......@@ -769,7 +817,7 @@ int main( int argc, const char* argv[] )
)
return default_openmp
def update_self_openmp(self):
def update_self_openmp(self) -> NoReturn:
"""
Make sure self.openmp is not True if there is no support in gxx.
......@@ -797,21 +845,60 @@ int main( int argc, const char* argv[] )
self.update_self_openmp()
def lquote_macro(txt: Text) -> Text:
"""Turn the last line of text into a ``\\``-commented line."""
res = []
spl = txt.split("\n")
for l in spl[:-1]:
res.append(l + " \\")
res.append(spl[-1])
return "\n".join(res)
def get_sub_macros(sub: Dict[Text, Text]) -> Tuple[Text]:
define_macros = []
undef_macros = []
define_macros.append(f"#define FAIL {lquote_macro(sub['fail'])}")
undef_macros.append("#undef FAIL")
if "params" in sub:
define_macros.append(f"#define PARAMS {sub['params']}")
undef_macros.append("#undef PARAMS")
return "\n".join(define_macros), "\n".join(undef_macros)
def get_io_macros(inputs: List[Text], outputs: List[Text]) -> Tuple[List[Text]]:
define_macros = []
undef_macros = []
for i, inp in enumerate(inputs):
define_macros.append(f"#define INPUT_{int(i)} {inp}")
undef_macros.append(f"#undef INPUT_{int(i)}")
for i, out in enumerate(outputs):
define_macros.append(f"#define OUTPUT_{int(i)} {out}")
undef_macros.append(f"#undef OUTPUT_{int(i)}")
return "\n".join(define_macros), "\n".join(undef_macros)
class ExternalCOp(COp):
"""
Class to allow an op to have an external C implementation.
"""Class for an `Op` with an external C implementation.
An op can use this class by inheriting from it and calling its
__init__() method, providing it with a path to an external file containing
the C implementation and the name of the function, in that file, to call
to perform the computations for the op.
One can inherit from this class, provide its constructor with a path to
an external C source file and the name of a function within it, and define
an `Op` for said function.
"""
section_re = re.compile(r"^#section ([a-zA-Z0-9_]+)$", re.MULTILINE)
backward_re = re.compile(r"^THEANO_(APPLY|SUPPORT)_CODE_SECTION$", re.MULTILINE)
section_re: ClassVar[Pattern] = re.compile(
r"^#section ([a-zA-Z0-9_]+)$", re.MULTILINE
)
backward_re: ClassVar[Pattern] = re.compile(
r"^THEANO_(APPLY|SUPPORT)_CODE_SECTION$", re.MULTILINE
)
# This is the set of allowed markers
SECTIONS = {
SECTIONS: ClassVar[Set[Text]] = {
"init_code",
"init_code_apply",
"init_code_struct",
......@@ -824,11 +911,10 @@ class ExternalCOp(COp):
}
@classmethod
def get_path(cls, f):
"""
Convert a path relative to the location of the class file into
an aboslute path. Paths that are already absolute are passed
through unchanged.
def get_path(cls, f: Text) -> Text:
"""Convert a path relative to the location of the class file into an absolute path.
Paths that are already absolute are passed through unchanged.
"""
if not os.path.isabs(f):
......@@ -837,7 +923,9 @@ class ExternalCOp(COp):
f = os.path.realpath(os.path.join(class_dir, f))
return f
def __init__(self, func_files, func_name=None):
def __init__(
self, func_files: Union[Text, List[Text]], func_name: Optional[Text] = None
):
"""
Sections are loaded from files in order with sections in later
files overriding sections in previous files.
......@@ -868,10 +956,8 @@ class ExternalCOp(COp):
"and specify the func_name"
)
def load_c_code(self, func_files):
"""
Loads the c code to perform the Op
"""
def load_c_code(self, func_files: List[Text]) -> NoReturn:
"""Loads the C code to perform the `Op`."""
func_files = [self.get_path(f) for f in func_files]
self.func_codes = []
for func_file in func_files:
......@@ -940,10 +1026,8 @@ class ExternalCOp(COp):
f"No valid section marker was found in file {func_files[i]}"
)
def __get_op_params(self):
"""
Returns a list of (name, value) pairs that will be turned into
macros for use within the op code.
def __get_op_params(self) -> List[Text]:
"""Construct name, value pairs that will be turned into macros for use within the `Op`'s code.
The names must be strings that are not a C keyword and the
values must be strings of literal C representations.
......@@ -1031,10 +1115,12 @@ class ExternalCOp(COp):
else:
return super().c_cleanup_code_struct(node, name)
def format_c_function_args(self, inp, out):
# Generate an string containing the arguments sent to the external C
# function. The argstring will be of format :
# "input0, input1, input2, &output0, &output1"
def format_c_function_args(self, inp: List[Text], out: List[Text]) -> Text:
"""Generate a string containing the arguments sent to the external C function.
The result will have the format: ``"input0, input1, input2, &output0, &output1"``.
"""
inp = list(inp)
numi = getattr(self, "_cop_num_inputs", len(inp))
while len(inp) < numi:
......@@ -1045,7 +1131,10 @@ class ExternalCOp(COp):
out.append("NULL")
return ", ".join(inp + out)
def get_c_macros(self, node, name, check_input=None):
def get_c_macros(
self, node: Apply, name: Text, check_input: Optional[bool] = None
) -> Tuple[Text]:
"Construct a pair of C ``#define`` and ``#undef`` code strings."
define_template = "#define %s %s"
undef_template = "#undef %s"
define_macros = []
......@@ -1097,37 +1186,6 @@ class ExternalCOp(COp):
return "\n".join(define_macros), "\n".join(undef_macros)
def _lquote_macro(self, txt):
res = []
spl = txt.split("\n")
for l in spl[:-1]:
res.append(l + " \\")
res.append(spl[-1])
return "\n".join(res)
def get_sub_macros(self, sub):
define_macros = []
undef_macros = []
define_macros.append(f"#define FAIL {self._lquote_macro(sub['fail'])}")
undef_macros.append("#undef FAIL")
if "params" in sub:
define_macros.append(f"#define PARAMS {sub['params']}")
undef_macros.append("#undef PARAMS")
return "\n".join(define_macros), "\n".join(undef_macros)
def get_io_macros(self, inputs, outputs):
define_macros = []
undef_macros = []
for i, inp in enumerate(inputs):
define_macros.append(f"#define INPUT_{int(i)} {inp}")
undef_macros.append(f"#undef INPUT_{int(i)}")
for i, out in enumerate(outputs):
define_macros.append(f"#define OUTPUT_{int(i)} {inp}")
undef_macros.append(f"#undef OUTPUT_{int(i)}")
def c_init_code_struct(self, node, name, sub):
"""
Stitches all the macros and "init_code" together
......@@ -1137,7 +1195,7 @@ class ExternalCOp(COp):
op_code = self.code_sections["init_code_struct"]
def_macros, undef_macros = self.get_c_macros(node, name)
def_sub, undef_sub = self.get_sub_macros(sub)
def_sub, undef_sub = get_sub_macros(sub)
return "\n".join(
["", def_macros, def_sub, op_code, undef_sub, undef_macros]
......@@ -1179,8 +1237,8 @@ class ExternalCOp(COp):
op_code = self.code_sections["code"]
def_macros, undef_macros = self.get_c_macros(node, name)
def_sub, undef_sub = self.get_sub_macros(sub)
def_io, undef_io = self.get_io_macros(inp, out)
def_sub, undef_sub = get_sub_macros(sub)
def_io, undef_io = get_io_macros(inp, out)
return "\n".join(
[
......@@ -1204,8 +1262,8 @@ class ExternalCOp(COp):
op_code = self.code_sections["code_cleanup"]
def_macros, undef_macros = self.get_c_macros(node, name)
def_sub, undef_sub = self.get_sub_macros(sub)
def_io, undef_io = self.get_io_macros(inputs, outputs)
def_sub, undef_sub = get_sub_macros(sub)
def_io, undef_io = get_io_macros(inputs, outputs)
return "\n".join(
[
......@@ -1220,3 +1278,38 @@ class ExternalCOp(COp):
)
else:
return super().c_code_cleanup(node, name, inputs, outputs, sub)
class _NoPythonOp(Op):
"""A class used to indicate that an `Op` does not provide a Python implementation.
XXX: Do not use this class; it's only for tracking bad implementations internally.
"""
def perform(self, node, inputs, output_storage, params=None):
raise NotImplementedError("No Python implementation is provided by this Op.")
class _NoPythonCOp(COp):
"""A class used to indicate that a `COp` does not provide a Python implementation.
XXX: Do not use this class; it's only for tracking bad implementations internally.
"""
def perform(self, node, inputs, output_storage, params=None):
raise NotImplementedError("No Python implementation is provided by this COp.")
class _NoPythonExternalCOp(ExternalCOp):
"""A class used to indicate that a `ExternalCOp` does not provide a Python implementation.
XXX: Do not use this class; it's only for tracking bad implementations internally.
"""
def perform(self, node, inputs, output_storage, params=None):
raise NotImplementedError(
"No Python implementation is provided by this ExternalCOp."
)
......@@ -9,7 +9,7 @@ import theano
from theano import tensor
from theano.configdefaults import config
from theano.gof.graph import Apply, Variable
from theano.gof.op import COp, ExternalCOp, Op
from theano.gof.op import COp, ExternalCOp, Op, _NoPythonOp
from theano.gof.opt import copy_stack_trace
from theano.gof.params_type import ParamsType
from theano.gof.type import CType
......@@ -493,6 +493,14 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
return (9,)
class GpuKernelBaseCOp(GpuKernelBase, COp):
pass
class GpuKernelBaseExternalCOp(GpuKernelBase, ExternalCOp):
pass
def forward_string_meth(name):
def f(*args):
res = getattr(GpuKernelBase, name)(*args)
......@@ -517,7 +525,7 @@ def get_dtype(s):
return np.dtype(s)
class CGpuKernelBase(ExternalCOp, GpuKernelBase):
class CGpuKernelBase(GpuKernelBaseExternalCOp, _NoPythonOp):
"""
Class to combine GpuKernelBase and ExternalCOp.
......@@ -1498,7 +1506,7 @@ class GpuJoin(HideC, Join):
gpu_join = GpuJoin()
class GpuSplit(HideC, Split):
class GpuSplit(HideC, Split, _NoPythonOp):
"""
Split for GPU.
......@@ -1748,7 +1756,7 @@ def profile_printer(
print("", file=file)
class GpuEye(GpuKernelBase, Op):
class GpuEye(GpuKernelBaseCOp, _NoPythonOp):
"""
Eye for GPU.
......@@ -1882,7 +1890,7 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off,
return (10,)
class GpuTri(GpuKernelBase, Op):
class GpuTri(GpuKernelBaseCOp, _NoPythonOp):
"""
Tri for GPU.
......
import theano
from theano.compile import optdb
from theano.gof.graph import Apply
from theano.gof.op import COp
from theano.gof.op import _NoPythonCOp
from theano.gof.opt import LocalOptGroup
from theano.gof.params_type import ParamsType
from theano.scalar import bool as bool_t
......@@ -27,7 +27,7 @@ except ImportError:
pass
class BlasOp(COp):
class BlasOp(_NoPythonCOp):
def c_headers(self, **kwargs):
return ["<blas_api.h>", "<numpy_compat.h>", "<gpuarray_helper.h>"]
......@@ -412,7 +412,7 @@ class GpuDot22(BlasOp):
gpu_dot22 = GpuDot22()
class GpuGemmBatch(BlasOp):
class GpuGemmBatch(BlasOp, _NoPythonCOp):
params_type = ParamsType(inplace=bool_t)
__props__ = ("inplace",)
_f16_ok = True
......@@ -1009,7 +1009,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
)
class GpuCorrMM(BaseGpuCorrMM):
class GpuCorrMM(BaseGpuCorrMM, _NoPythonCOp):
"""
GPU correlation implementation using Matrix Multiplication.
......@@ -1129,7 +1129,7 @@ class GpuCorrMM(BaseGpuCorrMM):
return d_bottom, d_weights
class GpuCorrMM_gradWeights(BaseGpuCorrMM):
class GpuCorrMM_gradWeights(BaseGpuCorrMM, _NoPythonCOp):
"""
Gradient wrt. filters for `GpuCorrMM`.
......@@ -1235,7 +1235,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
return [[1], [1], [0], [0]] # no connection to height, width
class GpuCorrMM_gradInputs(BaseGpuCorrMM):
class GpuCorrMM_gradInputs(BaseGpuCorrMM, _NoPythonCOp):
"""
Gradient wrt. inputs for `GpuCorrMM`.
......@@ -1337,7 +1337,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
return [[1], [1], [0], [0]] # no connection to height, width
class BaseGpuCorr3dMM(CGpuKernelBase):
class BaseGpuCorr3dMM(CGpuKernelBase, _NoPythonCOp):
"""
Base class for `GpuCorr3dMM`, `GpuCorr3dMM_gradWeights` and
`GpuCorr3dMM_gradInputs`. Cannot be used directly.
......@@ -1777,7 +1777,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
)
class GpuCorr3dMM(BaseGpuCorr3dMM):
class GpuCorr3dMM(BaseGpuCorr3dMM, _NoPythonCOp):
"""
GPU correlation implementation using Matrix Multiplication.
......@@ -1881,7 +1881,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
return d_bottom, d_weights
class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM, _NoPythonCOp):
"""
Gradient wrt. filters for `GpuCorr3dMM`.
......@@ -1970,7 +1970,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
return [[1], [1], [0], [0], [0]] # no connection to height, width, depth
class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM, _NoPythonCOp):
"""
Gradient wrt. inputs for `GpuCorr3dMM`.
......
......@@ -4,7 +4,7 @@ import numpy as np
from theano import tensor
from theano.gof.graph import Apply
from theano.gof.op import ExternalCOp
from theano.gof.op import _NoPythonExternalCOp
from theano.gof.params_type import ParamsType
from theano.gradient import grad_undefined
from theano.scalar import bool as bool_t
......@@ -17,7 +17,7 @@ from .type import gpu_context_type
_logger = logging.getLogger("theano.gpuarray.blocksparse")
class GpuSparseBlockGemv(ExternalCOp):
class GpuSparseBlockGemv(_NoPythonExternalCOp):
"""
GPU version of SparseBlockGemv. Check SparseBlockGemv's docstring for more
information.
......@@ -32,7 +32,7 @@ class GpuSparseBlockGemv(ExternalCOp):
# NB: DTYPE_INPUT_* is used in C code, so I think we should not set check_input to False.
def __init__(self, inplace=False):
ExternalCOp.__init__(self, "c_code/blockgemv.c", "APPLY_SPECIFIC(blockgemv)")
super().__init__("c_code/blockgemv.c", "APPLY_SPECIFIC(blockgemv)")
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
......@@ -92,7 +92,7 @@ gpu_sparse_block_gemv = GpuSparseBlockGemv(False)
gpu_sparse_block_gemv_inplace = GpuSparseBlockGemv(True)
class GpuSparseBlockOuter(ExternalCOp):
class GpuSparseBlockOuter(_NoPythonExternalCOp):
"""
GPU version of SparseBlockOuter. See SparseBlockOuter's docstring for more
information.
......@@ -106,7 +106,7 @@ class GpuSparseBlockOuter(ExternalCOp):
params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
def __init__(self, inplace=False):
ExternalCOp.__init__(self, ["c_code/blockger.c"], "APPLY_SPECIFIC(blockger)")
super().__init__(["c_code/blockger.c"], "APPLY_SPECIFIC(blockger)")
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
......
......@@ -4,7 +4,7 @@ import sys
import theano.tensor as tt
from theano.configdefaults import config
from theano.gof.graph import Apply
from theano.gof.op import ExternalCOp
from theano.gof.op import _NoPythonExternalCOp
from theano.gof.opt import local_optimizer
from theano.gpuarray import pygpu
from theano.gpuarray.basic_ops import (
......@@ -20,7 +20,7 @@ from theano.tensor.nnet.ctc import ctc_available
from theano.tensor.opt import register_canonicalize
class GpuConnectionistTemporalClassification(ExternalCOp):
class GpuConnectionistTemporalClassification(_NoPythonExternalCOp):
"""
GPU wrapper for Baidu CTC loss function.
......
......@@ -12,7 +12,7 @@ from theano import tensor
from theano.compile.ops import shape_i, shape_i_op
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME, config
from theano.gof.graph import Apply, Variable
from theano.gof.op import COp, ExternalCOp
from theano.gof.op import ExternalCOp, _NoPythonCOp, _NoPythonExternalCOp
from theano.gof.params_type import ParamsType
from theano.gof.type import CDataType, EnumList, Generic
from theano.gpuarray import cudnn_defs, pygpu
......@@ -302,7 +302,7 @@ class MakerCDataType(CDataType):
return self._get_func()(ptr)
class CDataMaker(COp):
class CDataMaker(_NoPythonCOp):
"""This is the equally lame `Op` that accompanies `MakerCDataType`."""
__props__ = ("rtype",)
......@@ -350,7 +350,7 @@ def CUDNNDataType(name, freefunc=None):
)
class DnnVersion(COp):
class DnnVersion(_NoPythonCOp):
__props__ = ()
def c_headers(self, **kwargs):
......@@ -460,7 +460,7 @@ def get_precision(precision, inputs, for_grad=False):
return precision, common_dtype
class DnnBase(ExternalCOp):
class DnnBase(_NoPythonExternalCOp):
"""
Creates a handle for cudnn and pulls in the cudnn libraries and headers.
......@@ -496,7 +496,7 @@ class DnnBase(ExternalCOp):
def __init__(self, files=None, c_func=None):
if files is None:
files = []
ExternalCOp.__init__(self, ["c_code/dnn_base.c"] + files, c_func)
super().__init__(["c_code/dnn_base.c"] + files, c_func)
def c_headers(self, **kwargs):
return [
......@@ -535,7 +535,7 @@ class DnnBase(ExternalCOp):
return (super().c_code_cache_version(), version(), 4)
class GpuDnnConvDesc(ExternalCOp):
class GpuDnnConvDesc(_NoPythonExternalCOp):
"""
This Op builds a convolution descriptor for use in the other convolution
......@@ -607,7 +607,7 @@ class GpuDnnConvDesc(ExternalCOp):
precision="float32",
num_groups=1,
):
ExternalCOp.__init__(self, ["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
super().__init__(["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
if version() < 6000 and any([d != 1 for d in dilation]):
raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.")
......@@ -756,8 +756,7 @@ class GpuDnnConv(DnnBase):
)
def __init__(self, algo=None, inplace=False, num_groups=1):
DnnBase.__init__(
self,
super().__init__(
["c_code/dnn_conv_base.c", "c_code/dnn_fwd.c"],
"APPLY_SPECIFIC(conv_fwd)",
)
......@@ -918,8 +917,7 @@ class GpuDnnConvGradW(DnnBase):
)
def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__(
self,
super().__init__(
["c_code/dnn_conv_base.c", "c_code/dnn_gw.c"],
"APPLY_SPECIFIC(conv_gw)",
)
......@@ -1088,8 +1086,7 @@ class GpuDnnConvGradI(DnnBase):
)
def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__(
self,
super().__init__(
["c_code/dnn_conv_base.c", "c_code/dnn_gi.c"],
"APPLY_SPECIFIC(conv_gi)",
)
......@@ -1767,7 +1764,7 @@ def dnn_gradinput3d(
)
class GpuDnnPoolDesc(COp):
class GpuDnnPoolDesc(_NoPythonCOp):
"""
This Op builds a pooling descriptor for use in the other
pooling operations.
......@@ -1911,7 +1908,7 @@ class GpuDnnPoolBase(DnnBase):
params_type = ParamsType(mode=cudnn.cudnnPoolingMode_t, handle=handle_type)
def __init__(self, mode="max"):
DnnBase.__init__(self, [self.c_file], self.c_function)
super().__init__([self.c_file], self.c_function)
if mode == "average":
mode = "average_inc_pad"
# Supported modes depend on runtime cuDNN version.
......@@ -2114,7 +2111,7 @@ class GpuDnnSoftmaxBase(DnnBase):
)
def __init__(self, algo, mode):
DnnBase.__init__(self, [self.file], self.c_func)
super().__init__([self.file], self.c_func)
assert cudnn.cudnnSoftmaxAlgorithm_t.has_alias(algo)
self.algo = algo
......@@ -2207,7 +2204,7 @@ class GpuDnnReduction(DnnBase):
)
def __init__(self, red_op, axis, acc_dtype, dtype, return_indices):
DnnBase.__init__(self, ["c_code/dnn_redux.c"], "APPLY_SPECIFIC(dnn_redux)")
super().__init__(["c_code/dnn_redux.c"], "APPLY_SPECIFIC(dnn_redux)")
assert cudnn.cudnnReduceTensorOp_t.has_alias(red_op)
self.red_op = red_op
assert acc_dtype in ["float16", "float32", "float64"]
......@@ -2328,8 +2325,7 @@ class GpuDnnBatchNorm(DnnBase):
inplace_running_var=False,
inplace_output=False,
):
DnnBase.__init__(
self,
super().__init__(
["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm.c"],
"dnn_batchnorm_op",
)
......@@ -2460,8 +2456,7 @@ class GpuDnnBatchNormInference(DnnBase):
)
def __init__(self, mode="per-activation", inplace=False):
DnnBase.__init__(
self,
super().__init__(
["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm_inf.c"],
"dnn_batchnorm_op",
)
......@@ -2546,8 +2541,7 @@ class GpuDnnBatchNormGrad(DnnBase):
params_type = ParamsType(mode=cudnn.cudnnBatchNormMode_t, handle=handle_type)
def __init__(self, mode="per-activation"):
DnnBase.__init__(
self,
super().__init__(
["c_code/dnn_batchnorm_base.c", "c_code/dnn_batchnorm_grad.c"],
"dnn_batchnorm_grad",
)
......@@ -2585,7 +2579,7 @@ class GpuDnnDropoutOp(DnnBase):
__props__ = ("inplace",)
def __init__(self, inplace=False):
DnnBase.__init__(self, ["c_code/dnn_dropout_fwd.c"], "dnn_dropout_fwd")
super().__init__(["c_code/dnn_dropout_fwd.c"], "dnn_dropout_fwd")
self.inplace = inplace
if self.inplace:
self.destroy_map = {1: [2]}
......@@ -2605,7 +2599,7 @@ class _DropoutDescriptor(DnnBase):
__props__ = ("context_name",)
def __init__(self, context_name):
DnnBase.__init__(self, ["c_code/dnn_dropout_desc.c"], "dnn_dropout_desc")
super().__init__(["c_code/dnn_dropout_desc.c"], "dnn_dropout_desc")
self.context_name = context_name
def dnn_context(self, node):
......@@ -2666,7 +2660,7 @@ class _RNNDescriptor(DnnBase):
def __init__(self, context_name):
if version() < 5005:
raise RuntimeError("cudnn RNN require cudnn v5 final or higher.")
DnnBase.__init__(self, ["c_code/dnn_rnn_desc.c"], "dnn_rnn_desc")
super().__init__(["c_code/dnn_rnn_desc.c"], "dnn_rnn_desc")
self.context_name = context_name
def dnn_context(self, node):
......@@ -2759,7 +2753,7 @@ class _RNNParamSize(DnnBase):
__props__ = ("context_name",)
def __init__(self, context_name):
DnnBase.__init__(self, ["c_code/dnn_rnn_paramsize.c"], "dnn_rnn_paramsize")
super().__init__(["c_code/dnn_rnn_paramsize.c"], "dnn_rnn_paramsize")
self.context_name = context_name
def dnn_context(self, node):
......@@ -2792,7 +2786,7 @@ class _RNNSplitParams(DnnBase):
__props__ = ("rnn_mode",)
def __init__(self, rnn_mode):
DnnBase.__init__(self)
super().__init__()
self.rnn_mode = rnn_mode
def make_node(self, w, desc, layer, isize, typecode):
......@@ -3035,7 +3029,7 @@ class GpuDnnRNNOp(DnnBase):
_cop_num_outputs = 4
def __init__(self, rnn_mode, direction_mode):
DnnBase.__init__(self, ["c_code/dnn_rnn_fwd.c"], "dnn_rnn_fwd")
super().__init__(["c_code/dnn_rnn_fwd.c"], "dnn_rnn_fwd")
self.rnn_mode = rnn_mode
if direction_mode == "bidirectional":
self.num_dirs = 2
......@@ -3126,7 +3120,7 @@ class GpuDnnRNNGradInputs(DnnBase):
_cop_num_outputs = 4
def __init__(self, rnn_mode, grad_h, grad_c):
DnnBase.__init__(self, ["c_code/dnn_rnn_gi.c"], "dnn_rnn_gi")
super().__init__(["c_code/dnn_rnn_gi.c"], "dnn_rnn_gi")
self.rnn_mode = rnn_mode
self.grad_h = grad_h
self.grad_c = grad_c
......@@ -3175,7 +3169,7 @@ class GpuDnnRNNGradWeights(DnnBase):
__props__ = ()
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_rnn_gw.c"], "dnn_rnn_gw")
super().__init__(["c_code/dnn_rnn_gw.c"], "dnn_rnn_gw")
def make_node(self, desc, x, hx, y, reserve, w):
# We trust the callers here
......@@ -3579,9 +3573,7 @@ class GpuDnnTransformerGrid(DnnBase):
check_input = False
def __init__(self):
DnnBase.__init__(
self, ["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)"
)
super().__init__(["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)")
def make_node(self, theta, out_dims):
"""
......@@ -3640,8 +3632,8 @@ class GpuDnnTransformerSampler(DnnBase):
check_input = False
def __init__(self):
DnnBase.__init__(
self, ["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)"
super().__init__(
["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)"
)
def make_node(self, img, grid):
......@@ -3704,7 +3696,7 @@ class GpuDnnTransformerGradI(DnnBase):
check_input = False
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
super().__init__(["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
def make_node(self, img, grid, dy):
context_name = infer_context_name(img, grid, dy)
......@@ -3742,7 +3734,7 @@ class GpuDnnTransformerGradT(DnnBase):
check_input = False
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)")
super().__init__(["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)")
def make_node(self, dgrid):
context_name = infer_context_name(dgrid)
......
......@@ -5,7 +5,7 @@ import numpy as np
from theano import scalar
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.op import _NoPythonOp
from theano.gof.utils import MethodNotDefined
from theano.link.c.interface import HideC
from theano.scalar import Composite, Scalar
......@@ -84,7 +84,7 @@ def max_inputs_to_GpuElemwise(node_or_outputs):
return max_nb_inputs
class GpuElemwise(HideC, Elemwise):
class GpuElemwise(_NoPythonOp, HideC, Elemwise):
"""
Elemwise on the GPU.
......@@ -414,9 +414,6 @@ class GpuElemwise(HideC, Elemwise):
return str(code)
# To disable the superclass perform.
perform = Op.perform
# Since we don't have a perform ...
def python_constant_folding(self, node):
return False
......@@ -482,7 +479,7 @@ class GpuDimShuffle(DimShuffle):
storage[0] = res
class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype, _NoPythonOp):
"""
GpuCAReduceCuda is a Reduction along some dimensions by a scalar op.
......@@ -616,9 +613,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
],
)
def perform(self, node, inp, out, ctx):
Op.perform(self, node, inp, out, ctx)
def supports_c_code(self, inputs):
"""
Returns True if the current op and reduce pattern has functioning C code.
......
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.op import _NoPythonOp
from theano.tensor.extra_ops import CumOp
......@@ -11,7 +11,7 @@ except ImportError:
import theano.scalar as scalar
from theano.gof.params_type import ParamsType
from theano.gpuarray.basic_ops import (
GpuKernelBase,
GpuKernelBaseCOp,
GpuReshape,
Kernel,
as_gpuarray_variable,
......@@ -22,7 +22,7 @@ from theano.gpuarray.opt import op_lifter, register_opt, register_opt2
from theano.gpuarray.type import gpu_context_type
class GpuCumOp(GpuKernelBase, Op):
class GpuCumOp(GpuKernelBaseCOp, _NoPythonOp):
"""
Parameters
----------
......@@ -505,7 +505,7 @@ class GpuCumOp(GpuKernelBase, Op):
# GpuCumsumOp exists only to serve backward compatibility.
# Once an object is created, it will be converted to CumOp object.
class GpuCumsumOp(GpuKernelBase, Op):
class GpuCumsumOp(GpuKernelBaseCOp, _NoPythonOp):
SUPPORTED_NDIMS = 3
__props__ = ("axis",)
......
......@@ -2,7 +2,7 @@ import numpy as np
import theano.tensor as tt
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.op import _NoPythonOp
from theano.gpuarray.basic_ops import (
as_gpuarray_variable,
gpu_contiguous,
......@@ -37,7 +37,7 @@ except Exception:
skcuda_available = False
class CuRFFTOp(Op):
class CuRFFTOp(_NoPythonOp):
__props__ = ()
......@@ -168,7 +168,7 @@ class CuRFFTOp(Op):
curfft_op = CuRFFTOp()
class CuIRFFTOp(Op):
class CuIRFFTOp(_NoPythonOp):
__props__ = ()
......
......@@ -11,12 +11,12 @@ except ImportError:
import theano
import theano.sandbox.multinomial
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.op import _NoPythonOp
from theano.scalar import as_scalar
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from .basic_ops import (
GpuKernelBase,
GpuKernelBaseCOp,
Kernel,
as_gpuarray_variable,
gpuarray_helper_inc_dir,
......@@ -28,12 +28,12 @@ from .opt import op_lifter, register_opt, register_opt2
from .type import GpuArrayType
class GPUAMultinomialFromUniform(GpuKernelBase, Op):
class GPUAMultinomialFromUniform(GpuKernelBaseCOp, _NoPythonOp):
__props__ = ("odtype",)
_f16_ok = True
def __init__(self, odtype):
Op.__init__(self)
super().__init__(self)
self.odtype = odtype
def get_params(self, node):
......@@ -251,7 +251,7 @@ KERNEL void k_multi_warp_multinomial(
return (7,)
class GPUAChoiceFromUniform(GpuKernelBase, Op):
class GPUAChoiceFromUniform(GpuKernelBaseCOp, _NoPythonOp):
"""
The output is transposed compared to MultinomialWOReplacementFromUniform.
We must insert a Transpose op after it.
......@@ -263,7 +263,7 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op):
__props__ = ("odtype", "replace")
def __init__(self, odtype, replace=False):
Op.__init__(self)
super().__init__(self)
self.odtype = odtype
self.replace = replace
......
import theano.tensor as tt
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.op import _NoPythonOp
from theano.gof.params_type import ParamsType
from theano.tensor.nnet.neighbours import Images2Neibs
......@@ -11,7 +11,7 @@ except ImportError:
pass
from theano.gpuarray.basic_ops import (
GpuKernelBase,
GpuKernelBaseCOp,
Kernel,
as_gpuarray_variable,
infer_context_name,
......@@ -19,7 +19,7 @@ from theano.gpuarray.basic_ops import (
from theano.gpuarray.type import GpuArrayType, gpu_context_type
class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
class GpuImages2Neibs(GpuKernelBaseCOp, Images2Neibs, _NoPythonOp):
"""
Images2Neibs for the GPU.
......@@ -627,7 +627,3 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
params=sub["params"],
fail=sub["fail"],
)
def perform(self, node, inp, out, params):
# Disable the perform method from the CPU version
Op.perform(self, node, inp, out, params)
......@@ -3,7 +3,7 @@ from io import StringIO
import numpy as np
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.gof.op import _NoPythonOp
try:
......@@ -12,18 +12,18 @@ try:
except ImportError:
pass
from .basic_ops import (
GpuKernelBase,
from theano.gpuarray.basic_ops import (
GpuKernelBaseCOp,
Kernel,
as_gpuarray_variable,
gpuarray_helper_inc_dir,
infer_context_name,
)
from .fp16_help import load_w, work_dtype, write_w
from .type import GpuArrayType
from theano.gpuarray.fp16_help import load_w, work_dtype, write_w
from theano.gpuarray.type import GpuArrayType
class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBaseCOp, _NoPythonOp):
"""
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
......@@ -283,7 +283,7 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = (
)
class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBaseCOp, _NoPythonOp):
"""
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
......@@ -508,7 +508,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
class GpuSoftmax(GpuKernelBase, Op):
class GpuSoftmax(GpuKernelBaseCOp, _NoPythonOp):
"""
Implement Softmax on the gpu.
......@@ -804,7 +804,7 @@ class GpuSoftmax(GpuKernelBase, Op):
gpu_softmax = GpuSoftmax()
class GpuSoftmaxWithBias(GpuKernelBase, Op):
class GpuSoftmaxWithBias(GpuKernelBaseCOp, _NoPythonOp):
"""
Implement SoftmaxWithBias on the gpu.
......
......@@ -20,7 +20,7 @@ import theano.tensor
from theano.compile import optdb
from theano.configdefaults import config
from theano.gof.graph import Apply, Variable, is_in_ancestors
from theano.gof.op import Op
from theano.gof.op import _NoPythonOp
from theano.gof.opt import GlobalOptimizer, local_optimizer
from theano.scan.utils import clone
from theano.tensor import TensorType, opt
......@@ -40,7 +40,7 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>"
_logger = logging.getLogger("theano.ifelse")
class IfElse(Op):
class IfElse(_NoPythonOp):
"""
Op that provides conditional graph evaluation if used with the CVM/VM
linkers. Note that there exist a helpful function `ifelse` that should
......
......@@ -1067,7 +1067,8 @@ class Scan(Op):
)
except (ImportError, MissingGXX):
p = self.execute
p = self.perform
# default arguments are stored in the closure of `rval`
# Big ugly hack since we can't get the real value of allow_gc
......@@ -1246,9 +1247,10 @@ class Scan(Op):
)
return list_inputs[offset:]
def execute(self, node, args, outs):
"""
The args are packed like this:
def perform(self, node, inputs, output_storage, params=None):
"""Compute the scan operation in Python.
The `inputs` are packed like this:
n_steps
......@@ -1259,7 +1261,7 @@ class Scan(Op):
W other inputs w_1, w_2, ... w_W
There are at least 1 + self.n_seqs + self.n_outs inputs, and the
There are at least ``1 + self.n_seqs + self.n_outs`` inputs, and the
ones above this number are passed to the scanned function as
non-sequential inputs.
......@@ -1272,7 +1274,7 @@ class Scan(Op):
# negative flip sequences around, and make n_steps positive
t0_call = time.time()
t_fn = 0
n_steps = args[0]
n_steps = inputs[0]
seqs = []
if n_steps < 0:
# History, in the past, this was used for backward
......@@ -1285,7 +1287,7 @@ class Scan(Op):
"We didn't implemented yet the case where scan do 0 iteration"
)
else:
for idx, seq in enumerate(args[1 : self.seqs_arg_offset]):
for idx, seq in enumerate(inputs[1 : self.seqs_arg_offset]):
if seq.shape[0] < n_steps:
raise ValueError(
(
......@@ -1305,11 +1307,12 @@ class Scan(Op):
# output
store_steps = [
arg.shape[0] for arg in args[self.seqs_arg_offset : self.shared_arg_offset]
arg.shape[0]
for arg in inputs[self.seqs_arg_offset : self.shared_arg_offset]
]
store_steps += [
arg
for arg in args[
for arg in inputs[
self.nit_sot_arg_offset : self.nit_sot_arg_offset + self.n_nit_sot
]
]
......@@ -1325,31 +1328,32 @@ class Scan(Op):
if idx in self.destroy_map:
# ^ Case 1. Outputs should be computed inplace of their
# initial state
outs[idx][0] = args[self.seqs_arg_offset + idx]
output_storage[idx][0] = inputs[self.seqs_arg_offset + idx]
elif (
outs[idx][0] is not None
and outs[idx][0].shape[1:] == args[self.seqs_arg_offset + idx].shape[1:]
and outs[idx][0].shape[0] >= store_steps[idx]
output_storage[idx][0] is not None
and output_storage[idx][0].shape[1:]
== inputs[self.seqs_arg_offset + idx].shape[1:]
and output_storage[idx][0].shape[0] >= store_steps[idx]
):
# Put in the values of the initial state
outs[idx][0] = outs[idx][0][: store_steps[idx]]
output_storage[idx][0] = output_storage[idx][0][: store_steps[idx]]
if idx > self.n_mit_mot:
l = -self.mintaps[idx]
outs[idx][0][:l] = args[self.seqs_arg_offset + idx][:l]
output_storage[idx][0][:l] = inputs[self.seqs_arg_offset + idx][:l]
else:
outs[idx][0][:] = args[self.seqs_arg_offset + idx]
output_storage[idx][0][:] = inputs[self.seqs_arg_offset + idx]
else:
outs[idx][0] = args[self.seqs_arg_offset + idx].copy()
output_storage[idx][0] = inputs[self.seqs_arg_offset + idx].copy()
offset = self.nit_sot_arg_offset + self.n_nit_sot
other_args = args[offset:]
input_storage = self.fn.input_storage
other_args = inputs[offset:]
inner_input_storage = self.fn.input_storage
nb_mitmot_in = sum(map(len, self.tap_array[: self.n_mit_mot]))
old_mitmot_input_storage = [None] * nb_mitmot_in
old_mitmot_input_data = [None] * nb_mitmot_in
output_storage = self.fn.output_storage
old_output_storage = [None] * len(output_storage)
old_output_data = [None] * len(output_storage)
inner_output_storage = self.fn.output_storage
old_inner_output_storage = [None] * len(inner_output_storage)
old_inner_output_data = [None] * len(inner_output_storage)
fn = self.fn.fn
offset = (
self.n_seqs
......@@ -1357,7 +1361,7 @@ class Scan(Op):
+ self.n_shared_outs
)
for idx in range(len(other_args)):
input_storage[idx + offset].storage[0] = other_args[idx]
inner_input_storage[idx + offset].storage[0] = other_args[idx]
i = 0
cond = True
......@@ -1368,34 +1372,40 @@ class Scan(Op):
# 3. collect input slices
for idx in range(self.n_seqs):
if self.vector_seqs[idx]:
input_storage[idx].storage[0] = seqs[idx][i : i + 1].reshape(())
inner_input_storage[idx].storage[0] = seqs[idx][i : i + 1].reshape(
()
)
else:
input_storage[idx].storage[0] = seqs[idx][i]
inner_input_storage[idx].storage[0] = seqs[idx][i]
offset = self.n_seqs
for idx in range(self.n_outs):
if self.vector_outs[idx]:
for tap in self.tap_array[idx]:
_idx = (pos[idx] + tap) % store_steps[idx]
input_storage[offset].storage[0] = outs[idx][0][
inner_input_storage[offset].storage[0] = output_storage[idx][0][
_idx : _idx + 1
].reshape(())
offset += 1
else:
for tap in self.tap_array[idx]:
_idx = (pos[idx] + tap) % store_steps[idx]
input_storage[offset].storage[0] = outs[idx][0][_idx]
inner_input_storage[offset].storage[0] = output_storage[idx][0][
_idx
]
offset += 1
a_offset = self.shared_arg_offset
o_offset = self.n_outs + self.n_nit_sot
if i == 0:
for j in range(self.n_shared_outs):
input_storage[offset].storage[0] = args[a_offset + j]
inner_input_storage[offset].storage[0] = inputs[a_offset + j]
offset += 1
else:
for j in range(self.n_shared_outs):
input_storage[offset].storage[0] = outs[o_offset + j][0]
inner_input_storage[offset].storage[0] = output_storage[
o_offset + j
][0]
offset += 1
# 4. collecting slices where the output should be stored
......@@ -1404,7 +1414,7 @@ class Scan(Op):
offset = 0
for idx in range(self.n_mit_mot_outs):
if not self.mitmots_preallocated[idx]:
output_storage[offset].storage[0] = None
inner_output_storage[offset].storage[0] = None
offset += 1
# 4.2. Collect slices for mitsots, sitsots and nitsots
......@@ -1414,25 +1424,25 @@ class Scan(Op):
store_steps[idx + self.n_mit_mot] == 1
or self.vector_outs[idx + self.n_mit_mot]
):
output_storage[idx + offset].storage[0] = None
inner_output_storage[idx + offset].storage[0] = None
else:
_pos0 = idx + self.n_mit_mot
output_storage[idx + offset].storage[0] = outs[_pos0][0][
pos[_pos0]
]
inner_output_storage[idx + offset].storage[0] = output_storage[
_pos0
][0][pos[_pos0]]
else:
for idx in range(self.n_outs + self.n_nit_sot - self.n_mit_mot):
output_storage[idx + offset].storage[0] = None
inner_output_storage[idx + offset].storage[0] = None
# 4.3. Collect slices for shared outputs
offset += self.n_outs + self.n_nit_sot - self.n_mit_mot
for idx in range(self.n_shared_outs):
output_storage[idx + offset].storage[0] = None
inner_output_storage[idx + offset].storage[0] = None
# 4.4. If there is a condition add it to the mix
if self.as_while:
pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None
inner_output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) currently in the output_storage to be able to compare them
......@@ -1440,17 +1450,17 @@ class Scan(Op):
# execution. Also keep pointers to their data to be able to detect
# cases where outputs reused the allocated object but alter the
# memory region they refer to.
for idx in range(len(output_storage)):
for idx in range(len(inner_output_storage)):
var = output_storage[idx].storage[0]
old_output_storage[idx] = var
var = inner_output_storage[idx].storage[0]
old_inner_output_storage[idx] = var
if var is None:
old_output_data[idx] = None
old_inner_output_data[idx] = None
elif self.outs_is_tensor[idx]:
old_output_data[idx] = var.data
old_inner_output_data[idx] = var.data
else:
old_output_data[idx] = var.gpudata
old_inner_output_data[idx] = var.gpudata
# 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
# etc) associated with mitmot inputs currently in the
......@@ -1460,7 +1470,7 @@ class Scan(Op):
# reused the allocated object but alter the memory region they
# refer to.
for idx in range(nb_mitmot_in):
var = input_storage[idx + self.n_seqs].storage[0]
var = inner_input_storage[idx + self.n_seqs].storage[0]
old_mitmot_input_storage[idx] = var
if var is None:
......@@ -1502,19 +1512,19 @@ class Scan(Op):
dt_fn = time.time() - t0_fn
if self.as_while:
pdx = offset + self.n_shared_outs
cond = output_storage[pdx].storage[0] == 0
cond = inner_output_storage[pdx].storage[0] == 0
# 5.2. By calling fn() directly instead of calling the theano
# function, it is possible that the updates have not been
# performed. Perform the updates if needed.
offset_out = len(output_storage) - 1
offset_out = len(inner_output_storage) - 1
if getattr(fn, "need_update_inputs", True):
# Update the inputs that have an update function
for inp, storage in zip(
self.fn.maker.expanded_inputs[::-1], self.fn.input_storage[::-1]
):
if inp.update is not None:
storage.data = output_storage[offset_out].data
storage.data = inner_output_storage[offset_out].data
offset_out -= 1
t_fn += dt_fn
......@@ -1532,7 +1542,7 @@ class Scan(Op):
# Verify whether the input points to the same data as
# it did before the execution of the inner function.
old_var = old_mitmot_input_storage[inp_idx]
new_var = input_storage[self.n_seqs + inp_idx].storage[0]
new_var = inner_input_storage[self.n_seqs + inp_idx].storage[0]
if old_var is new_var:
old_data = old_mitmot_input_data[inp_idx]
if self.inps_is_tensor[self.n_seqs + inp_idx]:
......@@ -1547,14 +1557,16 @@ class Scan(Op):
# nothing needs to be done. Otherwise, recover the
# and store it in `outs` as usual
if not same_data:
outs[j][0][k + pos[j]] = input_storage[
output_storage[j][0][k + pos[j]] = inner_input_storage[
self.n_seqs + inp_idx
].storage[0]
else:
# This output tap has not been preallocated, recover
# its value as usual
outs[j][0][k + pos[j]] = output_storage[offset_out].storage[0]
output_storage[j][0][k + pos[j]] = inner_output_storage[
offset_out
].storage[0]
offset_out += 1
mitmot_out_idx += 1
......@@ -1570,14 +1582,16 @@ class Scan(Op):
# Copy the output value to `outs`, if necessary
if store_steps[j] == 1 or self.vector_outs[j]:
outs[j][0][pos[j]] = output_storage[offset_out + j].storage[0]
output_storage[j][0][pos[j]] = inner_output_storage[
offset_out + j
].storage[0]
else:
# Check whether the initialization of the output storage
# map for this output has been reused.
old_var = old_output_storage[offset_out + j]
new_var = output_storage[offset_out + j].storage[0]
old_var = old_inner_output_storage[offset_out + j]
new_var = inner_output_storage[offset_out + j].storage[0]
if old_var is new_var:
old_data = old_output_data[offset_out + j]
old_data = old_inner_output_data[offset_out + j]
if old_data is None:
output_reused = False
elif self.outs_is_tensor[offset_out + j]:
......@@ -1589,9 +1603,9 @@ class Scan(Op):
if not output_reused:
try:
outs[j][0][pos[j]] = output_storage[offset_out + j].storage[
0
]
output_storage[j][0][pos[j]] = inner_output_storage[
offset_out + j
].storage[0]
except ValueError as e:
if i == 0:
# First iteration, so don't change the
......@@ -1614,26 +1628,30 @@ class Scan(Op):
if i == 0:
jout = j + offset_out
shape = (store_steps[j],) + output_storage[jout].storage[0].shape
dtype = output_storage[jout].storage[0].dtype
shape = (store_steps[j],) + inner_output_storage[jout].storage[
0
].shape
dtype = inner_output_storage[jout].storage[0].dtype
if (
outs[j][0] is None
or outs[j][0].shape[0] < store_steps[j]
or outs[j][0].shape[1:] != shape[1:]
or outs[j][0].dtype != dtype
output_storage[j][0] is None
or output_storage[j][0].shape[0] < store_steps[j]
or output_storage[j][0].shape[1:] != shape[1:]
or output_storage[j][0].dtype != dtype
):
outs[j][0] = node.outputs[j].type.value_zeros(shape)
elif outs[j][0].shape[0] != store_steps[j]:
outs[j][0] = outs[j][0][: store_steps[j]]
outs[j][0][pos[j]] = output_storage[jout].storage[0]
output_storage[j][0] = node.outputs[j].type.value_zeros(shape)
elif output_storage[j][0].shape[0] != store_steps[j]:
output_storage[j][0] = output_storage[j][0][: store_steps[j]]
output_storage[j][0][pos[j]] = inner_output_storage[jout].storage[0]
elif store_steps[j] == 1 or self.vector_outs[j]:
outs[j][0][pos[j]] = output_storage[j + offset_out].storage[0]
output_storage[j][0][pos[j]] = inner_output_storage[
j + offset_out
].storage[0]
else:
# Check whether the initialization of the output storage map
# for this output has been reused.
old_var = old_output_storage[offset_out + j]
old_data = old_output_data[offset_out + j]
new_var = output_storage[offset_out + j].storage[0]
old_var = old_inner_output_storage[offset_out + j]
old_data = old_inner_output_data[offset_out + j]
new_var = inner_output_storage[offset_out + j].storage[0]
if old_var is new_var:
if old_data is None:
output_reused = False
......@@ -1645,7 +1663,9 @@ class Scan(Op):
output_reused = False
if not output_reused:
outs[j][0][pos[j]] = output_storage[j + offset_out].storage[0]
output_storage[j][0][pos[j]] = inner_output_storage[
j + offset_out
].storage[0]
# 5.6 Copy over the values for outputs corresponding to shared
# variables
......@@ -1653,7 +1673,7 @@ class Scan(Op):
end += self.n_shared_outs
for j in range(begin, end):
jout = j + offset_out
outs[j][0] = output_storage[jout].storage[0]
output_storage[j][0] = inner_output_storage[jout].storage[0]
pos = [(idx + 1) % store for idx, store in zip(pos, store_steps)]
i = i + 1
......@@ -1672,25 +1692,29 @@ class Scan(Op):
# are read and written.
# This way, there will be no information overwritten
# before it is read (as it used to happen).
shape = (pdx,) + outs[idx][0].shape[1:]
shape = (pdx,) + output_storage[idx][0].shape[1:]
tmp = node.outputs[idx].type.value_zeros(shape)
tmp[:] = outs[idx][0][:pdx]
outs[idx][0][: store_steps[idx] - pdx] = outs[idx][0][pdx:]
outs[idx][0][store_steps[idx] - pdx :] = tmp
tmp[:] = output_storage[idx][0][:pdx]
output_storage[idx][0][: store_steps[idx] - pdx] = output_storage[
idx
][0][pdx:]
output_storage[idx][0][store_steps[idx] - pdx :] = tmp
del tmp
else:
shape = (store_steps[idx] - pdx,) + outs[idx][0].shape[1:]
shape = (store_steps[idx] - pdx,) + output_storage[idx][0].shape[1:]
tmp = node.outputs[idx].type.value_zeros(shape)
tmp[:] = outs[idx][0][pdx:]
outs[idx][0][store_steps[idx] - pdx :] = outs[idx][0][:pdx]
outs[idx][0][: store_steps[idx] - pdx] = tmp
tmp[:] = output_storage[idx][0][pdx:]
output_storage[idx][0][store_steps[idx] - pdx :] = output_storage[
idx
][0][:pdx]
output_storage[idx][0][: store_steps[idx] - pdx] = tmp
del tmp
# This would normally happen only when doing truncated
# backpropagation through time. In such a scenarion Scan is
# expected to return 0 for all entries for which the gradient is
# not actually computed
elif store_steps[idx] > i - self.mintaps[idx]:
outs[idx][0][i - self.mintaps[idx] :] = 0
output_storage[idx][0][i - self.mintaps[idx] :] = 0
# This is a fix for a bug introduced by while. If you say
# you want to loop up to a condition, you expect the output
# to have that length ( and not the maximal length possible)
......@@ -1709,13 +1733,13 @@ class Scan(Op):
# every output and then do outs[0][:i+maximal_tap],
# which implies I think more computations then this
# little trick that I used
outs[idx][0] = outs[idx][0][: -(n_steps - i)]
output_storage[idx][0] = output_storage[idx][0][: -(n_steps - i)]
# We never reuse the input or output storage of the
# inner function so we clear it.
for i_s in input_storage:
for i_s in inner_input_storage:
i_s.storage[0] = None
for o_s in output_storage:
for o_s in inner_output_storage:
o_s.storage[0] = None
t_call = time.time() - t0_call
......@@ -1735,7 +1759,6 @@ class Scan(Op):
self.t_call = t_call
self.t_fn = t_fn
# Infer Shape
def infer_shape(self, fgraph, node, input_shapes):
# input_shapes correspond to the shapes of node.inputs
for inp, inp_shp in zip(node.inputs, input_shapes):
......@@ -2085,7 +2108,6 @@ class Scan(Op):
return mappings
# GRAD FUNCTION
def L_op(self, inputs, outs, dC_douts):
if not isinstance(outs, (list, tuple)):
outs = [outs]
......
......@@ -5,7 +5,7 @@ import theano
from theano import scalar, tensor
from theano.configdefaults import config
from theano.gof.graph import Apply
from theano.gof.op import COp
from theano.gof.op import COp, _NoPythonCOp
from theano.gof.opt import PatternSub, TopoOptimizer, local_optimizer
from theano.misc.safe_asarray import _asarray
from theano.sparse import basic as sparse
......@@ -78,7 +78,7 @@ theano.compile.optdb.register(
)
class AddSD_ccode(COp):
class AddSD_ccode(_NoPythonCOp):
"""
Add a sparse and a dense matrix.
......@@ -663,7 +663,7 @@ def local_structured_dot(fgraph, node):
# register_specialize(local_structured_dot)
class UsmmCscDense(COp):
class UsmmCscDense(_NoPythonCOp):
"""
Performs the expression is `alpha` * `x` `y` + `z`.
......@@ -995,7 +995,7 @@ def local_usmm_csx(fgraph, node):
register_specialize(local_usmm_csx, "cxx_only")
class CSMGradC(COp):
class CSMGradC(_NoPythonCOp):
__props__ = ()
......@@ -1138,7 +1138,7 @@ def local_csm_grad_c(fgraph, node):
# register_specialize(local_csm_grad_c, 'cxx_only')
class MulSDCSC(COp):
class MulSDCSC(_NoPythonCOp):
"""
Multiplication of sparse matrix by a broadcasted dense vector
element wise.
......@@ -1181,9 +1181,6 @@ class MulSDCSC(COp):
def c_code_cache_version(self):
return (3,)
# def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplementedError()
def c_code(self, node, name, inputs, outputs, sub):
(
......@@ -1275,7 +1272,7 @@ class MulSDCSC(COp):
mul_s_d_csc = MulSDCSC()
class MulSDCSR(COp):
class MulSDCSR(_NoPythonCOp):
"""
Multiplication of sparse matrix by a broadcasted dense vector
element wise.
......@@ -1318,9 +1315,6 @@ class MulSDCSR(COp):
def c_code_cache_version(self):
return (3,)
# def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplemented()
def c_code(self, node, name, inputs, outputs, sub):
(
......@@ -1463,7 +1457,7 @@ def local_mul_s_d(fgraph, node):
register_specialize(local_mul_s_d, "cxx_only")
class MulSVCSR(COp):
class MulSVCSR(_NoPythonCOp):
"""
Multiplication of sparse matrix by a broadcasted dense vector
element wise.
......@@ -1627,7 +1621,7 @@ def local_mul_s_v(fgraph, node):
register_specialize(local_mul_s_v, "cxx_only")
class StructuredAddSVCSR(COp):
class StructuredAddSVCSR(_NoPythonCOp):
"""
Structured addition of a sparse matrix and a dense vector.
The elements of the vector are are only added to the corresponding
......@@ -1806,7 +1800,7 @@ def local_structured_add_s_v(fgraph, node):
register_specialize(local_structured_add_s_v, "cxx_only")
class SamplingDotCSR(COp):
class SamplingDotCSR(_NoPythonCOp):
"""
Operand optimized for calculating the dot product dot(`x`, `y`.T) = `z`
when you only want to calculate a subset of `z`.
......
......@@ -4,7 +4,7 @@ import os
import theano
from theano.configdefaults import config
from theano.gof.graph import Apply
from theano.gof.op import OpenMPOp
from theano.gof.op import OpenMPOp, _NoPythonOp
from theano.gof.params_type import ParamsType
from theano.gof.type import EnumList
from theano.scalar import int8, int64
......@@ -18,7 +18,7 @@ from theano.tensor.type import TensorType
_logger = logging.getLogger(__name__)
class BaseCorrMM(OpenMPOp):
class BaseCorrMM(OpenMPOp, _NoPythonOp):
"""
Base class for `CorrMM`, `CorrMM_gradWeights` and
`CorrMM_gradInputs`. Cannot be used directly.
......
......@@ -4,7 +4,7 @@ import os
import theano
from theano.configdefaults import config
from theano.gof.graph import Apply
from theano.gof.op import OpenMPOp
from theano.gof.op import OpenMPOp, _NoPythonOp
from theano.gof.params_type import ParamsType
from theano.gof.type import EnumList
from theano.scalar import int64
......@@ -18,7 +18,7 @@ from theano.tensor.type import TensorType
_logger = logging.getLogger(__name__)
class BaseCorr3dMM(OpenMPOp):
class BaseCorr3dMM(OpenMPOp, _NoPythonOp):
"""
Base class for `Corr3dMM`, `Corr3dMM_gradWeights` and
`Corr3dMM_gradInputs`. Cannot be used directly.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论