提交 b8e79c60 authored 作者: nouiz's avatar nouiz

Merge pull request #989 from pascanur/c_viewop

C version of view op
import ops
from ops import (
DeepCopyOp, deep_copy_op, register_deep_copy_op_c_code,
ViewOp, view_op, register_view_op_c_code)
import function_module import function_module
from function_module import * from function_module import *
......
...@@ -27,6 +27,7 @@ from theano.compile.function_module import (FunctionMaker, ...@@ -27,6 +27,7 @@ from theano.compile.function_module import (FunctionMaker,
Supervisor, Supervisor,
std_fgraph) std_fgraph)
from theano.compile.mode import Mode, register_mode from theano.compile.mode import Mode, register_mode
from theano.compile.ops import OutputGuard
AddConfigVar('DebugMode.patience', AddConfigVar('DebugMode.patience',
"Optimize graph this many times to detect inconsistency", "Optimize graph this many times to detect inconsistency",
...@@ -715,7 +716,7 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, ...@@ -715,7 +716,7 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
actually_inplace_outputs.append(node.outputs[oo]) actually_inplace_outputs.append(node.outputs[oo])
if warn_input_not_reused and destroyed_res_list: if warn_input_not_reused and destroyed_res_list:
if isinstance(node.op, theano.compile.mode.OutputGuard): if isinstance(node.op, OutputGuard):
# The point of OutputGuard is to be declared as destructive # The point of OutputGuard is to be declared as destructive
# while not destroying anything # while not destroying anything
continue continue
...@@ -738,7 +739,7 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes, ...@@ -738,7 +739,7 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
# the version of numpy! # the version of numpy!
if getattr(out_var, 'size', 2) <= 1: if getattr(out_var, 'size', 2) <= 1:
continue continue
if isinstance(node.op, theano.compile.mode.OutputGuard): if isinstance(node.op, OutputGuard):
# This class is not in the final graph. # This class is not in the final graph.
continue continue
if not _may_share_memory(out_var, in_var): if not _may_share_memory(out_var, in_var):
......
...@@ -17,6 +17,7 @@ from theano import gof ...@@ -17,6 +17,7 @@ from theano import gof
from theano.gof.python25 import partial from theano.gof.python25 import partial
import mode as mode_module import mode as mode_module
from io import In, SymbolicInput, SymbolicInputKit, SymbolicOutput from io import In, SymbolicInput, SymbolicInputKit, SymbolicOutput
from theano.compile.ops import deep_copy_op, view_op
import logging import logging
_logger = logging.getLogger('theano.compile.function_module') _logger = logging.getLogger('theano.compile.function_module')
...@@ -159,101 +160,6 @@ class AliasedMemoryError(Exception): ...@@ -159,101 +160,6 @@ class AliasedMemoryError(Exception):
### Function ### Function
### ###
def register_DeepCopyOp_c_code(typ, code):
""" Tell DeepCopyOp how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an
instance of the class.
:param code: C code that deep copies the Theano type 'typ'.
Use %(iname)s and %(oname)s for the input and output C
variable names respectively.
"""
DeepCopyOp.c_codes[typ] = code
class DeepCopyOp(theano.gof.Op):
c_codes = {} # Theano Type, code
def __init__(self):
pass
def __str__(self):
return self.__class__.__name__
def __hash__(self):
return hash(type(self))
def __eq__(self, other):
return type(self) == type(other)
def make_node(self, x):
return theano.gof.Apply(self, [x], [x.type()])
def perform( self, node, args, outs):
if hasattr(args[0],'copy'):
#when args[0] is a an ndarray of 0 dimensions,
#this return a numpy.dtype and not an ndarray
#So when the args have a copy attribute we use it
#as this don't have this problem
outs[0][0] = args[0].copy()
else:
outs[0][0] = copy.deepcopy(args[0])
def c_code_cache_version(self):
return (1)
def c_code(self, node, name, inames, onames, sub):
iname = inames[0]
oname = onames[0]
fail = sub['fail']
if isinstance(node.inputs[0].type, theano.tensor.TensorType):
return """
Py_XDECREF(%(oname)s);
%(oname)s = (PyArrayObject*)PyArray_NewCopy(%(iname)s,NPY_ANYORDER);
if (!%(oname)s)
{
PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!");
%(fail)s;
}
"""%locals()
elif node.inputs[0].type.__class__ in self.c_codes:
return self.c_codes[node.inputs[0].type.__class__] % locals()
else:
super(DeepCopyOp, self).c_code(node, name, inames, onames, sub)
class ViewOp(theano.gof.Op):
def __init__(self):
self.view_map={0:[0]}
def __str__(self):
return self.__class__.__name__
def __hash__(self):
return hash(type(self))
def __eq__(self, other):
return type(self) == type(other)
def make_node(self, x):
return theano.gof.Apply(self, [x], [x.type()])
def perform( self, node, args, outs):
outs[0][0] = args[0]
def infer_shape(self, node, input_shapes):
return input_shapes
def grad(self, args, g_outs):
return g_outs
deep_copy_op = DeepCopyOp()
view_op = ViewOp()
DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries
class Function(object): class Function(object):
......
"""WRITEME """WRITEME
""" """
import logging import logging
import warnings
from textwrap import dedent
import numpy import numpy
...@@ -8,6 +10,7 @@ import theano ...@@ -8,6 +10,7 @@ import theano
from theano import gof from theano import gof
import theano.gof.vm import theano.gof.vm
from theano.configparser import config, AddConfigVar, StrParam from theano.configparser import config, AddConfigVar, StrParam
from theano.compile.ops import register_view_op_c_code, _output_guard
_logger = logging.getLogger('theano.compile.mode') _logger = logging.getLogger('theano.compile.mode')
...@@ -114,74 +117,6 @@ def register_optimizer(name, opt): ...@@ -114,74 +117,6 @@ def register_optimizer(name, opt):
predefined_optimizers[name] = opt predefined_optimizers[name] = opt
def register_OutputGuard_c_code(type):
OutputGuard.c_code_types.append(type)
class OutputGuard(gof.Op):
"""
This op is used only internally by Theano.
Only the AddDestroyHandler optimizer tries to insert them in the graph.
This Op is declared as destructive while it is not destroying
anything. It returns a view. This is used to prevent destruction of
the output variables of a Theano function.
There is a mechanism in Theano that should prevent this, but the use
of OutputGuard adds a safeguard: it may be possible for some optimization
run before the add_destroy_handler phase to bypass this mechanism, by
making in-place optimizations.
TODO: find a current full explanation.
"""
destroy_map = {0: [0]}
view_map = {0: [0]}
c_code_types = []
def make_node(self, x):
return gof.Apply(self, [x], [x.type()])
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def perform(self, node, inp, out):
x, = inp
z, = out
z[0] = x
def __str__(self):
return '%s' % self.__class__.__name__
def c_code(self, node, nodename, inp, out, sub):
x, = inp
z, = out
if isinstance(node.inputs[0].type, theano.scalar.Scalar):
# Scalars are C objects on the stack,
# and should not be inc/decrefed
return """
%(z)s = %(x)s;
""" % locals()
elif (isinstance(node.inputs[0].type, tuple(self.c_code_types))):
# These are Python object types
return """
Py_XDECREF(%(z)s);
%(z)s = %(x)s;
Py_XINCREF(%(z)s);
""" % locals()
# Else, no C code for you
return super(OutputGuard, self).c_code(node, nodename, inp, out, sub)
def c_code_cache_version(self):
return (2,)
_output_guard = OutputGuard()
class AddDestroyHandler(gof.Optimizer): class AddDestroyHandler(gof.Optimizer):
"""This optimizer performs two important functions: """This optimizer performs two important functions:
...@@ -448,3 +383,17 @@ def register_mode(name, mode): ...@@ -448,3 +383,17 @@ def register_mode(name, mode):
if name in predefined_modes: if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name) raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode predefined_modes[name] = mode
def register_OutputGuard_c_code(type):
"""Deprecated function calling register_view_op_c_code"""
warnings.warn("register_OutputGuard_c_code(type) is deprecated, "
"theano.compile.register_view_op_c_code(type, code) instead.",
stacklevel=2)
register_view_op_c_code(
type,
dedent("""
Py_XDECREF(%(oname)s);
%(oname)s = %(iname)s;
Py_XINCREF(%(oname)s);
"""))
"""This file contain auxiliary Ops, used during the compilation phase."""
import copy
import warnings
#import theano
from theano import gof
def register_view_op_c_code(type, code, version=()):
""" Tell ViewOp how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an
instance of the class.
:param code: C code that deep copies the Theano type 'typ'.
Use %(iname)s and %(oname)s for the input and output C
variable names respectively.
:param version: A number indicating the version of the code, for cache.
"""
ViewOp.c_code_and_version[type] = (code, version)
class ViewOp(gof.Op):
"""
Returns an inplace view of the input. Used internally by Theano.
"""
view_map = {0: [0]}
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
c_code_and_version = {}
def make_node(self, x):
return gof.Apply(self, [x], [x.type()])
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def perform(self, node, inp, out):
x, = inp
z, = out
z[0] = x
def __str__(self):
return '%s' % self.__class__.__name__
def c_code(self, node, nodename, inp, out, sub):
iname, = inp
oname, = out
fail = sub['fail']
itype = node.inputs[0].type.__class__
if itype in self.c_code_and_version:
code, version = self.c_code_and_version[itype]
return code % locals()
# Else, no C code
return super(ViewOp, self).c_code(node, nodename, inp, out, sub)
def c_code_cache_version(self):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items()):
if not v:
warnings.warn("Type %s has C code for ViewOp, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_deep_copy_op_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
return tuple(version)
def infer_shape(self, node, input_shapes):
return input_shapes
def grad(self, args, g_outs):
return g_outs
view_op = ViewOp()
class OutputGuard(ViewOp):
"""
This op is used only internally by Theano.
Only the AddDestroyHandler optimizer tries to insert them in the graph.
This Op is declared as destructive while it is not destroying
anything. It returns a view. This is used to prevent destruction of
the output variables of a Theano function.
There is a mechanism in Theano that should prevent this, but the use
of OutputGuard adds a safeguard: it may be possible for some optimization
run before the add_destroy_handler phase to bypass this mechanism, by
making in-place optimizations.
TODO: find a current full explanation.
"""
destroy_map = {0: [0]}
_output_guard = OutputGuard()
def register_deep_copy_op_c_code(typ, code, version=()):
""" Tell DeepCopyOp how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an
instance of the class.
:param code: C code that deep copies the Theano type 'typ'.
Use %(iname)s and %(oname)s for the input and output C
variable names respectively.
:param version: A number indicating the version of the code, for cache.
"""
DeepCopyOp.c_code_and_version[typ] = (code, version)
class DeepCopyOp(gof.Op):
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
c_code_and_version = {}
def __init__(self):
pass
def __str__(self):
return self.__class__.__name__
def __hash__(self):
return hash(type(self))
def __eq__(self, other):
return type(self) == type(other)
def make_node(self, x):
return gof.Apply(self, [x], [x.type()])
def perform(self, node, args, outs):
if hasattr(args[0], 'copy'):
#when args[0] is a an ndarray of 0 dimensions,
#this return a numpy.dtype and not an ndarray
#So when the args have a copy attribute we use it
#as this don't have this problem
outs[0][0] = args[0].copy()
else:
outs[0][0] = copy.deepcopy(args[0])
def c_code_cache_version(self):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items()):
if not v:
warnings.warn("Type %s has C code for OutputGuard, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_OutputGuard_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
return tuple(version)
def c_code(self, node, name, inames, onames, sub):
iname, = inames
oname, = onames
fail = sub['fail']
itype = node.inputs[0].type.__class__
if itype in self.c_code_and_version:
code, version = self.c_code_and_version[itype]
return code % locals()
# Else, no C code
return super(DeepCopyOp, self).c_code(node, name, inames, onames, sub)
deep_copy_op = DeepCopyOp()
import numpy
import unittest
from nose.plugins.skip import SkipTest
import theano
mode_with_opt = theano.compile.mode.get_default_mode()
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
def test_viewop_gpu():
from theano.sandbox import cuda
if cuda.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
_x = theano.tensor.fvector('x')
x = cuda.gpu_from_host(_x)
_out = theano.compile.ViewOp()(x)
out = cuda.host_from_gpu(_out)
f = theano.function([x],
out,
mode=mode_with_gpu)
data = numpy.array([1, 2, 3], dtype='float32')
assert numpy.allclose(f(data), data)
...@@ -412,12 +412,20 @@ class CudaNdarrayType(Type): ...@@ -412,12 +412,20 @@ class CudaNdarrayType(Type):
return [] return []
# Register CudaNdarrayType to the OutputGuard list of known types # Register C code for ViewOp on CudaNdarrayType
# to have OutputGuard generate C code for this type. theano.compile.register_view_op_c_code(
theano.compile.mode.register_OutputGuard_c_code(CudaNdarrayType) CudaNdarrayType,
"""
Py_XDECREF(%(oname)s);
%(oname)s = %(iname)s;
Py_XINCREF(%(oname)s);
""",
version=1)
# Register CudaNdarrayType to the DeepCopyOp list of types with c code. # Register CudaNdarrayType to the DeepCopyOp list of types with c code.
theano.compile.function_module.register_DeepCopyOp_c_code(CudaNdarrayType, """ theano.compile.register_deep_copy_op_c_code(
CudaNdarrayType,
"""
Py_XDECREF(%(oname)s); Py_XDECREF(%(oname)s);
%(oname)s = (CudaNdarray*)CudaNdarray_Copy(%(iname)s); %(oname)s = (CudaNdarray*)CudaNdarray_Copy(%(iname)s);
...@@ -427,7 +435,8 @@ theano.compile.function_module.register_DeepCopyOp_c_code(CudaNdarrayType, """ ...@@ -427,7 +435,8 @@ theano.compile.function_module.register_DeepCopyOp_c_code(CudaNdarrayType, """
PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!"); PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!");
%(fail)s; %(fail)s;
} }
""") """,
version=1)
# THIS WORKS But CudaNdarray instances don't compare equal to one # THIS WORKS But CudaNdarray instances don't compare equal to one
......
...@@ -421,6 +421,14 @@ class Scalar(Type): ...@@ -421,6 +421,14 @@ class Scalar(Type):
return (4,) # explicit T given in specialization of operator= return (4,) # explicit T given in specialization of operator=
# lines. This makes it compile with open64 # lines. This makes it compile with open64
# Register C code for ViewOp on Scalars.
theano.compile.register_view_op_c_code(
Scalar,
"""
%(oname)s = %(iname)s;
""",
1)
int8 = Scalar('int8') int8 = Scalar('int8')
int16 = Scalar('int16') int16 = Scalar('int16')
......
...@@ -23,8 +23,7 @@ from theano import gof ...@@ -23,8 +23,7 @@ from theano import gof
from theano.gof.python25 import maxsize from theano.gof.python25 import maxsize
from theano.gof.opt import Optimizer from theano.gof.opt import Optimizer
from theano.gof import toolbox, DestroyHandler, InconsistencyError from theano.gof import toolbox, DestroyHandler, InconsistencyError
from theano.compile import optdb from theano.compile import deep_copy_op, optdb
from theano.compile.function_module import deep_copy_op
import scan_op import scan_op
import scan_utils import scan_utils
......
...@@ -147,6 +147,12 @@ class SparseType(gof.Type): ...@@ -147,6 +147,12 @@ class SparseType(gof.Type):
def is_valid_value(self, a): def is_valid_value(self, a):
return scipy.sparse.issparse(a) and (a.format == self.format) return scipy.sparse.issparse(a) and (a.format == self.format)
# Register CudaNdarrayType to the OutputGuard list of known types # Register SparseType's C code for ViewOp.
# to have OutputGuard generate C code for this type. theano.compile.register_view_op_c_code(
theano.compile.mode.register_OutputGuard_c_code(SparseType) SparseType,
"""
Py_XDECREF(%(oname)s);
%(oname)s = %(iname)s;
Py_XINCREF(%(oname)s);
""",
1)
...@@ -1054,9 +1054,31 @@ class TensorType(Type): ...@@ -1054,9 +1054,31 @@ class TensorType(Type):
return numpy.zeros(shape, dtype=self.dtype) return numpy.zeros(shape, dtype=self.dtype)
# Register CudaNdarrayType to the OutputGuard list of known types # Register TensorType C code for ViewOp.
# to have OutputGuard generate C code for this type. theano.compile.register_view_op_c_code(
theano.compile.mode.register_OutputGuard_c_code(TensorType) TensorType,
"""
Py_XDECREF(%(oname)s);
%(oname)s = %(iname)s;
Py_XINCREF(%(oname)s);
""",
version=1)
# Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code(
TensorType,
"""
Py_XDECREF(%(oname)s);
%(oname)s = (PyArrayObject*)PyArray_NewCopy(%(iname)s,NPY_ANYORDER);
if (!%(oname)s)
{
PyErr_SetString(PyExc_ValueError, "DeepCopyOp: the copy failed!");
%(fail)s;
}
""",
version=1)
# Easy constructors # Easy constructors
......
...@@ -56,9 +56,15 @@ class RandomStateType(gof.Type): ...@@ -56,9 +56,15 @@ class RandomStateType(gof.Type):
return False return False
return True return True
# Register CudaNdarrayType to the OutputGuard list of known types # Register RandomStateType's C code for ViewOp.
# to have OutputGuard generate C code for this type. theano.compile.register_view_op_c_code(
theano.compile.mode.register_OutputGuard_c_code(RandomStateType) RandomStateType,
"""
Py_XDECREF(%(oname)s);
%(oname)s = %(iname)s;
Py_XINCREF(%(oname)s);
""",
1)
random_state_type = RandomStateType() random_state_type = RandomStateType()
......
...@@ -19,6 +19,7 @@ from numpy.testing.noseclasses import KnownFailureTest ...@@ -19,6 +19,7 @@ from numpy.testing.noseclasses import KnownFailureTest
import theano import theano
from theano import compile, config, function, gof, tensor, shared from theano import compile, config, function, gof, tensor, shared
from theano.compile import DeepCopyOp
from theano.compile.mode import get_default_mode from theano.compile.mode import get_default_mode
from theano.gof.python25 import any, all, combinations from theano.gof.python25 import any, all, combinations
from theano.tensor import (_shared, wvector, bvector, autocast_float_as, from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
...@@ -1747,8 +1748,7 @@ class TestAlloc(unittest.TestCase): ...@@ -1747,8 +1748,7 @@ class TestAlloc(unittest.TestCase):
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert numpy.sum([isinstance(node.op, alloc) assert numpy.sum([isinstance(node.op, alloc)
for node in topo]) == 1 for node in topo]) == 1
assert not isinstance(topo[0].op, assert not isinstance(topo[0].op, DeepCopyOp)
theano.compile.function_module.DeepCopyOp)
def test_eye(): def test_eye():
...@@ -1794,8 +1794,7 @@ def test_identity(): ...@@ -1794,8 +1794,7 @@ def test_identity():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 1 assert len(topo) == 1
if theano.config.mode != 'FAST_COMPILE': if theano.config.mode != 'FAST_COMPILE':
assert isinstance(topo[0].op, theano.compile. assert isinstance(topo[0].op, DeepCopyOp)
function_module.DeepCopyOp)
for dtype in ALL_DTYPES: for dtype in ALL_DTYPES:
yield check, dtype yield check, dtype
...@@ -2502,7 +2501,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -2502,7 +2501,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
adv_incsub1=tensor.AdvancedIncSubtensor1, adv_incsub1=tensor.AdvancedIncSubtensor1,
mode=None, mode=None,
dtype=theano.config.floatX, dtype=theano.config.floatX,
ignore_topo=(theano.compile.function_module.DeepCopyOp)): ignore_topo=DeepCopyOp):
self.shared = shared self.shared = shared
self.sub = sub self.sub = sub
self.inc_sub = inc_sub self.inc_sub = inc_sub
...@@ -3551,7 +3550,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3551,7 +3550,7 @@ class T_Join_and_Split(unittest.TestCase):
mode=self.mode.including('local_join_1')) mode=self.mode.including('local_join_1'))
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, theano.compile.DeepCopyOp) assert isinstance(topo[0].op, DeepCopyOp)
def test_join_vector(self): def test_join_vector(self):
a = self.shared(numpy.array([1, 2, 3], dtype=self.floatX)) a = self.shared(numpy.array([1, 2, 3], dtype=self.floatX))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论