提交 884ed6be authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #4747 from nouiz/mixed_opt

Small mixed changes.
...@@ -15,7 +15,10 @@ Looking for an idea for a first contribution? Check `github issue ...@@ -15,7 +15,10 @@ Looking for an idea for a first contribution? Check `github issue
with a label ``easy fix``. They are good starter. It is recommanded with a label ``easy fix``. They are good starter. It is recommanded
that you write on the issue you want to work on it. This help make that you write on the issue you want to work on it. This help make
sure it is up to date and see if nobody else is working on it. Also, sure it is up to date and see if nobody else is working on it. Also,
we can sometimes provides more information about it. we can sometimes provides more information about it. There is also
the label `NeedSomeoneToFinish
<https://github.com/Theano/Theano/labels/NeedSomeoneToFinish>` that is
interresting to check. The difficulty level is variable.
Resources Resources
========= =========
......
...@@ -25,8 +25,12 @@ except ImportError: ...@@ -25,8 +25,12 @@ except ImportError:
try: try:
# fall back on pydot if necessary # fall back on pydot if necessary
import pydot as pd import pydot as pd
if hasattr(pd, 'find_graphviz'):
if pd.find_graphviz(): if pd.find_graphviz():
pydot_imported = True pydot_imported = True
else:
pd.Dot.create(pd.Dot())
pydot_imported = True
except ImportError: except ImportError:
pass # tests should not fail on optional dependency pass # tests should not fail on optional dependency
......
...@@ -32,7 +32,7 @@ from .elemwise import GpuElemwise ...@@ -32,7 +32,7 @@ from .elemwise import GpuElemwise
# These don't exist in gpuarray # These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad # GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt, conv_groupopt, from .opt import (gpu_seqopt, register_opt,
op_lifter, register_opt2) op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty from .opt_util import alpha_merge, output_merge, inplace_allocempty
...@@ -1472,18 +1472,31 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -1472,18 +1472,31 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, @register_opt('fast_compile', 'conv_dnn', 'cudnn')
AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d])
def local_abstractconv_cudnn(node): def local_abstractconv_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
conv_groupopt.register('local_abstractconv_cudnn',
local_abstractconv_cudnn, 20, @register_opt('fast_compile', 'conv_dnn', 'cudnn')
'fast_compile', 'fast_run', @local_optimizer([AbstractConv2d_gradWeights])
'gpuarray', 'conv_dnn', 'cudnn') def local_abstractconv_gw_cudnn(node):
ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType):
return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d_gradInputs])
def local_abstractconv_gi_cudnn(node):
ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType):
return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@inplace_allocempty(GpuDnnConv, 2) @inplace_allocempty(GpuDnnConv, 2)
......
...@@ -51,13 +51,15 @@ class GpuElemwise(HideC, Elemwise): ...@@ -51,13 +51,15 @@ class GpuElemwise(HideC, Elemwise):
def make_node(self, *inputs): def make_node(self, *inputs):
ctx_name = infer_context_name(*inputs) ctx_name = infer_context_name(*inputs)
res = Elemwise.make_node(self, *inputs) inputs = [as_gpuarray_variable(i, ctx_name) for i in inputs]
outputs = [GpuArrayType(broadcastable=o.type.broadcastable, out_info = Elemwise.get_output_info(self, GpuDimShuffle, *inputs)
inputs = out_info[2]
outputs = [GpuArrayType(broadcastable=br,
context_name=ctx_name, context_name=ctx_name,
dtype=o.type.dtype)() for o in res.outputs] dtype=dtype)() for dtype, br in
zip(out_info[0], out_info[1])]
if len(outputs) > 1: if len(outputs) > 1:
raise NotImplementedError() raise NotImplementedError()
inputs = [as_gpuarray_variable(i, ctx_name) for i in inputs]
node = Apply(self, inputs, outputs) node = Apply(self, inputs, outputs)
# Try to generate the kernel to catch SupportCodeErrors # Try to generate the kernel to catch SupportCodeErrors
......
...@@ -14,7 +14,6 @@ from theano.compile.ops import shape_i ...@@ -14,7 +14,6 @@ from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer, from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
SequenceDB, Optimizer, DB, toolbox, graph) SequenceDB, Optimizer, DB, toolbox, graph)
from theano.gof.opt import NavigatorOptimizer from theano.gof.opt import NavigatorOptimizer
from theano.gof.optdb import LocalGroupDB
from theano.ifelse import IfElse from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet from theano.misc.ordered_set import OrderedSet
...@@ -79,10 +78,6 @@ class GraphToGPUDB(DB): ...@@ -79,10 +78,6 @@ class GraphToGPUDB(DB):
gpu_seqopt = SequenceDB() gpu_seqopt = SequenceDB()
# Don't register this right now
conv_groupopt = LocalGroupDB()
conv_groupopt.__name__ = "gpua_conv_opts"
gpu_seqopt.register('gpuarray_graph_optimization', GraphToGPUDB(), -0.5, gpu_seqopt.register('gpuarray_graph_optimization', GraphToGPUDB(), -0.5,
'fast_compile', 'fast_run', 'gpuarray') 'fast_compile', 'fast_run', 'gpuarray')
...@@ -1297,9 +1292,6 @@ def local_gpua_lift_abstractconv2d_graph(op, context_name, inputs, outputs): ...@@ -1297,9 +1292,6 @@ def local_gpua_lift_abstractconv2d_graph(op, context_name, inputs, outputs):
context_name=context_name) context_name=context_name)
return [op(*inps)] return [op(*inps)]
# Register this here so that it goes after the abstract lifting
register_opt('fast_compile')(conv_groupopt)
@register_opt("low_memory") @register_opt("low_memory")
@local_optimizer([GpuCAReduceCuda]) @local_optimizer([GpuCAReduceCuda])
......
...@@ -35,10 +35,14 @@ except ImportError: ...@@ -35,10 +35,14 @@ except ImportError:
try: try:
# fall back on pydot if necessary # fall back on pydot if necessary
import pydot as pd import pydot as pd
if hasattr(pd, 'find_graphviz'):
if pd.find_graphviz(): if pd.find_graphviz():
pydot_imported = True pydot_imported = True
else: else:
pydot_imported_msg = "pydot can't find graphviz" pydot_imported_msg = "pydot can't find graphviz"
else:
pd.Dot.create(pd.Dot())
pydot_imported = True
except ImportError: except ImportError:
# tests should not fail on optional dependency # tests should not fail on optional dependency
pydot_imported_msg = "Install the python package pydot or pydot-ng." pydot_imported_msg = "Install the python package pydot or pydot-ng."
......
...@@ -544,13 +544,11 @@ second dimension ...@@ -544,13 +544,11 @@ second dimension
self.scalar_op.nout) self.scalar_op.nout)
self._rehash() self._rehash()
def make_node(self, *inputs): def get_output_info(self, dim_shuffle, *inputs):
""" """Return the outputs dtype and broadcastable pattern and the
If the inputs have different number of dimensions, their shape dimshuffled niputs.
is left-completed to the greatest number of dimensions with 1s
using DimShuffle.
""" """
inputs = list(map(as_tensor_variable, inputs))
shadow = self.scalar_op.make_node( shadow = self.scalar_op.make_node(
*[get_scalar_type(dtype=i.type.dtype).make_variable() *[get_scalar_type(dtype=i.type.dtype).make_variable()
for i in inputs]) for i in inputs])
...@@ -565,7 +563,7 @@ second dimension ...@@ -565,7 +563,7 @@ second dimension
args.append(input) args.append(input)
else: else:
# TODO: use LComplete instead # TODO: use LComplete instead
args.append(DimShuffle( args.append(dim_shuffle(
input.type.broadcastable, input.type.broadcastable,
['x'] * difference + list(range(length)), ['x'] * difference + list(range(length)),
inplace=False)(input)) inplace=False)(input))
...@@ -601,7 +599,18 @@ second dimension ...@@ -601,7 +599,18 @@ second dimension
raise TypeError(( raise TypeError((
"Cannot do an inplace operation on incompatible data types.", "Cannot do an inplace operation on incompatible data types.",
([i.type.dtype for i in inputs], out_dtypes, inplace_pattern))) ([i.type.dtype for i in inputs], out_dtypes, inplace_pattern)))
assert len(out_dtypes) == len(out_broadcastables)
return out_dtypes, out_broadcastables, inputs
def make_node(self, *inputs):
"""
If the inputs have different number of dimensions, their shape
is left-completed to the greatest number of dimensions with 1s
using DimShuffle.
"""
inputs = list(map(as_tensor_variable, inputs))
out_dtypes, out_broadcastables, inputs = self.get_output_info(
DimShuffle, *inputs)
outputs = [TensorType(dtype=dtype, broadcastable=broadcastable)() outputs = [TensorType(dtype=dtype, broadcastable=broadcastable)()
for dtype, broadcastable in izip(out_dtypes, for dtype, broadcastable in izip(out_dtypes,
out_broadcastables)] out_broadcastables)]
......
...@@ -685,6 +685,14 @@ class lstsq(Op): ...@@ -685,6 +685,14 @@ class lstsq(Op):
def matrix_power(M, n): def matrix_power(M, n):
"""
Raise a square matrix to the (integer) power n.
Parameters
----------
M : Tensor variable
n : Python int
"""
result = 1 result = 1
for i in xrange(n): for i in xrange(n):
result = theano.dot(result, M) result = theano.dot(result, M)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论