提交 514c7de4 authored 作者: carriepl's avatar carriepl

Merge pull request #3205 from nouiz/mixed2

Mixed stuff
...@@ -31,18 +31,19 @@ install: ...@@ -31,18 +31,19 @@ install:
- pip install . --no-deps - pip install . --no-deps
# command to run tests # command to run tests
env:
- PART="sandbox sparse scalar tensor/nnet scan_module"
- PART="tensor/tests/test_basic.py tensor/signal compile gof misc tests compat"
- PART="-e test_basic.py tensor/tests"
matrix: matrix:
fast_finish: true
include: include:
- python: "3.3" - python: "3.3"
env: PART="." THEANO_FLAGS="mode=FAST_COMPILE" env: PART="." THEANO_FLAGS="mode=FAST_COMPILE"
- python: "2.6" - python: "2.6"
env: PART="." THEANO_FLAGS="mode=FAST_COMPILE,floatX=float32" env: PART="." THEANO_FLAGS="mode=FAST_COMPILE,floatX=float32"
env:
- PART="sandbox sparse scalar tensor/nnet scan_module"
- PART="tensor/tests/test_basic.py tensor/signal compile gof misc tests compat"
- PART="-e test_basic.py tensor/tests"
script: script:
- export THEANO_FLAGS=$THEANO_FLAGS,warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc.cxxflags=-pipe - export THEANO_FLAGS=$THEANO_FLAGS,warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc.cxxflags=-pipe
- python --version - python --version
......
...@@ -40,6 +40,12 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None, ...@@ -40,6 +40,12 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None,
that, you can set to replace shared variables values by zeros by that, you can set to replace shared variables values by zeros by
calling set_value(...) on them before calling `function_dump`. calling set_value(...) on them before calling `function_dump`.
To load such a dump and do the compilation:
>>> import cPickle, theano
>>> d=cPickle.load(open("func_dump.bin", "rb"))
>>> f=theano.function(**d)
""" """
assert isinstance(filename, string_types) assert isinstance(filename, string_types)
d = dict(inputs=inputs, outputs=outputs, mode=mode, updates=updates, d = dict(inputs=inputs, outputs=outputs, mode=mode, updates=updates,
......
...@@ -263,7 +263,7 @@ class Variable(Node): ...@@ -263,7 +263,7 @@ class Variable(Node):
- `TensorVariable` subclass of Variable that represents a numpy.ndarray object - `TensorVariable` subclass of Variable that represents a numpy.ndarray object
- `SharedTensorVariable` Shared version of TensorVariable - `TensorSharedVariable` Shared version of TensorVariable
- `SparseVariable` subclass of Variable that represents a scipy.sparse.{csc,csr}_matrix object - `SparseVariable` subclass of Variable that represents a scipy.sparse.{csc,csr}_matrix object
......
...@@ -1803,6 +1803,12 @@ class GpuConv(GpuOp): ...@@ -1803,6 +1803,12 @@ class GpuConv(GpuOp):
self.max_threads_dim0 = None self.max_threads_dim0 = None
if not hasattr(self, "direction_hint"): if not hasattr(self, "direction_hint"):
self.direction_hint = None self.direction_hint = None
if not hasattr(self, "nkern"):
self.nkern = None
if not hasattr(self, "bsize"):
self.bsize = None
if not hasattr(self, "fft_opt"):
self.fft_opt = True
def __hash__(self): def __hash__(self):
# don't use hash(self.version) as hash(-1)==-2 and # don't use hash(self.version) as hash(-1)==-2 and
......
...@@ -231,6 +231,9 @@ class GpuDnnConvDesc(GpuOp): ...@@ -231,6 +231,9 @@ class GpuDnnConvDesc(GpuOp):
def c_compiler(self): def c_compiler(self):
return NVCC_compiler return NVCC_compiler
def do_constant_folding(self, node):
return False
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'): def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'):
if isinstance(border_mode, int): if isinstance(border_mode, int):
border_mode = (border_mode, border_mode) border_mode = (border_mode, border_mode)
......
...@@ -42,13 +42,30 @@ def test_dnn_conv_desc_merge(): ...@@ -42,13 +42,30 @@ def test_dnn_conv_desc_merge():
# CDataType is not DeepCopyable so this will crash if we don't use # CDataType is not DeepCopyable so this will crash if we don't use
# borrow=True # borrow=True
f = theano.function([], [theano.Out(desc1, borrow=True), f = theano.function([], [theano.Out(desc1, borrow=True),
theano.Out(desc2, borrow=True)]) theano.Out(desc2, borrow=True)],
mode=mode_with_gpu)
d1, d2 = f() d1, d2 = f()
# This will be the case if they are merged, which would be bad. # This will be the case if they are merged, which would be bad.
assert d1 != d2 assert d1 != d2
desc1v2 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
conv_mode='conv')(img_shp, kern_shp)
f = theano.function([], [theano.Out(desc1, borrow=True),
theano.Out(desc1v2, borrow=True)],
mode=mode_with_gpu)
assert len([n for n in f.maker.fgraph.apply_nodes
if isinstance(n.op, dnn.GpuDnnConvDesc)]) == 1
# CDATA type don't equal even if they represent the same object
# So we can't use debugmode with it.
if theano.config.mode not in ["DebugMode", "DEBUG_MODE"]:
d1, d2 = f()
# They won't be equal if they aren't merged.
assert d1 == d2
def test_dnn_conv_merge(): def test_dnn_conv_merge():
"""This test that we merge correctly multiple dnn_conv. """This test that we merge correctly multiple dnn_conv.
......
...@@ -27,15 +27,23 @@ class GpuConv(gof.Op): ...@@ -27,15 +27,23 @@ class GpuConv(gof.Op):
logical_kern_hw=None, logical_kern_hw=None,
logical_kern_align_top=True, logical_kern_align_top=True,
version=-1, version=-1,
direction_hint=None,
verbose=0, verbose=0,
kshp=None, kshp=None,
imshp=None, imshp=None,
max_threads_dim0=None): max_threads_dim0=None,
nkern=None,
bsize=None,
fft_opt=True):
""" """
:param version: each version of c_code implements many kernels for the :param version: each version of c_code implements many kernels for the
convolution. By default we try to guess the best one. convolution. By default we try to guess the best one.
You can force one version with this parameter. This You can force one version with this parameter. This
parameter is used by the tests. parameter is used by the tests.
:param direction_hint: 'forward', 'bprop weights' or 'bprop inputs'.
Serves as a hint for graph optimizers replacing
GpuConv by other implementations. If the GpuConv is
inserted automatically, we take its value from ConvOp.
:param verbose: for value of 1,2 and 3. Print more information during :param verbose: for value of 1,2 and 3. Print more information during
the execution of the convolution. Mostly used for the execution of the convolution. Mostly used for
optimization or debugging. optimization or debugging.
...@@ -49,6 +57,19 @@ class GpuConv(gof.Op): ...@@ -49,6 +57,19 @@ class GpuConv(gof.Op):
:param max_threads_dim0: The maximum number of threads for the :param max_threads_dim0: The maximum number of threads for the
block size dimensions 0 (blockDim.x) used by the block size dimensions 0 (blockDim.x) used by the
GPU function. GPU function.
:param nkern: The number of kernels. Not used for this op, but can be
used by graph optimizers to select a more optimal
convolution implementation. If the GpuConv op is inserted
automatically, we take its value from the Conv op.
:param bsize: The batch size. Not used for this op, but can be
used by graph optimizers to select a more optimal
convolution implementation. If the GpuConv op is inserted
automatically, we take its value from the Conv op.
:param fft_opt: deactivate fft_opt optimization at the op level when
set to False. Note that by default fft optimization
aren't enabled. See
:ref:`convolution documentation <libdoc_tensor_nnet_conv>`
to enable them.
""" """
self.border_mode = border_mode self.border_mode = border_mode
...@@ -69,10 +90,14 @@ class GpuConv(gof.Op): ...@@ -69,10 +90,14 @@ class GpuConv(gof.Op):
self.logical_kern_hw = logical_kern_hw self.logical_kern_hw = logical_kern_hw
self.logical_kern_align_top = logical_kern_align_top self.logical_kern_align_top = logical_kern_align_top
self.version = version self.version = version
self.direction_hint = direction_hint
self.verbose = verbose self.verbose = verbose
self.kshp = kshp self.kshp = kshp
self.imshp = imshp self.imshp = imshp
self.max_threads_dim0 = max_threads_dim0 self.max_threads_dim0 = max_threads_dim0
self.nkern = nkern
self.bsize = bsize
self.fft_opt = fft_opt
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) \ return type(self) == type(other) \
...@@ -93,6 +118,14 @@ class GpuConv(gof.Op): ...@@ -93,6 +118,14 @@ class GpuConv(gof.Op):
self.imshp = None self.imshp = None
if not hasattr(self, "max_threads_dim0"): if not hasattr(self, "max_threads_dim0"):
self.max_threads_dim0 = None self.max_threads_dim0 = None
if not hasattr(self, "direction_hint"):
self.direction_hint = None
if not hasattr(self, "nkern"):
self.nkern = None
if not hasattr(self, "bsize"):
self.bsize = None
if not hasattr(self, "fft_opt"):
self.fft_opt = True
def __hash__(self): def __hash__(self):
# don't use hash(self.version) as hash(-1)==-2 and # don't use hash(self.version) as hash(-1)==-2 and
......
...@@ -669,8 +669,12 @@ def local_gpu_conv(node): ...@@ -669,8 +669,12 @@ def local_gpu_conv(node):
logical_kern_align_top=op.kshp_logical_top_aligned, logical_kern_align_top=op.kshp_logical_top_aligned,
kshp=op.kshp, kshp=op.kshp,
version=op.version, version=op.version,
direction_hint=op.direction_hint,
verbose=op.verbose, verbose=op.verbose,
imshp=op.imshp, imshp=op.imshp,
nkern=op.nkern,
bsize=op.bsize,
fft_opt=op.fft_opt
) )
if op.imshp_logical is not None: if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3] logical_img_hw = op.imshp_logical[1:3]
......
...@@ -27,6 +27,8 @@ from .test_basic_ops import (mode_with_gpu, ...@@ -27,6 +27,8 @@ from .test_basic_ops import (mode_with_gpu,
mode_without_gpu) mode_without_gpu)
from ..type import GpuArrayType from ..type import GpuArrayType
from ..conv import GpuConv from ..conv import GpuConv
from theano.sandbox.gpuarray import dnn
import pygpu import pygpu
gftensor4 = GpuArrayType('float32', [False] * 4) gftensor4 = GpuArrayType('float32', [False] * 4)
...@@ -501,6 +503,9 @@ def test_subsample(): ...@@ -501,6 +503,9 @@ def test_subsample():
class TestConv2DGPU(unittest.TestCase): class TestConv2DGPU(unittest.TestCase):
conv_ops = (GpuConv,
dnn.DnnBase)
def test_logical_shapes(self): def test_logical_shapes(self):
seed_rng() seed_rng()
for stride in range(1, 4): for stride in range(1, 4):
...@@ -527,7 +532,7 @@ class TestConv2DGPU(unittest.TestCase): ...@@ -527,7 +532,7 @@ class TestConv2DGPU(unittest.TestCase):
func = theano.function([a, A], image_estimate, mode=mode_with_gpu) func = theano.function([a, A], image_estimate, mode=mode_with_gpu)
# theano.printing.debugprint(func,) # theano.printing.debugprint(func,)
assert any([isinstance(node.op, GpuConv) assert any([isinstance(node.op, self.conv_ops)
for node in func.maker.fgraph.toposort()]) for node in func.maker.fgraph.toposort()])
a_in = numpy.random.randn(*featshp).astype("float32") a_in = numpy.random.randn(*featshp).astype("float32")
......
...@@ -1239,7 +1239,7 @@ class ShapeFeature(object): ...@@ -1239,7 +1239,7 @@ class ShapeFeature(object):
continue continue
if shpnode.outputs[0] in theano.gof.graph.ancestors([repl]): if shpnode.outputs[0] in theano.gof.graph.ancestors([repl]):
raise AssertionError( raise InconsistencyError(
"This substitution would insert a cycle in the graph:" "This substitution would insert a cycle in the graph:"
"node: %s, i: %i, r: %s, new_r: %s" "node: %s, i: %i, r: %s, new_r: %s"
% (node, i, r, new_r)) % (node, i, r, new_r))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论