提交 13989ba3 authored 作者: lamblin's avatar lamblin

Merge pull request #1207 from nouiz/infer_shape_broadcast

Infer shape broadcast
# Prevent git from showing duplicate names with commands like "git shortlog"
# # See the manpage of git-shortlog for details.
# # The syntax is:
# # Name that should be used <email that should be used> Bad name <bad email>
# #
# # You can skip Bad name if it is the same as the one that should be used, and is unique.
# #
# # This file is up-to-date if the command git log --format="%aN <%aE>" | sort -u
# # gives no duplicates.
<abergeron@gmail.com> <anakha@kami.(none)> <abergeron@gmail.com> <anakha@kami.(none)>
David Warde-Farley <wardefar@iro.umontreal.ca> David Warde-Farley <dwf@cs.toronto.edu> David Warde-Farley <wardefar@iro.umontreal.ca> David Warde-Farley <dwf@cs.toronto.edu>
David Warde-Farley <wardefar@iro.umontreal.ca> David Warde Farley <dwf@cs.toronto.edu> David Warde-Farley <wardefar@iro.umontreal.ca> David Warde Farley <dwf@cs.toronto.edu>
......
...@@ -39,7 +39,7 @@ probably do something similar on older computer. ...@@ -39,7 +39,7 @@ probably do something similar on older computer.
Installation steps Installation steps
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
Ubuntu 11.10/12.04: Ubuntu 11.10/12.04/12.10:
1) ``sudo apt-get install python-numpy python-scipy python-dev python-pip python-nose g++ libopenblas-dev git`` 1) ``sudo apt-get install python-numpy python-scipy python-dev python-pip python-nose g++ libopenblas-dev git``
2) ``sudo pip install Theano`` 2) ``sudo pip install Theano``
...@@ -70,7 +70,7 @@ Theano/BLAS speed test: ...@@ -70,7 +70,7 @@ Theano/BLAS speed test:
.. code-block:: bash .. code-block:: bash
python /usr/lib/python2.*/site-packages/theano/misc/check_blas.py python `python -c "import os, theano; print os.path.dirname(theano.__file__)"`/misc/check_blas.py
This will print a table with different versions of BLAS/numbers of This will print a table with different versions of BLAS/numbers of
threads on multiple CPUs and GPUs. It will also print some Theano/NumPy threads on multiple CPUs and GPUs. It will also print some Theano/NumPy
...@@ -163,6 +163,8 @@ Test GPU configuration ...@@ -163,6 +163,8 @@ Test GPU configuration
Ubuntu 12.04 LTS: default gcc version 4.6.3. gcc 4.4.7 and 4.5.3 availables. Ubuntu 12.04 LTS: default gcc version 4.6.3. gcc 4.4.7 and 4.5.3 availables.
Ubuntu 12.10: default gcc version 4.7.2. gcc 4.4.7, 4.5.4 and 4.6.3 availables.
......
...@@ -1472,7 +1472,7 @@ class GCC_compiler(object): ...@@ -1472,7 +1472,7 @@ class GCC_compiler(object):
#cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION") #cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]] numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
# numpy 1.7 deprecated the following macro but the didn't # numpy 1.7 deprecated the following macro but the new one didn't
# existed in the past # existed in the past
if bool(numpy_ver < [1, 7]): if bool(numpy_ver < [1, 7]):
cxxflags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY") cxxflags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
...@@ -1609,6 +1609,7 @@ class GCC_compiler(object): ...@@ -1609,6 +1609,7 @@ class GCC_compiler(object):
try: try:
p = call_subprocess_Popen(cmd, stderr=subprocess.PIPE) p = call_subprocess_Popen(cmd, stderr=subprocess.PIPE)
p.wait()
compile_stderr = p.communicate()[1] compile_stderr = p.communicate()[1]
except Exception: except Exception:
# An exception can occur e.g. if `g++` is not found. # An exception can occur e.g. if `g++` is not found.
......
...@@ -194,41 +194,28 @@ if __name__ == "__main__": ...@@ -194,41 +194,28 @@ if __name__ == "__main__":
goto2 1.13/16 3.16s goto2 1.13/16 3.16s
Test time in float32 Test time in float32
(cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??)
cuda version 5.0 4.2 4.1 4.0 3.2 3.0 # note
gpu/cuda version gpu
M2050(Amazon)/5.0 0.25s M2070 0.25s 0.27s 0.32s
M2050(Amazon) 0.25s
GTX680/4.2 0.154s C2075 0.25s
GTX580/4.2 0.164s C1060 0.46s
GTX480/4.2 0.192s
GTX470/4.2 0.238s GTX680 0.154s 0.218s
C2075/4.2 0.25s GTX580 0.164s 0.203s
GTX285/4.2 0.452s #cuda 3.0 seam faster? driver version? GTX480 0.192s 0.237s 0.27s
GT520/4.2 2.68s GTX470 0.238s 0.297s 0.34s
GTX560/4.2 0.30s GTX660 0.24s
GTX560 0.30s
GTX460/4.0 0.45s GTX460 0.37s 0.45s
GTX285 0.452s 0.452s 0.40s # cuda 3.0 seam faster? driver version?
GTX580/3.2 0.203s GTX550Ti 0.57s
GTX680/3.2 0.218s GT520 2.68s 3.06s
GTX480/3.2 0.237s 520M 3.19s # with bumblebee on Ubuntu 12.04
GTX470/3.2 0.297s GT220 3.80s
GTX285/3.2 0.452s #cuda 3.0 seam faster? driver version? GT210 6.35s
8500GT 10.68s
GTX480/3.0 0.27s
M2070/4.1 0.27s
GTX470/3.2 0.29s
M2070/3.2 0.32s
GTX470/3.0 0.34s
GTX285/3.0 0.40s
C1060/3.2 0.46s
GTX550Ti/4.0 0.57s
520/3.2 3.06s
520M/3.2 3.19s with bumblebee on Ubuntu 12.04
GT220/3.2RC 3.80s
GT210/4.0 6.35s
8500GT/3.0 10.68s
""" """
t, impl = execute(not options.print_only, not options.quiet, t, impl = execute(not options.print_only, not options.quiet,
......
...@@ -218,7 +218,7 @@ if cuda_available: ...@@ -218,7 +218,7 @@ if cuda_available:
atexit.register(gpu_shutdown) atexit.register(gpu_shutdown)
except EnvironmentError, e: except EnvironmentError, e:
cuda_available = False cuda_available = False
cuda_initialization_error_message = e.message cuda_initialization_error_message = " ".join(e.args)
class GpuOp(theano.gof.Op): class GpuOp(theano.gof.Op):
......
...@@ -561,6 +561,9 @@ class ScalarVariable(_scalar_py_operators, Variable): ...@@ -561,6 +561,9 @@ class ScalarVariable(_scalar_py_operators, Variable):
class ScalarConstant(_scalar_py_operators, Constant): class ScalarConstant(_scalar_py_operators, Constant):
pass pass
# Register ScalarConstant as the type of Constant corresponding to Scalar
Scalar.Constant = ScalarConstant
# Easy constructors # Easy constructors
......
...@@ -519,7 +519,6 @@ def get_scalar_constant_value(v): ...@@ -519,7 +519,6 @@ def get_scalar_constant_value(v):
if isinstance(v, numpy.ndarray): if isinstance(v, numpy.ndarray):
return numpy_scalar(v) return numpy_scalar(v)
if isinstance(v, Constant): if isinstance(v, Constant):
if getattr(v.tag, 'unique_value', None) is not None: if getattr(v.tag, 'unique_value', None) is not None:
data = v.tag.unique_value data = v.tag.unique_value
...@@ -528,11 +527,9 @@ def get_scalar_constant_value(v): ...@@ -528,11 +527,9 @@ def get_scalar_constant_value(v):
return numpy_scalar(data) return numpy_scalar(data)
if v.owner: if v.owner:
if isinstance(v.owner.op, Alloc): if isinstance(v.owner.op, (Alloc, DimShuffle, Rebroadcast,
return get_scalar_constant_value(v.owner.inputs[0]) compile.ops.OutputGuard,
if isinstance(v.owner.op, DimShuffle): compile.DeepCopyOp)):
return get_scalar_constant_value(v.owner.inputs[0])
if isinstance(v.owner.op, Rebroadcast):
return get_scalar_constant_value(v.owner.inputs[0]) return get_scalar_constant_value(v.owner.inputs[0])
if isinstance(v.owner.op, Elemwise) and \ if isinstance(v.owner.op, Elemwise) and \
isinstance(v.owner.op.scalar_op, scal.Second): isinstance(v.owner.op.scalar_op, scal.Second):
...@@ -2007,6 +2004,13 @@ class TensorConstant(_tensor_py_operators, Constant): ...@@ -2007,6 +2004,13 @@ class TensorConstant(_tensor_py_operators, Constant):
def signature(self): def signature(self):
return TensorConstantSignature((self.type, self.data)) return TensorConstantSignature((self.type, self.data))
def equals(self, other):
# Override Contant.equals to allow to compare with numpy.ndarray
if isinstance(other, numpy.ndarray):
# Make a TensorConstant to be able to compare
other = constant(other)
return (isinstance(other, TensorConstant) and
self.signature() == other.signature())
TensorType.Constant = TensorConstant TensorType.Constant = TensorConstant
......
...@@ -813,7 +813,18 @@ class ShapeFeature(object): ...@@ -813,7 +813,18 @@ class ShapeFeature(object):
"for a variable with %d dimensions." % ( "for a variable with %d dimensions." % (
len(s), r.ndim)) len(s), r.ndim))
shape_vars = [self.unpack(s_i) for s_i in s] shape_vars = []
for i in range(r.ndim):
if (hasattr(r.type, 'broadcastable') and
r.type.broadcastable[i]):
shape_vars.append(self.lscalar_one)
else:
shape_vars.append(self.unpack(s[i]))
assert all([not r.type.broadcastable[i] or
self.lscalar_one.equals(shape_vars[i]) or
self.lscalar_one.equals(
T.extract_constant(shape_vars[i]))
for i in range(r.ndim)])
self.shape_of[r] = tuple(shape_vars) self.shape_of[r] = tuple(shape_vars)
for sv in shape_vars: for sv in shape_vars:
self.shape_of_reverse_index.setdefault(sv, set()).add(r) self.shape_of_reverse_index.setdefault(sv, set()).add(r)
...@@ -855,6 +866,12 @@ class ShapeFeature(object): ...@@ -855,6 +866,12 @@ class ShapeFeature(object):
merged_shape.append(r_shape[i]) merged_shape.append(r_shape[i])
else: else:
merged_shape.append(other_shape[i]) merged_shape.append(other_shape[i])
assert all([(not r.type.broadcastable[i] and
not other_r.type.broadcastable[i]) or
self.lscalar_one.equals(merged_shape[i]) or
self.lscalar_one.equals(
T.extract_constant(merged_shape[i]))
for i in range(r.ndim)])
self.shape_of[r] = tuple(merged_shape) self.shape_of[r] = tuple(merged_shape)
for sv in self.shape_of[r]: for sv in self.shape_of[r]:
self.shape_of_reverse_index.setdefault(sv, set()).add(r) self.shape_of_reverse_index.setdefault(sv, set()).add(r)
...@@ -871,6 +888,10 @@ class ShapeFeature(object): ...@@ -871,6 +888,10 @@ class ShapeFeature(object):
new_shape.append(self.unpack(s_i)) new_shape.append(self.unpack(s_i))
else: else:
new_shape.append(s_j) new_shape.append(s_j)
assert all([not r.type.broadcastable[i] or
self.lscalar_one.equals(new_shape[i]) or
self.lscalar_one.equals(T.extract_constant(new_shape[i]))
for i in range(r.ndim)])
self.shape_of[r] = tuple(new_shape) self.shape_of[r] = tuple(new_shape)
for sv in self.shape_of[r]: for sv in self.shape_of[r]:
self.shape_of_reverse_index.setdefault(sv, set()).add(r) self.shape_of_reverse_index.setdefault(sv, set()).add(r)
......
...@@ -5456,8 +5456,9 @@ class test_tensordot(unittest.TestCase): ...@@ -5456,8 +5456,9 @@ class test_tensordot(unittest.TestCase):
f1 = inplace_func([avec, bvec], c) f1 = inplace_func([avec, bvec], c)
aval = rand(5) aval = rand(5)
bval = rand(5) bval = rand(5)
self.assertTrue(numpy.tensordot(aval, bval, axes) == \ out0 = numpy.tensordot(aval, bval, axes)
f1(aval, bval)) out1 = f1(aval, bval)
self.assertTrue(numpy.allclose(out0, out1), (out0, out1))
utt.verify_grad(self.TensorDot(axes), [aval, bval]) utt.verify_grad(self.TensorDot(axes), [aval, bval])
# Test matrix-vector # Test matrix-vector
......
...@@ -2475,6 +2475,57 @@ class test_shapeoptimizer(unittest.TestCase): ...@@ -2475,6 +2475,57 @@ class test_shapeoptimizer(unittest.TestCase):
assert len(topo) == 1 assert len(topo) == 1
assert topo[0].op == deep_copy_op assert topo[0].op == deep_copy_op
@staticmethod
def max_pool_c01b(c01b, pool_shp, pool_stride, img_shp):
"""Like max_pool but with input using axes ('c', 0, 1, 'b')
(Alex Krizhevsky format)
pool_shp, pool_stride and img_shp are int that represent
the same shp in x and y.
"""
mx = None
# Compute index in pooled space of last needed pool
# (needed = each input pixel must appear in at least one pool)
def last_pool(im_shp, p_shp, p_strd):
rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd))
assert p_strd * rval + p_shp >= im_shp
assert p_strd * (rval - 1) + p_shp < im_shp
return rval
# Compute starting row of the last pool
last_pool_r = last_pool(img_shp, pool_shp, pool_stride) * pool_stride
# Compute number of rows needed in img for all indexes to work out
required_r = last_pool_r + pool_shp
last_pool_c = last_pool(img_shp, pool_shp, pool_stride) * pool_stride
required_c = last_pool_c + pool_shp
wide_infinity = T.alloc(-numpy.inf, c01b.shape[0],
required_r, required_c, c01b.shape[3])
c01b = T.set_subtensor(wide_infinity[:, 0:img_shp, 0:img_shp, :], c01b)
for row_within_pool in xrange(pool_shp):
row_stop = last_pool_r + row_within_pool + 1
for col_within_pool in xrange(pool_shp):
col_stop = last_pool_c + col_within_pool + 1
cur = c01b[:, row_within_pool:row_stop:pool_stride,
col_within_pool:col_stop:pool_stride, :]
if mx is None:
mx = cur
else:
mx = T.maximum(mx, cur)
return mx
def test_broadcasted_dims(self):
#This test a case that caused a crash during optimization
shp = (1, 1, 1, 1)
rng = numpy.random.RandomState(utt.fetch_seed())
a = shared(rng.rand(*shp).astype(config.floatX))
out = self.max_pool_c01b(a, 1, 1, 1)
f = theano.function([], out)
f()
def test_local_track_shape_i(self): def test_local_track_shape_i(self):
class IdentityNoShape(gof.Op): class IdentityNoShape(gof.Op):
'''Op that does not infer the output shape from the input one''' '''Op that does not infer the output shape from the input one'''
......
...@@ -55,10 +55,12 @@ nosetests. ...@@ -55,10 +55,12 @@ nosetests.
import cPickle import cPickle
import datetime
import os import os
import subprocess import subprocess
import sys import sys
import datetime import time
import theano import theano
from theano.misc.windows import call_subprocess_Popen from theano.misc.windows import call_subprocess_Popen
...@@ -261,8 +263,8 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile, ...@@ -261,8 +263,8 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile,
n_tests + 1)): n_tests + 1)):
# Print the test we will start in the raw log to help # Print the test we will start in the raw log to help
# debug tests that are too long. # debug tests that are too long.
f_rawlog.write("\nWill run test #%d %s\n" % (test_id, f_rawlog.write("\n%s Will run test #%d %s\n" % (
data["ids"][test_id])) time.ctime(), test_id, data["ids"][test_id]))
f_rawlog.flush() f_rawlog.flush()
proc = call_subprocess_Popen( proc = call_subprocess_Popen(
......
...@@ -64,7 +64,8 @@ class OrderedUpdates(OrderedDict): ...@@ -64,7 +64,8 @@ class OrderedUpdates(OrderedDict):
# Warn about non-determinism. # Warn about non-determinism.
warnings.warn('Updating an `OrderedUpdates` with a ' warnings.warn('Updating an `OrderedUpdates` with a '
'non-ordered dictionary with 2+ elements could ' 'non-ordered dictionary with 2+ elements could '
'make your code non-deterministic') 'make your code non-deterministic',
stacklevel=2)
for key, val in OrderedDict(other).iteritems(): for key, val in OrderedDict(other).iteritems():
if key in self: if key in self:
if self[key] == val: if self[key] == val:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论