提交 13989ba3 authored 作者: lamblin's avatar lamblin

Merge pull request #1207 from nouiz/infer_shape_broadcast

Infer shape broadcast
# Prevent git from showing duplicate names with commands like "git shortlog"
# # See the manpage of git-shortlog for details.
# # The syntax is:
# # Name that should be used <email that should be used> Bad name <bad email>
# #
# # You can skip Bad name if it is the same as the one that should be used, and is unique.
# #
# # This file is up-to-date if the command git log --format="%aN <%aE>" | sort -u
# # gives no duplicates.
<abergeron@gmail.com> <anakha@kami.(none)>
David Warde-Farley <wardefar@iro.umontreal.ca> David Warde-Farley <dwf@cs.toronto.edu>
David Warde-Farley <wardefar@iro.umontreal.ca> David Warde Farley <dwf@cs.toronto.edu>
......
......@@ -39,7 +39,7 @@ probably do something similar on older computer.
Installation steps
~~~~~~~~~~~~~~~~~~
Ubuntu 11.10/12.04:
Ubuntu 11.10/12.04/12.10:
1) ``sudo apt-get install python-numpy python-scipy python-dev python-pip python-nose g++ libopenblas-dev git``
2) ``sudo pip install Theano``
......@@ -70,7 +70,7 @@ Theano/BLAS speed test:
.. code-block:: bash
python /usr/lib/python2.*/site-packages/theano/misc/check_blas.py
python `python -c "import os, theano; print os.path.dirname(theano.__file__)"`/misc/check_blas.py
This will print a table with different versions of BLAS/numbers of
threads on multiple CPUs and GPUs. It will also print some Theano/NumPy
......@@ -163,6 +163,8 @@ Test GPU configuration
Ubuntu 12.04 LTS: default gcc version 4.6.3. gcc 4.4.7 and 4.5.3 availables.
Ubuntu 12.10: default gcc version 4.7.2. gcc 4.4.7, 4.5.4 and 4.6.3 availables.
......
......@@ -1472,7 +1472,7 @@ class GCC_compiler(object):
#cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
# numpy 1.7 deprecated the following macro but the didn't
# numpy 1.7 deprecated the following macro but the new one didn't
# existed in the past
if bool(numpy_ver < [1, 7]):
cxxflags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
......@@ -1609,6 +1609,7 @@ class GCC_compiler(object):
try:
p = call_subprocess_Popen(cmd, stderr=subprocess.PIPE)
p.wait()
compile_stderr = p.communicate()[1]
except Exception:
# An exception can occur e.g. if `g++` is not found.
......
......@@ -194,41 +194,28 @@ if __name__ == "__main__":
goto2 1.13/16 3.16s
Test time in float32
(cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??)
gpu/cuda version
M2050(Amazon)/5.0 0.25s
GTX680/4.2 0.154s
GTX580/4.2 0.164s
GTX480/4.2 0.192s
GTX470/4.2 0.238s
C2075/4.2 0.25s
GTX285/4.2 0.452s #cuda 3.0 seam faster? driver version?
GT520/4.2 2.68s
GTX560/4.2 0.30s
GTX460/4.0 0.45s
GTX580/3.2 0.203s
GTX680/3.2 0.218s
GTX480/3.2 0.237s
GTX470/3.2 0.297s
GTX285/3.2 0.452s #cuda 3.0 seam faster? driver version?
GTX480/3.0 0.27s
M2070/4.1 0.27s
GTX470/3.2 0.29s
M2070/3.2 0.32s
GTX470/3.0 0.34s
GTX285/3.0 0.40s
C1060/3.2 0.46s
GTX550Ti/4.0 0.57s
520/3.2 3.06s
520M/3.2 3.19s with bumblebee on Ubuntu 12.04
GT220/3.2RC 3.80s
GT210/4.0 6.35s
8500GT/3.0 10.68s
cuda version 5.0 4.2 4.1 4.0 3.2 3.0 # note
gpu
M2070 0.25s 0.27s 0.32s
M2050(Amazon) 0.25s
C2075 0.25s
C1060 0.46s
GTX680 0.154s 0.218s
GTX580 0.164s 0.203s
GTX480 0.192s 0.237s 0.27s
GTX470 0.238s 0.297s 0.34s
GTX660 0.24s
GTX560 0.30s
GTX460 0.37s 0.45s
GTX285 0.452s 0.452s 0.40s # cuda 3.0 seam faster? driver version?
GTX550Ti 0.57s
GT520 2.68s 3.06s
520M 3.19s # with bumblebee on Ubuntu 12.04
GT220 3.80s
GT210 6.35s
8500GT 10.68s
"""
t, impl = execute(not options.print_only, not options.quiet,
......
......@@ -218,7 +218,7 @@ if cuda_available:
atexit.register(gpu_shutdown)
except EnvironmentError, e:
cuda_available = False
cuda_initialization_error_message = e.message
cuda_initialization_error_message = " ".join(e.args)
class GpuOp(theano.gof.Op):
......
......@@ -561,6 +561,9 @@ class ScalarVariable(_scalar_py_operators, Variable):
class ScalarConstant(_scalar_py_operators, Constant):
pass
# Register ScalarConstant as the type of Constant corresponding to Scalar
Scalar.Constant = ScalarConstant
# Easy constructors
......
......@@ -519,7 +519,6 @@ def get_scalar_constant_value(v):
if isinstance(v, numpy.ndarray):
return numpy_scalar(v)
if isinstance(v, Constant):
if getattr(v.tag, 'unique_value', None) is not None:
data = v.tag.unique_value
......@@ -528,11 +527,9 @@ def get_scalar_constant_value(v):
return numpy_scalar(data)
if v.owner:
if isinstance(v.owner.op, Alloc):
return get_scalar_constant_value(v.owner.inputs[0])
if isinstance(v.owner.op, DimShuffle):
return get_scalar_constant_value(v.owner.inputs[0])
if isinstance(v.owner.op, Rebroadcast):
if isinstance(v.owner.op, (Alloc, DimShuffle, Rebroadcast,
compile.ops.OutputGuard,
compile.DeepCopyOp)):
return get_scalar_constant_value(v.owner.inputs[0])
if isinstance(v.owner.op, Elemwise) and \
isinstance(v.owner.op.scalar_op, scal.Second):
......@@ -2007,6 +2004,13 @@ class TensorConstant(_tensor_py_operators, Constant):
def signature(self):
return TensorConstantSignature((self.type, self.data))
def equals(self, other):
# Override Contant.equals to allow to compare with numpy.ndarray
if isinstance(other, numpy.ndarray):
# Make a TensorConstant to be able to compare
other = constant(other)
return (isinstance(other, TensorConstant) and
self.signature() == other.signature())
TensorType.Constant = TensorConstant
......
......@@ -813,7 +813,18 @@ class ShapeFeature(object):
"for a variable with %d dimensions." % (
len(s), r.ndim))
shape_vars = [self.unpack(s_i) for s_i in s]
shape_vars = []
for i in range(r.ndim):
if (hasattr(r.type, 'broadcastable') and
r.type.broadcastable[i]):
shape_vars.append(self.lscalar_one)
else:
shape_vars.append(self.unpack(s[i]))
assert all([not r.type.broadcastable[i] or
self.lscalar_one.equals(shape_vars[i]) or
self.lscalar_one.equals(
T.extract_constant(shape_vars[i]))
for i in range(r.ndim)])
self.shape_of[r] = tuple(shape_vars)
for sv in shape_vars:
self.shape_of_reverse_index.setdefault(sv, set()).add(r)
......@@ -855,6 +866,12 @@ class ShapeFeature(object):
merged_shape.append(r_shape[i])
else:
merged_shape.append(other_shape[i])
assert all([(not r.type.broadcastable[i] and
not other_r.type.broadcastable[i]) or
self.lscalar_one.equals(merged_shape[i]) or
self.lscalar_one.equals(
T.extract_constant(merged_shape[i]))
for i in range(r.ndim)])
self.shape_of[r] = tuple(merged_shape)
for sv in self.shape_of[r]:
self.shape_of_reverse_index.setdefault(sv, set()).add(r)
......@@ -871,6 +888,10 @@ class ShapeFeature(object):
new_shape.append(self.unpack(s_i))
else:
new_shape.append(s_j)
assert all([not r.type.broadcastable[i] or
self.lscalar_one.equals(new_shape[i]) or
self.lscalar_one.equals(T.extract_constant(new_shape[i]))
for i in range(r.ndim)])
self.shape_of[r] = tuple(new_shape)
for sv in self.shape_of[r]:
self.shape_of_reverse_index.setdefault(sv, set()).add(r)
......
......@@ -5456,8 +5456,9 @@ class test_tensordot(unittest.TestCase):
f1 = inplace_func([avec, bvec], c)
aval = rand(5)
bval = rand(5)
self.assertTrue(numpy.tensordot(aval, bval, axes) == \
f1(aval, bval))
out0 = numpy.tensordot(aval, bval, axes)
out1 = f1(aval, bval)
self.assertTrue(numpy.allclose(out0, out1), (out0, out1))
utt.verify_grad(self.TensorDot(axes), [aval, bval])
# Test matrix-vector
......
......@@ -2475,6 +2475,57 @@ class test_shapeoptimizer(unittest.TestCase):
assert len(topo) == 1
assert topo[0].op == deep_copy_op
@staticmethod
def max_pool_c01b(c01b, pool_shp, pool_stride, img_shp):
"""Like max_pool but with input using axes ('c', 0, 1, 'b')
(Alex Krizhevsky format)
pool_shp, pool_stride and img_shp are int that represent
the same shp in x and y.
"""
mx = None
# Compute index in pooled space of last needed pool
# (needed = each input pixel must appear in at least one pool)
def last_pool(im_shp, p_shp, p_strd):
rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd))
assert p_strd * rval + p_shp >= im_shp
assert p_strd * (rval - 1) + p_shp < im_shp
return rval
# Compute starting row of the last pool
last_pool_r = last_pool(img_shp, pool_shp, pool_stride) * pool_stride
# Compute number of rows needed in img for all indexes to work out
required_r = last_pool_r + pool_shp
last_pool_c = last_pool(img_shp, pool_shp, pool_stride) * pool_stride
required_c = last_pool_c + pool_shp
wide_infinity = T.alloc(-numpy.inf, c01b.shape[0],
required_r, required_c, c01b.shape[3])
c01b = T.set_subtensor(wide_infinity[:, 0:img_shp, 0:img_shp, :], c01b)
for row_within_pool in xrange(pool_shp):
row_stop = last_pool_r + row_within_pool + 1
for col_within_pool in xrange(pool_shp):
col_stop = last_pool_c + col_within_pool + 1
cur = c01b[:, row_within_pool:row_stop:pool_stride,
col_within_pool:col_stop:pool_stride, :]
if mx is None:
mx = cur
else:
mx = T.maximum(mx, cur)
return mx
def test_broadcasted_dims(self):
#This test a case that caused a crash during optimization
shp = (1, 1, 1, 1)
rng = numpy.random.RandomState(utt.fetch_seed())
a = shared(rng.rand(*shp).astype(config.floatX))
out = self.max_pool_c01b(a, 1, 1, 1)
f = theano.function([], out)
f()
def test_local_track_shape_i(self):
class IdentityNoShape(gof.Op):
'''Op that does not infer the output shape from the input one'''
......
......@@ -55,10 +55,12 @@ nosetests.
import cPickle
import datetime
import os
import subprocess
import sys
import datetime
import time
import theano
from theano.misc.windows import call_subprocess_Popen
......@@ -261,8 +263,8 @@ def run(stdout, stderr, argv, theano_nose, batch_size, time_profile,
n_tests + 1)):
# Print the test we will start in the raw log to help
# debug tests that are too long.
f_rawlog.write("\nWill run test #%d %s\n" % (test_id,
data["ids"][test_id]))
f_rawlog.write("\n%s Will run test #%d %s\n" % (
time.ctime(), test_id, data["ids"][test_id]))
f_rawlog.flush()
proc = call_subprocess_Popen(
......
......@@ -64,7 +64,8 @@ class OrderedUpdates(OrderedDict):
# Warn about non-determinism.
warnings.warn('Updating an `OrderedUpdates` with a '
'non-ordered dictionary with 2+ elements could '
'make your code non-deterministic')
'make your code non-deterministic',
stacklevel=2)
for key, val in OrderedDict(other).iteritems():
if key in self:
if self[key] == val:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论