提交 38e2f502 authored 作者: abergeron's avatar abergeron

Merge pull request #1946 from nouiz/scan

Small scan speed up on the GPU.
......@@ -21,6 +21,8 @@ Montreal).
News
====
* Colin Raffel `tutorial on Theano <http://nbviewer.ipython.org/github/craffel/theano-tutorial/blob/master/Theano%20Tutorial.ipynb>`_.
* Ian Goodfellow did a `12h class with exercises on Theano <https://github.com/goodfeli/theano_exercises>`_.
* Theano 0.6 was released. Everybody is encouraged to update.
......
......@@ -120,8 +120,18 @@ enum = EnumStr("g++", "")
try:
rc = call_subprocess_Popen(['g++', '-v'])
except OSError:
enum = EnumStr("")
rc = 1
if rc == 0:
AddConfigVar('cxx',
"The C++ compiler to use. Currently only g++ is"
" supported, but supporting additional compilers should not be "
"too difficult. "
"If it is empty, no C++ code is compiled.",
enum,
in_c_key=False)
del enum
if rc == 0 and config.cxx != "":
# Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar('linker',
("Default linker used if the theano flags mode is Mode "
......@@ -140,16 +150,6 @@ else:
'optimized C-implementations (for both CPU and GPU) and will '
'default to Python implementations. Performance will be severely '
'degraded.')
enum = EnumStr("")
AddConfigVar('cxx',
"The C++ compiler to use. Currently only g++ is"
" supported, but supporting additional compilers should not be "
"too difficult. "
"If it is empty, no C++ code is compiled.",
enum,
in_c_key=False)
del enum
#Keep the default value the same as the one for the mode FAST_RUN
......
......@@ -12,7 +12,8 @@ import warnings
from theano.gof.python25 import all
from theano.configparser import config, AddConfigVar, BoolParam, ConfigParam
from theano.configparser import (config, AddConfigVar,
BoolParam, ConfigParam, _config_var_list)
import theano.gof.cmodule
......@@ -560,7 +561,7 @@ except (OSError, theano.gof.cmodule.MissingGXX), e:
# already changed the default linker to something else then CVM.
# Currently this is the py linker.
# Here we assert that the default linker is not cvm.
assert not [x for x in theano.configparser._config_var_list
assert not [x for x in _config_var_list
if x.fullname == 'linker'][0].default.startswith('cvm'), e
pass
......
......@@ -1411,9 +1411,10 @@ def norm(x,ord):
elif ndim > 2:
raise NotImplementedError("We don't support norm witn ndim > 2")
class lstsq(theano.Op):
def __eq__(self, other):
pass
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
......
......@@ -422,9 +422,17 @@ class Scan(PureOp):
raise ValueError('For output %s you need to provide a '
'scalar int !', str(outer_nitsot))
assert len(new_inputs) == len(inputs)
self.vector_seqs = [seq.ndim == 1 for seq in
# The vector_seqs and vector_outs are just a workaround
# strange NumPy behavior: vector_ndarray[int] return a NumPy
# scalar and not a NumPy ndarray of 0 dimensions.
self.vector_seqs = [isinstance(seq, (tensor.TensorVariable,
tensor.TensorConstant)) and
seq.ndim == 1 for seq in
new_inputs[1:1 + self.n_seqs]]
self.vector_outs = [arg.ndim == 1 for arg in
self.vector_outs = [isinstance(arg, (tensor.TensorVariable,
tensor.TensorConstant)) and
arg.ndim == 1 for arg in
new_inputs[1 + self.n_seqs: (1 + self.n_seqs +
self.n_outs)]]
self.vector_outs += [False] * self.n_nit_sot
......@@ -598,12 +606,6 @@ class Scan(PureOp):
for _d1 in range(cython_mit_mot_out_nslices[_d0]):
cython_mit_mot_out_slices[_d0, _d1] = \
self.mit_mot_out_slices[_d0][_d1]
vector_seqs = [seq.ndim == 1 for seq in
node.inputs[1:1 + self.n_seqs]]
vector_outs = [arg.ndim == 1 for arg in
node.inputs[1 + self.n_seqs:
(1 + self.n_seqs + self.n_outs)]]
vector_outs += [False] * self.n_nit_sot
cython_vector_seqs = numpy.asarray(self.vector_seqs,
dtype='int32')
......
import os, logging, sys
import logging
import os
import sys
import numpy
import theano
from theano import config
......@@ -60,6 +64,28 @@ except ImportError:
os.mkdir(loc)
preargs = ['-fwrapv', '-O2', '-fno-strict-aliasing']
preargs += cmodule.GCC_compiler.compile_args()
# Cython 19.1 always use the old NumPy interface. So we
# need to manually modify the .c file to get it compiled
# by Theano. As by default, we tell NumPy to don't import
# the old interface.
if False:
#During scan cython development, it is helpful to keep the old interface, to don't manually edit the c file each time.
preargs.remove('-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION')
else:
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
# Add add some macro to lower the number of edit
# needed to the c file.
if bool(numpy_ver >= [1, 7]):
# Needed when we disable the old API, as cython
# use the old interface
preargs.append("-D NPY_ENSUREARRAY=NPY_ARRAY_ENSUREARRAY")
preargs.append("-D NPY_ENSURECOPY=NPY_ARRAY_ENSURECOPY")
preargs.append("-D NPY_ALIGNED=NPY_ARRAY_ALIGNED")
preargs.append("-D NPY_WRITEABLE=NPY_ARRAY_WRITEABLE")
preargs.append("-D NPY_UPDATE_ALL=NPY_ARRAY_UPDATE_ALL")
preargs.append("-D NPY_C_CONTIGUOUS=NPY_ARRAY_C_CONTIGUOUS")
preargs.append("-D NPY_F_CONTIGUOUS=NPY_ARRAY_F_CONTIGUOUS")
cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
preargs=preargs)
# Save version into the __init__.py file.
......
......@@ -86,7 +86,7 @@ class multiple_outputs_numeric_grad:
dtype_eps = _eps
self.ndarray_mask = ndarray_mask
#'''
# '''
# Compute clean output:
f_x = f(*pt)
gx = []
......@@ -148,7 +148,7 @@ class multiple_outputs_numeric_grad:
return numpy.inf, 0
#TODO: Test this function, and if it works,
# TODO: Test this function, and if it works,
# use it with the normal verify_grad rather than the
# copy-and-pasted one above.
# Also - add a reference to this technique in the
......@@ -201,7 +201,6 @@ def grab_scan_node(output):
class T_Scan(unittest.TestCase):
#class T_Scan(object):
def setUp(self):
utt.seed_rng()
......@@ -230,7 +229,7 @@ class T_Scan(unittest.TestCase):
updates=updates,
allow_input_downcast=True)
### TESTING PICKLE-ing this function
# TESTING PICKLE-ing this function
origdir = os.getcwd()
tmpdir = None
try:
......@@ -367,7 +366,7 @@ class T_Scan(unittest.TestCase):
# This first version test the first case in the optimizer to the gpu.
def test_one_sequence_one_output_weights_gpu1(self):
from theano.sandbox import cuda
if cuda.cuda_available == False:
if not cuda.cuda_available:
raise SkipTest('Optional package cuda disabled')
def f_rnn(u_t, x_tm1, W_in, W):
......@@ -447,7 +446,7 @@ class T_Scan(unittest.TestCase):
# This second version test the second case in the optimizer to the gpu.
def test_one_sequence_one_output_weights_gpu2(self):
from theano.sandbox import cuda
if cuda.cuda_available == False:
if not cuda.cuda_available:
raise SkipTest('Optional package cuda disabled')
def f_rnn(u_t, x_tm1, W_in, W):
......@@ -511,7 +510,7 @@ class T_Scan(unittest.TestCase):
# outputs when is running on GPU
def test_gpu3_mixture_dtype_outputs(self):
from theano.sandbox import cuda
if cuda.cuda_available == False:
if not cuda.cuda_available:
raise SkipTest('Optional package cuda disabled')
def f_rnn(u_t, x_tm1, W_in, W):
......@@ -595,8 +594,8 @@ class T_Scan(unittest.TestCase):
v_out = numpy.zeros((4,))
v_out[0] = v_u[0] * W_in.get_value() + v_x0 * W.get_value()
for step in xrange(1, 4):
v_out[step] = v_u[step] * W_in.get_value() + \
v_out[step - 1] * W.get_value()
v_out[step] = (v_u[step] * W_in.get_value() +
v_out[step - 1] * W.get_value())
theano_values = f3(v_u, v_x0)
assert numpy.allclose(theano_values, v_out)
......@@ -624,7 +623,7 @@ class T_Scan(unittest.TestCase):
y0 = theano.tensor.scalar('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return [theano.dot(u1_t, W_in1) + u2_t * W_in2 + \
return [theano.dot(u1_t, W_in1) + u2_t * W_in2 +
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan(f_rnn_cmpl,
......@@ -643,12 +642,12 @@ class T_Scan(unittest.TestCase):
# compute the values in numpy
v_x = numpy.zeros((3, 2), dtype=theano.config.floatX)
v_y = numpy.zeros((3,), dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
numpy.dot(v_x0, vW)
v_x[0] = (numpy.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 +
numpy.dot(v_x0, vW))
v_y[0] = numpy.dot(v_x0, vWout)
for i in xrange(1, 3):
v_x[i] = numpy.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
numpy.dot(v_x[i - 1], vW)
v_x[i] = (numpy.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 +
numpy.dot(v_x[i - 1], vW))
v_y[i] = numpy.dot(v_x[i - 1], vWout)
(theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
......@@ -684,8 +683,8 @@ class T_Scan(unittest.TestCase):
y_tm1,
y_tm3,
W_in1):
return [theano.dot(u1_t, W_in1) + \
(u2_t + u2_tm1 * u2_tp1) * W_in2 + \
return [theano.dot(u1_t, W_in1) +
(u2_t + u2_tm1 * u2_tp1) * W_in2 +
theano.dot(x_tm1, W),
(y_tm1 + y_tm3) * theano.dot(x_tm1, W_out),
theano.dot(u1_t, W_in1)]
......@@ -891,10 +890,10 @@ class T_Scan(unittest.TestCase):
numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0] * vu2[0]
numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu1[0] + vu2[0]
for i in xrange(1, 3):
numpy_x0[i] = vu0[i] * vW_in + numpy_x0[i - 1] * vW + \
vu1[i] * vu2[i]
numpy_x1[i] = vu0[i] * vW_in + numpy_x1[i - 1] * vW + \
vu1[i] + vu2[i]
numpy_x0[i] = (vu0[i] * vW_in + numpy_x0[i - 1] * vW +
vu1[i] * vu2[i])
numpy_x1[i] = (vu0[i] * vW_in + numpy_x1[i - 1] * vW +
vu1[i] + vu2[i])
# note theano computes inplace, so call function after numpy
# equivalent is done
......@@ -908,8 +907,8 @@ class T_Scan(unittest.TestCase):
# Old way of doing inplace operations is deprecated .. tests don't
# make sense anymore.
##utt.assert_allclose( theano_x0 , vu2)
## utt.assert_allclose( theano_x1 , vu1)
## utt.assert_allclose(theano_x0 , vu2)
## utt.assert_allclose(theano_x1 , vu1)
# simple rnn ; compute inplace version 2
def test_inplace2(self):
......@@ -971,10 +970,10 @@ class T_Scan(unittest.TestCase):
numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0] * vu1[1]
numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu2[0] + vu2[1] + vu2[2]
for i in xrange(1, 3):
numpy_x0[i] = vu0[i] * vW_in + numpy_x0[i - 1] * vW + \
vu1[i] * vu1[i + 1]
numpy_x1[i] = vu0[i] * vW_in + numpy_x1[i - 1] * vW + \
vu2[i] + vu2[i + 1] + vu2[i + 2]
numpy_x0[i] = (vu0[i] * vW_in + numpy_x0[i - 1] * vW +
vu1[i] * vu1[i + 1])
numpy_x1[i] = (vu0[i] * vW_in + numpy_x1[i - 1] * vW +
vu2[i] + vu2[i + 1] + vu2[i + 2])
# note theano computes inplace, so call function after numpy
# equivalent is done
......@@ -1069,8 +1068,8 @@ class T_Scan(unittest.TestCase):
y1 = theano.shared(vy1, 'y1')
def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
y0_t = theano.dot(theano.dot(u1_t, W1), W2) + 0.1 * y0_tm1 + \
0.33 * y0_tm2 + 0.17 * y0_tm3
y0_t = (theano.dot(theano.dot(u1_t, W1), W2) + 0.1 * y0_tm1 +
0.33 * y0_tm2 + 0.17 * y0_tm3)
y1_t = theano.dot(u2_t, W2) + y1_tm1
y2_t = theano.dot(u1_t, W1)
nwW1 = W1 + .1
......@@ -1106,14 +1105,13 @@ class T_Scan(unittest.TestCase):
numpy_W1 = vW1.copy()
numpy_W2 = vW2.copy()
for idx in xrange(3):
numpy_y0[idx + 3] = numpy.dot(\
numpy.dot(vu1[idx, :], numpy_W1), \
numpy_y0[idx + 3] = numpy.dot(numpy.dot(vu1[idx, :], numpy_W1),
numpy_W2) + \
0.1 * numpy_y0[idx + 2] + \
0.33 * numpy_y0[idx + 1] + \
0.17 * numpy_y0[idx]
numpy_y1[idx + 1] = numpy.dot(vu2[idx, :], numpy_W2) +\
numpy_y1[idx]
numpy_y1[idx + 1] = (numpy.dot(vu2[idx, :], numpy_W2) +
numpy_y1[idx])
numpy_y2[idx] = numpy.dot(vu1[idx, :], numpy_W1)
numpy_W1 = numpy_W1 + .1
numpy_W2 = numpy_W2 + .05
......@@ -1196,7 +1194,7 @@ class T_Scan(unittest.TestCase):
def test_cuda_gibbs_chain(self):
from theano.sandbox import cuda
if cuda.cuda_available == False:
if not cuda.cuda_available:
raise SkipTest('Optional package cuda disabled')
rng = numpy.random.RandomState(utt.fetch_seed())
......
......@@ -5033,7 +5033,7 @@ def power(x, y):
return x**y
def swapaxes(y,axis1,axis2):
def swapaxes(y, axis1, axis2):
"swap axes of inputted tensor"
y = as_tensor_variable(y)
ndim = y.ndim
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论