提交 b4dc02d6 authored 作者: Brandon T. Willard's avatar Brandon T. Willard

Replace theano.tensor alias T with tt in tests.gpuarray

上级 b4c51eb2
import numpy as np import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as tt
class Model(object): class Model(object):
...@@ -125,16 +125,16 @@ class GRU(Layer): ...@@ -125,16 +125,16 @@ class GRU(Layer):
"""step through processed input to create output""" """step through processed input to create output"""
def step(inp, s_prev): def step(inp, s_prev):
i_t = T.nnet.sigmoid( i_t = tt.nnet.sigmoid(
T.dot(inp, self.W_i) + T.dot(s_prev, self.R_i) + self.b_wi + self.b_ru tt.dot(inp, self.W_i) + tt.dot(s_prev, self.R_i) + self.b_wi + self.b_ru
) )
r_t = T.nnet.sigmoid( r_t = tt.nnet.sigmoid(
T.dot(inp, self.W_r) + T.dot(s_prev, self.R_r) + self.b_wr + self.b_rr tt.dot(inp, self.W_r) + tt.dot(s_prev, self.R_r) + self.b_wr + self.b_rr
) )
h_hat_t = T.tanh( h_hat_t = tt.tanh(
T.dot(inp, self.W_h) tt.dot(inp, self.W_h)
+ (r_t * (T.dot(s_prev, self.R_h) + self.b_rh)) + (r_t * (tt.dot(s_prev, self.R_h) + self.b_rh))
+ self.b_wh + self.b_wh
) )
...@@ -229,21 +229,21 @@ class LSTM(Layer): ...@@ -229,21 +229,21 @@ class LSTM(Layer):
"""step through processed input to create output""" """step through processed input to create output"""
def step(x_t, h_tm1, c_tm1): def step(x_t, h_tm1, c_tm1):
i_t = T.nnet.sigmoid( i_t = tt.nnet.sigmoid(
T.dot(x_t, self.W_i) + T.dot(h_tm1, self.R_i) + self.b_wi + self.b_ri tt.dot(x_t, self.W_i) + tt.dot(h_tm1, self.R_i) + self.b_wi + self.b_ri
) )
f_t = T.nnet.sigmoid( f_t = tt.nnet.sigmoid(
T.dot(x_t, self.W_f) + T.dot(h_tm1, self.R_f) + self.b_wf + self.b_rf tt.dot(x_t, self.W_f) + tt.dot(h_tm1, self.R_f) + self.b_wf + self.b_rf
) )
o_t = T.nnet.sigmoid( o_t = tt.nnet.sigmoid(
T.dot(x_t, self.W_o) + T.dot(h_tm1, self.R_o) + self.b_ro + self.b_wo tt.dot(x_t, self.W_o) + tt.dot(h_tm1, self.R_o) + self.b_ro + self.b_wo
) )
c_hat_t = T.tanh( c_hat_t = tt.tanh(
T.dot(x_t, self.W_c) + T.dot(h_tm1, self.R_c) + self.b_wc + self.b_rc tt.dot(x_t, self.W_c) + tt.dot(h_tm1, self.R_c) + self.b_wc + self.b_rc
) )
c_t = f_t * c_tm1 + i_t * c_hat_t c_t = f_t * c_tm1 + i_t * c_hat_t
h_t = o_t * T.tanh(c_t) h_t = o_t * tt.tanh(c_t)
return h_t, c_t return h_t, c_t
...@@ -275,7 +275,7 @@ class FC(Layer): ...@@ -275,7 +275,7 @@ class FC(Layer):
self.b = bias_weights((output_dim,), param_list=self.params, name=name + ".b") self.b = bias_weights((output_dim,), param_list=self.params, name=name + ".b")
def output(self): def output(self):
return T.dot(self.X, self.W) + self.b return tt.dot(self.X, self.W) + self.b
class WrapperLayer(Layer): class WrapperLayer(Layer):
......
...@@ -4,7 +4,7 @@ pygpu = pytest.importorskip("pygpu") ...@@ -4,7 +4,7 @@ pygpu = pytest.importorskip("pygpu")
gpuarray = pygpu.gpuarray gpuarray = pygpu.gpuarray
import numpy as np import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as tt
from theano.tensor import TensorType from theano.tensor import TensorType
from theano.tensor.basic import alloc from theano.tensor.basic import alloc
...@@ -217,7 +217,7 @@ def makeTester( ...@@ -217,7 +217,7 @@ def makeTester(
def test_transfer_cpu_gpu(): def test_transfer_cpu_gpu():
a = T.fmatrix("a") a = tt.fmatrix("a")
g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g") g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")
av = np.asarray(rng.rand(5, 4), dtype="float32") av = np.asarray(rng.rand(5, 4), dtype="float32")
...@@ -254,7 +254,7 @@ def test_transfer_strided(): ...@@ -254,7 +254,7 @@ def test_transfer_strided():
# This is just to ensure that it works in theano # This is just to ensure that it works in theano
# libgpuarray has a much more comprehensive suit of tests to # libgpuarray has a much more comprehensive suit of tests to
# ensure correctness # ensure correctness
a = T.fmatrix("a") a = tt.fmatrix("a")
g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g") g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")
av = np.asarray(rng.rand(5, 8), dtype="float32") av = np.asarray(rng.rand(5, 8), dtype="float32")
...@@ -300,7 +300,7 @@ class TestGPUAlloc(TestAlloc): ...@@ -300,7 +300,7 @@ class TestGPUAlloc(TestAlloc):
dtype = "float32" dtype = "float32"
mode = mode_with_gpu mode = mode_with_gpu
shared = staticmethod(gpuarray_shared_constructor) shared = staticmethod(gpuarray_shared_constructor)
allocs = [GpuAlloc(test_ctx_name), GpuAlloc(test_ctx_name), T.Alloc()] allocs = [GpuAlloc(test_ctx_name), GpuAlloc(test_ctx_name), tt.Alloc()]
def test_alloc_empty(): def test_alloc_empty():
...@@ -343,21 +343,21 @@ def test_shape(): ...@@ -343,21 +343,21 @@ def test_shape():
assert np.all(f(v) == (3, 4, 5)) assert np.all(f(v) == (3, 4, 5))
if theano.config.mode != "FAST_COMPILE": if theano.config.mode != "FAST_COMPILE":
assert len(topo) == 4 assert len(topo) == 4
assert isinstance(topo[0].op, T.opt.Shape_i) assert isinstance(topo[0].op, tt.opt.Shape_i)
assert isinstance(topo[1].op, T.opt.Shape_i) assert isinstance(topo[1].op, tt.opt.Shape_i)
assert isinstance(topo[2].op, T.opt.Shape_i) assert isinstance(topo[2].op, tt.opt.Shape_i)
assert isinstance(topo[3].op, T.opt.MakeVector) assert isinstance(topo[3].op, tt.opt.MakeVector)
mode = mode_with_gpu.excluding("local_shape_to_shape_i") mode = mode_with_gpu.excluding("local_shape_to_shape_i")
f = theano.function([x], x.shape, mode=mode) f = theano.function([x], x.shape, mode=mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert np.all(f(v) == (3, 4, 5)) assert np.all(f(v) == (3, 4, 5))
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, T.Shape) assert isinstance(topo[0].op, tt.Shape)
def test_gpu_contiguous(): def test_gpu_contiguous():
a = T.fmatrix("a") a = tt.fmatrix("a")
i = T.iscalar("i") i = tt.iscalar("i")
a_val = np.asarray(np.random.rand(4, 5), dtype="float32") a_val = np.asarray(np.random.rand(4, 5), dtype="float32")
# The reshape is needed otherwise we make the subtensor on the CPU # The reshape is needed otherwise we make the subtensor on the CPU
# to transfer less data. # to transfer less data.
...@@ -383,8 +383,8 @@ class TestGPUReshape(TestReshape): ...@@ -383,8 +383,8 @@ class TestGPUReshape(TestReshape):
theano.compile.DeepCopyOp, theano.compile.DeepCopyOp,
GpuDimShuffle, GpuDimShuffle,
GpuElemwise, GpuElemwise,
theano.tensor.opt.Shape_i, tt.opt.Shape_i,
theano.tensor.opt.MakeVector, tt.opt.MakeVector,
) )
assert self.op == GpuReshape assert self.op == GpuReshape
...@@ -418,7 +418,7 @@ class TestGPUJoinAndSplit(TestJoinAndSplit): ...@@ -418,7 +418,7 @@ class TestGPUJoinAndSplit(TestJoinAndSplit):
# Also test float16 computation at the same time. # Also test float16 computation at the same time.
rng = np.random.RandomState(seed=utt.fetch_seed()) rng = np.random.RandomState(seed=utt.fetch_seed())
m = self.shared(rng.rand(4, 6).astype("float16")) m = self.shared(rng.rand(4, 6).astype("float16"))
o = T.Split(2)(m, 0, [2, 2]) o = tt.Split(2)(m, 0, [2, 2])
assert o[0].dtype == "float16" assert o[0].dtype == "float16"
f = theano.function([], o, mode=self.mode) f = theano.function([], o, mode=self.mode)
assert any( assert any(
...@@ -433,22 +433,22 @@ class TestGPUJoinAndSplit(TestJoinAndSplit): ...@@ -433,22 +433,22 @@ class TestGPUJoinAndSplit(TestJoinAndSplit):
def test_gpujoin_gpualloc(): def test_gpujoin_gpualloc():
a = T.fmatrix("a") a = tt.fmatrix("a")
a_val = np.asarray(np.random.rand(4, 5), dtype="float32") a_val = np.asarray(np.random.rand(4, 5), dtype="float32")
b = T.fmatrix("b") b = tt.fmatrix("b")
b_val = np.asarray(np.random.rand(3, 5), dtype="float32") b_val = np.asarray(np.random.rand(3, 5), dtype="float32")
f = theano.function( f = theano.function(
[a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_without_gpu [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_without_gpu
) )
f_gpu = theano.function( f_gpu = theano.function(
[a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)), mode=mode_with_gpu
) )
f_gpu2 = theano.function( f_gpu2 = theano.function(
[a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_with_gpu
) )
assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == tt.alloc for node in f.maker.fgraph.toposort()]) == 2
assert sum([node.op == T.join_ for node in f.maker.fgraph.toposort()]) == 1 assert sum([node.op == tt.join_ for node in f.maker.fgraph.toposort()]) == 1
assert ( assert (
sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()]) sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()])
== 2 == 2
...@@ -471,10 +471,10 @@ def test_gpueye(): ...@@ -471,10 +471,10 @@ def test_gpueye():
# allowed. # allowed.
if M is None: if M is None:
M = N M = N
N_symb = T.iscalar() N_symb = tt.iscalar()
M_symb = T.iscalar() M_symb = tt.iscalar()
k_symb = T.iscalar() k_symb = tt.iscalar()
out = T.eye(N_symb, M_symb, k_symb, dtype=dtype) + np.array(1).astype(dtype) out = tt.eye(N_symb, M_symb, k_symb, dtype=dtype) + np.array(1).astype(dtype)
f = theano.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu) f = theano.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu)
result = np.asarray(f(N, M, k)) - np.array(1).astype(dtype) result = np.asarray(f(N, M, k)) - np.array(1).astype(dtype)
...@@ -511,7 +511,7 @@ def test_hostfromgpu_shape_i(): ...@@ -511,7 +511,7 @@ def test_hostfromgpu_shape_i():
m = mode_with_gpu.including( m = mode_with_gpu.including(
"local_dot_to_dot22", "local_dot22_to_dot22scalar", "specialize" "local_dot_to_dot22", "local_dot22_to_dot22scalar", "specialize"
) )
a = T.fmatrix("a") a = tt.fmatrix("a")
ca = theano.gpuarray.type.GpuArrayType("float32", (False, False))() ca = theano.gpuarray.type.GpuArrayType("float32", (False, False))()
av = np.asarray(np.random.rand(5, 4), dtype="float32") av = np.asarray(np.random.rand(5, 4), dtype="float32")
cv = gpuarray.asarray( cv = gpuarray.asarray(
...@@ -522,9 +522,9 @@ def test_hostfromgpu_shape_i(): ...@@ -522,9 +522,9 @@ def test_hostfromgpu_shape_i():
assert any(isinstance(x.op, GpuFromHost) for x in f.maker.fgraph.toposort()) assert any(isinstance(x.op, GpuFromHost) for x in f.maker.fgraph.toposort())
f = theano.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m) f = theano.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert isinstance(topo[0].op, T.opt.Shape_i) assert isinstance(topo[0].op, tt.opt.Shape_i)
assert isinstance(topo[1].op, T.opt.Shape_i) assert isinstance(topo[1].op, tt.opt.Shape_i)
assert isinstance(topo[2].op, T.opt.MakeVector) assert isinstance(topo[2].op, tt.opt.MakeVector)
assert tuple(f(av)) == (5, 4) assert tuple(f(av)) == (5, 4)
f = theano.function([ca], host_from_gpu(ca), mode=m) f = theano.function([ca], host_from_gpu(ca), mode=m)
...@@ -533,7 +533,7 @@ def test_hostfromgpu_shape_i(): ...@@ -533,7 +533,7 @@ def test_hostfromgpu_shape_i():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert isinstance(topo[0].op, theano.compile.Shape_i) assert isinstance(topo[0].op, theano.compile.Shape_i)
assert isinstance(topo[1].op, theano.compile.Shape_i) assert isinstance(topo[1].op, theano.compile.Shape_i)
assert isinstance(topo[2].op, theano.tensor.opt.MakeVector) assert isinstance(topo[2].op, tt.opt.MakeVector)
assert tuple(f(cv)) == (5, 4) assert tuple(f(cv)) == (5, 4)
...@@ -544,10 +544,10 @@ def test_Gpujoin_inplace(): ...@@ -544,10 +544,10 @@ def test_Gpujoin_inplace():
# Gpujoin function but all except one of them are empty. In this case # Gpujoin function but all except one of them are empty. In this case
# Gpujoin should work inplace and the output should be the view of the # Gpujoin should work inplace and the output should be the view of the
# non-empty element. # non-empty element.
s = T.lscalar() s = tt.lscalar()
data = np.array([3, 4, 5], dtype=theano.config.floatX) data = np.array([3, 4, 5], dtype=theano.config.floatX)
x = gpuarray_shared_constructor(data, borrow=True) x = gpuarray_shared_constructor(data, borrow=True)
z = T.zeros((s,)) z = tt.zeros((s,))
join = GpuJoin(view=0) join = GpuJoin(view=0)
c = join(0, x, z) c = join(0, x, z)
...@@ -560,11 +560,11 @@ def test_Gpujoin_inplace(): ...@@ -560,11 +560,11 @@ def test_Gpujoin_inplace():
def test_gpu_tril_triu(): def test_gpu_tril_triu():
def check_l(m, k=0): def check_l(m, k=0):
m_symb = T.matrix(dtype=m.dtype) m_symb = tt.matrix(dtype=m.dtype)
k_symb = T.iscalar() k_symb = tt.iscalar()
f = theano.function( f = theano.function(
[m_symb, k_symb], T.tril(m_symb, k_symb), mode=mode_with_gpu [m_symb, k_symb], tt.tril(m_symb, k_symb), mode=mode_with_gpu
) )
result = f(m, k) result = f(m, k)
assert np.allclose(result, np.tril(m, k)) assert np.allclose(result, np.tril(m, k))
...@@ -572,10 +572,10 @@ def test_gpu_tril_triu(): ...@@ -572,10 +572,10 @@ def test_gpu_tril_triu():
assert any([isinstance(node.op, GpuTri) for node in f.maker.fgraph.toposort()]) assert any([isinstance(node.op, GpuTri) for node in f.maker.fgraph.toposort()])
def check_u(m, k=0): def check_u(m, k=0):
m_symb = T.matrix(dtype=m.dtype) m_symb = tt.matrix(dtype=m.dtype)
k_symb = T.iscalar() k_symb = tt.iscalar()
f = theano.function( f = theano.function(
[m_symb, k_symb], T.triu(m_symb, k_symb), mode=mode_with_gpu [m_symb, k_symb], tt.triu(m_symb, k_symb), mode=mode_with_gpu
) )
result = f(m, k) result = f(m, k)
assert np.allclose(result, np.triu(m, k)) assert np.allclose(result, np.triu(m, k))
...@@ -624,10 +624,10 @@ def test_gputri(): ...@@ -624,10 +624,10 @@ def test_gputri():
# allowed. # allowed.
if M is None: if M is None:
M = N M = N
N_symb = T.iscalar() N_symb = tt.iscalar()
M_symb = T.iscalar() M_symb = tt.iscalar()
k_symb = T.iscalar() k_symb = tt.iscalar()
out = T.tri(N_symb, M_symb, k_symb, dtype=dtype) + np.array(1).astype(dtype) out = tt.tri(N_symb, M_symb, k_symb, dtype=dtype) + np.array(1).astype(dtype)
f = theano.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu) f = theano.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu)
result = np.asarray(f(N, M, k)) - np.array(1).astype(dtype) result = np.asarray(f(N, M, k)) - np.array(1).astype(dtype)
assert np.allclose(result, np.tri(N, M_, k, dtype=dtype)) assert np.allclose(result, np.tri(N, M_, k, dtype=dtype))
......
...@@ -2,7 +2,7 @@ import pytest ...@@ -2,7 +2,7 @@ import pytest
import numpy as np import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as tt
import theano.gpuarray import theano.gpuarray
from theano.gpuarray.ctc import gpu_ctc, GpuConnectionistTemporalClassification from theano.gpuarray.ctc import gpu_ctc, GpuConnectionistTemporalClassification
...@@ -56,7 +56,7 @@ class TestCTC: ...@@ -56,7 +56,7 @@ class TestCTC:
outputs = [cpu_ctc_cost] outputs = [cpu_ctc_cost]
if compute_grad: if compute_grad:
# Symbolic gradient of CTC cost # Symbolic gradient of CTC cost
cpu_ctc_grad = T.grad(T.mean(cpu_ctc_cost), activations) cpu_ctc_grad = tt.grad(tt.mean(cpu_ctc_cost), activations)
outputs += [cpu_ctc_grad] outputs += [cpu_ctc_grad]
return theano.function([], outputs, mode=mode) return theano.function([], outputs, mode=mode)
...@@ -65,7 +65,7 @@ class TestCTC: ...@@ -65,7 +65,7 @@ class TestCTC:
outputs = [gpu_ctc_cost] outputs = [gpu_ctc_cost]
if compute_grad: if compute_grad:
# Symbolic gradient of CTC cost # Symbolic gradient of CTC cost
gpu_ctc_grad = T.grad(T.mean(gpu_ctc_cost), activations) gpu_ctc_grad = tt.grad(tt.mean(gpu_ctc_cost), activations)
outputs += [gpu_ctc_grad] outputs += [gpu_ctc_grad]
return theano.function([], outputs, mode=mode_with_gpu) return theano.function([], outputs, mode=mode_with_gpu)
......
...@@ -2,9 +2,10 @@ import logging ...@@ -2,9 +2,10 @@ import logging
import pytest import pytest
import numpy as np import numpy as np
pygpu = pytest.importorskip("pygpu") pygpu = pytest.importorskip("pygpu") # noqa
import theano import theano
import theano.tensor as T import theano.tensor as tt
import tests.unittest_tools as utt import tests.unittest_tools as utt
from itertools import product, chain from itertools import product, chain
...@@ -12,17 +13,27 @@ from collections import OrderedDict ...@@ -12,17 +13,27 @@ from collections import OrderedDict
from six import StringIO from six import StringIO
from theano.tensor.signal.pool import pool_2d, pool_3d from theano.tensor.nnet import (
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad bn,
softmax_op,
SoftmaxGrad,
softmax,
LogSoftmax,
Softmax,
conv2d,
)
from theano.tensor.nnet.corr3d import Corr3dMM
from theano.tensor.nnet.corr import CorrMM
from theano.tensor.nnet.abstract_conv import ( from theano.tensor.nnet.abstract_conv import (
get_conv_output_shape, get_conv_output_shape,
get_conv_gradinputs_shape, get_conv_gradinputs_shape,
) )
from theano.tensor.nnet import bn from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
from theano.gpuarray import dnn from theano.gpuarray import dnn
from theano.gpuarray.basic_ops import GpuAllocEmpty from theano.gpuarray.basic_ops import GpuAllocEmpty
from theano.gpuarray.type import gpuarray_shared_constructor, GpuArrayType from theano.gpuarray.type import gpuarray_shared_constructor, GpuArrayType
...@@ -76,7 +87,7 @@ def set_precision(floatX): ...@@ -76,7 +87,7 @@ def set_precision(floatX):
def test_dnn_conv_desc_merge(): def test_dnn_conv_desc_merge():
kern_shp = T.as_tensor_variable(np.asarray([3, 1, 2, 2]).astype("int64")) kern_shp = tt.as_tensor_variable(np.asarray([3, 1, 2, 2]).astype("int64"))
desc1 = dnn.GpuDnnConvDesc( desc1 = dnn.GpuDnnConvDesc(
border_mode="valid", subsample=(2, 2), dilation=(1, 1), conv_mode="conv" border_mode="valid", subsample=(2, 2), dilation=(1, 1), conv_mode="conv"
)(kern_shp) )(kern_shp)
...@@ -101,9 +112,9 @@ def test_dnn_conv_merge(): ...@@ -101,9 +112,9 @@ def test_dnn_conv_merge():
# This test that we merge correctly multiple dnn_conv. # This test that we merge correctly multiple dnn_conv.
img_shp = [2, 5, 6, 8] img_shp = [2, 5, 6, 8]
kern_shp = [3, 5, 5, 6] kern_shp = [3, 5, 5, 6]
img = T.tensor4("img") img = tt.tensor4("img")
kern = T.tensor4("kern") kern = tt.tensor4("kern")
out = T.tensor4("out") out = tt.tensor4("out")
desc = dnn.GpuDnnConvDesc(border_mode="valid")(kern.shape) desc = dnn.GpuDnnConvDesc(border_mode="valid")(kern.shape)
# Test forward op # Test forward op
...@@ -139,9 +150,9 @@ def test_dnn_conv_inplace(): ...@@ -139,9 +150,9 @@ def test_dnn_conv_inplace():
img_shp = [2, 5, 6, 8] img_shp = [2, 5, 6, 8]
kern_shp = [3, 5, 5, 6] kern_shp = [3, 5, 5, 6]
img = T.tensor4("img") img = tt.tensor4("img")
kern = T.tensor4("kern") kern = tt.tensor4("kern")
out = T.tensor4("out") out = tt.tensor4("out")
desc1 = dnn.GpuDnnConvDesc(border_mode="valid", conv_mode="conv")(kern.shape) desc1 = dnn.GpuDnnConvDesc(border_mode="valid", conv_mode="conv")(kern.shape)
desc2 = dnn.GpuDnnConvDesc(border_mode="valid", conv_mode="cross")(kern.shape) desc2 = dnn.GpuDnnConvDesc(border_mode="valid", conv_mode="cross")(kern.shape)
...@@ -184,9 +195,9 @@ def test_dnn_conv_inplace(): ...@@ -184,9 +195,9 @@ def test_dnn_conv_inplace():
def run_dnn_conv_invalid_precision(ndim): def run_dnn_conv_invalid_precision(ndim):
bc = (False,) * (ndim + 2) bc = (False,) * (ndim + 2)
img = T.tensor(theano.config.floatX, broadcastable=bc) img = tt.tensor(theano.config.floatX, broadcastable=bc)
kerns = T.tensor(theano.config.floatX, broadcastable=bc) kerns = tt.tensor(theano.config.floatX, broadcastable=bc)
topgrad = T.tensor(theano.config.floatX, broadcastable=bc) topgrad = tt.tensor(theano.config.floatX, broadcastable=bc)
shape = np.arange(ndim + 2) shape = np.arange(ndim + 2)
if ndim == 2: if ndim == 2:
dnn_conv_func = dnn.dnn_conv dnn_conv_func = dnn.dnn_conv
...@@ -242,8 +253,8 @@ def test_dnn_conv_invalid_precision(): ...@@ -242,8 +253,8 @@ def test_dnn_conv_invalid_precision():
def test_dnn_conv_mixed_dtype(): def test_dnn_conv_mixed_dtype():
mf = T.ftensor4() mf = tt.ftensor4()
md = T.dtensor4() md = tt.dtensor4()
def assert_types(conv): def assert_types(conv):
dt = conv.owner.inputs[0].dtype dt = conv.owner.inputs[0].dtype
...@@ -259,8 +270,8 @@ def test_dnn_conv_mixed_dtype(): ...@@ -259,8 +270,8 @@ def test_dnn_conv_mixed_dtype():
def test_dnn_conv3d_mixed_dtype(): def test_dnn_conv3d_mixed_dtype():
mf = T.ftensor5() mf = tt.ftensor5()
md = T.dtensor5() md = tt.dtensor5()
def assert_types(conv): def assert_types(conv):
dt = conv.owner.inputs[0].dtype dt = conv.owner.inputs[0].dtype
...@@ -280,7 +291,7 @@ def test_pooling(): ...@@ -280,7 +291,7 @@ def test_pooling():
modes = get_dnn_pool_modes() modes = get_dnn_pool_modes()
x = T.tensor4() x = tt.tensor4()
for mode, pad in product(modes, ((0, 0), (1, 0), (0, 1), (2, 3), (3, 2))): for mode, pad in product(modes, ((0, 0), (1, 0), (0, 1), (2, 3), (3, 2))):
if pad != (0, 0) and mode == "average_exc_pad": if pad != (0, 0) and mode == "average_exc_pad":
# Not implemented # Not implemented
...@@ -386,7 +397,7 @@ def test_pooling(): ...@@ -386,7 +397,7 @@ def test_pooling():
def run_pooling_with_tensor_vars(mode): def run_pooling_with_tensor_vars(mode):
utt.seed_rng() utt.seed_rng()
x = T.tensor4() x = tt.tensor4()
ws = theano.shared(np.array([2, 2], dtype="int32")) ws = theano.shared(np.array([2, 2], dtype="int32"))
stride = theano.shared(np.array([1, 1], dtype="int32")) stride = theano.shared(np.array([1, 1], dtype="int32"))
pad = theano.shared(np.array([0, 0], dtype="int32")) pad = theano.shared(np.array([0, 0], dtype="int32"))
...@@ -446,7 +457,7 @@ def test_pooling3d(): ...@@ -446,7 +457,7 @@ def test_pooling3d():
modes = get_dnn_pool_modes() modes = get_dnn_pool_modes()
x = T.tensor5() x = tt.tensor5()
for mode, pad in product( for mode, pad in product(
modes, modes,
((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1), (2, 3, 2), (3, 2, 2), (2, 2, 3)), ((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1), (2, 3, 2), (3, 2, 2), (2, 2, 3)),
...@@ -549,7 +560,7 @@ def test_pooling_opt(): ...@@ -549,7 +560,7 @@ def test_pooling_opt():
utt.seed_rng() utt.seed_rng()
# 2D pooling # 2D pooling
x = T.matrix() x = tt.matrix()
f = theano.function( f = theano.function(
[x], [x],
...@@ -564,7 +575,7 @@ def test_pooling_opt(): ...@@ -564,7 +575,7 @@ def test_pooling_opt():
# gradient of 2D pooling # gradient of 2D pooling
f = theano.function( f = theano.function(
[x], [x],
T.grad( tt.grad(
pool_2d(x, ws=(2, 2), mode="average_inc_pad", ignore_border=True).sum(), x pool_2d(x, ws=(2, 2), mode="average_inc_pad", ignore_border=True).sum(), x
), ),
mode=mode_with_gpu.including("cudnn"), mode=mode_with_gpu.including("cudnn"),
...@@ -586,7 +597,7 @@ def test_pooling_opt(): ...@@ -586,7 +597,7 @@ def test_pooling_opt():
f(data) f(data)
# 3D pooling # 3D pooling
x = T.tensor3() x = tt.tensor3()
f = theano.function( f = theano.function(
[x], [x],
...@@ -601,7 +612,7 @@ def test_pooling_opt(): ...@@ -601,7 +612,7 @@ def test_pooling_opt():
# gradient of 3D pooling # gradient of 3D pooling
f = theano.function( f = theano.function(
[x], [x],
T.grad( tt.grad(
pool_3d(x, ws=(2, 2, 2), mode="average_inc_pad", ignore_border=True).sum(), pool_3d(x, ws=(2, 2, 2), mode="average_inc_pad", ignore_border=True).sum(),
x, x,
), ),
...@@ -632,7 +643,7 @@ def test_pooling_opt_arbitrary_dimensions(): ...@@ -632,7 +643,7 @@ def test_pooling_opt_arbitrary_dimensions():
for mode in modes: for mode in modes:
out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws) out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
out_pool_grad = T.grad(T.sum(out_pool), wrt=input) out_pool_grad = tt.grad(tt.sum(out_pool), wrt=input)
out = [out_pool, out_pool_grad] out = [out_pool, out_pool_grad]
# run on GPU # run on GPU
...@@ -679,14 +690,14 @@ def test_pooling_opt_arbitrary_dimensions(): ...@@ -679,14 +690,14 @@ def test_pooling_opt_arbitrary_dimensions():
def test_pooling_empty_batch(): def test_pooling_empty_batch():
img_shp = (0, 5, 6, 8) img_shp = (0, 5, 6, 8)
img = T.ftensor4("img") img = tt.ftensor4("img")
o = dnn.dnn_pool(img, (2, 2), (2, 2)) o = dnn.dnn_pool(img, (2, 2), (2, 2))
f = theano.function([img], o, mode=mode_with_gpu) f = theano.function([img], o, mode=mode_with_gpu)
d = f(np.random.rand(*img_shp).astype("float32")) d = f(np.random.rand(*img_shp).astype("float32"))
assert d.shape == (0, 5, 3, 4) assert d.shape == (0, 5, 3, 4)
g = T.grad(T.sum(o), wrt=img) g = tt.grad(tt.sum(o), wrt=img)
f = theano.function([img], g, mode=mode_with_gpu) f = theano.function([img], g, mode=mode_with_gpu)
d = f(np.random.rand(*img_shp).astype("float32")) d = f(np.random.rand(*img_shp).astype("float32"))
# Not sure what to assert, it should just pass, that's all. # Not sure what to assert, it should just pass, that's all.
...@@ -696,7 +707,7 @@ def test_pooling_empty_batch(): ...@@ -696,7 +707,7 @@ def test_pooling_empty_batch():
def test_dnn_tag(): def test_dnn_tag():
# Test that if cudnn isn't avail we crash and that if it is avail, we use it. # Test that if cudnn isn't avail we crash and that if it is avail, we use it.
x = T.tensor4() x = tt.tensor4()
old = theano.config.on_opt_error old = theano.config.on_opt_error
theano.config.on_opt_error = "raise" theano.config.on_opt_error = "raise"
...@@ -737,7 +748,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -737,7 +748,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
super().setup_method() super().setup_method()
def test_softmax(self): def test_softmax(self):
t = T.tensor4("t") t = tt.tensor4("t")
rand_tensor = np.asarray(np.random.rand(5, 4, 3, 2), dtype=theano.config.floatX) rand_tensor = np.asarray(np.random.rand(5, 4, 3, 2), dtype=theano.config.floatX)
self._compile_and_check( self._compile_and_check(
[t], [t],
...@@ -748,7 +759,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -748,7 +759,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
self._compile_and_check( self._compile_and_check(
[t], [t],
[T.grad(dnn.GpuDnnSoftmax("accurate", "channel")(t).mean(), t)], [tt.grad(dnn.GpuDnnSoftmax("accurate", "channel")(t).mean(), t)],
[rand_tensor], [rand_tensor],
dnn.GpuDnnSoftmaxGrad, dnn.GpuDnnSoftmaxGrad,
) )
...@@ -815,9 +826,9 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -815,9 +826,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
dilations += [(2, 2)] dilations += [(2, 2)]
self._test_conv( self._test_conv(
T.tensor4("img"), tt.tensor4("img"),
T.tensor4("kerns"), tt.tensor4("kerns"),
T.tensor4("out"), tt.tensor4("out"),
np.random.rand(7, 2, 12, 16), np.random.rand(7, 2, 12, 16),
np.random.rand(8, 2, 4, 3), np.random.rand(8, 2, 4, 3),
border_mode, border_mode,
...@@ -834,9 +845,9 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -834,9 +845,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
dilations = [(1, 1, 1), (2, 2, 2)] if dnn.version() >= 6000 else [(1, 1, 1)] dilations = [(1, 1, 1), (2, 2, 2)] if dnn.version() >= 6000 else [(1, 1, 1)]
self._test_conv( self._test_conv(
T.tensor5("img"), tt.tensor5("img"),
T.tensor5("kerns"), tt.tensor5("kerns"),
T.tensor5("out"), tt.tensor5("out"),
np.random.rand(10, 2, 15, 16, 17), np.random.rand(10, 2, 15, 16, 17),
np.random.rand(8, 2, 4, 3, 1), np.random.rand(8, 2, 4, 3, 1),
border_mode, border_mode,
...@@ -901,9 +912,9 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -901,9 +912,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
dilations = [(1, 1), (2, 2)] if dnn.version() >= 6000 else [(1, 1)] dilations = [(1, 1), (2, 2)] if dnn.version() >= 6000 else [(1, 1)]
self._test_conv_gradw( self._test_conv_gradw(
T.tensor4("img"), tt.tensor4("img"),
T.tensor4("topgrad"), tt.tensor4("topgrad"),
T.tensor4("kerns"), tt.tensor4("kerns"),
(5, 2, 6, 13), (5, 2, 6, 13),
(1, 2, 3, 7), (1, 2, 3, 7),
border_mode, border_mode,
...@@ -913,9 +924,9 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -913,9 +924,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
) )
def test_conv_gradi(self): def test_conv_gradi(self):
img = T.tensor4("img") img = tt.tensor4("img")
kerns = T.tensor4("kerns") kerns = tt.tensor4("kerns")
out = T.tensor4("out") out = tt.tensor4("out")
kern_vals = np.asarray(np.random.rand(13, 4, 5, 6), dtype=theano.config.floatX) kern_vals = np.asarray(np.random.rand(13, 4, 5, 6), dtype=theano.config.floatX)
out_vals = np.asarray(np.random.rand(3, 13, 9, 11), dtype=theano.config.floatX) out_vals = np.asarray(np.random.rand(3, 13, 9, 11), dtype=theano.config.floatX)
...@@ -948,7 +959,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -948,7 +959,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
) )
def test_pool(self): def test_pool(self):
img = T.tensor4("img") img = tt.tensor4("img")
img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX) img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX)
modes = get_dnn_pool_modes() modes = get_dnn_pool_modes()
...@@ -964,7 +975,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -964,7 +975,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
) )
def test_pool_3d(self): def test_pool_3d(self):
img = T.tensor5("img") img = tt.tensor5("img")
img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX) img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX)
modes = get_dnn_pool_modes() modes = get_dnn_pool_modes()
...@@ -980,9 +991,9 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -980,9 +991,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
) )
def test_pool_grad(self): def test_pool_grad(self):
img = T.tensor4("img") img = tt.tensor4("img")
img_grad = T.tensor4("img_grad") img_grad = tt.tensor4("img_grad")
out = T.tensor4("out") out = tt.tensor4("out")
img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX) img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX)
img_grad_val = np.asarray( img_grad_val = np.asarray(
np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX
...@@ -1006,9 +1017,9 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -1006,9 +1017,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
) )
def test_pool_3d_grad(self): def test_pool_3d_grad(self):
img = T.tensor5("img") img = tt.tensor5("img")
img_grad = T.tensor5("img_grad") img_grad = tt.tensor5("img_grad")
out = T.tensor5("out") out = tt.tensor5("out")
img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX) img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX)
img_grad_val = np.asarray( img_grad_val = np.asarray(
np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX
...@@ -1034,8 +1045,8 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -1034,8 +1045,8 @@ class TestDnnInferShapes(utt.InferShapeTester):
# this has been a problem in the past # this has been a problem in the past
def test_dnn_conv_border_mode(): def test_dnn_conv_border_mode():
img = T.tensor4() img = tt.tensor4()
kern = T.tensor4() kern = tt.tensor4()
dnn.dnn_conv(img, kern, border_mode=1) dnn.dnn_conv(img, kern, border_mode=1)
dnn.dnn_conv(img, kern, border_mode=(2, 3)) dnn.dnn_conv(img, kern, border_mode=(2, 3))
...@@ -1047,9 +1058,9 @@ def test_dnn_conv_border_mode(): ...@@ -1047,9 +1058,9 @@ def test_dnn_conv_border_mode():
def test_dnn_conv_alpha_output_merge(): def test_dnn_conv_alpha_output_merge():
utt.seed_rng() utt.seed_rng()
img = T.tensor4() img = tt.tensor4()
kern = T.tensor4() kern = tt.tensor4()
out = T.tensor4() out = tt.tensor4()
b = 1 b = 1
c = 4 c = 4
...@@ -1313,7 +1324,7 @@ def test_conv3d_fwd(): ...@@ -1313,7 +1324,7 @@ def test_conv3d_fwd():
flipped_filters = filters flipped_filters = filters
# Compile a theano function for the reference implementation # Compile a theano function for the reference implementation
conv_ref = theano.tensor.nnet.corr3d.Corr3dMM( conv_ref = Corr3dMM(
border_mode=border_mode, border_mode=border_mode,
subsample=subsample, subsample=subsample,
filter_dilation=dilation, filter_dilation=dilation,
...@@ -1365,7 +1376,7 @@ def test_conv3d_bwd(): ...@@ -1365,7 +1376,7 @@ def test_conv3d_bwd():
conv_mode=conv_mode, conv_mode=conv_mode,
) )
grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters]) grad_i, grad_w = tt.grad(conv.sum(), [inputs, filters])
f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu) f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu)
...@@ -1377,12 +1388,12 @@ def test_conv3d_bwd(): ...@@ -1377,12 +1388,12 @@ def test_conv3d_bwd():
flipped_filters = filters flipped_filters = filters
# Compile a theano function for the reference implementation # Compile a theano function for the reference implementation
conv_ref = theano.tensor.nnet.corr3d.Corr3dMM( conv_ref = Corr3dMM(
border_mode=border_mode, border_mode=border_mode,
subsample=subsample, subsample=subsample,
filter_dilation=dilation, filter_dilation=dilation,
)(ref_cast(inputs), flipped_filters) )(ref_cast(inputs), flipped_filters)
(grad_i_ref, grad_w_ref) = theano.tensor.grad(conv_ref.sum(), [inputs, filters]) (grad_i_ref, grad_w_ref) = tt.grad(conv_ref.sum(), [inputs, filters])
f_ref = theano.function([], [grad_i_ref, grad_w_ref], mode="FAST_RUN") f_ref = theano.function([], [grad_i_ref, grad_w_ref], mode="FAST_RUN")
# Compare the results of the two implementations # Compare the results of the two implementations
...@@ -1418,22 +1429,22 @@ class TestSoftMax(test_nnet.TestSoftMax): ...@@ -1418,22 +1429,22 @@ class TestSoftMax(test_nnet.TestSoftMax):
data = np.arange(np.product(dims), dtype=theano.config.floatX).reshape(dims) data = np.arange(np.product(dims), dtype=theano.config.floatX).reshape(dims)
# Verify the forward op # Verify the forward op
x_gpu = T.tensor4("x_gpu") x_gpu = tt.tensor4("x_gpu")
f_gpu = dnn.GpuDnnSoftmax("accurate", "channel")(x_gpu) f_gpu = dnn.GpuDnnSoftmax("accurate", "channel")(x_gpu)
f_gpu = theano.function([x_gpu], f_gpu, mode=self.mode) f_gpu = theano.function([x_gpu], f_gpu, mode=self.mode)
assert f_gpu(data).shape == dims assert f_gpu(data).shape == dims
# Verify the gradient op # Verify the gradient op
dy_gpu = T.tensor4("dy_gpu") dy_gpu = tt.tensor4("dy_gpu")
sm_gpu = T.tensor4("sm_gpu") sm_gpu = tt.tensor4("sm_gpu")
f_grad_gpu = dnn.GpuDnnSoftmaxGrad("accurate", "channel")(dy_gpu, sm_gpu) f_grad_gpu = dnn.GpuDnnSoftmaxGrad("accurate", "channel")(dy_gpu, sm_gpu)
f_grad_gpu = theano.function([dy_gpu, sm_gpu], f_grad_gpu, mode=self.mode) f_grad_gpu = theano.function([dy_gpu, sm_gpu], f_grad_gpu, mode=self.mode)
assert f_grad_gpu(data, data).shape == dims assert f_grad_gpu(data, data).shape == dims
def test_softmax_f16(self): def test_softmax_f16(self):
x = T.matrix("x", "float16") x = tt.matrix("x", "float16")
x_gpu = T.tensor4("x_gpu", "float16") x_gpu = tt.tensor4("x_gpu", "float16")
f_z = T.nnet.softmax_op f_z = softmax_op
f_gpu = dnn.GpuDnnSoftmax("accurate", "channel") f_gpu = dnn.GpuDnnSoftmax("accurate", "channel")
def cmp(n, m, f, f_gpu): def cmp(n, m, f, f_gpu):
...@@ -1455,15 +1466,15 @@ class TestSoftMax(test_nnet.TestSoftMax): ...@@ -1455,15 +1466,15 @@ class TestSoftMax(test_nnet.TestSoftMax):
gout = np.asarray(f_gpu(gdata))[:, :, 0, 0] gout = np.asarray(f_gpu(gdata))[:, :, 0, 0]
utt.assert_allclose(out, gout) utt.assert_allclose(out, gout)
x = T.matrix("x") x = tt.matrix("x")
x_gpu = T.tensor4("x_gpu") x_gpu = tt.tensor4("x_gpu")
f_z = T.nnet.softmax_op f_z = softmax_op
f_gpu = dnn.GpuDnnSoftmax("accurate", "channel") f_gpu = dnn.GpuDnnSoftmax("accurate", "channel")
# Verify the grad operation # Verify the grad operation
dims = (2, 3, 4, 5) dims = (2, 3, 4, 5)
gdata = np.arange(np.product(dims), dtype=theano.config.floatX).reshape(dims) gdata = np.arange(np.product(dims), dtype=theano.config.floatX).reshape(dims)
T.verify_grad(f_gpu, [gdata], rng=np.random, mode=mode_with_gpu) tt.verify_grad(f_gpu, [gdata], rng=np.random, mode=mode_with_gpu)
# Verify that the CPU and GPU implementations return the same results # Verify that the CPU and GPU implementations return the same results
# up to a tolerance. # up to a tolerance.
...@@ -1474,65 +1485,34 @@ class TestSoftMax(test_nnet.TestSoftMax): ...@@ -1474,65 +1485,34 @@ class TestSoftMax(test_nnet.TestSoftMax):
# Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
# optimization is applied when cudnn is required # optimization is applied when cudnn is required
y = T.vector("y") y = tt.vector("y")
f = theano.function( f = theano.function([y], tt.grad(softmax(y).mean(), y), mode=mode_with_gpu)
[y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu
)
sorted_f = f.maker.fgraph.toposort() sorted_f = f.maker.fgraph.toposort()
val = np.random.rand(5).astype(theano.config.floatX) val = np.random.rand(5).astype(theano.config.floatX)
out_dnn = f(val) out_dnn = f(val)
assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1 assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
assert ( assert len([i for i in sorted_f if isinstance(i.op, SoftmaxGrad)]) == 0
len(
[
i
for i in sorted_f
if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
]
)
== 0
)
# Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
# optimization is not applied when cudnn is excluded or not # optimization is not applied when cudnn is excluded or not
# available # available
mode_wo_cudnn = mode_with_gpu.excluding("cudnn") mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
y = T.vector("y") y = tt.vector("y")
f = theano.function( f = theano.function([y], tt.grad(softmax(y).mean(), y), mode=mode_wo_cudnn)
[y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn
)
sorted_f = f.maker.fgraph.toposort() sorted_f = f.maker.fgraph.toposort()
out_cpu = f(val) out_cpu = f(val)
utt.assert_allclose(out_dnn, out_cpu) utt.assert_allclose(out_dnn, out_cpu)
assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0 assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0
assert ( assert len([i for i in sorted_f if isinstance(i.op, SoftmaxGrad)]) == 1
len(
[
i
for i in sorted_f
if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
]
)
== 1
)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
# crash with manual graph # crash with manual graph
y = T.vector("y") y = tt.vector("y")
o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2) o = SoftmaxGrad()(y, y * 2)
f = theano.function([y], o, mode=mode_with_gpu) f = theano.function([y], o, mode=mode_with_gpu)
sorted_f = f.maker.fgraph.toposort() sorted_f = f.maker.fgraph.toposort()
assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1 assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
assert ( assert len([i for i in sorted_f if isinstance(i.op, SoftmaxGrad)]) == 0
len(
[
i
for i in sorted_f
if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
]
)
== 0
)
@pytest.mark.skipif( @pytest.mark.skipif(
dnn.version(raises=False) < 3000, reason="Log-softmax is only in cudnn v3+" dnn.version(raises=False) < 3000, reason="Log-softmax is only in cudnn v3+"
...@@ -1540,9 +1520,9 @@ class TestSoftMax(test_nnet.TestSoftMax): ...@@ -1540,9 +1520,9 @@ class TestSoftMax(test_nnet.TestSoftMax):
def test_log_softmax(self): def test_log_softmax(self):
# This is a test for an optimization that depends on cuDNN v3 or # This is a test for an optimization that depends on cuDNN v3 or
# more recent. Don't test if the cuDNN version is too old. # more recent. Don't test if the cuDNN version is too old.
x = T.tensor4() x = tt.tensor4()
softmax_out = dnn.GpuDnnSoftmax("accurate", "channel")(x) softmax_out = dnn.GpuDnnSoftmax("accurate", "channel")(x)
log_out = T.log(T.as_tensor_variable(softmax_out)) log_out = tt.log(tt.as_tensor_variable(softmax_out))
f = theano.function([x], log_out, mode=mode_with_gpu) f = theano.function([x], log_out, mode=mode_with_gpu)
...@@ -1585,11 +1565,11 @@ class TestSoftMax(test_nnet.TestSoftMax): ...@@ -1585,11 +1565,11 @@ class TestSoftMax(test_nnet.TestSoftMax):
# Compile a reference function, on the CPU, to be used to validate the # Compile a reference function, on the CPU, to be used to validate the
# results of the other function. # results of the other function.
x = T.matrix() x = tt.matrix()
f_ref = theano.function([x], T.nnet.LogSoftmax()(x)) f_ref = theano.function([x], LogSoftmax()(x))
# Build the first graph and ensure that the optimization is applied # Build the first graph and ensure that the optimization is applied
log_softmax_out = T.nnet.LogSoftmax()(x) log_softmax_out = LogSoftmax()(x)
f = theano.function([x], log_softmax_out, mode=mode_with_gpu) f = theano.function([x], log_softmax_out, mode=mode_with_gpu)
dnn_softmax_nodes = [ dnn_softmax_nodes = [
...@@ -1603,7 +1583,7 @@ class TestSoftMax(test_nnet.TestSoftMax): ...@@ -1603,7 +1583,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
utt.assert_allclose(f(inp), f_ref(inp)) utt.assert_allclose(f(inp), f_ref(inp))
# Build the first graph and ensure that the optimization is applied # Build the first graph and ensure that the optimization is applied
log_softmax_out = T.log(T.nnet.Softmax()(x)) log_softmax_out = tt.log(Softmax()(x))
f = theano.function([x], log_softmax_out, mode=mode_with_gpu) f = theano.function([x], log_softmax_out, mode=mode_with_gpu)
dnn_softmax_nodes = [ dnn_softmax_nodes = [
...@@ -1618,7 +1598,7 @@ class TestSoftMax(test_nnet.TestSoftMax): ...@@ -1618,7 +1598,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
def dnn_reduction(nd, idtype, acc_dtype, odtype): def dnn_reduction(nd, idtype, acc_dtype, odtype):
inp = T.TensorType(idtype, (False,) * nd)() inp = tt.TensorType(idtype, (False,) * nd)()
res = inp.sum(acc_dtype=acc_dtype, dtype=odtype) res = inp.sum(acc_dtype=acc_dtype, dtype=odtype)
f = theano.function([inp], res, mode=mode_with_gpu) f = theano.function([inp], res, mode=mode_with_gpu)
assert any( assert any(
...@@ -1641,7 +1621,7 @@ def test_dnn_reduction_opt(): ...@@ -1641,7 +1621,7 @@ def test_dnn_reduction_opt():
@pytest.mark.skipif(dnn.version(raises=False) < 6000, reason=dnn.dnn_available.msg) @pytest.mark.skipif(dnn.version(raises=False) < 6000, reason=dnn.dnn_available.msg)
def test_dnn_reduction_sum_squares(): def test_dnn_reduction_sum_squares():
M = T.matrix() M = tt.matrix()
for axis in (None, 0, 1): for axis in (None, 0, 1):
out = (M ** 2).sum(axis=axis) out = (M ** 2).sum(axis=axis)
f = theano.function([M], out, mode=mode_with_gpu) f = theano.function([M], out, mode=mode_with_gpu)
...@@ -1655,7 +1635,7 @@ def test_dnn_reduction_sum_squares(): ...@@ -1655,7 +1635,7 @@ def test_dnn_reduction_sum_squares():
@pytest.mark.skipif(dnn.version(raises=False) < 6000, reason=dnn.dnn_available.msg) @pytest.mark.skipif(dnn.version(raises=False) < 6000, reason=dnn.dnn_available.msg)
def test_dnn_reduction_sum_abs(): def test_dnn_reduction_sum_abs():
M = T.matrix() M = tt.matrix()
for axis in (None, 0, 1): for axis in (None, 0, 1):
out = abs(M).sum(axis=axis) out = abs(M).sum(axis=axis)
f = theano.function([M], out, mode=mode_with_gpu) f = theano.function([M], out, mode=mode_with_gpu)
...@@ -1669,7 +1649,7 @@ def test_dnn_reduction_sum_abs(): ...@@ -1669,7 +1649,7 @@ def test_dnn_reduction_sum_abs():
@pytest.mark.skipif(dnn.version(raises=False) < 6000, reason=dnn.dnn_available.msg) @pytest.mark.skipif(dnn.version(raises=False) < 6000, reason=dnn.dnn_available.msg)
def test_dnn_reduction_absmax(): def test_dnn_reduction_absmax():
M = T.matrix() M = tt.matrix()
for axis in (None, 0, 1): for axis in (None, 0, 1):
out = abs(M).max(axis=axis) out = abs(M).max(axis=axis)
f = theano.function([M], out, mode=mode_with_gpu) f = theano.function([M], out, mode=mode_with_gpu)
...@@ -1692,9 +1672,7 @@ def test_dnn_reduction_axis_size_one(): ...@@ -1692,9 +1672,7 @@ def test_dnn_reduction_axis_size_one():
[(4, 1, 6, 1), (1, 3)], [(4, 1, 6, 1), (1, 3)],
]: ]:
x = theano.tensor.TensorType( x = tt.TensorType(dtype=dtype, broadcastable=[False] * len(shape))()
dtype=dtype, broadcastable=[False] * len(shape)
)()
sum = x.sum(axis=axis) sum = x.sum(axis=axis)
sum_squares = (x ** 2).sum(axis=axis) sum_squares = (x ** 2).sum(axis=axis)
sum_abs = abs(x).sum(axis=axis) sum_abs = abs(x).sum(axis=axis)
...@@ -1779,9 +1757,9 @@ def test_dnn_reduction_error(): ...@@ -1779,9 +1757,9 @@ def test_dnn_reduction_error():
slow_output = np.sum(slow_output.transpose(), axis=1) slow_output = np.sum(slow_output.transpose(), axis=1)
vecT = T.vector(dtype=theano.config.floatX) vecT = tt.vector(dtype=theano.config.floatX)
outputT = T.alloc(2.0 * vecT, 5, vecT.shape[0]) outputT = tt.alloc(2.0 * vecT, 5, vecT.shape[0])
outputSummedT = T.sum(T.transpose(outputT), axis=1) outputSummedT = tt.sum(tt.transpose(outputT), axis=1)
f3 = theano.function(inputs=[vecT], outputs=outputSummedT) f3 = theano.function(inputs=[vecT], outputs=outputSummedT)
output = f3(vec) output = f3(vec)
...@@ -1789,8 +1767,8 @@ def test_dnn_reduction_error(): ...@@ -1789,8 +1767,8 @@ def test_dnn_reduction_error():
def dnn_maxargmax(nd, idtype, axis): def dnn_maxargmax(nd, idtype, axis):
inp = T.TensorType(idtype, (False,) * nd)() inp = tt.TensorType(idtype, (False,) * nd)()
res = T.max_and_argmax(inp, axis=axis) res = tt.max_and_argmax(inp, axis=axis)
f = theano.function([inp], res, mode=mode_with_gpu) f = theano.function([inp], res, mode=mode_with_gpu)
assert any( assert any(
isinstance(n.op, dnn.GpuDnnReduction) for n in f.maker.fgraph.apply_nodes isinstance(n.op, dnn.GpuDnnReduction) for n in f.maker.fgraph.apply_nodes
...@@ -1819,7 +1797,14 @@ def test_dnn_batchnorm_train(): ...@@ -1819,7 +1797,14 @@ def test_dnn_batchnorm_train():
utt.seed_rng() utt.seed_rng()
for mode in ("per-activation", "spatial"): for mode in ("per-activation", "spatial"):
for vartype in (T.tensor6, T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector): for vartype in (
tt.tensor6,
tt.tensor5,
tt.tensor4,
tt.tensor3,
tt.matrix,
tt.vector,
):
x, scale, bias, running_mean, running_var = ( x, scale, bias, running_mean, running_var = (
vartype(n) vartype(n)
for n in ("x", "scale", "bias", "running_mean", "running_var") for n in ("x", "scale", "bias", "running_mean", "running_var")
...@@ -1869,10 +1854,10 @@ def test_dnn_batchnorm_train(): ...@@ -1869,10 +1854,10 @@ def test_dnn_batchnorm_train():
axes = (0,) + tuple(range(2, ndim)) axes = (0,) + tuple(range(2, ndim))
x_mean_ref = x.mean(axis=axes, keepdims=True) x_mean_ref = x.mean(axis=axes, keepdims=True)
x_var_ref = x.var(axis=axes, keepdims=True) x_var_ref = x.var(axis=axes, keepdims=True)
x_invstd_ref = T.inv(T.sqrt(x_var_ref + eps)) x_invstd_ref = tt.inv(tt.sqrt(x_var_ref + eps))
scale_ref = T.addbroadcast(scale, *axes) scale_ref = tt.addbroadcast(scale, *axes)
bias_ref = T.addbroadcast(bias, *axes) bias_ref = tt.addbroadcast(bias, *axes)
m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX) m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
out_ref = (x - x_mean_ref) * (scale_ref * x_invstd_ref) + bias_ref out_ref = (x - x_mean_ref) * (scale_ref * x_invstd_ref) + bias_ref
out_running_mean_ref = ( out_running_mean_ref = (
running_mean * (1 - running_average_factor) running_mean * (1 - running_average_factor)
...@@ -1884,12 +1869,12 @@ def test_dnn_batchnorm_train(): ...@@ -1884,12 +1869,12 @@ def test_dnn_batchnorm_train():
) )
# backward pass # backward pass
dy = vartype("dy") dy = vartype("dy")
grads_gpu = T.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy}) grads_gpu = tt.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy})
grads_abstract = T.grad( grads_abstract = tt.grad(
None, wrt=[x, scale, bias], known_grads={out_abstract: dy} None, wrt=[x, scale, bias], known_grads={out_abstract: dy}
) )
# reference backward pass # reference backward pass
grads_ref = T.grad(None, wrt=[x, scale, bias], known_grads={out_ref: dy}) grads_ref = tt.grad(None, wrt=[x, scale, bias], known_grads={out_ref: dy})
# compile # compile
f_gpu = theano.function( f_gpu = theano.function(
[x, scale, bias, running_mean, running_var, dy], [x, scale, bias, running_mean, running_var, dy],
...@@ -2011,10 +1996,10 @@ def test_dnn_batchnorm_train_without_running_averages(): ...@@ -2011,10 +1996,10 @@ def test_dnn_batchnorm_train_without_running_averages():
utt.seed_rng() utt.seed_rng()
x, scale, bias, dy = ( x, scale, bias, dy = (
T.tensor4("x"), tt.tensor4("x"),
T.tensor4("scale"), tt.tensor4("scale"),
T.tensor4("bias"), tt.tensor4("bias"),
T.tensor4("dy"), tt.tensor4("dy"),
) )
data_shape = (5, 10, 30, 25) data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25) param_shape = (1, 10, 30, 25)
...@@ -2027,8 +2012,8 @@ def test_dnn_batchnorm_train_without_running_averages(): ...@@ -2027,8 +2012,8 @@ def test_dnn_batchnorm_train_without_running_averages():
x, scale, bias, "per-activation" x, scale, bias, "per-activation"
) )
# backward pass # backward pass
grads_gpu = T.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy}) grads_gpu = tt.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy})
grads_abstract = T.grad(None, wrt=[x, scale, bias], known_grads={out_abstract: dy}) grads_abstract = tt.grad(None, wrt=[x, scale, bias], known_grads={out_abstract: dy})
# compile # compile
f_gpu = theano.function( f_gpu = theano.function(
[x, scale, bias, dy], [x, scale, bias, dy],
...@@ -2081,10 +2066,10 @@ def test_without_dnn_batchnorm_train_without_running_averages(): ...@@ -2081,10 +2066,10 @@ def test_without_dnn_batchnorm_train_without_running_averages():
utt.seed_rng() utt.seed_rng()
x, scale, bias, dy = ( x, scale, bias, dy = (
T.tensor4("x"), tt.tensor4("x"),
T.tensor4("scale"), tt.tensor4("scale"),
T.tensor4("bias"), tt.tensor4("bias"),
T.tensor4("dy"), tt.tensor4("dy"),
) )
data_shape = (5, 10, 30, 25) data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25) param_shape = (1, 10, 30, 25)
...@@ -2094,7 +2079,7 @@ def test_without_dnn_batchnorm_train_without_running_averages(): ...@@ -2094,7 +2079,7 @@ def test_without_dnn_batchnorm_train_without_running_averages():
x, scale, bias, "per-activation" x, scale, bias, "per-activation"
) )
# backward pass # backward pass
grads_abstract = T.grad(None, wrt=[x, scale, bias], known_grads={out_abstract: dy}) grads_abstract = tt.grad(None, wrt=[x, scale, bias], known_grads={out_abstract: dy})
# compile # compile
f_abstract = theano.function( f_abstract = theano.function(
[x, scale, bias, dy], [x, scale, bias, dy],
...@@ -2143,7 +2128,7 @@ def test_dnn_batchnorm_train_inplace(): ...@@ -2143,7 +2128,7 @@ def test_dnn_batchnorm_train_inplace():
# test inplace_running_mean and inplace_running_var # test inplace_running_mean and inplace_running_var
utt.seed_rng() utt.seed_rng()
x, scale, bias = T.tensor4("x"), T.tensor4("scale"), T.tensor4("bias") x, scale, bias = tt.tensor4("x"), tt.tensor4("scale"), tt.tensor4("bias")
data_shape = (5, 10, 30, 25) data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25) param_shape = (1, 10, 30, 25)
running_mean = gpuarray_shared_constructor( running_mean = gpuarray_shared_constructor(
...@@ -2199,7 +2184,14 @@ def test_batchnorm_inference(): ...@@ -2199,7 +2184,14 @@ def test_batchnorm_inference():
utt.seed_rng() utt.seed_rng()
for mode in ("per-activation", "spatial"): for mode in ("per-activation", "spatial"):
for vartype in (T.tensor6, T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector): for vartype in (
tt.tensor6,
tt.tensor5,
tt.tensor4,
tt.tensor3,
tt.matrix,
tt.vector,
):
x, scale, bias, mean, var = ( x, scale, bias, mean, var = (
vartype(n) for n in ("x", "scale", "bias", "mean", "var") vartype(n) for n in ("x", "scale", "bias", "mean", "var")
) )
...@@ -2220,19 +2212,19 @@ def test_batchnorm_inference(): ...@@ -2220,19 +2212,19 @@ def test_batchnorm_inference():
elif mode == "spatial": elif mode == "spatial":
axes = (0,) + tuple(range(2, ndim)) axes = (0,) + tuple(range(2, ndim))
scale_ref, bias_ref, mean_ref, var_ref = ( scale_ref, bias_ref, mean_ref, var_ref = (
T.addbroadcast(t, *axes) for t in (scale, bias, mean, var) tt.addbroadcast(t, *axes) for t in (scale, bias, mean, var)
) )
out_ref = (x - mean_ref) * (scale_ref / T.sqrt(var_ref + eps)) + bias_ref out_ref = (x - mean_ref) * (scale_ref / tt.sqrt(var_ref + eps)) + bias_ref
# backward pass # backward pass
dy = vartype("dy") dy = vartype("dy")
grads_gpu = T.grad( grads_gpu = tt.grad(
None, wrt=[x, scale, bias, mean, var], known_grads={out_gpu: dy} None, wrt=[x, scale, bias, mean, var], known_grads={out_gpu: dy}
) )
grads_abstract = T.grad( grads_abstract = tt.grad(
None, wrt=[x, scale, bias, mean, var], known_grads={out_abstract: dy} None, wrt=[x, scale, bias, mean, var], known_grads={out_abstract: dy}
) )
# reference backward pass # reference backward pass
grads_ref = T.grad( grads_ref = tt.grad(
None, wrt=[x, scale, bias, mean, var], known_grads={out_ref: dy} None, wrt=[x, scale, bias, mean, var], known_grads={out_ref: dy}
) )
# compile # compile
...@@ -2318,7 +2310,7 @@ def test_batchnorm_inference_inplace(): ...@@ -2318,7 +2310,7 @@ def test_batchnorm_inference_inplace():
utt.seed_rng() utt.seed_rng()
x, scale, bias, mean, var = ( x, scale, bias, mean, var = (
T.tensor4(n) for n in ("x", "scale", "bias", "mean", "var") tt.tensor4(n) for n in ("x", "scale", "bias", "mean", "var")
) )
data_shape = (5, 10, 30, 25) data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25) param_shape = (1, 10, 30, 25)
...@@ -2345,7 +2337,7 @@ def test_batchnorm_inference_inplace(): ...@@ -2345,7 +2337,7 @@ def test_batchnorm_inference_inplace():
def test_dnn_batchnorm_valid_and_invalid_axes(): def test_dnn_batchnorm_valid_and_invalid_axes():
for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix): for vartype in (tt.tensor5, tt.tensor4, tt.tensor3, tt.matrix):
x, scale, bias, mean, var, dy = ( x, scale, bias, mean, var, dy = (
vartype(n) for n in ("x", "scale", "bias", "mean", "var", "dy") vartype(n) for n in ("x", "scale", "bias", "mean", "var", "dy")
) )
...@@ -2363,10 +2355,10 @@ def test_dnn_batchnorm_valid_and_invalid_axes(): ...@@ -2363,10 +2355,10 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
out_test = bn.batch_normalization_test(x, scale, bias, mean, var, axes) out_test = bn.batch_normalization_test(x, scale, bias, mean, var, axes)
# backward pass # backward pass
dy = vartype("dy") dy = vartype("dy")
grads_train = T.grad( grads_train = tt.grad(
None, wrt=[x, scale, bias], known_grads={out_train: dy} None, wrt=[x, scale, bias], known_grads={out_train: dy}
) )
grads_test = T.grad( grads_test = tt.grad(
None, wrt=[x, scale, bias, mean, var], known_grads={out_test: dy} None, wrt=[x, scale, bias, mean, var], known_grads={out_test: dy}
) )
# compile # compile
...@@ -2439,9 +2431,9 @@ def test_dnn_rnn_gru(): ...@@ -2439,9 +2431,9 @@ def test_dnn_rnn_gru():
timesteps = 5 timesteps = 5
# test code # test code
X = T.tensor3("X") X = tt.tensor3("X")
Y = T.tensor3("Y") Y = tt.tensor3("Y")
h0 = T.tensor3("h0") h0 = tt.tensor3("h0")
rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "gru") rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "gru")
psize = rnnb.get_param_size([batch_size, input_dim]) psize = rnnb.get_param_size([batch_size, input_dim])
...@@ -2465,10 +2457,10 @@ def test_dnn_rnn_gru(): ...@@ -2465,10 +2457,10 @@ def test_dnn_rnn_gru():
def funcs(out, params, hy=None): def funcs(out, params, hy=None):
cost = 0 cost = 0
if out: if out:
cost += T.mean((Y - out) ** 2) cost += tt.mean((Y - out) ** 2)
if hy: if hy:
cost += T.mean(hy ** 2) cost += tt.mean(hy ** 2)
grad = T.grad(cost, [X, h0] + params) grad = tt.grad(cost, [X, h0] + params)
grad_fn = theano.function( grad_fn = theano.function(
[X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore" [X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore"
) )
...@@ -2477,7 +2469,7 @@ def test_dnn_rnn_gru(): ...@@ -2477,7 +2469,7 @@ def test_dnn_rnn_gru():
ref_y = last_layer.output() ref_y = last_layer.output()
# This will grab the hy from the scan implementation # This will grab the hy from the scan implementation
ref_hy = T.stack( ref_hy = tt.stack(
[model.layers[0].Y[-1], model.layers[1].Y[-1], model.layers[2].Y[-1]] [model.layers[0].Y[-1], model.layers[1].Y[-1], model.layers[2].Y[-1]]
) )
...@@ -2548,9 +2540,9 @@ def test_dnn_rnn_gru_bidi(): ...@@ -2548,9 +2540,9 @@ def test_dnn_rnn_gru_bidi():
timesteps = 5 timesteps = 5
# test code # test code
X = T.tensor3("X") X = tt.tensor3("X")
Y = T.tensor3("Y") Y = tt.tensor3("Y")
h0 = T.tensor3("h0") h0 = tt.tensor3("h0")
rnnb = dnn.RNNBlock( rnnb = dnn.RNNBlock(
theano.config.floatX, hidden_dim, depth, "gru", direction_mode="bidirectional" theano.config.floatX, hidden_dim, depth, "gru", direction_mode="bidirectional"
...@@ -2563,10 +2555,10 @@ def test_dnn_rnn_gru_bidi(): ...@@ -2563,10 +2555,10 @@ def test_dnn_rnn_gru_bidi():
def funcs(out, params, hy=None): def funcs(out, params, hy=None):
cost = 0 cost = 0
if out: if out:
cost += T.mean((Y - out) ** 2) cost += tt.mean((Y - out) ** 2)
if hy: if hy:
cost += T.mean(hy ** 2) cost += tt.mean(hy ** 2)
grad = T.grad(cost, [X, h0] + params) grad = tt.grad(cost, [X, h0] + params)
grad_fn = theano.function( grad_fn = theano.function(
[X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore" [X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore"
) )
...@@ -2609,10 +2601,10 @@ def test_dnn_rnn_lstm(): ...@@ -2609,10 +2601,10 @@ def test_dnn_rnn_lstm():
timesteps = 5 timesteps = 5
# test code # test code
X = T.tensor3("X") X = tt.tensor3("X")
Y = T.tensor3("Y") Y = tt.tensor3("Y")
h0 = T.tensor3("h0") h0 = tt.tensor3("h0")
c0 = T.tensor3("c0") c0 = tt.tensor3("c0")
rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "lstm") rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "lstm")
psize = rnnb.get_param_size([batch_size, input_dim]) psize = rnnb.get_param_size([batch_size, input_dim])
...@@ -2635,8 +2627,8 @@ def test_dnn_rnn_lstm(): ...@@ -2635,8 +2627,8 @@ def test_dnn_rnn_lstm():
def funcs(out, params): def funcs(out, params):
fn = theano.function([X, h0, c0], out, mode=mode_with_gpu) fn = theano.function([X, h0, c0], out, mode=mode_with_gpu)
cost = T.mean((Y - out) ** 2) cost = tt.mean((Y - out) ** 2)
grad = T.grad(cost, [X, h0, c0] + params) grad = tt.grad(cost, [X, h0, c0] + params)
grad_fn = theano.function([X, Y, h0, c0], grad, mode=mode_with_gpu) grad_fn = theano.function([X, Y, h0, c0], grad, mode=mode_with_gpu)
return fn, grad_fn return fn, grad_fn
...@@ -2695,10 +2687,10 @@ def test_dnn_rnn_lstm_grad_c(): ...@@ -2695,10 +2687,10 @@ def test_dnn_rnn_lstm_grad_c():
timesteps = 5 timesteps = 5
# test code # test code
X = T.tensor3("X") X = tt.tensor3("X")
CY = T.tensor3("CY") CY = tt.tensor3("CY")
h0 = T.tensor3("h0") h0 = tt.tensor3("h0")
c0 = T.tensor3("c0") c0 = tt.tensor3("c0")
rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "lstm") rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "lstm")
psize = rnnb.get_param_size([batch_size, input_dim]) psize = rnnb.get_param_size([batch_size, input_dim])
...@@ -2720,13 +2712,13 @@ def test_dnn_rnn_lstm_grad_c(): ...@@ -2720,13 +2712,13 @@ def test_dnn_rnn_lstm_grad_c():
p[:] = layer_params[j].get_value(borrow=True, return_internal_type=True) p[:] = layer_params[j].get_value(borrow=True, return_internal_type=True)
def funcs(out, params): def funcs(out, params):
cost = T.mean((CY - out) ** 2) cost = tt.mean((CY - out) ** 2)
grad = T.grad(cost, [X, h0, c0] + params) grad = tt.grad(cost, [X, h0, c0] + params)
grad_fn = theano.function([X, CY, h0, c0], grad, mode=mode_with_gpu) grad_fn = theano.function([X, CY, h0, c0], grad, mode=mode_with_gpu)
return grad_fn return grad_fn
_, _, cy = rnnb.apply(params_cudnn, X, h0, c0) _, _, cy = rnnb.apply(params_cudnn, X, h0, c0)
ref_cy = T.stack( ref_cy = tt.stack(
[model.layers[0].C[-1], model.layers[1].C[-1], model.layers[2].C[-1]] [model.layers[0].C[-1], model.layers[1].C[-1], model.layers[2].C[-1]]
) )
...@@ -2797,14 +2789,14 @@ def test_dnn_spatialtf(): ...@@ -2797,14 +2789,14 @@ def test_dnn_spatialtf():
def spatialtf_cpu(inp, theta, scale_height, scale_width, border_mode="nearest"): def spatialtf_cpu(inp, theta, scale_height, scale_width, border_mode="nearest"):
num_batch, num_channels, height, width = inp.shape num_batch, num_channels, height, width = inp.shape
theta = T.reshape(theta, (-1, 2, 3)) theta = tt.reshape(theta, (-1, 2, 3))
# grid of (x_t, y_t, 1), eq (1) in ref [1] # grid of (x_t, y_t, 1), eq (1) in ref [1]
out_height = T.cast(T.ceil(height * scale_height), "int64") out_height = tt.cast(tt.ceil(height * scale_height), "int64")
out_width = T.cast(T.ceil(width * scale_width), "int64") out_width = tt.cast(tt.ceil(width * scale_width), "int64")
grid = _meshgrid(out_height, out_width) grid = _meshgrid(out_height, out_width)
# transform a x (x_t, y_t, 1)^t -> (x_s, y_s) # transform a x (x_t, y_t, 1)^t -> (x_s, y_s)
t_g = T.dot(theta, grid) t_g = tt.dot(theta, grid)
x_s = t_g[:, 0] x_s = t_g[:, 0]
y_s = t_g[:, 1] y_s = t_g[:, 1]
x_s_flat = x_s.flatten() x_s_flat = x_s.flatten()
...@@ -2816,7 +2808,7 @@ def test_dnn_spatialtf(): ...@@ -2816,7 +2808,7 @@ def test_dnn_spatialtf():
input_dim, x_s_flat, y_s_flat, out_height, out_width, border_mode input_dim, x_s_flat, y_s_flat, out_height, out_width, border_mode
) )
output = T.reshape( output = tt.reshape(
input_transformed, (num_batch, out_height, out_width, num_channels) input_transformed, (num_batch, out_height, out_width, num_channels)
) )
output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format
...@@ -2825,8 +2817,8 @@ def test_dnn_spatialtf(): ...@@ -2825,8 +2817,8 @@ def test_dnn_spatialtf():
def _interpolate(im, x, y, out_height, out_width, border_mode): def _interpolate(im, x, y, out_height, out_width, border_mode):
# *_f are floats # *_f are floats
num_batch, height, width, channels = im.shape num_batch, height, width, channels = im.shape
height_f = T.cast(height, theano.config.floatX) height_f = tt.cast(height, theano.config.floatX)
width_f = T.cast(width, theano.config.floatX) width_f = tt.cast(width, theano.config.floatX)
# scale coordinates from [-1, 1] to [0, dimension - 1], where dimension # scale coordinates from [-1, 1] to [0, dimension - 1], where dimension
# can be the width or height # can be the width or height
...@@ -2835,42 +2827,42 @@ def test_dnn_spatialtf(): ...@@ -2835,42 +2827,42 @@ def test_dnn_spatialtf():
# obtain indices of the 2x2 pixel neighborhood surrounding the coordinates; # obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
# we need those in floatX for interpolation and in int64 for indexing. # we need those in floatX for interpolation and in int64 for indexing.
x0_f = T.floor(x) x0_f = tt.floor(x)
y0_f = T.floor(y) y0_f = tt.floor(y)
x1_f = x0_f + 1 x1_f = x0_f + 1
y1_f = y0_f + 1 y1_f = y0_f + 1
# for indexing, we need to take care of the border mode for outside pixels. # for indexing, we need to take care of the border mode for outside pixels.
if border_mode == "nearest": if border_mode == "nearest":
x0 = T.clip(x0_f, 0, width_f - 1) x0 = tt.clip(x0_f, 0, width_f - 1)
x1 = T.clip(x1_f, 0, width_f - 1) x1 = tt.clip(x1_f, 0, width_f - 1)
y0 = T.clip(y0_f, 0, height_f - 1) y0 = tt.clip(y0_f, 0, height_f - 1)
y1 = T.clip(y1_f, 0, height_f - 1) y1 = tt.clip(y1_f, 0, height_f - 1)
elif border_mode == "mirror": elif border_mode == "mirror":
w = 2 * (width_f - 1) w = 2 * (width_f - 1)
x0 = T.minimum(x0_f % w, -x0_f % w) x0 = tt.minimum(x0_f % w, -x0_f % w)
x1 = T.minimum(x1_f % w, -x1_f % w) x1 = tt.minimum(x1_f % w, -x1_f % w)
h = 2 * (height_f - 1) h = 2 * (height_f - 1)
y0 = T.minimum(y0_f % h, -y0_f % h) y0 = tt.minimum(y0_f % h, -y0_f % h)
y1 = T.minimum(y1_f % h, -y1_f % h) y1 = tt.minimum(y1_f % h, -y1_f % h)
elif border_mode == "wrap": elif border_mode == "wrap":
x0 = T.mod(x0_f, width_f) x0 = tt.mod(x0_f, width_f)
x1 = T.mod(x1_f, width_f) x1 = tt.mod(x1_f, width_f)
y0 = T.mod(y0_f, height_f) y0 = tt.mod(y0_f, height_f)
y1 = T.mod(y1_f, height_f) y1 = tt.mod(y1_f, height_f)
else: else:
raise ValueError( raise ValueError(
"border_mode must be one of " "'nearest', 'mirror', 'wrap'" "border_mode must be one of " "'nearest', 'mirror', 'wrap'"
) )
x0, x1, y0, y1 = (T.cast(v, "int64") for v in (x0, x1, y0, y1)) x0, x1, y0, y1 = (tt.cast(v, "int64") for v in (x0, x1, y0, y1))
# The input is [num_batch, height, width, channels]. We do the lookup in # The input is [num_batch, height, width, channels]. We do the lookup in
# the flattened input, i.e [num_batch*height*width, channels]. We need # the flattened input, i.e [num_batch*height*width, channels]. We need
# to offset all indices to match the flat version # to offset all indices to match the flat version
dim2 = width dim2 = width
dim1 = width * height dim1 = width * height
base = T.repeat( base = tt.repeat(
T.arange(num_batch, dtype="int64") * dim1, out_height * out_width tt.arange(num_batch, dtype="int64") * dim1, out_height * out_width
) )
base_y0 = base + y0 * dim2 base_y0 = base + y0 * dim2
base_y1 = base + y1 * dim2 base_y1 = base + y1 * dim2
...@@ -2891,16 +2883,16 @@ def test_dnn_spatialtf(): ...@@ -2891,16 +2883,16 @@ def test_dnn_spatialtf():
wb = ((x1_f - x) * (y - y0_f)).dimshuffle(0, "x") wb = ((x1_f - x) * (y - y0_f)).dimshuffle(0, "x")
wc = ((x - x0_f) * (y1_f - y)).dimshuffle(0, "x") wc = ((x - x0_f) * (y1_f - y)).dimshuffle(0, "x")
wd = ((x - x0_f) * (y - y0_f)).dimshuffle(0, "x") wd = ((x - x0_f) * (y - y0_f)).dimshuffle(0, "x")
output = T.sum([wa * Ia, wb * Ib, wc * Ic, wd * Id], axis=0) output = tt.sum([wa * Ia, wb * Ib, wc * Ic, wd * Id], axis=0)
return output return output
def _linspace(start, stop, num): def _linspace(start, stop, num):
# Theano linspace. Behaves similar to np.linspace # Theano linspace. Behaves similar to np.linspace
start = T.cast(start, theano.config.floatX) start = tt.cast(start, theano.config.floatX)
stop = T.cast(stop, theano.config.floatX) stop = tt.cast(stop, theano.config.floatX)
num = T.cast(num, theano.config.floatX) num = tt.cast(num, theano.config.floatX)
step = (stop - start) / (num - 1) step = (stop - start) / (num - 1)
return T.arange(num, dtype=theano.config.floatX) * step + start return tt.arange(num, dtype=theano.config.floatX) * step + start
def _meshgrid(height, width): def _meshgrid(height, width):
# This function is the grid generator from eq. (1) in reference [1]. # This function is the grid generator from eq. (1) in reference [1].
...@@ -2913,13 +2905,17 @@ def test_dnn_spatialtf(): ...@@ -2913,13 +2905,17 @@ def test_dnn_spatialtf():
# Note: If the image size is known at layer construction time, we could # Note: If the image size is known at layer construction time, we could
# compute the meshgrid offline in numpy instead of doing it dynamically # compute the meshgrid offline in numpy instead of doing it dynamically
# in Theano. However, it hardly affected performance when we tried. # in Theano. However, it hardly affected performance when we tried.
x_t = T.dot(T.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle("x", 0)) x_t = tt.dot(
y_t = T.dot(_linspace(-1.0, 1.0, height).dimshuffle(0, "x"), T.ones((1, width))) tt.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle("x", 0)
)
y_t = tt.dot(
_linspace(-1.0, 1.0, height).dimshuffle(0, "x"), tt.ones((1, width))
)
x_t_flat = x_t.reshape((1, -1)) x_t_flat = x_t.reshape((1, -1))
y_t_flat = y_t.reshape((1, -1)) y_t_flat = y_t.reshape((1, -1))
ones = T.ones_like(x_t_flat) ones = tt.ones_like(x_t_flat)
grid = T.concatenate([x_t_flat, y_t_flat, ones], axis=0) grid = tt.concatenate([x_t_flat, y_t_flat, ones], axis=0)
return grid return grid
img_dims = (5, 3, 16, 16) img_dims = (5, 3, 16, 16)
...@@ -2933,8 +2929,8 @@ def test_dnn_spatialtf(): ...@@ -2933,8 +2929,8 @@ def test_dnn_spatialtf():
theta = np.asarray(img_dims[0] * [transform], dtype=theano.config.floatX) theta = np.asarray(img_dims[0] * [transform], dtype=theano.config.floatX)
# Create symbolic variables for inputs and transformations # Create symbolic variables for inputs and transformations
t_img = T.tensor4("img") t_img = tt.tensor4("img")
t_theta = T.tensor3("theta") t_theta = tt.tensor3("theta")
st_dnn = dnn.dnn_spatialtf( st_dnn = dnn.dnn_spatialtf(
t_img, t_theta, scale_height=scale_height, scale_width=scale_width t_img, t_theta, scale_height=scale_height, scale_width=scale_width
...@@ -2963,8 +2959,8 @@ def test_dnn_spatialtf(): ...@@ -2963,8 +2959,8 @@ def test_dnn_spatialtf():
def test_dnn_spatialtf_invalid_shapes(): def test_dnn_spatialtf_invalid_shapes():
inputs = T.tensor4("inputs") inputs = tt.tensor4("inputs")
theta = T.tensor3("theta") theta = tt.tensor3("theta")
st_dnn = dnn.dnn_spatialtf(inputs, theta) st_dnn = dnn.dnn_spatialtf(inputs, theta)
st_dnn_func = theano.function([inputs, theta], st_dnn, mode=mode_with_gpu) st_dnn_func = theano.function([inputs, theta], st_dnn, mode=mode_with_gpu)
...@@ -2994,13 +2990,13 @@ def test_dnn_spatialtf_invalid_shapes(): ...@@ -2994,13 +2990,13 @@ def test_dnn_spatialtf_invalid_shapes():
def test_dnn_spatialtf_grad(): def test_dnn_spatialtf_grad():
utt.seed_rng() utt.seed_rng()
inputs = T.tensor4("inputs") inputs = tt.tensor4("inputs")
theta = T.tensor3("theta") theta = tt.tensor3("theta")
out = dnn.dnn_spatialtf(inputs, theta, scale_height=0.25, scale_width=0.75) out = dnn.dnn_spatialtf(inputs, theta, scale_height=0.25, scale_width=0.75)
out_mean = T.mean(out) out_mean = tt.mean(out)
mean_gi = T.grad(out_mean, [inputs]) mean_gi = tt.grad(out_mean, [inputs])
mean_gt = T.grad(out_mean, [theta]) mean_gt = tt.grad(out_mean, [theta])
f_gi = theano.function([inputs, theta], mean_gi, mode=mode_with_gpu) f_gi = theano.function([inputs, theta], mean_gi, mode=mode_with_gpu)
assert any( assert any(
...@@ -3053,7 +3049,7 @@ def test_dnn_spatialtf_grad(): ...@@ -3053,7 +3049,7 @@ def test_dnn_spatialtf_grad():
class TestDnnConv2DRuntimeAlgorithms(object): class TestDnnConv2DRuntimeAlgorithms(object):
ndim = 2 ndim = 2
cpu_conv_class = theano.tensor.nnet.corr.CorrMM cpu_conv_class = CorrMM
runtime_shapes = [ runtime_shapes = [
(3, [(2, 3, 10, 9), (5, 3, 7, 7)]), (3, [(2, 3, 10, 9), (5, 3, 7, 7)]),
(1, [(1, 1, 100, 200), (1, 1, 50, 200)]), (1, [(1, 1, 100, 200), (1, 1, 50, 200)]),
...@@ -3080,8 +3076,8 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3080,8 +3076,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
_broadcastable = [False] * (2 + self.ndim) _broadcastable = [False] * (2 + self.ndim)
def run_fwd_runtime_algorithm(algo): def run_fwd_runtime_algorithm(algo):
inputs = theano.tensor.TensorType(dtype, _broadcastable)() inputs = tt.TensorType(dtype, _broadcastable)()
filters = theano.tensor.TensorType(dtype, _broadcastable)() filters = tt.TensorType(dtype, _broadcastable)()
# Scale down the input values to prevent very large absolute errors # Scale down the input values to prevent very large absolute errors
# due to float rounding # due to float rounding
lower_inputs = inputs / 10 lower_inputs = inputs / 10
...@@ -3127,8 +3123,8 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3127,8 +3123,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
def run_gradinput_runtime_algorithm(algo): def run_gradinput_runtime_algorithm(algo):
theano.config.dnn.conv.algo_bwd_data = algo theano.config.dnn.conv.algo_bwd_data = algo
inputs = theano.tensor.TensorType(dtype, _broadcastable)() inputs = tt.TensorType(dtype, _broadcastable)()
filters = theano.tensor.TensorType(dtype, _broadcastable)() filters = tt.TensorType(dtype, _broadcastable)()
conv = dnn.dnn_conv( conv = dnn.dnn_conv(
img=inputs, img=inputs,
kerns=filters, kerns=filters,
...@@ -3137,7 +3133,7 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3137,7 +3133,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
subsample=unit_shape, subsample=unit_shape,
dilation=unit_shape, dilation=unit_shape,
) )
(grad_i,) = theano.tensor.grad(conv.sum(), [inputs]) (grad_i,) = tt.grad(conv.sum(), [inputs])
f = theano.function([inputs, filters], grad_i, mode=mode_with_gpu) f = theano.function([inputs, filters], grad_i, mode=mode_with_gpu)
assert 1 == len( assert 1 == len(
[ [
...@@ -3161,7 +3157,7 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3161,7 +3157,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
conv_ref = self.cpu_conv_class(subsample=unit_shape)( conv_ref = self.cpu_conv_class(subsample=unit_shape)(
ref_cast(inputs), flipped_filters ref_cast(inputs), flipped_filters
) )
(grad_i_ref,) = theano.tensor.grad(conv_ref.sum(), [inputs]) (grad_i_ref,) = tt.grad(conv_ref.sum(), [inputs])
f_ref = theano.function([inputs, filters], grad_i_ref, mode="FAST_RUN") f_ref = theano.function([inputs, filters], grad_i_ref, mode="FAST_RUN")
runtime_shapes = self.runtime_shapes runtime_shapes = self.runtime_shapes
if algo in ("time_once", "guess_once"): if algo in ("time_once", "guess_once"):
...@@ -3185,8 +3181,8 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3185,8 +3181,8 @@ class TestDnnConv2DRuntimeAlgorithms(object):
def run_gradweight_runtime_algorithm(algo): def run_gradweight_runtime_algorithm(algo):
theano.config.dnn.conv.algo_bwd_filter = algo theano.config.dnn.conv.algo_bwd_filter = algo
inputs = theano.tensor.TensorType(dtype, _broadcastable)() inputs = tt.TensorType(dtype, _broadcastable)()
filters = theano.tensor.TensorType(dtype, _broadcastable)() filters = tt.TensorType(dtype, _broadcastable)()
conv = dnn.dnn_conv( conv = dnn.dnn_conv(
img=inputs, img=inputs,
kerns=filters, kerns=filters,
...@@ -3195,7 +3191,7 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3195,7 +3191,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
subsample=unit_shape, subsample=unit_shape,
dilation=unit_shape, dilation=unit_shape,
) )
(grad_w,) = theano.tensor.grad(conv.sum(), [filters]) (grad_w,) = tt.grad(conv.sum(), [filters])
f = theano.function([inputs, filters], grad_w, mode=mode_with_gpu) f = theano.function([inputs, filters], grad_w, mode=mode_with_gpu)
assert 1 == len( assert 1 == len(
[ [
...@@ -3219,7 +3215,7 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3219,7 +3215,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
conv_ref = self.cpu_conv_class(subsample=unit_shape)( conv_ref = self.cpu_conv_class(subsample=unit_shape)(
ref_cast(inputs), flipped_filters ref_cast(inputs), flipped_filters
) )
(grad_w_ref,) = theano.tensor.grad(conv_ref.sum(), [filters]) (grad_w_ref,) = tt.grad(conv_ref.sum(), [filters])
f_ref = theano.function([inputs, filters], grad_w_ref, mode="FAST_RUN") f_ref = theano.function([inputs, filters], grad_w_ref, mode="FAST_RUN")
runtime_shapes = self.runtime_shapes runtime_shapes = self.runtime_shapes
if algo in ("time_once", "guess_once"): if algo in ("time_once", "guess_once"):
...@@ -3239,7 +3235,7 @@ class TestDnnConv2DRuntimeAlgorithms(object): ...@@ -3239,7 +3235,7 @@ class TestDnnConv2DRuntimeAlgorithms(object):
class TestDnnConv3DRuntimeAlgorithms(TestDnnConv2DRuntimeAlgorithms): class TestDnnConv3DRuntimeAlgorithms(TestDnnConv2DRuntimeAlgorithms):
ndim = 3 ndim = 3
cpu_conv_class = theano.tensor.nnet.corr3d.Corr3dMM cpu_conv_class = Corr3dMM
runtime_shapes = [ runtime_shapes = [
(3, [(2, 3, 5, 10, 9), (5, 3, 4, 7, 7)]), (3, [(2, 3, 5, 10, 9), (5, 3, 4, 7, 7)]),
(1, [(1, 1, 5, 100, 200), (1, 1, 4, 50, 200)]), (1, [(1, 1, 5, 100, 200), (1, 1, 4, 50, 200)]),
...@@ -3293,9 +3289,9 @@ def test_conv_guess_once_with_dtypes(): ...@@ -3293,9 +3289,9 @@ def test_conv_guess_once_with_dtypes():
def test_opt_f16_prec32(): def test_opt_f16_prec32():
inputs = T.TensorType("float16", (False,) * 4)() inputs = tt.TensorType("float16", (False,) * 4)()
filters = T.TensorType("float16", (False,) * 4)() filters = tt.TensorType("float16", (False,) * 4)()
conv = T.nnet.conv2d(inputs, filters) conv = conv2d(inputs, filters)
gfilt = theano.grad(conv.sum(), filters) gfilt = theano.grad(conv.sum(), filters)
......
...@@ -3,12 +3,11 @@ import pytest ...@@ -3,12 +3,11 @@ import pytest
import numpy as np import numpy as np
import theano import theano
import theano.tensor as tt
from functools import partial from functools import partial
from itertools import product from itertools import product
from theano import tensor as T
from theano.tensor.extra_ops import CumOp from theano.tensor.extra_ops import CumOp
from theano.gpuarray.extra_ops import GpuCumOp from theano.gpuarray.extra_ops import GpuCumOp
from theano.gpuarray.type import get_context from theano.gpuarray.type import get_context
...@@ -33,13 +32,13 @@ class TestGpuCumOp(TestCumOp): ...@@ -33,13 +32,13 @@ class TestGpuCumOp(TestCumOp):
# The CPU implementation is not so accurate, which throws out DebugMode. # The CPU implementation is not so accurate, which throws out DebugMode.
# Since propagating .tag.values_eq_approx to the output of every # Since propagating .tag.values_eq_approx to the output of every
# GpuFromHost seems overkill, we just relax the rtol for these tests # GpuFromHost seems overkill, we just relax the rtol for these tests
self.old_rtol = theano.tensor.float32_rtol self.old_rtol = tt.float32_rtol
theano.tensor.basic.float32_rtol *= 2 tt.float32_rtol *= 2
def teardown_method(self): def teardown_method(self):
super().teardown_method() super().teardown_method()
# Restore rtol # Restore rtol
theano.tensor.basic.float32_rtol = self.old_rtol tt.float32_rtol = self.old_rtol
@pytest.mark.skipif( @pytest.mark.skipif(
theano.config.floatX != "float32", theano.config.floatX != "float32",
...@@ -48,7 +47,7 @@ class TestGpuCumOp(TestCumOp): ...@@ -48,7 +47,7 @@ class TestGpuCumOp(TestCumOp):
@pytest.mark.parametrized("mode", ["mul", "add"]) @pytest.mark.parametrized("mode", ["mul", "add"])
def test_infer_shape(self, mode): def test_infer_shape(self, mode):
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
x = T.tensor3("x") x = tt.tensor3("x")
a = np.random.random((3, 5, 2)).astype(theano.config.floatX) a = np.random.random((3, 5, 2)).astype(theano.config.floatX)
for axis in range(-len(a.shape), len(a.shape)): for axis in range(-len(a.shape), len(a.shape)):
...@@ -58,7 +57,7 @@ class TestGpuCumOp(TestCumOp): ...@@ -58,7 +57,7 @@ class TestGpuCumOp(TestCumOp):
def test_Strides1D(self, mode): def test_Strides1D(self, mode):
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
x = T.fvector("x") x = tt.fvector("x")
for axis in [0, None, -1]: for axis in [0, None, -1]:
a = np.random.random((42,)).astype("float32") a = np.random.random((42,)).astype("float32")
...@@ -89,7 +88,7 @@ class TestGpuCumOp(TestCumOp): ...@@ -89,7 +88,7 @@ class TestGpuCumOp(TestCumOp):
def test_Strides2D(self, mode): def test_Strides2D(self, mode):
np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
x = T.fmatrix("x") x = tt.fmatrix("x")
for axis in [0, 1, None, -1, -2]: for axis in [0, 1, None, -1, -2]:
a = np.random.random((42, 30)).astype("float32") a = np.random.random((42, 30)).astype("float32")
...@@ -120,7 +119,7 @@ class TestGpuCumOp(TestCumOp): ...@@ -120,7 +119,7 @@ class TestGpuCumOp(TestCumOp):
def test_Strides3D(self, mode): def test_Strides3D(self, mode):
np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
x = T.ftensor3("x") x = tt.ftensor3("x")
for axis in [0, 1, 2, None, -1, -2, -3]: for axis in [0, 1, 2, None, -1, -2, -3]:
a = np.random.random((42, 30, 25)).astype("float32") a = np.random.random((42, 30, 25)).astype("float32")
...@@ -153,7 +152,7 @@ class TestGpuCumOp(TestCumOp): ...@@ -153,7 +152,7 @@ class TestGpuCumOp(TestCumOp):
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
block_max_size = self.max_threads_dim0 * 2 block_max_size = self.max_threads_dim0 * 2
x = T.fvector("x") x = tt.fvector("x")
f = theano.function([x], op_class(axis=0)(x), mode=self.mode) f = theano.function([x], op_class(axis=0)(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)] assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]
...@@ -176,7 +175,7 @@ class TestGpuCumOp(TestCumOp): ...@@ -176,7 +175,7 @@ class TestGpuCumOp(TestCumOp):
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
block_max_size = self.max_threads_dim0 * 2 block_max_size = self.max_threads_dim0 * 2
x = T.fmatrix("x") x = tt.fmatrix("x")
for shape_axis, axis in zip([0, 1, 0, 1, 0], [0, 1, None, -1, -2]): for shape_axis, axis in zip([0, 1, 0, 1, 0], [0, 1, None, -1, -2]):
f = theano.function([x], op_class(axis=axis)(x), mode=self.mode) f = theano.function([x], op_class(axis=axis)(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)] assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]
...@@ -217,7 +216,7 @@ class TestGpuCumOp(TestCumOp): ...@@ -217,7 +216,7 @@ class TestGpuCumOp(TestCumOp):
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
block_max_size = self.max_threads_dim0 * 2 block_max_size = self.max_threads_dim0 * 2
x = T.ftensor3("x") x = tt.ftensor3("x")
for shape_axis, axis in zip([0, 1, 2, 0, 2, 1, 0], [0, 1, 2, None, -1, -2, -3]): for shape_axis, axis in zip([0, 1, 2, 0, 2, 1, 0], [0, 1, 2, None, -1, -2, -3]):
f = theano.function([x], op_class(axis=axis)(x), mode=self.mode) f = theano.function([x], op_class(axis=axis)(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)] assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]
...@@ -267,6 +266,6 @@ class TestGpuCumOp(TestCumOp): ...@@ -267,6 +266,6 @@ class TestGpuCumOp(TestCumOp):
def test_GpuCumOp4D(self, mode): def test_GpuCumOp4D(self, mode):
op_class = partial(self.op_class, mode=mode) op_class = partial(self.op_class, mode=mode)
# Should not use the GPU version. # Should not use the GPU version.
x = T.ftensor4("x") x = tt.ftensor4("x")
f = theano.function([x], op_class(axis=1)(x), mode=self.mode) f = theano.function([x], op_class(axis=1)(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, CumOp)] assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, CumOp)]
...@@ -3,7 +3,7 @@ import numpy as np ...@@ -3,7 +3,7 @@ import numpy as np
import pytest import pytest
import theano import theano
import theano.tensor as T import theano.tensor as tt
import theano.gpuarray.fft import theano.gpuarray.fft
from theano.gpuarray.fft import pygpu_available, skcuda_available, pycuda_available from theano.gpuarray.fft import pygpu_available, skcuda_available, pycuda_available
...@@ -27,7 +27,7 @@ class TestFFT: ...@@ -27,7 +27,7 @@ class TestFFT:
def test_1Dfft(self): def test_1Dfft(self):
inputs_val = np.random.random((1, N)).astype("float32") inputs_val = np.random.random((1, N)).astype("float32")
x = T.matrix("x", dtype="float32") x = tt.matrix("x", dtype="float32")
rfft = theano.gpuarray.fft.curfft(x) rfft = theano.gpuarray.fft.curfft(x)
f_rfft = theano.function([x], rfft, mode=mode_with_gpu) f_rfft = theano.function([x], rfft, mode=mode_with_gpu)
res_rfft = f_rfft(inputs_val) res_rfft = f_rfft(inputs_val)
......
import numpy as np import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as tt
import tests.unittest_tools as utt import tests.unittest_tools as utt
from theano.tensor.nnet import crossentropy_softmax_1hot_with_bias_dx
from theano.gpuarray.nnet import ( from theano.gpuarray.nnet import (
GpuCrossentropySoftmaxArgmax1HotWithBias, GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmaxWithBias, GpuSoftmaxWithBias,
GpuSoftmax, GpuSoftmax,
) )
from tests.gpuarray.config import mode_with_gpu, mode_without_gpu from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
mode_wo_cudnn = mode_with_gpu.excluding("cudnn") mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
...@@ -29,16 +29,16 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): ...@@ -29,16 +29,16 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
n_in = 4098 n_in = 4098
n_out = 4099 n_out = 4099
y = T.lvector("y") y = tt.lvector("y")
b = T.fvector("b") b = tt.fvector("b")
# we precompute the dot with big shape before to allow the test of # we precompute the dot with big shape before to allow the test of
# GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
# (the launch timed out and was terminated) on GPU card not # (the launch timed out and was terminated) on GPU card not
# powerful enough. We need the big shape to check for corner # powerful enough. We need the big shape to check for corner
# case. # case.
dot_result = T.fmatrix("dot_result") dot_result = tt.fmatrix("dot_result")
# Seed numpy.random with config.unittests.rseed # Seed numpy.random with config.unittests.rseed
utt.seed_rng() utt.seed_rng()
...@@ -50,10 +50,10 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): ...@@ -50,10 +50,10 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
dot_value = np.asarray(np.dot(xx, W_values), dtype="float32") dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
del W_values del W_values
p_y_given_x = T.nnet.softmax(dot_result + b) p_y_given_x = tt.nnet.softmax(dot_result + b)
y_pred = T.argmax(p_y_given_x, axis=-1) y_pred = tt.argmax(p_y_given_x, axis=-1)
loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y]) loss = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
dW = T.grad(loss, dot_result) dW = tt.grad(loss, dot_result)
classify = theano.function( classify = theano.function(
inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu
) )
...@@ -63,7 +63,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): ...@@ -63,7 +63,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
assert any( assert any(
[ [
isinstance(node.op, T.nnet.CrossentropySoftmaxArgmax1HotWithBias) isinstance(node.op, tt.nnet.CrossentropySoftmaxArgmax1HotWithBias)
for node in classify.maker.fgraph.toposort() for node in classify.maker.fgraph.toposort()
] ]
) )
...@@ -100,11 +100,9 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx(): ...@@ -100,11 +100,9 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
dnll_value = np.asarray(np.random.rand(batch_size), dtype="float32") dnll_value = np.asarray(np.random.rand(batch_size), dtype="float32")
y_idx_value = np.random.randint(low=0, high=5, size=batch_size) y_idx_value = np.random.randint(low=0, high=5, size=batch_size)
softmax_output = T.fmatrix() softmax_output = tt.fmatrix()
softmax_output /= softmax_output.sum(axis=1).reshape(softmax_output.shape[1], 1) softmax_output /= softmax_output.sum(axis=1).reshape(softmax_output.shape[1], 1)
op = theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx( op = crossentropy_softmax_1hot_with_bias_dx(dnll_value, softmax_output, y_idx_value)
dnll_value, softmax_output, y_idx_value
)
cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu) cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu)
gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu) gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu)
...@@ -113,7 +111,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx(): ...@@ -113,7 +111,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
assert any( assert any(
[ [
isinstance(node.op, T.nnet.CrossentropySoftmax1HotWithBiasDx) isinstance(node.op, tt.nnet.CrossentropySoftmax1HotWithBiasDx)
for node in cpu_f.maker.fgraph.toposort() for node in cpu_f.maker.fgraph.toposort()
] ]
) )
...@@ -156,14 +154,14 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias): ...@@ -156,14 +154,14 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
# TODO: check that we loop when there are too many threads. (THIS IS # TODO: check that we loop when there are too many threads. (THIS IS
# NOT IMPLEMENTED) # NOT IMPLEMENTED)
x = T.matrix("x", dtype=dtypeInput) x = tt.matrix("x", dtype=dtypeInput)
b = T.vector("b", dtype=dtypeBias) b = tt.vector("b", dtype=dtypeBias)
z = T.nnet.softmax_with_bias(x, b) z = tt.nnet.softmax_with_bias(x, b)
f = theano.function([x, b], z, mode=mode_without_gpu) f = theano.function([x, b], z, mode=mode_without_gpu)
f_gpu = theano.function([x, b], z, mode=mode_with_gpu) f_gpu = theano.function([x, b], z, mode=mode_with_gpu)
assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax_with_bias assert f.maker.fgraph.toposort()[-1].op == tt.nnet.softmax_with_bias
assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op, GpuSoftmaxWithBias) assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op, GpuSoftmaxWithBias)
def cmp(n, m): def cmp(n, m):
...@@ -209,12 +207,12 @@ def softmax_unittest_template(dtypeInput): ...@@ -209,12 +207,12 @@ def softmax_unittest_template(dtypeInput):
# We check that we loop when their is too much block # We check that we loop when their is too much block
# We use slower code when there isn't enough shared memory # We use slower code when there isn't enough shared memory
x = T.matrix("x", dtype=dtypeInput) x = tt.matrix("x", dtype=dtypeInput)
z = T.nnet.softmax(x) z = tt.nnet.softmax(x)
f = theano.function([x], z, mode=mode_without_gpu) f = theano.function([x], z, mode=mode_without_gpu)
f_gpu = theano.function([x], z, mode=mode_wo_cudnn) f_gpu = theano.function([x], z, mode=mode_wo_cudnn)
assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax_op assert f.maker.fgraph.toposort()[-1].op == tt.nnet.softmax_op
assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op, GpuSoftmax) assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op, GpuSoftmax)
def cmp(n, m): def cmp(n, m):
...@@ -256,7 +254,7 @@ class TestSoftMax: ...@@ -256,7 +254,7 @@ class TestSoftMax:
f = theano.function([x], f_z_out, mode=mode_without_gpu) f = theano.function([x], f_z_out, mode=mode_without_gpu)
f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=self.mode) f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=self.mode)
self._check_types(f, f_gpu, T.nnet.Softmax, self.gpu_op) self._check_types(f, f_gpu, tt.nnet.Softmax, self.gpu_op)
# we need to test n>32*1024 to check that we make the block loop. # we need to test n>32*1024 to check that we make the block loop.
cmp(1, 5, f, f_gpu) cmp(1, 5, f, f_gpu)
...@@ -303,16 +301,16 @@ class TestSoftMax: ...@@ -303,16 +301,16 @@ class TestSoftMax:
) )
def test_softmax(self): def test_softmax(self):
x = T.fmatrix("x") x = tt.fmatrix("x")
z = T.nnet.softmax_op z = tt.nnet.softmax_op
f, f_gpu = self._test_softmax(x, x, z, z, self._cmp) f, f_gpu = self._test_softmax(x, x, z, z, self._cmp)
self._cmp(2 << 15, 5, f, f_gpu) self._cmp(2 << 15, 5, f, f_gpu)
def test_softmax_shape_0(self): def test_softmax_shape_0(self):
x = T.fmatrix("x") x = tt.fmatrix("x")
z = T.nnet.softmax_op z = tt.nnet.softmax_op
f, f_gpu = self._test_softmax(x, x, z, z, self._cmp) f, f_gpu = self._test_softmax(x, x, z, z, self._cmp)
# Theano can handle that case, but cudnn can't # Theano can handle that case, but cudnn can't
......
...@@ -5,7 +5,7 @@ import pytest ...@@ -5,7 +5,7 @@ import pytest
import numpy as np import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as tt
from theano.gpuarray import GpuArrayType from theano.gpuarray import GpuArrayType
from theano.gpuarray.reduction import GpuMaxAndArgmax from theano.gpuarray.reduction import GpuMaxAndArgmax
...@@ -96,7 +96,7 @@ class BaseTest: ...@@ -96,7 +96,7 @@ class BaseTest:
def get_host_tensor(self): def get_host_tensor(self):
broadcastable = (False,) * self.tensor_size broadcastable = (False,) * self.tensor_size
return T.tensor(self.dtype, broadcastable) return tt.tensor(self.dtype, broadcastable)
def get_gpu_tensor(self): def get_gpu_tensor(self):
broadcastable = (False,) * self.tensor_size broadcastable = (False,) * self.tensor_size
...@@ -116,7 +116,7 @@ class BaseTest: ...@@ -116,7 +116,7 @@ class BaseTest:
M = self.get_host_tensor() M = self.get_host_tensor()
f = theano.function( f = theano.function(
[M], [M],
[T.max(M, axis=axis), T.argmax(M, axis=axis)], [tt.max(M, axis=axis), tt.argmax(M, axis=axis)],
name="shape:" + str(test_tensor.shape) + "/axis:" + str(axis) + "/HOST", name="shape:" + str(test_tensor.shape) + "/axis:" + str(axis) + "/HOST",
mode=mode_without_gpu, mode=mode_without_gpu,
) )
...@@ -131,7 +131,7 @@ class BaseTest: ...@@ -131,7 +131,7 @@ class BaseTest:
M = self.get_gpu_tensor() M = self.get_gpu_tensor()
f = theano.function( f = theano.function(
[M], [M],
[T.max(M, axis=axis), T.argmax(M, axis=axis)], [tt.max(M, axis=axis), tt.argmax(M, axis=axis)],
name="shape:" + str(test_gpu_tensor.shape) + "/axis:" + str(axis) + "/GPU", name="shape:" + str(test_gpu_tensor.shape) + "/axis:" + str(axis) + "/GPU",
mode=mode_with_gpu, mode=mode_with_gpu,
) )
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论