提交 5e5e5cc5 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4413 from nouiz/harmdevries89-multinomial_newbackend

multinomial newbackend
...@@ -293,6 +293,9 @@ class GpuKernelBase(object): ...@@ -293,6 +293,9 @@ class GpuKernelBase(object):
# This is a shorthand for if your op only has a fixed version # This is a shorthand for if your op only has a fixed version
# You can reimplement it, but make sure to call kernel_version() # You can reimplement it, but make sure to call kernel_version()
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
v = self.c_code_cache_version()
if not v:
return ()
return (self.c_code_cache_version(), self.kernel_version(node)) return (self.c_code_cache_version(), self.kernel_version(node))
def kernel_version(self, node): def kernel_version(self, node):
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os import os
from theano import Apply from theano import Apply, Op
from theano.tensor.extra_ops import CumsumOp from theano.tensor.extra_ops import CumsumOp
try: try:
...@@ -13,7 +13,7 @@ from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, ...@@ -13,7 +13,7 @@ from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
from .opt import register_opt as register_gpu_opt, op_lifter from .opt import register_opt as register_gpu_opt, op_lifter
class GpuCumsum(GpuKernelBase): class GpuCumsum(GpuKernelBase, Op):
""" """
Parameters Parameters
---------- ----------
......
# TODO test dtype != float32
from __future__ import absolute_import, print_function, division
import os
try:
import pygpu
except ImportError:
pass
import theano
import theano.sandbox.multinomial
from theano import Apply, config
from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano.sandbox import gpuarray
from .basic_ops import as_gpuarray_variable, infer_context_name
from .opt import register_opt, op_lifter
from .type import GpuArrayType
class GPUAMultinomialFromUniform(gpuarray.basic_ops.GpuKernelBase, Op):
__props__ = ("odtype",)
def __init__(self, odtype):
Op.__init__(self)
self.odtype = odtype
def get_params(self, node):
return node.outputs[0].type.context
def c_headers(self):
return ['<numpy_compat.h>', 'gpuarray_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def make_node(self, pvals, unis):
assert pvals.dtype == 'float32'
assert unis.dtype == 'float32'
ctx_name = infer_context_name(pvals, unis)
pvals = as_gpuarray_variable(pvals, ctx_name)
unis = as_gpuarray_variable(unis, ctx_name)
if pvals.ndim != 2:
raise NotImplementedError('pvals ndim should be 2', pvals.ndim)
if unis.ndim != 1:
raise NotImplementedError('unis ndim should be 1', unis.ndim)
if self.odtype == 'auto':
odtype = pvals.dtype
else:
odtype = self.odtype
assert odtype == 'float32', odtype
if odtype != pvals.dtype:
raise NotImplementedError(
'GpuMultinomialFromUniform works only if '
'self.odtype == pvals.dtype', odtype, pvals.dtype)
br = (pvals.broadcastable[1], pvals.broadcastable[0])
out = GpuArrayType(broadcastable=br,
dtype=odtype,
context_name=ctx_name)()
return Apply(self, [pvals, unis], [out])
def gpu_kernels(self, node, name):
code = """
KERNEL void k_multi_warp_multinomial(
const ga_size nb_multi,
const ga_size nb_outcomes,
GLOBAL_MEM float * global_pvals,
const ga_ssize pvals_row_stride,
const ga_ssize pvals_col_stride,
GLOBAL_MEM float * global_unis,
const ga_ssize unis_stride,
GLOBAL_MEM float * global_outs,
const ga_ssize outs_row_stride,
const ga_ssize outs_col_stride
)
{
// each thread takes care of one multinomial draw
int n = LDIM_0*GID_0 + LID_0;
if (n < nb_multi)
{
float cummul = 0.;
bool done = false;
const float unis_n = global_unis[n*unis_stride];
for (ga_size m = 0; m < nb_outcomes; ++m)
{
float current_out = 0.;
if (!done)
{
cummul += global_pvals[m * pvals_col_stride +
n * pvals_row_stride];
if (unis_n < cummul)
{
current_out = 1.;
done = true;
}
}
//write out transposed for speed.
global_outs[n * outs_col_stride +
m * outs_row_stride] = current_out;
}
}
}
"""
return [gpuarray.basic_ops.Kernel(
code=code, name="k_multi_warp_multinomial",
params=[pygpu.gpuarray.SIZE,
pygpu.gpuarray.SIZE,
pygpu.gpuarray.GpuArray,
pygpu.gpuarray.SSIZE,
pygpu.gpuarray.SSIZE,
pygpu.gpuarray.GpuArray,
pygpu.gpuarray.SSIZE,
pygpu.gpuarray.GpuArray,
pygpu.gpuarray.SSIZE,
pygpu.gpuarray.SSIZE],
flags=gpuarray.basic_ops.Kernel.get_flags(node.outputs[0].dtype),
objvar='k_multi_warp_multinomial_' + name)]
def c_code(self, node, name, inp, outputs, sub):
pvals, unis = inp
out, = outputs
fail = sub['fail']
ctx = sub['params']
sync = bool(config.gpuarray.sync)
kname = self.gpu_kernels(node, name)[0].objvar
s = """
PyGpuArrayObject * pvals = %(pvals)s;
PyGpuArrayObject * unis = %(unis)s;
PyGpuArrayObject * out = %(out)s;
size_t dims[2];
if (PyGpuArray_NDIM(pvals) != 2)
{
PyErr_Format(PyExc_TypeError, "pvals wrong rank");
%(fail)s
}
if (PyGpuArray_NDIM(unis) != 1)
{
PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)s
}
if (PyGpuArray_DIMS(unis)[0] != PyGpuArray_DIMS(pvals)[0])
{
PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0]");
%(fail)s
}
dims[0] = PyGpuArray_DIMS(pvals)[1];
dims[1] = PyGpuArray_DIMS(pvals)[0];
if (theano_prep_output(&out, 2, dims, unis->ga.typecode,
GA_C_ORDER, %(ctx)s) != 0){
%(fail)s
}
%(out)s = out;
GpuArray_memset(&(out->ga), 0);
{ // NESTED SCOPE
int nb_multi = PyGpuArray_DIMS(pvals)[0];
int nb_outcomes = PyGpuArray_DIMS(pvals)[1];
//TODO : change this for a beautiful constant
int max_nb_blocks = 2<<15 - 1;
size_t nb_blocks = max_nb_blocks + 1;
size_t nb_threads=16; // so it really starts at 32, because of the *2
do
{
nb_threads*=2;
if (nb_multi % %nb_threads == 0)
nb_blocks = nb_multi/nb_threads;
else
nb_blocks = (int)((float)nb_multi/(float)nb_threads + 1.);
} while (nb_blocks > max_nb_blocks);
//printf("\\nN=%%i b=%%i t=%%i t*b=%%i",
// nb_multi, nb_blocks, nb_threads, nb_blocks*nb_threads);
// TODO : next line is a bit hardcoded...
if (nb_threads > 512)
{
PyErr_Format(
PyExc_ValueError,
"Multinomial is not implemented for so many rows in the matrix (%%i)",
nb_multi);
%(fail)s
}
assert(nb_blocks*nb_threads >= nb_multi);
void *args[10];
ssize_t strides[5] = {
PyGpuArray_STRIDES(pvals)[0]/sizeof(float),
PyGpuArray_STRIDES(pvals)[1]/sizeof(float),
PyGpuArray_STRIDES(unis)[0]/sizeof(float),
PyGpuArray_STRIDES(out)[0]/sizeof(float),
PyGpuArray_STRIDES(out)[1]/sizeof(float)
};
int err;
args[0] = (void*)&PyGpuArray_DIMS(out)[1];
args[1] = (void*)&PyGpuArray_DIMS(out)[0];
args[2] = pvals->ga.data; //PyGpuArray_DEV_DATA(pvals);
args[3] = (void*)&strides[0];
args[4] = (void*)&strides[1];
args[5] = unis->ga.data; //PyGpuArray_DEV_DATA(unis);
args[6] = (void*)&strides[2];
args[7] = out->ga.data; //PyGpuArray_DEV_DATA(out);
args[8] = (void*)&strides[3];
args[9] = (void*)&strides[4];
err = GpuKernel_call(&%(kname)s, 1, &nb_threads, &nb_blocks, 0, args);
if (err != GA_NO_ERROR) {
PyErr_Format(
PyExc_RuntimeError,
"gpuarray error: %%s: %%s.\\n",
"k_multi_warp_%(name)s",
GpuKernel_error(&%(kname)s, err));
%(fail)s;
}
if(%(sync)d)
GpuArray_sync(&(out->ga));
} // END NESTED SCOPE
""" % locals()
return s
def c_code_cache_version(self):
return (1,)
@register_opt()
@op_lifter([theano.sandbox.multinomial.MultinomialFromUniform])
def local_gpua_multinomial(node, context_name):
# TODO : need description for function
if len(node.inputs) == 2:
p, u = node.inputs
n_samples = 1
else:
p, u, n_samples = node.inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = node.outputs
if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GPUAMultinomialFromUniform(node.op.odtype)
return gpuarray.elemwise.GpuDimShuffle([False, False], [1, 0])(
gpu_op(p, u))
...@@ -763,7 +763,7 @@ def local_gpua_gemm(node, context_name): ...@@ -763,7 +763,7 @@ def local_gpua_gemm(node, context_name):
@op_lifter([tensor.blas.BatchedDot]) @op_lifter([tensor.blas.BatchedDot])
def local_gpua_gemmbatch(node, context_name): def local_gpua_gemmbatch(node, context_name):
a, b = node.inputs a, b = node.inputs
c = tensor.AllocEmpty((a.shape[0], a.shape[1], b.shape[2])) c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0) return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0)
......
from __future__ import absolute_import, print_function, division
import numpy
import theano
from theano import config, function, tensor
from ..multinomial import GPUAMultinomialFromUniform
import theano.tests.unittest_tools as utt
from .config import mode_with_gpu, mode_without_gpu
def get_mode(gpu):
mode = mode_without_gpu
if gpu:
mode = mode_with_gpu
return mode
def run_with_c(f, gpu=False):
mode = get_mode(gpu)
f(mode, gpu)
def test_multinomial_0():
# This tests the MultinomialFromUniform Op directly, not going through the
# multinomial() call in GPU random generation.
p = tensor.fmatrix()
u = tensor.fvector()
m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(p, u)
def body(mode, gpu):
# the m*2 allows the multinomial to reuse output
f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)
if gpu:
assert any([type(node.op) is GPUAMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
# test that both first and second samples can be drawn
utt.assert_allclose(f([[1, 0], [0, 1]], [.1, .1]),
[[2, 0], [0, 2]])
# test that both second labels can be drawn
r = f([[.2, .8], [.3, .7]], [.31, .31])
utt.assert_allclose(r, [[0, 2], [0, 2]])
# test that both first labels can be drawn
r = f([[.2, .8], [.3, .7]], [.21, .21])
utt.assert_allclose(r, [[0, 2], [2, 0]])
# change the size to make sure output gets reallocated ok
# and also make sure that the GPU version doesn't screw up the
# transposed-ness
r = f([[.2, .8]], [.25])
utt.assert_allclose(r, [[0, 2]])
run_with_c(body)
run_with_c(body, True)
# TODO: check a bigger example (make sure blocking on GPU is handled correctly)
def test_multinomial_large():
# DEBUG_MODE will test this on GPU
def body(mode, gpu):
p = tensor.fmatrix()
u = tensor.fvector()
m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(p, u)
f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)
if gpu:
assert any([type(node.op) is GPUAMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
pval = numpy.arange(10000 * 4,
dtype='float32').reshape((10000, 4)) + 0.1
pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:, 0]) * 0.5
mval = f(pval, uval)
assert mval.shape == pval.shape
if config.cast_policy == 'custom':
assert mval.dtype == pval.dtype
elif config.cast_policy == 'numpy+floatX':
assert mval.dtype == config.floatX
elif config.cast_policy == 'numpy':
assert mval.dtype == 'float64'
else:
raise NotImplementedError(config.cast_policy)
utt.assert_allclose(mval.sum(axis=1), 2)
asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval
utt.assert_allclose(mval, asdf) # broadcast over all rows
run_with_c(body)
run_with_c(body, True)
def test_gpu_opt():
# Does have some overlap with test_multinomial_0
# We test the case where we put the op on the gpu when the output
# is moved to the gpu.
p = tensor.fmatrix()
u = tensor.fvector()
m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(p, u)
assert m.dtype == 'float32', m.dtype
f = function([p, u], m, allow_input_downcast=True, mode=get_mode(True))
assert any([type(node.op) is GPUAMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:, 0]) * 0.5
f(pval, uval)
# Test with a row, it was failing in the past.
r = tensor.frow()
m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(r, u)
assert m.dtype == 'float32', m.dtype
f = function([r, u], m, allow_input_downcast=True, mode=get_mode(True))
assert any([type(node.op) is GPUAMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:, 0]) * 0.5
f(pval, uval)
...@@ -9,11 +9,10 @@ from theano.tensor import NotScalarConstantError, get_scalar_constant_value ...@@ -9,11 +9,10 @@ from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano.scalar import as_scalar from theano.scalar import as_scalar
import copy import copy
from theano.sandbox.cuda import cuda_available, GpuOp from theano.sandbox.cuda import cuda_available, GpuOp, register_opt
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt
class MultinomialFromUniform(Op): class MultinomialFromUniform(Op):
...@@ -565,6 +564,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -565,6 +564,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
""" % locals() """ % locals()
@register_opt()
@local_optimizer([MultinomialFromUniform]) @local_optimizer([MultinomialFromUniform])
def local_gpu_multinomial(node): def local_gpu_multinomial(node):
# TODO : need description for function # TODO : need description for function
...@@ -608,7 +608,3 @@ def local_gpu_multinomial(node): ...@@ -608,7 +608,3 @@ def local_gpu_multinomial(node):
# The dimshuffle is on the cpu, but will be moved to the # The dimshuffle is on the cpu, but will be moved to the
# gpu by an opt. # gpu by an opt.
return [gpu_from_host(ret)] return [gpu_from_host(ret)]
if cuda_available:
register_opt()(local_gpu_multinomial)
pass
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import copy
import os import os
import sys import sys
from six import reraise from six import reraise
...@@ -10,7 +9,7 @@ import numpy ...@@ -10,7 +9,7 @@ import numpy
import theano import theano
from theano import config, function, tensor from theano import config, function, tensor
from theano.sandbox import multinomial from theano.sandbox import multinomial
from theano.compile.mode import get_default_mode, predefined_linkers from theano.compile.mode import get_default_mode
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.compat import PY3 from theano.compat import PY3
...@@ -19,15 +18,12 @@ from theano.misc.pkl_utils import CompatUnpickler ...@@ -19,15 +18,12 @@ from theano.misc.pkl_utils import CompatUnpickler
def get_mode(gpu): def get_mode(gpu):
mode = get_default_mode() mode = get_default_mode()
mode = copy.copy(mode) if theano.config.mode == 'FAST_COMPILE':
mode = theano.compile.get_mode('FAST_RUN')
if gpu: if gpu:
mode = mode.including('gpu', 'gpu_local_optimizations', mode = mode.including('gpu', 'gpu_local_optimizations',
'local_cut_gpu_host_gpu', 'local_cut_gpu_host_gpu',
'local_gpu_multinomial') 'local_gpu_multinomial')
if isinstance(mode.linker, theano.gof.PerformLinker):
mode.linker = predefined_linkers['c|py']
if hasattr(mode.linker, 'c_thunks'):
mode.linker.c_thunks = True
return mode return mode
......
...@@ -6218,7 +6218,7 @@ class AllocEmpty(gof.Op): ...@@ -6218,7 +6218,7 @@ class AllocEmpty(gof.Op):
# specify the type of the data # specify the type of the data
def __init__(self, dtype): def __init__(self, dtype):
assert isinstance(dtype, str) assert isinstance(dtype, str), dtype
self.dtype = dtype.lower() self.dtype = dtype.lower()
def validate_shape(self, shape): def validate_shape(self, shape):
......
...@@ -285,7 +285,9 @@ class TestCorrConv2d(BaseTestConv2d): ...@@ -285,7 +285,9 @@ class TestCorrConv2d(BaseTestConv2d):
def tcase(self, i, f, s, b, flip, provide_shape): def tcase(self, i, f, s, b, flip, provide_shape):
o = self.get_output_shape(i, f, s, b) o = self.get_output_shape(i, f, s, b)
if not theano.config.blas.ldflags: if (not theano.config.blas.ldflags or
not theano.config.cxx or
theano.config.mode == "FAST_COMPILE"):
raise SkipTest("Need blas to test conv2d") raise SkipTest("Need blas to test conv2d")
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s, self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, provide_shape=provide_shape, verify_grad=True, provide_shape=provide_shape,
...@@ -541,7 +543,10 @@ class TestBilinearUpsampling(unittest.TestCase): ...@@ -541,7 +543,10 @@ class TestBilinearUpsampling(unittest.TestCase):
# If BLAS is not available on CPU, then we accept the fallback to the # If BLAS is not available on CPU, then we accept the fallback to the
# slow Python implementation for that test. # slow Python implementation for that test.
compile_mode = theano.compile.mode.get_default_mode() compile_mode = theano.compile.mode.get_default_mode()
if not theano.config.blas.ldflags: if theano.config.mode == "FAST_COMPILE":
compile_mode = compile_mode.excluding("conv_gemm")
compile_mode = compile_mode.excluding('AbstractConvCheck')
elif not theano.config.blas.ldflags or not theano.config.cxx:
compile_mode = compile_mode.excluding('AbstractConvCheck') compile_mode = compile_mode.excluding('AbstractConvCheck')
def numerical_kernel_1D(self, ratio): def numerical_kernel_1D(self, ratio):
......
...@@ -60,11 +60,11 @@ def test_bn_feature_maps(): ...@@ -60,11 +60,11 @@ def test_bn_feature_maps():
return n * G + B return n * G + B
numpy.random.seed(1234) numpy.random.seed(1234)
X = 1 + numpy.random.random([10, 20, 4, 4]).astype('float32') X = 1 + numpy.random.random([2, 3, 4, 4]).astype('float32')
B = 1 + numpy.random.random([20]).astype('float32') B = 1 + numpy.random.random([3]).astype('float32')
G = 1 + numpy.random.random([20]).astype('float32') G = 1 + numpy.random.random([3]).astype('float32')
M = 1 + numpy.random.random([20]).astype('float32') M = 1 + numpy.random.random([3]).astype('float32')
V = 1 + numpy.random.random([20]).astype('float32') V = 1 + numpy.random.random([3]).astype('float32')
x = theano.tensor.tensor4('x') x = theano.tensor.tensor4('x')
b = theano.tensor.vector('b') b = theano.tensor.vector('b')
......
...@@ -132,7 +132,8 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -132,7 +132,8 @@ class TestCorr2D(utt.InferShapeTester):
# TEST GRADIENT # TEST GRADIENT
if verify_grad: if verify_grad:
utt.verify_grad(sym_CorrMM, [orig_image_data, filter_data]) utt.verify_grad(sym_CorrMM, [orig_image_data, filter_data],
mode=self.mode)
@attr('slow') @attr('slow')
def test_basic(self): def test_basic(self):
...@@ -235,6 +236,8 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -235,6 +236,8 @@ class TestCorr2D(utt.InferShapeTester):
@attr('slow') @attr('slow')
def test_infer_shape_forward(self): def test_infer_shape_forward(self):
if theano.config.mode == "FAST_COMPILE":
raise SkipTest("CorrMM don't work in FAST_COMPILE")
def rand(*shape): def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64') r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
...@@ -264,6 +267,8 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -264,6 +267,8 @@ class TestCorr2D(utt.InferShapeTester):
@attr('slow') @attr('slow')
def test_infer_shape_gradW(self): def test_infer_shape_gradW(self):
if theano.config.mode == "FAST_COMPILE":
raise SkipTest("CorrMM don't work in FAST_COMPILE")
def rand(*shape): def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64') r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
...@@ -300,6 +305,8 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -300,6 +305,8 @@ class TestCorr2D(utt.InferShapeTester):
@attr('slow') @attr('slow')
def test_infer_shape_gradI(self): def test_infer_shape_gradI(self):
if theano.config.mode == "FAST_COMPILE":
raise SkipTest("CorrMM don't work in FAST_COMPILE")
def rand(*shape): def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64') r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
......
...@@ -279,16 +279,20 @@ class test_RopLop(RopLop_checker): ...@@ -279,16 +279,20 @@ class test_RopLop(RopLop_checker):
return conv_op(input, filters, border_mode=border_mode) return conv_op(input, filters, border_mode=border_mode)
output = sym_conv2d(input, filters).flatten() output = sym_conv2d(input, filters).flatten()
yv = tensor.Rop(output, [input, filters], [ev_input, ev_filters]) yv = tensor.Rop(output, [input, filters], [ev_input, ev_filters])
mode = None
if theano.config.mode == "FAST_COMPILE":
mode = "FAST_RUN"
rop_f = function([input, filters, ev_input, ev_filters], rop_f = function([input, filters, ev_input, ev_filters],
yv, on_unused_input='ignore') yv, on_unused_input='ignore', mode=mode)
sy, _ = theano.scan(lambda i, y, x1, x2, v1, v2: sy, _ = theano.scan(lambda i, y, x1, x2, v1, v2:
(tensor.grad(y[i], x1) * v1).sum() + (tensor.grad(y[i], x1) * v1).sum() +
(tensor.grad(y[i], x2) * v2).sum(), (tensor.grad(y[i], x2) * v2).sum(),
sequences=tensor.arange(output.shape[0]), sequences=tensor.arange(output.shape[0]),
non_sequences=[output, input, filters, non_sequences=[output, input, filters,
ev_input, ev_filters]) ev_input, ev_filters],
mode=mode)
scan_f = function([input, filters, ev_input, ev_filters], sy, scan_f = function([input, filters, ev_input, ev_filters], sy,
on_unused_input='ignore') on_unused_input='ignore', mode=mode)
dtype = theano.config.floatX dtype = theano.config.floatX
image_data = numpy.random.random(image_shape).astype(dtype) image_data = numpy.random.random(image_shape).astype(dtype)
filter_data = numpy.random.random(filter_shape).astype(dtype) filter_data = numpy.random.random(filter_shape).astype(dtype)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论