提交 5df0cfd8 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6091 from notoraptor/params-for-other-ops

Params for other ops
...@@ -346,6 +346,18 @@ class Shape_i(gof.Op): ...@@ -346,6 +346,18 @@ class Shape_i(gof.Op):
i = int(i) i = int(i)
self.i = i self.i = i
# NB:
# 1) params_type is defined as a property to avoid
# loop in Python import caused by importing theano.scalar below
# when params_type is defined directly in class code.
# 2) We wrap scalar into ParamsType (instead of directly using scalar as op param)
# to avoid Theano converting scalar param to constant that would be later
# hardcoded as litteral in C code, making us loose all the advantages of
# using params.
@property
def params_type(self):
return gof.ParamsType(i=theano.scalar.basic.int64)
def __str__(self): def __str__(self):
return '%s{%i}' % (self.__class__.__name__, self.i) return '%s{%i}' % (self.__class__.__name__, self.i)
...@@ -360,7 +372,7 @@ class Shape_i(gof.Op): ...@@ -360,7 +372,7 @@ class Shape_i(gof.Op):
(x, self.i)) (x, self.i))
return theano.Apply(self, [x], [theano.tensor.lscalar()]) return theano.Apply(self, [x], [theano.tensor.lscalar()])
def perform(self, node, inp, out_): def perform(self, node, inp, out_, params):
x, = inp x, = inp
out, = out_ out, = out_
if out[0] is None: if out[0] is None:
...@@ -383,7 +395,7 @@ class Shape_i(gof.Op): ...@@ -383,7 +395,7 @@ class Shape_i(gof.Op):
version.append((str(t), v)) version.append((str(t), v))
if version: if version:
version.append(1) version.append(2)
return tuple(version) return tuple(version)
...@@ -391,7 +403,8 @@ class Shape_i(gof.Op): ...@@ -391,7 +403,8 @@ class Shape_i(gof.Op):
iname, = inames iname, = inames
oname, = onames oname, = onames
fail = sub['fail'] fail = sub['fail']
i = self.i # i is then 'params->i', not just 'params'.
i = sub['params'] + '->i'
itype = node.inputs[0].type.__class__ itype = node.inputs[0].type.__class__
if itype in self.c_code_and_version: if itype in self.c_code_and_version:
......
...@@ -10,6 +10,9 @@ except ImportError: ...@@ -10,6 +10,9 @@ except ImportError:
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name) from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name)
from .opt import register_opt, op_lifter, register_opt2 from .opt import register_opt, op_lifter, register_opt2
from .type import gpu_context_type
from theano.gof import ParamsType
import theano.scalar as scalar
class GpuCumOp(GpuKernelBase, Op): class GpuCumOp(GpuKernelBase, Op):
...@@ -21,9 +24,12 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -21,9 +24,12 @@ class GpuCumOp(GpuKernelBase, Op):
""" """
SUPPORTED_NDIMS = 3 SUPPORTED_NDIMS = 3
__props__ = ('axis', 'mode') __props__ = ('axis', 'mode')
params_type = ParamsType(axis=scalar.int32,
context=gpu_context_type)
def __init__(self, axis, mode='add'): def __init__(self, axis, mode='add'):
self.axis = axis if axis else 0 assert axis is not None
self.axis = int(axis)
self.mode = mode self.mode = mode
def __eq__(self, other): def __eq__(self, other):
...@@ -35,7 +41,7 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -35,7 +41,7 @@ class GpuCumOp(GpuKernelBase, Op):
return hash(self.axis) ^ hash(self.mode) return hash(self.axis) ^ hash(self.mode)
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (7,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>']
...@@ -43,6 +49,9 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -43,6 +49,9 @@ class GpuCumOp(GpuKernelBase, Op):
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [os.path.dirname(__file__)]
def get_params(self, node):
return self.params_type.get_params(self, context=node.inputs[0].type.context)
def make_node(self, x): def make_node(self, x):
assert x.type.dtype == 'float32', "Only float32 supported for GpuCumOp" assert x.type.dtype == 'float32', "Only float32 supported for GpuCumOp"
...@@ -244,24 +253,18 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -244,24 +253,18 @@ class GpuCumOp(GpuKernelBase, Op):
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != b'cuda': if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
x, = inp return """
z, = out
axis = self.axis if self.axis is not None else 0
fail = sub['fail']
ctx = sub['params']
code = """
const size_t* shape = PyGpuArray_DIMS(%(x)s); const size_t* shape = PyGpuArray_DIMS(%(x)s);
bool needAllocation = !%(z)s || PyGpuArray_NDIM(%(x)s) != PyGpuArray_NDIM(%(z)s); bool needAllocation = !%(z)s || PyGpuArray_NDIM(%(x)s) != PyGpuArray_NDIM(%(z)s);
int axis = %(axis)s; int axis = %(params)s->axis;
if (axis < 0) { if (axis < 0) {
// Convert negative axis to positive axis. // Convert negative axis to positive axis.
axis += PyGpuArray_NDIM(%(x)s); axis += PyGpuArray_NDIM(%(x)s);
} }
if (theano_prep_output(&%(z)s, PyGpuArray_NDIM(%(x)s), PyGpuArray_DIMS(%(x)s), %(x)s->ga.typecode, GA_C_ORDER, %(ctx)s) != 0){ if (theano_prep_output(&%(z)s, PyGpuArray_NDIM(%(x)s), PyGpuArray_DIMS(%(x)s),
%(x)s->ga.typecode, GA_C_ORDER, %(params)s->context) != 0) {
%(fail)s; %(fail)s;
} }
...@@ -270,17 +273,17 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -270,17 +273,17 @@ class GpuCumOp(GpuKernelBase, Op):
size_t max_grid_size1; size_t max_grid_size1;
size_t max_grid_size2; size_t max_grid_size2;
int err; int err;
err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0); err = gpucontext_property(%(params)s->context->ctx, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0"); PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0");
%(fail)s; %(fail)s;
} }
err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1); err = gpucontext_property(%(params)s->context->ctx, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1"); PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1");
%(fail)s; %(fail)s;
} }
err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2); err = gpucontext_property(%(params)s->context->ctx, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2"); PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2");
%(fail)s; %(fail)s;
...@@ -289,9 +292,7 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -289,9 +292,7 @@ class GpuCumOp(GpuKernelBase, Op):
%(fail)s; %(fail)s;
} }
} }
""" % locals() """ % dict(x=inp[0], z=out[0], nodename=nodename, fail=sub['fail'], params=sub['params'])
return code
def c_support_code_struct(self, node, nodename): def c_support_code_struct(self, node, nodename):
code = """ code = """
......
...@@ -7,16 +7,15 @@ http://www.iro.umontreal.ca/~simardr/ssj/indexe.html ...@@ -7,16 +7,15 @@ http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
""" """
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy as np
from theano import Apply, tensor from theano import Apply, tensor
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.sandbox.rng_mrg import mrg_uniform_base, mrg_uniform from theano.sandbox.rng_mrg import mrg_uniform_base, mrg_uniform
from theano.tensor import as_tensor_variable, get_vector_length from theano.tensor import as_tensor_variable, get_vector_length
from theano.scalar import int32 as int_t
from .basic_ops import (GpuKernelBase, Kernel, infer_context_name, from .basic_ops import (GpuKernelBase, Kernel, infer_context_name,
host_from_gpu, as_gpuarray_variable) host_from_gpu, as_gpuarray_variable)
from .type import GpuArrayType from .type import GpuArrayType, gpu_context_type
from .fp16_help import write_w from .fp16_help import write_w
from .opt import register_opt, register_opt2 from .opt import register_opt, register_opt2
...@@ -24,6 +23,9 @@ from .opt import register_opt, register_opt2 ...@@ -24,6 +23,9 @@ from .opt import register_opt, register_opt2
class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
# GpuArray version # GpuArray version
_f16_ok = True _f16_ok = True
params_type = mrg_uniform_base.params_type.extended(otypecode=int_t, context=gpu_context_type)
otypecode = property(lambda self: self.output_type.typecode)
def make_node(self, rstate, size): def make_node(self, rstate, size):
# error checking slightly redundant here, since # error checking slightly redundant here, since
...@@ -39,6 +41,9 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -39,6 +41,9 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
[rstate, size], [rstate, size],
[rstate.type(), output_type]) [rstate.type(), output_type])
def get_params(self, node):
return self.params_type.get_params(self, context=node.inputs[0].type.context)
@classmethod @classmethod
def new(cls, rstate, ndim, dtype, size): def new(cls, rstate, ndim, dtype, size):
v_size = as_tensor_variable(size) v_size = as_tensor_variable(size)
...@@ -168,40 +173,34 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -168,40 +173,34 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
] ]
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
rstate, size = inp
o_rstate, o_sample = out
inplace = int(self.inplace)
ndim = self.output_type.ndim
o_type_num = np.asarray(0, dtype=self.output_type.dtype).dtype.num
fail = sub['fail']
ctx = sub['params']
kname = self.gpu_kernels(node, nodename)[0].objvar
otypecode = str(self.output_type.typecode)
return """ return """
npy_int64 M1 = 2147483647; //2^31 - 1 npy_int64 M1 = 2147483647; //2^31 - 1
// The +1 is to avoid odims[0] which fails on windows
size_t odims[%(ndim)s+1];
size_t n_elements = 1; size_t n_elements = 1;
unsigned int n_streams; unsigned int n_streams;
int must_alloc_sample = ((NULL == %(o_sample)s) int must_alloc_sample = ((NULL == %(o_sample)s)
|| !pygpu_GpuArray_Check((PyObject*)%(o_sample)s) || !pygpu_GpuArray_Check((PyObject*)%(o_sample)s)
|| !(%(o_sample)s->ga.flags & GA_C_CONTIGUOUS) || !(%(o_sample)s->ga.flags & GA_C_CONTIGUOUS)
|| (PyGpuArray_NDIM(%(o_sample)s) != %(ndim)s)); || (PyGpuArray_NDIM(%(o_sample)s) != %(params)s->ndim));
size_t* odims = (size_t*)malloc(%(params)s->ndim * sizeof(size_t));
if (odims == NULL) {
PyErr_NoMemory();
%(just_fail)s
}
if (PyArray_NDIM(%(size)s) != 1) if (PyArray_NDIM(%(size)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "size must be vector"); PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s %(fail)s
} }
if (PyArray_DIMS(%(size)s)[0] != %(ndim)s) if (PyArray_DIMS(%(size)s)[0] != %(params)s->ndim)
{ {
PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%li)", PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%li)",
%(ndim)s, PyArray_DIMS(%(size)s)[0]); %(params)s->ndim, PyArray_DIMS(%(size)s)[0]);
%(fail)s %(fail)s
} }
for (int i = 0; i < %(ndim)s; ++i) for (int i = 0; i < %(params)s->ndim; ++i)
{ {
odims[i] = *(dtype_%(size)s *)PyArray_GETPTR1(%(size)s, i); odims[i] = *(dtype_%(size)s *)PyArray_GETPTR1(%(size)s, i);
n_elements *= odims[i]; n_elements *= odims[i];
...@@ -219,8 +218,8 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -219,8 +218,8 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
if (must_alloc_sample) if (must_alloc_sample)
{ {
Py_XDECREF(%(o_sample)s); Py_XDECREF(%(o_sample)s);
%(o_sample)s = pygpu_empty(%(ndim)s, odims, %(otypecode)s, GA_C_ORDER, %(o_sample)s = pygpu_empty(%(params)s->ndim, odims, %(params)s->otypecode, GA_C_ORDER,
%(ctx)s, Py_None); %(params)s->context, Py_None);
if(!%(o_sample)s) if(!%(o_sample)s)
{ {
%(fail)s; %(fail)s;
...@@ -233,7 +232,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -233,7 +232,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
} }
Py_XDECREF(%(o_rstate)s); Py_XDECREF(%(o_rstate)s);
if (%(inplace)s) if (%(params)s->inplace)
{ {
Py_INCREF(%(rstate)s); Py_INCREF(%(rstate)s);
%(o_rstate)s = %(rstate)s; %(o_rstate)s = %(rstate)s;
...@@ -285,10 +284,22 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -285,10 +284,22 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
%(fail)s %(fail)s
} }
} }
""" % locals()
free(odims);
""" % dict(rstate=inp[0], size=inp[1],
o_rstate=out[0], o_sample=out[1],
kname=self.gpu_kernels(node, nodename)[0].objvar,
params=sub['params'],
just_fail=sub['fail'],
fail="""
{
free(odims);
%(fail)s
}
""" % dict(fail=sub['fail']))
def c_code_cache_version(self): def c_code_cache_version(self):
return (14,) return (15,)
@register_opt2([mrg_uniform], 'fast_compile') @register_opt2([mrg_uniform], 'fast_compile')
......
...@@ -8,6 +8,7 @@ import numpy as np ...@@ -8,6 +8,7 @@ import numpy as np
import theano import theano
from theano import Op, Apply from theano import Op, Apply
from theano.gof import EnumList
import theano.tensor as T import theano.tensor as T
from theano.gradient import grad_not_implemented from theano.gradient import grad_not_implemented
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
...@@ -39,13 +40,21 @@ class Images2Neibs(Op): ...@@ -39,13 +40,21 @@ class Images2Neibs(Op):
""" """
__props__ = ("mode",) __props__ = ("mode",)
BORDER_MODE = EnumList(('MODE_VALID', 'valid'),
('MODE_HALF', 'half'),
('MODE_FULL', 'full'),
('MODE_WRAP_CENTERED', 'wrap_centered'),
('MODE_IGNORE_BORDERS', 'ignore_borders'))
params_type = BORDER_MODE
def get_params(self, node):
return self.mode
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'half', 'full', implemented_modes = self.BORDER_MODE.get_aliases()
'wrap_centered', 'ignore_borders']: if mode not in implemented_modes:
raise NotImplementedError("Only the mode valid, half, full, " raise NotImplementedError("Only modes %s have been implemented for %s"
"ignore_borders and wrap_centered have " % (', '.join(implemented_modes), type(self).__name__))
"been implemented for Images2Neibs")
self.mode = mode self.mode = mode
def __str__(self): def __str__(self):
...@@ -159,9 +168,9 @@ class Images2Neibs(Op): ...@@ -159,9 +168,9 @@ class Images2Neibs(Op):
grad_undefined(self, 2, neib_step)] grad_undefined(self, 2, neib_step)]
def c_code_cache_version(self): def c_code_cache_version(self):
return (8,) return (10,)
def perform(self, node, inp, out_): def perform(self, node, inp, out_, params):
ten4, neib_shape, neib_step = inp ten4, neib_shape, neib_step = inp
z, = out_ z, = out_
# GpuImages2Neibs should not run this perform in DebugMode # GpuImages2Neibs should not run this perform in DebugMode
...@@ -344,11 +353,6 @@ class Images2Neibs(Op): ...@@ -344,11 +353,6 @@ class Images2Neibs(Op):
return [(z_dim0, z_dim1)] return [(z_dim0, z_dim1)]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
ten4, neib_shape, neib_step = inp
z, = out
fail = sub['fail']
mode = self.mode
return """ return """
#ifndef CEIL_INTDIV #ifndef CEIL_INTDIV
#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0)) #define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
...@@ -408,7 +412,7 @@ class Images2Neibs(Op): ...@@ -408,7 +412,7 @@ class Images2Neibs(Op):
%(fail)s; %(fail)s;
} }
if ( "%(mode)s" == "wrap_centered") { if (%(mode)s == MODE_WRAP_CENTERED) {
if (c%%2!=1 || d%%2!=1){ if (c%%2!=1 || d%%2!=1){
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"Images2Neibs: in mode wrap_centered" "Images2Neibs: in mode wrap_centered"
...@@ -430,7 +434,7 @@ class Images2Neibs(Op): ...@@ -430,7 +434,7 @@ class Images2Neibs(Op):
grid_c = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[2]),step_x); grid_c = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[2]),step_x);
grid_d = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[3]),step_y); grid_d = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[3]),step_y);
}else if ( "%(mode)s" == "valid") { } else if (%(mode)s == MODE_VALID) {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) || if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0)) ( (((PyArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{ {
...@@ -455,12 +459,12 @@ class Images2Neibs(Op): ...@@ -455,12 +459,12 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x); grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y); grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "ignore_borders") { } else if (%(mode)s == MODE_IGNORE_BORDERS) {
//number of patch in height //number of patch in height
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x); grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y); grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "half") { } else if (%(mode)s == MODE_HALF) {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) || if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0)) ( (((PyArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
{ {
...@@ -485,7 +489,7 @@ class Images2Neibs(Op): ...@@ -485,7 +489,7 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x); grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y); grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
}else if ( "%(mode)s" == "full") { } else if (%(mode)s == MODE_FULL) {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) || if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]+c-2) %% step_x)!=0)) ( (((PyArray_DIMS(%(ten4)s))[2]+c-2) %% step_x)!=0))
{ {
...@@ -510,9 +514,9 @@ class Images2Neibs(Op): ...@@ -510,9 +514,9 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]+c-2)/step_x); grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]+c-2)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]+d-2)/step_y); grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]+d-2)/step_y);
}else { } else {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknow mode '%(mode)s'"); "Images2Neibs: unknow mode %%d", %(mode)s);
%(fail)s; %(fail)s;
} }
...@@ -572,13 +576,13 @@ class Images2Neibs(Op): ...@@ -572,13 +576,13 @@ class Images2Neibs(Op):
for (int i = 0; i < c; i++) // loop over c for (int i = 0; i < c; i++) // loop over c
{ {
int ten4_2 = i + a * step_x; int ten4_2 = i + a * step_x;
if ( "%(mode)s" == "wrap_centered" ){ if (%(mode)s == MODE_WRAP_CENTERED) {
ten4_2 -= wrap_centered_half_idx_shift_x; ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height; if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height; else if (ten4_2 >= height) ten4_2 -= height;
} else if ( "%(mode)s" == "half" ){ } else if (%(mode)s == MODE_HALF) {
ten4_2 -= wrap_centered_half_idx_shift_x; ten4_2 -= wrap_centered_half_idx_shift_x;
} else if ( "%(mode)s" == "full" ){ } else if (%(mode)s == MODE_FULL) {
ten4_2 -= c - 1; ten4_2 -= c - 1;
} }
if (ten4_2 < 0 | ten4_2 >= height) { if (ten4_2 < 0 | ten4_2 >= height) {
...@@ -588,13 +592,13 @@ class Images2Neibs(Op): ...@@ -588,13 +592,13 @@ class Images2Neibs(Op):
for (int j = 0; j < d; j++) // loop over d for (int j = 0; j < d; j++) // loop over d
{ {
int ten4_3 = j + b * step_y; int ten4_3 = j + b * step_y;
if ( "%(mode)s" == "wrap_centered" ){ if (%(mode)s == MODE_WRAP_CENTERED) {
ten4_3 -= wrap_centered_half_idx_shift_y; ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width; if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width; else if (ten4_3 >= width) ten4_3 -= width;
} else if ( "%(mode)s" == "half" ){ } else if (%(mode)s == MODE_HALF) {
ten4_3 -= wrap_centered_half_idx_shift_y; ten4_3 -= wrap_centered_half_idx_shift_y;
} else if ( "%(mode)s" == "full" ){ } else if (%(mode)s == MODE_FULL) {
ten4_3 -= d - 1; ten4_3 -= d - 1;
} }
int z_col = j + d * i; int z_col = j + d * i;
...@@ -609,7 +613,8 @@ class Images2Neibs(Op): ...@@ -609,7 +613,8 @@ class Images2Neibs(Op):
} }
} }
} // END NESTED SCOPE } // END NESTED SCOPE
""" % locals() """ % dict(ten4=inp[0], neib_shape=inp[1], neib_step=inp[2], z=out[0],
fail=sub['fail'], mode=sub['params'])
def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
......
...@@ -12,7 +12,7 @@ import theano ...@@ -12,7 +12,7 @@ import theano
from theano.compat import izip from theano.compat import izip
from theano.gradient import DisconnectedType from theano.gradient import DisconnectedType
from theano import gof from theano import gof
from theano.gof import Apply, hashtype, Op, Type, MethodNotDefined from theano.gof import Apply, hashtype, Op, Type, MethodNotDefined, ParamsType
from theano.printing import pprint from theano.printing import pprint
from theano import scalar as scal from theano import scalar as scal
from theano.tensor.basic import alloc from theano.tensor.basic import alloc
...@@ -1685,6 +1685,7 @@ class AdvancedSubtensor1(Op): ...@@ -1685,6 +1685,7 @@ class AdvancedSubtensor1(Op):
# of the grad() method. # of the grad() method.
__props__ = () __props__ = ()
_f16_ok = True _f16_ok = True
check_input = False
def __init__(self, sparse_grad=False): def __init__(self, sparse_grad=False):
self.sparse_grad = sparse_grad self.sparse_grad = sparse_grad
...@@ -1872,10 +1873,13 @@ class AdvancedIncSubtensor1(Op): ...@@ -1872,10 +1873,13 @@ class AdvancedIncSubtensor1(Op):
""" """
__props__ = ('inplace', 'set_instead_of_inc') __props__ = ('inplace', 'set_instead_of_inc')
check_input = False
params_type = ParamsType(inplace=scal.bool,
set_instead_of_inc=scal.bool)
def __init__(self, inplace=False, set_instead_of_inc=False): def __init__(self, inplace=False, set_instead_of_inc=False):
self.inplace = inplace self.inplace = bool(inplace)
self.set_instead_of_inc = set_instead_of_inc self.set_instead_of_inc = bool(set_instead_of_inc)
if inplace: if inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -1955,17 +1959,11 @@ class AdvancedIncSubtensor1(Op): ...@@ -1955,17 +1959,11 @@ class AdvancedIncSubtensor1(Op):
raise NotImplementedError raise NotImplementedError
x, y, idx = input_names x, y, idx = input_names
out = output_names[0] out = output_names[0]
fail = sub['fail']
inc_or_set = 1 - self.set_instead_of_inc
if self.inplace: # convert bool to int
inplace = 1
else:
inplace = 0
copy_of_x = self.copy_of_x(x) copy_of_x = self.copy_of_x(x)
return """ return """
PyObject* rval = NULL; PyObject* rval = NULL;
if (%(inplace)s) if (%(params)s->inplace)
{ {
if (%(x)s != %(out)s) if (%(x)s != %(out)s)
{ {
...@@ -1983,16 +1981,17 @@ class AdvancedIncSubtensor1(Op): ...@@ -1983,16 +1981,17 @@ class AdvancedIncSubtensor1(Op):
%(fail)s %(fail)s
} }
} }
if (inplace_increment(%(out)s, (PyObject *)%(idx)s, %(y)s, %(inc_or_set)d)) { if (inplace_increment(%(out)s, (PyObject *)%(idx)s, %(y)s, (1 - %(params)s->set_instead_of_inc))) {
%(fail)s; %(fail)s;
} }
Py_XDECREF(rval); Py_XDECREF(rval);
""" % locals() """ % dict(x=x, y=y, idx=idx, out=out, copy_of_x=copy_of_x,
params=sub['params'], fail=sub['fail'])
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (8,)
def perform(self, node, inp, out_): def perform(self, node, inp, out_, params):
# TODO opt to make this inplace # TODO opt to make this inplace
x, y, idx = inp x, y, idx = inp
out, = out_ out, = out_
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论