提交 8d2ea245 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6218 from tfjgeorge/alloc_ops_params

Alloc ops params
...@@ -8,6 +8,8 @@ import theano ...@@ -8,6 +8,8 @@ import theano
from theano import Op, Apply, Type, Variable from theano import Op, Apply, Type, Variable
from theano import tensor, config from theano import tensor, config
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
from theano.scalar import (bool as bool_t,
int32 as int32_t)
from theano.tensor.basic import ( from theano.tensor.basic import (
Alloc, AllocEmpty, alloc_validate_shape, Join, Split) Alloc, AllocEmpty, alloc_validate_shape, Join, Split)
...@@ -808,14 +810,15 @@ class GpuAlloc(HideC, Alloc): ...@@ -808,14 +810,15 @@ class GpuAlloc(HideC, Alloc):
__props__ = ('memset_0', 'context_name') __props__ = ('memset_0', 'context_name')
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = ParamsType(context=gpu_context_type, memset_0=bool_t)
def __init__(self, context_name, memset_0=False): def __init__(self, context_name, memset_0=False):
self.context_name = context_name self.context_name = context_name
self.memset_0 = memset_0 self.memset_0 = memset_0
def get_params(self, node): def get_params(self, node):
return get_context(self.context_name) return self.params_type.get_params(context=get_context(self.context_name),
memset_0=self.memset_0)
def __str__(self): def __str__(self):
# Hide the memset parameter when not used to prevent confusion. # Hide the memset parameter when not used to prevent confusion.
...@@ -837,15 +840,15 @@ class GpuAlloc(HideC, Alloc): ...@@ -837,15 +840,15 @@ class GpuAlloc(HideC, Alloc):
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>'] return ['<numpy_compat.h>']
def perform(self, node, inputs, outs, ctx): def perform(self, node, inputs, outs, params):
out, = outs out, = outs
v = inputs[0] v = inputs[0]
sh = tuple(map(int, inputs[1:])) sh = tuple(map(int, inputs[1:]))
if out[0] is None or out[0].shape != sh: if out[0] is None or out[0].shape != sh:
if self.memset_0: if self.memset_0:
out[0] = gpuarray.zeros(sh, dtype=v.dtype, context=ctx) out[0] = gpuarray.zeros(sh, dtype=v.dtype, context=params.context)
else: else:
out[0] = gpuarray.empty(sh, dtype=v.dtype, context=ctx) out[0] = gpuarray.empty(sh, dtype=v.dtype, context=params.context)
out[0][...] = v out[0][...] = v
else: else:
out[0][...] = v out[0][...] = v
...@@ -855,7 +858,6 @@ class GpuAlloc(HideC, Alloc): ...@@ -855,7 +858,6 @@ class GpuAlloc(HideC, Alloc):
ndim = len(inp[1:]) ndim = len(inp[1:])
zz, = out zz, = out
memset_0 = int(self.memset_0)
code = """ code = """
int i; int i;
size_t %(name)s_shape[%(ndim)s]; size_t %(name)s_shape[%(ndim)s];
...@@ -873,12 +875,12 @@ class GpuAlloc(HideC, Alloc): ...@@ -873,12 +875,12 @@ class GpuAlloc(HideC, Alloc):
for (i = 0; i < %(ndim)s; i++) for (i = 0; i < %(ndim)s; i++)
need_new_out |= %(zz)s->ga.dimensions[i] != %(name)s_shape[i]; need_new_out |= %(zz)s->ga.dimensions[i] != %(name)s_shape[i];
if (need_new_out && (%(memset_0)s)) { if (need_new_out && (%(params)s->memset_0)) {
//pygpu_zeros can be faster then empty followed by memset. //pygpu_zeros can be faster then empty followed by memset.
Py_XDECREF(%(zz)s); Py_XDECREF(%(zz)s);
%(zz)s = pygpu_zeros(%(ndim)s, %(name)s_shape, %(zz)s = pygpu_zeros(%(ndim)s, %(name)s_shape,
%(vv)s->ga.typecode, GA_C_ORDER, %(vv)s->ga.typecode, GA_C_ORDER,
%(ctx)s, Py_None); %(params)s->context, Py_None);
if (!%(zz)s) { if (!%(zz)s) {
%(fail)s %(fail)s
} }
...@@ -887,12 +889,12 @@ class GpuAlloc(HideC, Alloc): ...@@ -887,12 +889,12 @@ class GpuAlloc(HideC, Alloc):
Py_XDECREF(%(zz)s); Py_XDECREF(%(zz)s);
%(zz)s = pygpu_empty(%(ndim)s, %(name)s_shape, %(zz)s = pygpu_empty(%(ndim)s, %(name)s_shape,
%(vv)s->ga.typecode, GA_C_ORDER, %(vv)s->ga.typecode, GA_C_ORDER,
%(ctx)s, Py_None); %(params)s->context, Py_None);
if (!%(zz)s) { if (!%(zz)s) {
%(fail)s %(fail)s
} }
} }
if (%(memset_0)s && GpuArray_ISONESEGMENT(&%(zz)s->ga)) if (%(params)s->memset_0 && GpuArray_ISONESEGMENT(&%(zz)s->ga))
{ {
int err = GpuArray_memset(&%(zz)s->ga, 0); int err = GpuArray_memset(&%(zz)s->ga, 0);
if (err != GA_NO_ERROR) if (err != GA_NO_ERROR)
...@@ -910,8 +912,8 @@ class GpuAlloc(HideC, Alloc): ...@@ -910,8 +912,8 @@ class GpuAlloc(HideC, Alloc):
%(fail)s %(fail)s
} }
} }
""" % dict(name=name, ndim=ndim, zz=zz, vv=vv, ctx=sub['params'], """ % dict(name=name, ndim=ndim, zz=zz, vv=vv, params=sub['params'],
fail=sub['fail'], memset_0=memset_0) fail=sub['fail'])
return code return code
...@@ -957,14 +959,20 @@ class GpuAllocEmpty(HideC, AllocEmpty): ...@@ -957,14 +959,20 @@ class GpuAllocEmpty(HideC, AllocEmpty):
""" """
__props__ = ('dtype', 'context_name') __props__ = ('dtype', 'context_name')
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = ParamsType(context=gpu_context_type,
typecode=int32_t)
def __init__(self, dtype, context_name): def __init__(self, dtype, context_name):
self.dtype = dtype self.dtype = dtype
self.context_name = context_name self.context_name = context_name
@property
def typecode(self):
return gpuarray.dtype_to_typecode(self.dtype)
def get_params(self, node): def get_params(self, node):
return get_context(self.context_name) return self.params_type.get_params(context=get_context(self.context_name),
typecode=self.typecode)
def make_node(self, *shape): def make_node(self, *shape):
sh, bcast = alloc_validate_shape(shape) sh, bcast = alloc_validate_shape(shape)
...@@ -980,11 +988,11 @@ class GpuAllocEmpty(HideC, AllocEmpty): ...@@ -980,11 +988,11 @@ class GpuAllocEmpty(HideC, AllocEmpty):
self.perform(node, inputs, out_, ctx) self.perform(node, inputs, out_, ctx)
out_[0][0][:] = -123456789 out_[0][0][:] = -123456789
def perform(self, node, inputs, out_, ctx): def perform(self, node, inputs, out_, params):
out = out_[0] out = out_[0]
sh = [int(i) for i in inputs] sh = [int(i) for i in inputs]
if out[0] is None or out[0].shape != sh: if out[0] is None or out[0].shape != sh:
out[0] = pygpu.empty(sh, dtype=self.dtype, context=ctx) out[0] = pygpu.empty(sh, dtype=self.dtype, context=params.context)
# if out[0] is the right shape, we just return it # if out[0] is the right shape, we just return it
def c_headers(self): def c_headers(self):
...@@ -1009,17 +1017,16 @@ shape[%(i)s] = ((dtype_%(shp_i)s *)PyArray_DATA(%(shp_i)s))[0]; ...@@ -1009,17 +1017,16 @@ shape[%(i)s] = ((dtype_%(shp_i)s *)PyArray_DATA(%(shp_i)s))[0];
""" % dict(i=i, shp_i=shp_i)) """ % dict(i=i, shp_i=shp_i))
code.append(""" code.append("""
if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER, if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(params)s->typecode, GA_C_ORDER,
%(ctx)s)) { %(params)s->context)) {
%(fail)s %(fail)s
} }
""" % dict(zz=zz, ndim=ndim, type=gpuarray.dtype_to_typecode(self.dtype), """ % dict(zz=zz, ndim=ndim, fail=fail, params=sub['params']))
fail=fail, ctx=sub['params']))
return ''.join(code) return ''.join(code)
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
......
...@@ -17,6 +17,7 @@ from theano import gof ...@@ -17,6 +17,7 @@ from theano import gof
from theano.gof import Apply, Constant, Op, Variable, ParamsType from theano.gof import Apply, Constant, Op, Variable, ParamsType
from theano.gof.type import Generic from theano.gof.type import Generic
from theano.scalar import int32 as int32_t
from theano.tensor import elemwise from theano.tensor import elemwise
from theano.tensor.var import (AsTensorError, TensorVariable, from theano.tensor.var import (AsTensorError, TensorVariable,
TensorConstant, TensorConstantSignature, TensorConstant, TensorConstantSignature,
...@@ -6632,13 +6633,18 @@ class Choose(Op): ...@@ -6632,13 +6633,18 @@ class Choose(Op):
class AllocEmpty(gof.Op): class AllocEmpty(gof.Op):
"""Implement Alloc on the cpu, but without initializing memory.""" """Implement Alloc on the cpu, but without initializing memory."""
__props__ = ("dtype",) __props__ = ("dtype", )
params_type = ParamsType(typecode=int32_t)
# specify the type of the data # specify the type of the data
def __init__(self, dtype): def __init__(self, dtype):
assert isinstance(dtype, str), dtype assert isinstance(dtype, str), dtype
self.dtype = dtype.lower() self.dtype = dtype.lower()
@property
def typecode(self):
return np.dtype(self.dtype).num
def make_node(self, *shape): def make_node(self, *shape):
shape, bcast = alloc_validate_shape(shape) shape, bcast = alloc_validate_shape(shape)
otype = TensorType(dtype=self.dtype, broadcastable=bcast) otype = TensorType(dtype=self.dtype, broadcastable=bcast)
...@@ -6661,18 +6667,18 @@ class AllocEmpty(gof.Op): ...@@ -6661,18 +6667,18 @@ class AllocEmpty(gof.Op):
self.perform(node, inputs, out_) self.perform(node, inputs, out_)
out_[0][0].fill(-123456789) out_[0][0].fill(-123456789)
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_, params):
out, = out_ out, = out_
sh = tuple([int(i) for i in inputs]) sh = tuple([int(i) for i in inputs])
if out[0] is None or out[0].shape != sh: if out[0] is None or out[0].shape != sh:
out[0] = np.empty(sh, dtype=self.dtype) out[0] = np.empty(sh, dtype=self.dtype)
def c_code(self, node, name, inputs, out_, sub): def c_code(self, node, name, inputs, out_, sub):
dtype = "NPY_" + self.dtype.upper()
out, = out_ out, = out_
fail = sub['fail'] fail = sub['fail']
shps = inputs shps = inputs
nd = len(shps) nd = len(shps)
params = sub['params']
str = "npy_intp dims[%(nd)s];\n" % locals() str = "npy_intp dims[%(nd)s];\n" % locals()
for idx, sh in enumerate(shps): for idx, sh in enumerate(shps):
str += "dims[%(idx)s] =" \ str += "dims[%(idx)s] =" \
...@@ -6691,7 +6697,7 @@ class AllocEmpty(gof.Op): ...@@ -6691,7 +6697,7 @@ class AllocEmpty(gof.Op):
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = (PyArrayObject*)PyArray_EMPTY(%(nd)s, %(out)s = (PyArrayObject*)PyArray_EMPTY(%(nd)s,
dims, dims,
%(dtype)s, %(params)s->typecode,
0); 0);
if (!%(out)s) if (!%(out)s)
{ {
...@@ -6706,7 +6712,7 @@ class AllocEmpty(gof.Op): ...@@ -6706,7 +6712,7 @@ class AllocEmpty(gof.Op):
return [node.inputs] return [node.inputs]
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (4,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论