提交 a536464a authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4323 from abergeron/gpua_newelem

Use the new GpuElemwise from libgpuarray
......@@ -42,7 +42,7 @@ register_transfer(transfer)
def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version()
if v[0] != -10000:
if v[0] != -9999:
raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu "
"are in sync.")
......
from __future__ import absolute_import, print_function, division
import os
import copy
import numpy
from six import integer_types
from six.moves import StringIO
import theano
from theano import tensor, gof
from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list
import theano.tensor.inplace
try:
import pygpu
......@@ -18,10 +15,9 @@ try:
except ImportError:
pass
from .type import GpuArrayType
from .type import GpuArrayType, gpu_context_type
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
infer_context_name)
from .elemwise import GpuElemwise
class GpuSubtensor(HideC, Subtensor):
......@@ -168,7 +164,7 @@ class GpuSubtensor(HideC, Subtensor):
return (6,)
class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
class GpuIncSubtensor(IncSubtensor):
"""
Implement IncSubtensor on the gpu.
......@@ -181,45 +177,20 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
:meth:`copy_of_x`, etc. specialize the c_code for this Op.
"""
@property
def _f16_ok(self):
return self.iadd_node.op._f16_ok
def c_headers(self):
return self.iadd_node.op.c_headers()
def c_init_code(self):
return self.iadd_node.op.c_init_code()
def gpu_kernels(self, node, nodename):
subname = nodename + "_add_to_zview"
return self.iadd_node.op.gpu_kernels(self.iadd_node, subname)
_f16_ok = True
params_type = gpu_context_type
def make_node(self, x, y, *inputs):
ctx_name = infer_context_name(x, y)
x = as_gpuarray_variable(x, ctx_name)
y = as_gpuarray_variable(y, ctx_name)
rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
op = copy.copy(self)
ret = gof.Apply(op, [x, y] + rval.inputs[2:], [x.type()])
op.create_iadd_node(ret)
ret = gof.Apply(self, [x, y] + rval.inputs[2:], [x.type()])
return ret
def get_params(self, node):
return node.outputs[0].type.context
def create_iadd_node(self, node):
# We store a iadd_node in the op that contain the info needed
# for the inplace add.
cop = theano.tensor.inplace.add_inplace
gop = GpuElemwise(cop.scalar_op, copy.copy(cop.inplace_pattern),
"Gpu" + cop.name, cop.nfunc_spec)
y = node.inputs[1]
xview = y.type()
iadd_node = gop(xview, y).owner
self.iadd_node = iadd_node
def perform(self, node, inputs, out_, ctx):
out, = out_
x, y = inputs[:2]
......@@ -261,18 +232,6 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
x.__setitem__(cdata, y)
out[0] = x
def __setstate__(self, d):
self.__dict__.update(d)
owner = getattr(self, "owner", None)
if owner:
self.create_iadd_node(owner)
def __getstate__(self):
d = copy.copy(self.__dict__)
if "iadd_node" in d:
d.pop('iadd_node')
return d
def do_type_checking(self, node):
"""
Should raise NotImplementedError if c_code does not support
......@@ -365,47 +324,52 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
"""
return """GpuArray_setarray(&%(view)s->ga, &%(source)s->ga)""" % locals()
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/error.h>', '<gpuarray/array.h>',
'<gpuarray/elemwise.h>']
def c_support_code_struct(self, node, nodename):
gop = self.iadd_node.op
sub_name = nodename + "_add_to_zview"
ret = gop.c_support_code_struct(self.iadd_node, sub_name)
ret += """
PyGpuArrayObject* inc_sub_iadd_%(nodename)s(PyGpuArrayObject* dst,
PyGpuArrayObject* src){
PyGpuArrayObject* ret = NULL;
""" % locals()
inputs = ["dst", "src"]
outputs = ["ret"]
sub = {"fail": "return NULL;", "params": "dst->context"}
ret += gop.c_code(self.iadd_node, sub_name, inputs, outputs, sub)
ret += """
return ret;
return "\nGpuElemwise *iadd;\n"
def c_init_code_struct(self, node, name, sub):
return """
gpuelemwise_arg args[2] = {{0}};
args[0].name = "a";
args[0].typecode = %(type1)s;
args[0].flags = GE_READ|GE_WRITE;
args[1].name = "b";
args[1].typecode = %(type2)s;
args[1].flags = GE_READ;
iadd = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, "", "a += b",
2, args, %(nd)s, 0);
if (iadd == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
%(fail)s
}
"""
return ret
""" % dict(ctx=sub['params'], fail=sub['fail'],
type1=node.inputs[0].type.typecode,
type2=node.inputs[1].type.typecode,
nd=node.inputs[1].ndim)
def add_to_zview(self, nodename, x, fail):
return """
PyGpuArrayObject * add_result = inc_sub_iadd_%(nodename)s(zview, %(x)s);
if (! add_result )
{
void *args[2];
args[0] = &zview->ga;
args[1] = &%(x)s->ga;
if (GpuElemwise_call(iadd, args, GE_BROADCAST) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Error doing inplace add");
Py_DECREF(zview);
%(fail)s;
}
else
{
Py_DECREF(add_result);
%(fail)s
}
}
""" % locals()
def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
elemwise_version = self.iadd_node.c_code_cache_version()
if not parent_version or not elemwise_version:
if not parent_version:
return
return parent_version + elemwise_version + (3,)
return parent_version + (5,)
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
......
......@@ -18,40 +18,18 @@ from pygpu import ndgpuarray as gpuarray
# This is acutally a test for GpuElemwise
class test_gpu_Broadcast(test_elemwise.test_Broadcast):
op = GpuElemwise
type = GpuArrayType
cop = GpuElemwise
ctype = GpuArrayType
# The order is important
linkers = [gof.PerformLinker, gof.CLinker]
def setUp(self):
if get_context(test_ctx_name).kind != 'cuda':
self.linkers = [gof.PerformLinker]
def rand_val(self, shp):
return rand_gpuarray(*shp, **dict(cls=gpuarray))
def rand_cval(self, shp):
return rand_gpuarray(*shp, **dict(cls=gpuarray))
def test_c(self):
if get_context(test_ctx_name).kind != 'cuda':
raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c()
def test_c_inplace(self):
if get_context(test_ctx_name).kind != 'cuda':
raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c_inplace()
def test_elemwise_pow():
# Test that GpuElemwise(pow) can compile with any combination of integer
# or float input dtype.
if get_context(test_ctx_name).kind != 'cuda':
raise SkipTest("Cuda specific tests")
dtypes = ["uint8", "uint16", "uint32", "uint64",
"int8", "int16", "int32", "int64",
"float16", "float32", "float64"]
......@@ -65,10 +43,10 @@ def test_elemwise_pow():
output = base ** exp
f = theano.function([base, exp], output)
# Call the function to make sure the output is valid
base_val = numpy.random.randint(0, 5, size=10).astype(dtype_base)
exp_val = numpy.random.randint(0, 3, size=10).astype(dtype_exp)
# Call the function to make sure the output is valid
out = f(base_val, exp_val)
expected_out = base_val ** exp_val
assert_allclose(out, expected_out)
......
......@@ -166,10 +166,12 @@ class test_Broadcast(unittest.TestCase):
linkers = [gof.PerformLinker, gof.CLinker]
def rand_val(self, shp):
return numpy.asarray(numpy.random.rand(*shp))
return numpy.asarray(numpy.random.rand(*shp),
dtype=theano.config.floatX)
def rand_cval(self, shp):
return numpy.asarray(numpy.random.rand(*shp))
return numpy.asarray(numpy.random.rand(*shp),
dtype=theano.config.floatX)
def setUp(self):
unittest_tools.seed_rng()
......@@ -189,8 +191,10 @@ class test_Broadcast(unittest.TestCase):
((2, 3, 4, 5), (1, 3, 1, 5)),
((2, 3, 4, 5), (1, 1, 1, 1)),
((), ())]:
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.add)(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
xv = rand_val(xsh)
......@@ -202,8 +206,10 @@ class test_Broadcast(unittest.TestCase):
# test Elemwise.infer_shape
# the Shape op don't implement c_code!
if isinstance(linker, gof.PerformLinker):
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.add)(x, y)
f = copy(linker).accept(FunctionGraph(
[x, y], [e.shape])).make_function()
......@@ -218,8 +224,10 @@ class test_Broadcast(unittest.TestCase):
((2, 3, 4, 5), (1, 3, 1, 5)),
((2, 3, 4, 5), (1, 1, 1, 1)),
((), ())]:
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
xv = rand_val(xsh)
......@@ -232,8 +240,10 @@ class test_Broadcast(unittest.TestCase):
# test Elemwise.infer_shape
# the Shape op don't implement c_code!
if isinstance(linker, gof.PerformLinker):
x = type('float64', [(entry == 1) for entry in xsh])('x')
y = type('float64', [(entry == 1) for entry in ysh])('y')
x = type(theano.config.floatX,
[(entry == 1) for entry in xsh])('x')
y = type(theano.config.floatX,
[(entry == 1) for entry in ysh])('y')
e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
f = copy(linker).accept(FunctionGraph(
[x, y], [e.shape])).make_function()
......@@ -267,13 +277,15 @@ class test_Broadcast(unittest.TestCase):
def test_fill(self):
if not theano.config.cxx:
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0])('x')
y = self.ctype('float64', [1, 1])('y')
for linker, op in zip(self.linkers, [self.op, self.cop]):
for linker, op, t, rval in zip(self.linkers, [self.op, self.cop],
[self.type, self.ctype],
[self.rand_val, self.rand_cval]):
x = t(theano.config.floatX, [0, 0])('x')
y = t(theano.config.floatX, [1, 1])('y')
e = op(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y)
f = linker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((5, 5))
yv = self.rand_cval((1, 1))
xv = rval((5, 5))
yv = rval((1, 1))
f(xv, yv)
assert (xv == yv).all()
......@@ -292,24 +304,28 @@ class test_Broadcast(unittest.TestCase):
def test_weird_strides(self):
if not theano.config.cxx:
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0, 0, 0, 0])('x')
y = self.ctype('float64', [0, 0, 0, 0, 0])('y')
for linker, op in zip(self.linkers, [self.op, self.cop]):
for linker, op, t, rval in zip(self.linkers, [self.op, self.cop],
[self.type, self.ctype],
[self.rand_val, self.rand_cval]):
x = t(theano.config.floatX, [0, 0, 0, 0, 0])('x')
y = t(theano.config.floatX, [0, 0, 0, 0, 0])('y')
e = op(scalar.add)(x, y)
f = linker().accept(FunctionGraph([x, y], [e])).make_function()
xv = self.rand_cval((2, 2, 2, 2, 2))
yv = self.rand_cval((2, 2, 2, 2, 2)).transpose(4, 0, 3, 1, 2)
xv = rval((2, 2, 2, 2, 2))
yv = rval((2, 2, 2, 2, 2)).transpose(4, 0, 3, 1, 2)
zv = xv + yv
assert (f(xv, yv) == zv).all()
def test_same_inputs(self):
if not theano.config.cxx:
raise SkipTest("G++ not available, so we need to skip this test.")
x = self.ctype('float64', [0, 0])('x')
for linker, op in zip(self.linkers, [self.op, self.cop]):
for linker, op, t, rval in zip(self.linkers, [self.op, self.cop],
[self.type, self.ctype],
[self.rand_val, self.rand_cval]):
x = t(theano.config.floatX, [0, 0])('x')
e = op(scalar.add)(x, x)
f = linker().accept(FunctionGraph([x], [e])).make_function()
xv = self.rand_cval((2, 2))
xv = rval((2, 2))
zv = xv + xv
assert (f(xv) == zv).all()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论