提交 7f36ca79 authored 作者: abergeron's avatar abergeron

Merge pull request #2930 from carriepl/gpuarray_elemwise_pow

Add opt for gpuarray.GpuElemwise so exponent of pow has same dtype as output
......@@ -189,8 +189,8 @@ class GpuElemwise(HideC, Elemwise):
pass
for npy, ga in [("npy_uint8", "ga_ubyte"),
("npy_uint16", "ga_ushort"),
("npy_uin32", "ga_uint"),
("npy_uin64", "ga_ulong"),
("npy_uint32", "ga_uint"),
("npy_uint64", "ga_ulong"),
("npy_int8", "ga_byte"),
("npy_int16", "ga_short"),
("npy_int32", "ga_int"),
......
......@@ -14,13 +14,15 @@ from theano.gof import (local_optimizer, EquilibriumDB,
SequenceDB, Optimizer, toolbox)
from theano.gof.optdb import LocalGroupDB
from theano.scalar.basic import Scalar, Pow, Cast
from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.nnet.conv import ConvOp
from theano.tests.breakpoint import PdbBreakpoint
from .type import GpuArrayType, GpuArrayConstant
from .basic_ops import (host_from_gpu, gpu_from_host,
from .basic_ops import (as_gpuarray_variable,
host_from_gpu, gpu_from_host,
HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous,
gpu_alloc, GpuAlloc, GpuReshape,
......@@ -262,10 +264,38 @@ def local_gpu_elemwise(node):
name = op.name
if name:
name = 'Gpu' + name
res = GpuElemwise(scal_op, name=name,
inplace_pattern=copy.copy(op.inplace_pattern),
nfunc_spec=op.nfunc_spec)
return res
# If the elemwise operation is a pow, casts might be required on the
# inputs and or outputs because only the (float, float)->float and
# (double, double)->double cases are implemented at the moment.
if isinstance(op.scalar_op, Pow):
# Only transfer the computation on the gpu if the output dtype is
# floating point. Else, give up on the transfer to the gpu.
out_dtype = node.outputs[0].dtype
if out_dtype not in ['float16', 'float32', 'float64']:
return
# Transfer the inputs on the GPU and cast them to the right dtype.
new_inputs = []
for inp in node.inputs:
if inp.dtype != out_dtype:
gpu_cast_op = GpuElemwise(Cast(Scalar(out_dtype)))
new_inputs.append(gpu_cast_op(as_gpuarray_variable(inp)))
else:
new_inputs.append(as_gpuarray_variable(inp))
# Perform the exponent on the gpu and transfer the output back to the
# cpu.
gpu_output = res(*new_inputs)
cpu_output = host_from_gpu(gpu_output)
return [cpu_output]
else:
return res
def max_inputs_to_GpuElemwise(node):
......
import numpy
import theano
from theano import scalar, gof
from theano.tests.unittest_tools import SkipTest
from theano.tests.unittest_tools import SkipTest, assert_allclose
from theano.tensor.tests.test_elemwise import (test_Broadcast, test_DimShuffle,
test_CAReduce, T_reduce_dtype)
......@@ -46,6 +48,35 @@ class test_gpu_Broadcast(test_Broadcast):
super(test_gpu_Broadcast, self).test_c_inplace()
def test_elemwise_pow():
# Test that GpuElemwise(pow) can compile with any combination of integer
# or float input dtype.
dev = theano.sandbox.gpuarray.init_dev.device
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests")
dtypes = ["uint8", "uint16", "uint32", "uint64",
"int8", "int16", "int32", "int64",
"float16", "float32", "float64"]
for dtype_base in dtypes:
for dtype_exp in dtypes:
# Compile a gpu function with the specified dtypes
base = theano.tensor.vector(dtype=dtype_base)
exp = theano.tensor.vector(dtype=dtype_exp)
output = base ** exp
f = theano.function([base, exp], output)
# Call the function to make sure the output is valid
base_val = numpy.random.randint(0, 5, size=10).astype(dtype_base)
exp_val = numpy.random.randint(0, 3, size=10).astype(dtype_exp)
out = f(base_val, exp_val)
expected_out = base_val ** exp_val
assert_allclose(out, expected_out)
class test_GpuDimShuffle(test_DimShuffle):
op = GpuDimShuffle
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论