提交 b730df38 authored 作者: --global's avatar --global

Only transfer the exponential elemwise to the gpu if the out dtype is floating point

上级 12ace374
...@@ -21,7 +21,8 @@ from theano.tensor.nnet.conv import ConvOp ...@@ -21,7 +21,8 @@ from theano.tensor.nnet.conv import ConvOp
from theano.tests.breakpoint import PdbBreakpoint from theano.tests.breakpoint import PdbBreakpoint
from .type import GpuArrayType, GpuArrayConstant from .type import GpuArrayType, GpuArrayConstant
from .basic_ops import (host_from_gpu, gpu_from_host, from .basic_ops import (as_gpuarray_variable,
host_from_gpu, gpu_from_host,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, GpuSplit, GpuContiguous,
gpu_alloc, GpuAlloc, GpuReshape, gpu_alloc, GpuAlloc, GpuReshape,
...@@ -272,36 +273,25 @@ def local_gpu_elemwise(node): ...@@ -272,36 +273,25 @@ def local_gpu_elemwise(node):
# inputs and or outputs because only the (float, float)->float and # inputs and or outputs because only the (float, float)->float and
# (double, double)->double cases are implemented at the moment. # (double, double)->double cases are implemented at the moment.
if isinstance(op.scalar_op, Pow): if isinstance(op.scalar_op, Pow):
old_out_dtype = node.outputs[0].dtype
old_inp_dtypes = [inp.dtype for inp in node.inputs]
# Upcast the input dtypes with 'float32' to obtain a floating-point # Only transfer the computation on the gpu if the output dtype is
# dtype in which to do the computation. # floating point. Else, give up on the transfer to the gpu.
# TODO : Currently, a bug in GpuElemwise prevents support for float16. out_dtype = node.outputs[0].dtype
# It should be fixed and then the upcast below can use 'float16' if out_dtype not in ['float16', 'float32', 'float64']:
# instead of 'float32' return
new_out_dtype = upcast("float32", *old_inp_dtypes)
# Transfer the inputs on the GPU and cast them to the right dtype # Transfer the inputs on the GPU and cast them to the right dtype.
new_inputs = [] new_inputs = []
for inp in node.inputs: for inp in node.inputs:
if inp.dtype != new_out_dtype: if inp.dtype != out_dtype:
gpu_cast_op = GpuElemwise(Cast(Scalar(new_out_dtype))) gpu_cast_op = GpuElemwise(Cast(Scalar(out_dtype)))
new_inputs.append(gpu_cast_op(gpu_from_host(inp))) new_inputs.append(gpu_cast_op(as_gpuarray_variable(inp)))
else: else:
new_inputs.append(gpu_from_host(inp)) new_inputs.append(as_gpuarray_variable(inp))
# Perform the exponent on the gpu
casted_gpu_output = res(*new_inputs)
# If needed, cast the output back to the right dtype and transfer it
# to the cpu.
if casted_gpu_output.dtype != old_out_dtype:
gpu_cast_op = GpuElemwise(Cast(Scalar(old_out_dtype)))
gpu_output = gpu_cast_op(casted_gpu_output)
else:
gpu_output = casted_gpu_output
# Perform the exponent on the gpu and transfer the output back to the
# cpu.
gpu_output = res(*new_inputs)
cpu_output = host_from_gpu(gpu_output) cpu_output = host_from_gpu(gpu_output)
return [cpu_output] return [cpu_output]
else: else:
......
import numpy
import theano import theano
from theano import scalar, gof from theano import scalar, gof
from theano.tests.unittest_tools import SkipTest from theano.tests.unittest_tools import SkipTest, assert_allclose
from theano.tensor.tests.test_elemwise import (test_Broadcast, test_DimShuffle, from theano.tensor.tests.test_elemwise import (test_Broadcast, test_DimShuffle,
test_CAReduce, T_reduce_dtype) test_CAReduce, T_reduce_dtype)
...@@ -59,12 +61,21 @@ def test_elemwise_pow(): ...@@ -59,12 +61,21 @@ def test_elemwise_pow():
for dtype_base in dtypes: for dtype_base in dtypes:
for dtype_exp in dtypes: for dtype_exp in dtypes:
# Compile a gpu function with the specified dtypes # Compile a gpu function with the specified dtypes
base = theano.tensor.vector(dtype=dtype_base) base = theano.tensor.vector(dtype=dtype_base)
exp = theano.tensor.vector(dtype=dtype_exp) exp = theano.tensor.vector(dtype=dtype_exp)
output = base ** exp output = base ** exp
f = theano.function([base, exp], output) f = theano.function([base, exp], output)
# Call the function to make sure the output is valid
base_val = numpy.random.randint(0, 5, size=10).astype(dtype_base)
exp_val = numpy.random.randint(0, 3, size=10).astype(dtype_exp)
out = f(base_val, exp_val)
expected_out = base_val ** exp_val
assert_allclose(out, expected_out)
class test_GpuDimShuffle(test_DimShuffle): class test_GpuDimShuffle(test_DimShuffle):
op = GpuDimShuffle op = GpuDimShuffle
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论