提交 7f3ea07c authored 作者: Guillaume Desjardins's avatar Guillaume Desjardins

make erfinv work on the gpu.

上级 487aa554
...@@ -9,6 +9,8 @@ import copy, logging, StringIO, sys ...@@ -9,6 +9,8 @@ import copy, logging, StringIO, sys
import numpy import numpy
from theano.scalar.basic import upgrade_to_float_no_complex, complex_types
from theano.scalar.basic_scipy import Erfinv
from theano import Apply, Constant, Op, Type, Variable from theano import Apply, Constant, Op, Type, Variable
from theano import gof, scalar, tensor from theano import gof, scalar, tensor
...@@ -1021,3 +1023,20 @@ nd_collapse_[i]=0; ...@@ -1021,3 +1023,20 @@ nd_collapse_[i]=0;
#print sio.getvalue() #print sio.getvalue()
return sio.getvalue() return sio.getvalue()
class ErfinvGPU(Erfinv):
def c_libraries(self):
return ['math.h']
def c_headers(self):
return ['math_functions.h', 'cublas_v2.h']
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
if node.inputs[0].type in complex_types:
raise NotImplementedError('type not supported', type)
return "%(z)s = erfinv(%(x)s);" % locals()
erfinv_gpu = ErfinvGPU(upgrade_to_float_no_complex, name='erfinv_gpu')
...@@ -33,6 +33,8 @@ from theano.sandbox.cuda.nnet import ( ...@@ -33,6 +33,8 @@ from theano.sandbox.cuda.nnet import (
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmax, GpuSoftmaxWithBias) GpuSoftmax, GpuSoftmaxWithBias)
from theano.sandbox.cuda.elemwise import SupportCodeError from theano.sandbox.cuda.elemwise import SupportCodeError
from theano.scalar.basic_scipy import Erfinv
from theano.sandbox.cuda.elemwise import ErfinvGPU, erfinv_gpu
from theano.sandbox.cuda.var import CudaNdarrayConstant from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.scan_module import scan_utils, scan_op from theano.scan_module import scan_utils, scan_op
from theano.tensor.blas import _is_real_vector, _is_real_matrix from theano.tensor.blas import _is_real_vector, _is_real_matrix
...@@ -177,11 +179,15 @@ def local_gpu_elemwise_0(node): ...@@ -177,11 +179,15 @@ def local_gpu_elemwise_0(node):
if numpy.all([o.type.dtype == 'float32' for o in node.outputs]): if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
# Don't set any inplace pattern. # Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later # gpu_inplace_elemwise_optimizer will do it later
try:
new_op = GpuElemwise(node.op.scalar_op) if isinstance(node.op.scalar_op, Erfinv):
except SupportCodeError: new_op = GpuElemwise(erfinv_gpu)
# This happens when scalar_op requires support code else:
return False try:
new_op = GpuElemwise(node.op.scalar_op)
except SupportCodeError:
# This happens when scalar_op requires support code
return False
# first establish that float32 can store all inputs # first establish that float32 can store all inputs
upcastable = set(['float32', 'int8', 'int16', 'uint8', upcastable = set(['float32', 'int8', 'int16', 'uint8',
...@@ -234,11 +240,16 @@ def local_gpu_elemwise_1(node): ...@@ -234,11 +240,16 @@ def local_gpu_elemwise_1(node):
elemwise_node = host_i.owner elemwise_node = host_i.owner
# Don't set any inplace pattern. # Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later # gpu_inplace_elemwise_optimizer will do it later
try:
new_op = GpuElemwise(elemwise_node.op.scalar_op) if isinstance(node.op.scalar_op, Erfinv):
except SupportCodeError: new_op = GpuElemwise(erfinv_gpu)
# This happens when scalar_op requires support code else:
return False try:
new_op = GpuElemwise(elemwise_node.op.scalar_op)
except SupportCodeError:
# This happens when scalar_op requires support code
return False
if all([i.dtype == 'float32' for i in elemwise_node.inputs]): if all([i.dtype == 'float32' for i in elemwise_node.inputs]):
gpu_elemwise = new_op(*[gpu_from_host(i) gpu_elemwise = new_op(*[gpu_from_host(i)
for i in elemwise_node.inputs]) for i in elemwise_node.inputs])
......
...@@ -17,6 +17,7 @@ if cuda.cuda_available == False: ...@@ -17,6 +17,7 @@ if cuda.cuda_available == False:
from theano.sandbox.cuda import basic_ops from theano.sandbox.cuda import basic_ops
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.scalar.basic_scipy import erfinv
if theano.config.mode=='FAST_COMPILE': if theano.config.mode=='FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu') mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
...@@ -368,6 +369,18 @@ def test_incsubtensor_mixed(): ...@@ -368,6 +369,18 @@ def test_incsubtensor_mixed():
client, idx = packed client, idx = packed
assert isinstance(client.op, cuda.GpuFromHost) assert isinstance(client.op, cuda.GpuFromHost)
def test_erfinvgpu():
""" Test that local_gpu_elemwise_0 replaces Erfinv with ErfinvGPU """
x = tensor.fmatrix()
f = theano.function([x], tensor.Elemwise(erfinv)(x), mode=mode_with_gpu)
f2 = theano.function([x], tensor.Elemwise(erfinv)(x), mode=mode_without_gpu)
assert isinstance(f.maker.fgraph.toposort()[1].op, cuda.GpuElemwise)
assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, cuda.elemwise.ErfinvGPU)
xv=numpy.random.rand(7,8).astype('float32')
assert numpy.allclose(f(xv),f2(xv))
if __name__ == '__main__': if __name__ == '__main__':
test_gpualloc() test_gpualloc()
test_opt_gpujoin_onlyajoin() test_opt_gpujoin_onlyajoin()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论