提交 60ec239c authored 作者: nouiz's avatar nouiz

Merge pull request #1295 from lamblin/fix_erfinvgpuopt

Fix bug in erfinv GPU optimization
......@@ -8,15 +8,14 @@ import warnings
import numpy
import theano
from theano.scan_module import scan_utils, scan_op, scan_opt
from theano import scalar as scal
from theano import tensor, compile, gof
from theano import tensor, gof
import theano.ifelse
from theano.compile import optdb
from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
Optimizer, toolbox, DestroyHandler,
InconsistencyError, EquilibriumOptimizer)
EquilibriumOptimizer)
from theano.gof.python25 import all, any
from theano.sandbox.cuda.basic_ops import *
from theano.sandbox.cuda.type import CudaNdarrayType
......@@ -34,9 +33,9 @@ from theano.sandbox.cuda.nnet import (
GpuSoftmax, GpuSoftmaxWithBias)
from theano.sandbox.cuda.elemwise import SupportCodeError
from theano.scalar.basic_scipy import Erfinv
from theano.sandbox.cuda.elemwise import ErfinvGPU, erfinv_gpu
from theano.sandbox.cuda.elemwise import erfinv_gpu
from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.scan_module import scan_utils, scan_op
from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.blas import _is_real_vector, _is_real_matrix
#optdb.print_summary() # shows what is currently registered
......@@ -241,7 +240,7 @@ def local_gpu_elemwise_1(node):
# Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later
if isinstance(node.op.scalar_op, Erfinv):
if isinstance(elemwise_node.op.scalar_op, Erfinv):
new_op = GpuElemwise(erfinv_gpu)
else:
try:
......@@ -622,8 +621,8 @@ def local_gpu_careduce(node):
# Try to make a simpler pattern based on reshaping
# The principle is that if two adjacent dimensions have
# the same value in the reduce_mask, then we can reshape
# to make them a single dimension, do the reduction, and then
# reshape to get them back.
# to make them a single dimension, do the reduction, and
# then reshape to get them back.
shape_of = node.fgraph.shape_feature.shape_of
......@@ -641,7 +640,7 @@ def local_gpu_careduce(node):
new_greduce = GpuCAReduce(new_mask, scalar_op)
reshaped_x = x.reshape(tensor.stack(*new_in_shp))
gpu_reshaped_x = gpu_from_host(reshaped_x)
reshaped_gpu_inputs = [ gpu_reshaped_x ]
reshaped_gpu_inputs = [gpu_reshaped_x]
if new_greduce.supports_c_code(reshaped_gpu_inputs):
reduce_reshaped_x = host_from_gpu(
new_greduce(gpu_reshaped_x))
......@@ -655,11 +654,11 @@ def local_gpu_careduce(node):
return [unreshaped_reduce]
else:
print >> sys.stderr, \
"WARNING: local_gpu_careduce got type wrong"
"WARNING: local_gpu_careduce got type wrong"
return None
raise Exception(
"GpuCAReduce does not yet implement this pattern:",
"GpuCAReduce does not yet implement this pattern:",
pattern)
return False
......@@ -1020,6 +1019,7 @@ def local_gpu_conv(node):
float(op.imshp[1])))
cstride = int(numpy.ceil(op.imshp_logical[2] /
float(op.imshp[2])))
def make_graph(img, kern):
buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
img.shape[0], *op.imshp_logical)
......@@ -1027,6 +1027,7 @@ def local_gpu_conv(node):
img)
img = gpu_from_host(img)
return ret(img, kern)
return make_graph
return ret
......@@ -1344,7 +1345,6 @@ def local_gpualloc(node):
@register_opt()
@local_optimizer([tensor.Alloc])
def local_gpualloc_memset_0(node):
replace = False
if isinstance(node.op, GpuAlloc) and not node.op.memset_0:
inp = node.inputs[0]
if (isinstance(inp, CudaNdarrayConstant) and
......@@ -1522,9 +1522,11 @@ def gpuScanOptimization(node):
local_fgraph = gof.FunctionGraph(tmp_in, tmp_out)
_cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
info['gpu_hash'] = hash(_cmodule_key)
def typeConstructor(broadcastable, dtype):
assert dtype == 'float32'
return CudaNdarrayType(broadcastable=broadcastable)
_outputs = scan_op.Scan(
scan_ins,
scan_outs,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论