提交 3bd237f5 authored 作者: sentient07's avatar sentient07

Added fft opts to new opt, fixed broadcasting test, few cleanups

上级 ebcf5615
......@@ -9,7 +9,7 @@ from theano.gradient import DisconnectedType
from theano.gpuarray import (basic_ops, GpuArrayType)
import theano.tensor.fft
from .opt import register_opt, op_lifter
from .opt import register_opt, op_lifter, register_opt2
try:
import pygpu
......@@ -373,10 +373,12 @@ def _unitary(norm):
if scikits_cuda_available:
@register_opt('fast_compile')
@op_lifter([theano.tensor.fft.RFFTOp])
def local_curfft_op(node, context_name):
@register_opt2([theano.tensor.fft.RFFTOp], 'fast_compile')
def local_gpua_curfft_op(node, context_name):
return curfft_op
@register_opt('fast_compile')
@op_lifter([theano.tensor.fft.IRFFTOp])
def local_cuirfft_op(node, context_name):
@register_opt2([theano.tensor.fft.IRFFTOp], 'fast_compile')
def local_gpua_cuirfft_op(node, context_name):
return cuirfft_op
......@@ -112,11 +112,11 @@ def register_opt2(tracks, *tags, **kwargs):
Parameters
----------
tracks : Op
tracks : List of Op class Or Op instance or None
The Node's Op to which optimization is being applied.
tags : String
The tag optimization mode to which the optimizer will be registered.
The optimization tag to which the optimizer will be registered.
'''
def f(local_opt):
......@@ -180,10 +180,10 @@ def op_lifter(OP, cuda_only=False):
context_name = i.owner.inputs[0].type.context_name
replace = True
break
clients = [c for o in node.outputs for c in o.clients]
if not replace:
# We replace if *all* clients are on the GPU
clients = [c for o in node.outputs for c in o.clients]
replace = len(clients) != 0
for c, idx in clients:
if (c == 'output' or
......@@ -273,7 +273,7 @@ class GraphToGPU(NavigatorOptimizer):
Parameters
----------
local_optimizers_all : List or Set
local_optimizers_all : List or SortedSet
The local optimizations to apply to a node.
local_optimizers_map : Dict
Dictionary object containing the mapping of Op to list of
......@@ -349,7 +349,6 @@ class GraphToGPU(NavigatorOptimizer):
self.local_optimizers_map.get(type(c.op), []))):
move_to_GPU = True
new_ops = None
outputs = []
# Apply the lifter
if move_to_GPU:
for lopt in (self.local_optimizers_map.get(node.op, []) +
......@@ -365,24 +364,23 @@ class GraphToGPU(NavigatorOptimizer):
if new_ops:
process_count[lopt] += 1
break
if not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i)
for i in node.inputs])
outputs = []
if isinstance(new_ops, theano.Op):
outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True)
elif not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs])
outputs = newnode.outputs
elif isinstance(new_ops, (tuple, list)):
outputs = []
for o in new_ops:
outputs.append(o)
outputs = new_ops
elif isinstance(new_ops, theano.Variable):
outputs = [new_ops]
else:
outputs = new_ops(*[mapping[i] for i in node.inputs],
return_list=True)
if new_ops:
node_created[lopt] += len(graph.ops([mapping[i] for i in node.inputs], outputs))
for new_o, old_o in zip(outputs, node.outputs):
assert len(outputs) == len(node.outputs)
mapping[old_o] = new_o
new_nodes = []
......@@ -473,15 +471,6 @@ class GraphToGPU(NavigatorOptimizer):
prof2[0].local_optimizers_map)
new_opt = GraphToGPU(local_optimizers, local_optimizers_map)
def merge_list(l1, l2):
l = copy.copy(l1)
for idx, nb in enumerate(l2):
if idx < len(l):
l[idx] += nb
else:
l.append(nb)
return l
toposort_timing = prof1[1] + prof2[1]
time_opts = merge_dict(prof1[2], prof2[2])
node_created = merge_dict(prof1[3], prof2[3])
......@@ -583,7 +572,7 @@ def local_gpua_alloc(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([tensor.AllocEmpty])
@register_opt2([tensor.AllocEmpty], 'fast_compile')
def local_gpua_allocempty(op, context_name, inputs, outputs):
def local_gpua_alloc_empty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the
# CPU op props.
return gpu_alloc_empty(context_name, **op._props_dict())
......@@ -949,7 +938,7 @@ def local_gpua_subtensor_graph(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([tensor.IncSubtensor])
@register_opt2([tensor.IncSubtensor], 'fast_compile')
def local_gpua_incsubtensor(op, context_name, inputs, outputs):
def local_gpua_inc_subtensor(op, context_name, inputs, outputs):
op = GpuIncSubtensor(op.idx_list, op.inplace,
op.set_instead_of_inc,
op.destroyhandler_tolerate_aliased)
......@@ -1229,11 +1218,11 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
def local_gpua_assert(op, context_name, inputs, outputs):
if isinstance(inputs[0].type, GpuArrayType):
return
return local_assert_graph(op, context_name, inputs, outputs)
return local_gpua_assert_graph(op, context_name, inputs, outputs)
@register_opt2([theano.tensor.opt.Assert], 'fast_compile')
def local_assert_graph(op, context_name, inputs, outputs):
def local_gpua_assert_graph(op, context_name, inputs, outputs):
return [op(as_gpuarray_variable(inputs[0], context_name),
*inputs[1:])]
......@@ -1253,7 +1242,7 @@ theano.tensor.nnet.conv2d()
@register_opt('fast_compile')
@op_lifter([SparseBlockGemv])
@register_opt2([SparseBlockGemv], 'fast_compile')
def local_gpua_lift_sparseblockgemv(op, context_name, inputs, outputs):
def local_gpua_sparseblockgemv(op, context_name, inputs, outputs):
if op.inplace:
return gpu_sparse_block_gemv_inplace
else:
......@@ -1263,7 +1252,7 @@ def local_gpua_lift_sparseblockgemv(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([SparseBlockOuter])
@register_opt2([SparseBlockOuter], 'fast_compile')
def local_gpua_lift_sparseblockouter(op, context_name, inputs, outputs):
def local_gpua_sparseblockouter(op, context_name, inputs, outputs):
if op.inplace:
return gpu_sparse_block_outer_inplace
else:
......@@ -1289,7 +1278,7 @@ def local_inplace_sparseblockouter(node):
@op_lifter([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
def local_gpua_lift_abstractconv2d(op, context_name, inputs, outputs):
def local_gpua_abstractconv2d(op, context_name, inputs, outputs):
if isinstance(outputs[0].type, GpuArrayType):
# Don't handle this node here, it's already on the GPU.
return
......
......@@ -187,7 +187,7 @@ def test_local_gpualloc_empty():
ii = theano.tensor.iscalar()
# Test with vector
# Should not be moved as the only client is the uutput
# Should not be moved as the only client is the output
a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
......
......@@ -1553,7 +1553,7 @@ class MRG_RandomStreams(object):
@register_opt2([mrg_uniform], 'fast_compile')
def local_gpua_mrg1(op, context_name, inputs, outputs):
def local_gpua_mrg_graph(op, context_name, inputs, outputs):
if (type(op) == mrg_uniform and
isinstance(inputs[0].type, GpuArrayType)):
outs = GPUA_mrg_uniform.new(inputs[0],
......@@ -1566,9 +1566,8 @@ def local_gpua_mrg1(op, context_name, inputs, outputs):
@register_gpua('fast_compile')
@local_optimizer([mrg_uniform])
def local_gpua_mrg(node):
# TODO : need description for function
context_name = infer_context_name(*node.inputs)
return local_gpua_mrg1(node.op, context_name, node.inputs, node.outputs)
return local_gpua_mrg_graph(node.op, context_name, node.inputs, node.outputs)
MRG_RNGs = (mrg_uniform, GPU_mrg_uniform, GPUA_mrg_uniform)
......
......@@ -7003,7 +7003,7 @@ class T_get_scalar_constant_value(unittest.TestCase):
assert get_scalar_constant_value(s) == 3
s = opt.Shape_i(1)(c)
assert get_scalar_constant_value(s) == 4
d = theano.tensor.constant(numpy.random.rand(1, 1))
d = theano.shared(numpy.random.randn(1,1), broadcastable=(True, True))
f = theano.tensor.basic.ScalarFromTensor()(opt.Shape_i(0)(d))
assert get_scalar_constant_value(f) == 1
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论