提交 3bd237f5 authored 作者: sentient07's avatar sentient07

Added fft opts to new opt, fixed broadcasting test, few cleanups

上级 ebcf5615
...@@ -9,7 +9,7 @@ from theano.gradient import DisconnectedType ...@@ -9,7 +9,7 @@ from theano.gradient import DisconnectedType
from theano.gpuarray import (basic_ops, GpuArrayType) from theano.gpuarray import (basic_ops, GpuArrayType)
import theano.tensor.fft import theano.tensor.fft
from .opt import register_opt, op_lifter from .opt import register_opt, op_lifter, register_opt2
try: try:
import pygpu import pygpu
...@@ -373,10 +373,12 @@ def _unitary(norm): ...@@ -373,10 +373,12 @@ def _unitary(norm):
if scikits_cuda_available: if scikits_cuda_available:
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.tensor.fft.RFFTOp]) @op_lifter([theano.tensor.fft.RFFTOp])
def local_curfft_op(node, context_name): @register_opt2([theano.tensor.fft.RFFTOp], 'fast_compile')
def local_gpua_curfft_op(node, context_name):
return curfft_op return curfft_op
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.tensor.fft.IRFFTOp]) @op_lifter([theano.tensor.fft.IRFFTOp])
def local_cuirfft_op(node, context_name): @register_opt2([theano.tensor.fft.IRFFTOp], 'fast_compile')
def local_gpua_cuirfft_op(node, context_name):
return cuirfft_op return cuirfft_op
...@@ -112,11 +112,11 @@ def register_opt2(tracks, *tags, **kwargs): ...@@ -112,11 +112,11 @@ def register_opt2(tracks, *tags, **kwargs):
Parameters Parameters
---------- ----------
tracks : Op tracks : List of Op class Or Op instance or None
The Node's Op to which optimization is being applied. The Node's Op to which optimization is being applied.
tags : String tags : String
The tag optimization mode to which the optimizer will be registered. The optimization tag to which the optimizer will be registered.
''' '''
def f(local_opt): def f(local_opt):
...@@ -180,10 +180,10 @@ def op_lifter(OP, cuda_only=False): ...@@ -180,10 +180,10 @@ def op_lifter(OP, cuda_only=False):
context_name = i.owner.inputs[0].type.context_name context_name = i.owner.inputs[0].type.context_name
replace = True replace = True
break break
clients = [c for o in node.outputs for c in o.clients]
if not replace: if not replace:
# We replace if *all* clients are on the GPU # We replace if *all* clients are on the GPU
clients = [c for o in node.outputs for c in o.clients]
replace = len(clients) != 0 replace = len(clients) != 0
for c, idx in clients: for c, idx in clients:
if (c == 'output' or if (c == 'output' or
...@@ -273,7 +273,7 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -273,7 +273,7 @@ class GraphToGPU(NavigatorOptimizer):
Parameters Parameters
---------- ----------
local_optimizers_all : List or Set local_optimizers_all : List or SortedSet
The local optimizations to apply to a node. The local optimizations to apply to a node.
local_optimizers_map : Dict local_optimizers_map : Dict
Dictionary object containing the mapping of Op to list of Dictionary object containing the mapping of Op to list of
...@@ -349,7 +349,6 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -349,7 +349,6 @@ class GraphToGPU(NavigatorOptimizer):
self.local_optimizers_map.get(type(c.op), []))): self.local_optimizers_map.get(type(c.op), []))):
move_to_GPU = True move_to_GPU = True
new_ops = None new_ops = None
outputs = []
# Apply the lifter # Apply the lifter
if move_to_GPU: if move_to_GPU:
for lopt in (self.local_optimizers_map.get(node.op, []) + for lopt in (self.local_optimizers_map.get(node.op, []) +
...@@ -365,24 +364,23 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -365,24 +364,23 @@ class GraphToGPU(NavigatorOptimizer):
if new_ops: if new_ops:
process_count[lopt] += 1 process_count[lopt] += 1
break break
if not new_ops: outputs = []
newnode = node.clone_with_new_inputs([mapping.get(i)
for i in node.inputs]) if isinstance(new_ops, theano.Op):
outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True)
elif not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs])
outputs = newnode.outputs outputs = newnode.outputs
elif isinstance(new_ops, (tuple, list)): elif isinstance(new_ops, (tuple, list)):
outputs = [] outputs = new_ops
for o in new_ops:
outputs.append(o)
elif isinstance(new_ops, theano.Variable): elif isinstance(new_ops, theano.Variable):
outputs = [new_ops] outputs = [new_ops]
else:
outputs = new_ops(*[mapping[i] for i in node.inputs],
return_list=True)
if new_ops: if new_ops:
node_created[lopt] += len(graph.ops([mapping[i] for i in node.inputs], outputs)) node_created[lopt] += len(graph.ops([mapping[i] for i in node.inputs], outputs))
for new_o, old_o in zip(outputs, node.outputs): for new_o, old_o in zip(outputs, node.outputs):
assert len(outputs) == len(node.outputs)
mapping[old_o] = new_o mapping[old_o] = new_o
new_nodes = [] new_nodes = []
...@@ -473,15 +471,6 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -473,15 +471,6 @@ class GraphToGPU(NavigatorOptimizer):
prof2[0].local_optimizers_map) prof2[0].local_optimizers_map)
new_opt = GraphToGPU(local_optimizers, local_optimizers_map) new_opt = GraphToGPU(local_optimizers, local_optimizers_map)
def merge_list(l1, l2):
l = copy.copy(l1)
for idx, nb in enumerate(l2):
if idx < len(l):
l[idx] += nb
else:
l.append(nb)
return l
toposort_timing = prof1[1] + prof2[1] toposort_timing = prof1[1] + prof2[1]
time_opts = merge_dict(prof1[2], prof2[2]) time_opts = merge_dict(prof1[2], prof2[2])
node_created = merge_dict(prof1[3], prof2[3]) node_created = merge_dict(prof1[3], prof2[3])
...@@ -583,7 +572,7 @@ def local_gpua_alloc(op, context_name, inputs, outputs): ...@@ -583,7 +572,7 @@ def local_gpua_alloc(op, context_name, inputs, outputs):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.AllocEmpty]) @op_lifter([tensor.AllocEmpty])
@register_opt2([tensor.AllocEmpty], 'fast_compile') @register_opt2([tensor.AllocEmpty], 'fast_compile')
def local_gpua_allocempty(op, context_name, inputs, outputs): def local_gpua_alloc_empty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the # We use _props_dict() to make sure that the GPU op know all the
# CPU op props. # CPU op props.
return gpu_alloc_empty(context_name, **op._props_dict()) return gpu_alloc_empty(context_name, **op._props_dict())
...@@ -949,7 +938,7 @@ def local_gpua_subtensor_graph(op, context_name, inputs, outputs): ...@@ -949,7 +938,7 @@ def local_gpua_subtensor_graph(op, context_name, inputs, outputs):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.IncSubtensor]) @op_lifter([tensor.IncSubtensor])
@register_opt2([tensor.IncSubtensor], 'fast_compile') @register_opt2([tensor.IncSubtensor], 'fast_compile')
def local_gpua_incsubtensor(op, context_name, inputs, outputs): def local_gpua_inc_subtensor(op, context_name, inputs, outputs):
op = GpuIncSubtensor(op.idx_list, op.inplace, op = GpuIncSubtensor(op.idx_list, op.inplace,
op.set_instead_of_inc, op.set_instead_of_inc,
op.destroyhandler_tolerate_aliased) op.destroyhandler_tolerate_aliased)
...@@ -1229,11 +1218,11 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs): ...@@ -1229,11 +1218,11 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
def local_gpua_assert(op, context_name, inputs, outputs): def local_gpua_assert(op, context_name, inputs, outputs):
if isinstance(inputs[0].type, GpuArrayType): if isinstance(inputs[0].type, GpuArrayType):
return return
return local_assert_graph(op, context_name, inputs, outputs) return local_gpua_assert_graph(op, context_name, inputs, outputs)
@register_opt2([theano.tensor.opt.Assert], 'fast_compile') @register_opt2([theano.tensor.opt.Assert], 'fast_compile')
def local_assert_graph(op, context_name, inputs, outputs): def local_gpua_assert_graph(op, context_name, inputs, outputs):
return [op(as_gpuarray_variable(inputs[0], context_name), return [op(as_gpuarray_variable(inputs[0], context_name),
*inputs[1:])] *inputs[1:])]
...@@ -1253,7 +1242,7 @@ theano.tensor.nnet.conv2d() ...@@ -1253,7 +1242,7 @@ theano.tensor.nnet.conv2d()
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([SparseBlockGemv]) @op_lifter([SparseBlockGemv])
@register_opt2([SparseBlockGemv], 'fast_compile') @register_opt2([SparseBlockGemv], 'fast_compile')
def local_gpua_lift_sparseblockgemv(op, context_name, inputs, outputs): def local_gpua_sparseblockgemv(op, context_name, inputs, outputs):
if op.inplace: if op.inplace:
return gpu_sparse_block_gemv_inplace return gpu_sparse_block_gemv_inplace
else: else:
...@@ -1263,7 +1252,7 @@ def local_gpua_lift_sparseblockgemv(op, context_name, inputs, outputs): ...@@ -1263,7 +1252,7 @@ def local_gpua_lift_sparseblockgemv(op, context_name, inputs, outputs):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([SparseBlockOuter]) @op_lifter([SparseBlockOuter])
@register_opt2([SparseBlockOuter], 'fast_compile') @register_opt2([SparseBlockOuter], 'fast_compile')
def local_gpua_lift_sparseblockouter(op, context_name, inputs, outputs): def local_gpua_sparseblockouter(op, context_name, inputs, outputs):
if op.inplace: if op.inplace:
return gpu_sparse_block_outer_inplace return gpu_sparse_block_outer_inplace
else: else:
...@@ -1289,7 +1278,7 @@ def local_inplace_sparseblockouter(node): ...@@ -1289,7 +1278,7 @@ def local_inplace_sparseblockouter(node):
@op_lifter([AbstractConv2d, @op_lifter([AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs])
def local_gpua_lift_abstractconv2d(op, context_name, inputs, outputs): def local_gpua_abstractconv2d(op, context_name, inputs, outputs):
if isinstance(outputs[0].type, GpuArrayType): if isinstance(outputs[0].type, GpuArrayType):
# Don't handle this node here, it's already on the GPU. # Don't handle this node here, it's already on the GPU.
return return
......
...@@ -187,7 +187,7 @@ def test_local_gpualloc_empty(): ...@@ -187,7 +187,7 @@ def test_local_gpualloc_empty():
ii = theano.tensor.iscalar() ii = theano.tensor.iscalar()
# Test with vector # Test with vector
# Should not be moved as the only client is the uutput # Should not be moved as the only client is the output
a = tensor.AllocEmpty('float32')(i) a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a, mode=mode_with_gpu) f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
......
...@@ -1553,7 +1553,7 @@ class MRG_RandomStreams(object): ...@@ -1553,7 +1553,7 @@ class MRG_RandomStreams(object):
@register_opt2([mrg_uniform], 'fast_compile') @register_opt2([mrg_uniform], 'fast_compile')
def local_gpua_mrg1(op, context_name, inputs, outputs): def local_gpua_mrg_graph(op, context_name, inputs, outputs):
if (type(op) == mrg_uniform and if (type(op) == mrg_uniform and
isinstance(inputs[0].type, GpuArrayType)): isinstance(inputs[0].type, GpuArrayType)):
outs = GPUA_mrg_uniform.new(inputs[0], outs = GPUA_mrg_uniform.new(inputs[0],
...@@ -1566,9 +1566,8 @@ def local_gpua_mrg1(op, context_name, inputs, outputs): ...@@ -1566,9 +1566,8 @@ def local_gpua_mrg1(op, context_name, inputs, outputs):
@register_gpua('fast_compile') @register_gpua('fast_compile')
@local_optimizer([mrg_uniform]) @local_optimizer([mrg_uniform])
def local_gpua_mrg(node): def local_gpua_mrg(node):
# TODO : need description for function
context_name = infer_context_name(*node.inputs) context_name = infer_context_name(*node.inputs)
return local_gpua_mrg1(node.op, context_name, node.inputs, node.outputs) return local_gpua_mrg_graph(node.op, context_name, node.inputs, node.outputs)
MRG_RNGs = (mrg_uniform, GPU_mrg_uniform, GPUA_mrg_uniform) MRG_RNGs = (mrg_uniform, GPU_mrg_uniform, GPUA_mrg_uniform)
......
...@@ -7003,7 +7003,7 @@ class T_get_scalar_constant_value(unittest.TestCase): ...@@ -7003,7 +7003,7 @@ class T_get_scalar_constant_value(unittest.TestCase):
assert get_scalar_constant_value(s) == 3 assert get_scalar_constant_value(s) == 3
s = opt.Shape_i(1)(c) s = opt.Shape_i(1)(c)
assert get_scalar_constant_value(s) == 4 assert get_scalar_constant_value(s) == 4
d = theano.tensor.constant(numpy.random.rand(1, 1)) d = theano.shared(numpy.random.randn(1,1), broadcastable=(True, True))
f = theano.tensor.basic.ScalarFromTensor()(opt.Shape_i(0)(d)) f = theano.tensor.basic.ScalarFromTensor()(opt.Shape_i(0)(d))
assert get_scalar_constant_value(f) == 1 assert get_scalar_constant_value(f) == 1
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论