提交 f3afab87 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

More pep8 / pyflakes

上级 034bb5a3
from theano import gof from theano import gof
from theano import gradient as G from theano import gradient as G
from function_module import orig_function from function_module import orig_function
...@@ -33,16 +32,19 @@ class OpFromGraph(gof.Op): ...@@ -33,16 +32,19 @@ class OpFromGraph(gof.Op):
e2 = op(x, y, z) + op(z, y, x) e2 = op(x, y, z) + op(z, y, x)
fn = function([x, y, z], [e2]) fn = function([x, y, z], [e2])
""" """
def __init__(self, inputs, outputs, grad_depth = 1, **kwargs): def __init__(self, inputs, outputs, grad_depth=1, **kwargs):
if not isinstance(outputs, list): if not isinstance(outputs, list):
raise TypeError('outputs must be list', outputs) raise TypeError('outputs must be list', outputs)
for i in inputs + outputs: for i in inputs + outputs:
if not isinstance(i, gof.Variable): if not isinstance(i, gof.Variable):
raise TypeError('inputs and outputs must be Variable instances', i) raise TypeError(
'inputs and outputs must be Variable instances', i)
if 'updates' in kwargs: if 'updates' in kwargs:
raise TypeError('updates are not allowed in kwargs') raise TypeError('updates are not allowed in kwargs')
# TODO: the graph may have implicit inputs like Value and SharedVariable instances.
# TODO: the graph may have implicit inputs like Value and
# SharedVariable instances.
# what impact to they have on the validity of this Op? # what impact to they have on the validity of this Op?
self.fn = orig_function(inputs, outputs, **kwargs) self.fn = orig_function(inputs, outputs, **kwargs)
self.inputs = inputs self.inputs = inputs
...@@ -52,7 +54,8 @@ class OpFromGraph(gof.Op): ...@@ -52,7 +54,8 @@ class OpFromGraph(gof.Op):
if grad_depth > 0: if grad_depth > 0:
output_grads = [t() for t in self.output_types] output_grads = [t() for t in self.output_types]
gd = G.grad_sources_inputs(zip(self.outputs, output_grads), self.inputs) gd = G.grad_sources_inputs(zip(self.outputs, output_grads),
self.inputs)
gs = map(gd.get, self.inputs) gs = map(gd.get, self.inputs)
self.grad_ops = [] self.grad_ops = []
for g in gs: for g in gs:
...@@ -63,8 +66,9 @@ class OpFromGraph(gof.Op): ...@@ -63,8 +66,9 @@ class OpFromGraph(gof.Op):
# to compute the gradient, so we ignore them. # to compute the gradient, so we ignore them.
self.grad_ops.append(OpFromGraph(inputs + output_grads, self.grad_ops.append(OpFromGraph(inputs + output_grads,
[g], [g],
grad_depth = grad_depth - 1, grad_depth=grad_depth - 1,
on_unused_input='ignore')) on_unused_input='ignore'))
def __eq__(self, other): def __eq__(self, other):
#TODO: recognize a copy #TODO: recognize a copy
return self is other return self is other
...@@ -76,7 +80,8 @@ class OpFromGraph(gof.Op): ...@@ -76,7 +80,8 @@ class OpFromGraph(gof.Op):
def make_node(self, *inputs): def make_node(self, *inputs):
for input, type in zip(inputs, self.input_types): for input, type in zip(inputs, self.input_types):
if not type == input.type: if not type == input.type:
raise TypeError("Wrong type, expected %s but got %s" % (type, input.type)) raise TypeError("Wrong type, expected %s but got %s"
% (type, input.type))
return gof.Apply(self, return gof.Apply(self,
inputs, inputs,
[type() for type in self.output_types]) [type() for type in self.output_types])
...@@ -85,8 +90,8 @@ class OpFromGraph(gof.Op): ...@@ -85,8 +90,8 @@ class OpFromGraph(gof.Op):
variables = self.fn(*inputs) variables = self.fn(*inputs)
assert len(variables) == len(outputs) assert len(variables) == len(outputs)
for output, variable in zip(outputs, variables): for output, variable in zip(outputs, variables):
##TODO: when function's output-borrowing semantics are correct, we wont need this ##TODO: when function's output-borrowing semantics are correct,
# copy anymore # we wont need this copy anymore
output[0] = variable.copy() output[0] = variable.copy()
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
...@@ -94,5 +99,3 @@ class OpFromGraph(gof.Op): ...@@ -94,5 +99,3 @@ class OpFromGraph(gof.Op):
return [go(*(inputs + output_grads)) for go in self.grad_ops] return [go(*(inputs + output_grads)) for go in self.grad_ops]
else: else:
raise NotImplementedError raise NotImplementedError
import sys
import numpy import numpy
from theano import config from theano import config
...@@ -10,36 +9,43 @@ import theano.compile ...@@ -10,36 +9,43 @@ import theano.compile
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
import unittest import unittest
def test0(): def test0():
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], (2.*x + 7) / 2., mode=debugmode.DebugMode()) f = theano.function([x], ((2. * x) + 7) / 2., mode=debugmode.DebugMode())
print f([1,2]) print f([1, 2])
class BROKEN_ON_PURPOSE_Add(gof.Op): class BROKEN_ON_PURPOSE_Add(gof.Op):
def __init__(self, py_offset): def __init__(self, py_offset):
gof.Op.__init__(self) gof.Op.__init__(self)
self.py_offset = py_offset self.py_offset = py_offset
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and (self.py_offset == other.py_offset) return (type(self) == type(other) and
(self.py_offset == other.py_offset))
def __hash__(self): def __hash__(self):
return 29834 ^ hash(type(self)) ^ hash(self.py_offset) return 29834 ^ hash(type(self)) ^ hash(self.py_offset)
def make_node(self, a, b): def make_node(self, a, b):
a = theano.tensor.as_tensor_variable(a) a = theano.tensor.as_tensor_variable(a)
b = theano.tensor.as_tensor_variable(b) b = theano.tensor.as_tensor_variable(b)
assert a.type.dtype == 'float64' assert a.type.dtype == 'float64'
assert a.type.dtype == b.type.dtype assert a.type.dtype == b.type.dtype
assert a.type.ndim==1 assert a.type.ndim == 1
r = gof.Apply(self, [a, b], [a.type()]) r = gof.Apply(self, [a, b], [a.type()])
return r return r
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
a, b = inp a, b = inp
out, = out_ out, = out_
z = a+b z = a + b
#ERROR TO ADD THIS CRAPPY OFFSET #ERROR TO ADD THIS CRAPPY OFFSET
if self.py_offset: if self.py_offset:
out[0] = z+0.5 out[0] = z + 0.5
else: out[0] = z else:
out[0] = z
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
a, b = inp a, b = inp
...@@ -76,26 +82,30 @@ class BROKEN_ON_PURPOSE_Add(gof.Op): ...@@ -76,26 +82,30 @@ class BROKEN_ON_PURPOSE_Add(gof.Op):
+ ((double*)PyArray_GETPTR1(%(b)s, m))[0] ; + ((double*)PyArray_GETPTR1(%(b)s, m))[0] ;
} }
} }
"""% dict(locals(), **sub) """ % dict(locals(), **sub)
# inconsistent is a invalid op, whose perform and c_code do not match # inconsistent is a invalid op, whose perform and c_code do not match
inconsistent = BROKEN_ON_PURPOSE_Add(False) inconsistent = BROKEN_ON_PURPOSE_Add(False)
# off_by_half is a good op, that is different from theano.sparse.sd_csc # off_by_half is a good op, that is different from theano.sparse.sd_csc
off_by_half = BROKEN_ON_PURPOSE_Add(True) off_by_half = BROKEN_ON_PURPOSE_Add(True)
class WeirdBrokenOp(gof.Op): class WeirdBrokenOp(gof.Op):
""" """
This op can be inplace if behaviour is 'times1_inplace' This op can be inplace if behaviour is 'times1_inplace'
This op can be destructive if behaviour is 'times2_inplace' This op can be destructive if behaviour is 'times2_inplace'
In both cases, it does not set the destroy_map or view_map correctly so it should raise an In both cases, it does not set the destroy_map or view_map correctly so
error in DebugMode. it should raise an error in DebugMode.
""" """
def __init__(self, behaviour): def __init__(self, behaviour):
gof.Op.__init__(self) gof.Op.__init__(self)
self.behaviour = behaviour self.behaviour = behaviour
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and (self.behaviour == other.behaviour) return (type(self) == type(other)
and (self.behaviour == other.behaviour))
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ hash(self.behaviour) return hash(type(self)) ^ hash(self.behaviour)
...@@ -168,7 +178,8 @@ class WeirdBrokenOp(gof.Op): ...@@ -168,7 +178,8 @@ class WeirdBrokenOp(gof.Op):
} }
""" """
total = (z_code + prep_vars + behaviour + prep_vars2)% dict(locals(), **sub) total = ((z_code + prep_vars + behaviour + prep_vars2)
% dict(locals(), **sub))
return total return total
wb2i = WeirdBrokenOp('times2_inplace') wb2i = WeirdBrokenOp('times2_inplace')
...@@ -176,6 +187,7 @@ wb2 = WeirdBrokenOp('times2') ...@@ -176,6 +187,7 @@ wb2 = WeirdBrokenOp('times2')
wb1i = WeirdBrokenOp('times1_inplace') wb1i = WeirdBrokenOp('times1_inplace')
wb1 = WeirdBrokenOp('times1') wb1 = WeirdBrokenOp('times1')
def test_badclinkeroutput(): def test_badclinkeroutput():
a = theano.tensor.dvector() a = theano.tensor.dvector()
...@@ -184,20 +196,20 @@ def test_badclinkeroutput(): ...@@ -184,20 +196,20 @@ def test_badclinkeroutput():
f_good = theano.function([a, b], f_good = theano.function([a, b],
off_by_half(a, b), off_by_half(a, b),
mode=debugmode.DebugMode(check_c_code=True)) mode=debugmode.DebugMode(check_c_code=True))
f_inconsistent = theano.function([a,b], f_inconsistent = theano.function([a, b],
inconsistent(a, b), inconsistent(a, b),
mode=debugmode.DebugMode(check_c_code=True)) mode=debugmode.DebugMode(check_c_code=True))
#this should evaluate with no error #this should evaluate with no error
f_good([1.0, 2.0, 3.0], [2,3,4]) f_good([1.0, 2.0, 3.0], [2, 3, 4])
try: try:
f_inconsistent([1.0, 2.0, 3.0], [2,3,4]) f_inconsistent([1.0, 2.0, 3.0], [2, 3, 4])
except debugmode.BadCLinkerOutput, e: except debugmode.BadCLinkerOutput, e:
print repr(e) print repr(e)
assert e.r.owner.op is inconsistent assert e.r.owner.op is inconsistent
return #TEST PASS return # TEST PASS
assert False #an error should have been detected assert False # an error should have been detected
def test_badoptimization(): def test_badoptimization():
...@@ -213,22 +225,24 @@ def test_badoptimization(): ...@@ -213,22 +225,24 @@ def test_badoptimization():
a = theano.tensor.dvector() a = theano.tensor.dvector()
b = theano.tensor.dvector() b = theano.tensor.dvector()
f = theano.function([a, b], a+b, f = theano.function([a, b], a + b,
mode=debugmode.DebugMode(optimizer=opt, check_c_code=True)) mode=debugmode.DebugMode(optimizer=opt, check_c_code=True))
try: try:
rval = f([1.0, 2.0, 3.0], [2,3,4],) f([1.0, 2.0, 3.0], [2, 3, 4],)
except debugmode.BadOptimization, e: except debugmode.BadOptimization, e:
assert str(e.reason) == 'insert_broken_add' assert str(e.reason) == 'insert_broken_add'
return #TEST PASS return # TEST PASS
assert False assert False
def test_stochasticoptimization(): def test_stochasticoptimization():
# this optimization alternates between triggering and not triggering. # this optimization alternates between triggering and not triggering.
last_time_replaced=[False] last_time_replaced = [False]
@gof.local_optimizer([theano.tensor.add]) @gof.local_optimizer([theano.tensor.add])
def insert_broken_add_sometimes(node): def insert_broken_add_sometimes(node):
if node.op == theano.tensor.add: if node.op == theano.tensor.add:
...@@ -236,32 +250,39 @@ def test_stochasticoptimization(): ...@@ -236,32 +250,39 @@ def test_stochasticoptimization():
if last_time_replaced[0]: if last_time_replaced[0]:
return [off_by_half(*node.inputs)] return [off_by_half(*node.inputs)]
return False return False
edb = gof.EquilibriumDB() edb = gof.EquilibriumDB()
edb.register('insert_broken_add_sometimes', insert_broken_add_sometimes, 'all') edb.register(
'insert_broken_add_sometimes',
insert_broken_add_sometimes,
'all')
opt = edb.query('+all') opt = edb.query('+all')
a = theano.tensor.dvector() a = theano.tensor.dvector()
b = theano.tensor.dvector() b = theano.tensor.dvector()
try: try:
f = theano.function([a, b], theano.function([a, b],
theano.tensor.add(a, b), theano.tensor.add(a, b),
mode=debugmode.DebugMode(optimizer=opt, check_c_code=True)) mode=debugmode.DebugMode(optimizer=opt, check_c_code=True))
except debugmode.StochasticOrder: except debugmode.StochasticOrder:
return #TEST PASS return # TEST PASS
assert False assert False
def test_just_c_code(): def test_just_c_code():
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], wb2(x), mode=debugmode.DebugMode(check_py_code=False)) f = theano.function([x], wb2(x),
assert numpy.all(f([1,2]) == [2, 4]) mode=debugmode.DebugMode(check_py_code=False))
assert numpy.all(f([1, 2]) == [2, 4])
def test_baddestroymap(): def test_baddestroymap():
class BadAdd(gof.Op): class BadAdd(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = a.type() c = a.type()
return gof.Apply(self, [a,b], [c]) return gof.Apply(self, [a, b], [c])
def perform(self, node, inp, out): def perform(self, node, inp, out):
a, b = inp a, b = inp
c, = out c, = out
...@@ -270,20 +291,22 @@ def test_baddestroymap(): ...@@ -270,20 +291,22 @@ def test_baddestroymap():
x = theano.tensor.dvector() x = theano.tensor.dvector()
y = theano.tensor.dvector() y = theano.tensor.dvector()
f = theano.function([x, y], BadAdd()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], BadAdd()(x, y), mode='DEBUG_MODE')
try: try:
f([1,2], [3,4]) f([1, 2], [3, 4])
assert False #failed to raise error assert False # failed to raise error
except debugmode.BadDestroyMap: except debugmode.BadDestroyMap:
pass pass
def test_baddestroymap_c(): def test_baddestroymap_c():
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], wb2i(x), mode=debugmode.DebugMode(check_py_code=False)) f = theano.function([x], wb2i(x),
mode=debugmode.DebugMode(check_py_code=False))
try: try:
assert numpy.all(f([1,2]) == [2, 4]) assert numpy.all(f([1, 2]) == [2, 4])
assert False #failed to raise error assert False # failed to raise error
except debugmode.BadDestroyMap: except debugmode.BadDestroyMap:
pass pass
...@@ -293,7 +316,8 @@ class Test_ViewMap(unittest.TestCase): ...@@ -293,7 +316,8 @@ class Test_ViewMap(unittest.TestCase):
class BadAddRef(gof.Op): class BadAddRef(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = b.type() c = b.type()
return gof.Apply(self, [a,b], [c]) return gof.Apply(self, [a, b], [c])
def perform(self, node, inp, out): def perform(self, node, inp, out):
a, b = inp a, b = inp
c, = out c, = out
...@@ -302,7 +326,8 @@ class Test_ViewMap(unittest.TestCase): ...@@ -302,7 +326,8 @@ class Test_ViewMap(unittest.TestCase):
class BadAddSlice(gof.Op): class BadAddSlice(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = b.type() c = b.type()
return gof.Apply(self, [a,b], [c]) return gof.Apply(self, [a, b], [c])
def perform(self, node, inp, out): def perform(self, node, inp, out):
a, b = inp a, b = inp
c, = out c, = out
...@@ -311,20 +336,21 @@ class Test_ViewMap(unittest.TestCase): ...@@ -311,20 +336,21 @@ class Test_ViewMap(unittest.TestCase):
def test_badviewmap_ref(self): def test_badviewmap_ref(self):
x = theano.tensor.dvector() x = theano.tensor.dvector()
y = theano.tensor.dvector() y = theano.tensor.dvector()
f = theano.function([x, y], self.BadAddRef()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], self.BadAddRef()(x, y), mode='DEBUG_MODE')
try: try:
f([1,2], [3,4]) f([1, 2], [3, 4])
assert False #failed to raise error assert False # failed to raise error
except debugmode.BadViewMap: except debugmode.BadViewMap:
return return
def test_badviewmap_slice(self): def test_badviewmap_slice(self):
x = theano.tensor.dvector() x = theano.tensor.dvector()
y = theano.tensor.dvector() y = theano.tensor.dvector()
f = theano.function([x, y], self.BadAddSlice()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], self.BadAddSlice()(x, y),
mode='DEBUG_MODE')
try: try:
f([1,2], [3,4]) f([1, 2], [3, 4])
assert False #failed to raise error assert False # failed to raise error
except debugmode.BadViewMap: except debugmode.BadViewMap:
return return
...@@ -333,31 +359,34 @@ class Test_ViewMap(unittest.TestCase): ...@@ -333,31 +359,34 @@ class Test_ViewMap(unittest.TestCase):
goodop.view_map = {0: [1]} goodop.view_map = {0: [1]}
x = theano.tensor.dvector() x = theano.tensor.dvector()
y = theano.tensor.dvector() y = theano.tensor.dvector()
f = theano.function([x, y], goodop(x,y), mode='DEBUG_MODE') f = theano.function([x, y], goodop(x, y), mode='DEBUG_MODE')
try: try:
f([1,5,1], [3,4,2,1,4]) f([1, 5, 1], [3, 4, 2, 1, 4])
return return
except debugmode.BadViewMap: except debugmode.BadViewMap:
assert False #failed to raise error assert False # failed to raise error
def test_badviewmap_c(self): def test_badviewmap_c(self):
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False)) f = theano.function([x], wb1i(x),
mode=debugmode.DebugMode(check_py_code=False))
try: try:
f([1,2]) f([1, 2])
assert False #failed to raise error assert False # failed to raise error
except debugmode.BadViewMap: except debugmode.BadViewMap:
pass pass
def test_aliased_outputs_ok(self): def test_aliased_outputs_ok(self):
#here aliased outputs is ok because they are both aliased to an input as well # here aliased outputs is ok because they are both aliased to an input
# as well
class CustomOp(gof.Op): class CustomOp(gof.Op):
view_map = {0:[0], 1:[0]} view_map = {0: [0], 1: [0]}
def make_node(self, a, b): def make_node(self, a, b):
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a, b], [c, d])
def perform(self, node, inp, out): def perform(self, node, inp, out):
a, b = inp a, b = inp
c, d = out c, d = out
...@@ -366,21 +395,22 @@ class Test_ViewMap(unittest.TestCase): ...@@ -366,21 +395,22 @@ class Test_ViewMap(unittest.TestCase):
x = theano.tensor.dvector('x') x = theano.tensor.dvector('x')
y = theano.tensor.dvector('y') y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], CustomOp()(x, y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4],[5,6,7,8]) r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [1,2,3,4]) assert numpy.all(r0 == [1, 2, 3, 4])
assert numpy.all(r1 == [2,3,4]) assert numpy.all(r1 == [2, 3, 4])
def test_aliased_outputs_ok_output(self): def test_aliased_outputs_ok_output(self):
# here aliased outputs is ok because they are both outputs of the function as a whole and # here aliased outputs is ok because they are both outputs of the
# thus not destroy-able # function as a whole and thus not destroy-able
class CustomOp(gof.Op): class CustomOp(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a, b], [c, d])
def perform(self, node, inp, out): def perform(self, node, inp, out):
a, b = inp a, b = inp
c, d = out c, d = out
...@@ -390,22 +420,23 @@ class Test_ViewMap(unittest.TestCase): ...@@ -390,22 +420,23 @@ class Test_ViewMap(unittest.TestCase):
x = theano.tensor.dvector() x = theano.tensor.dvector()
y = theano.tensor.dvector() y = theano.tensor.dvector()
f = theano.function([x, y], CustomOp()(x,y), mode='DEBUG_MODE') f = theano.function([x, y], CustomOp()(x, y), mode='DEBUG_MODE')
r0, r1 = f([1,2,3,4],[5,6,7,8]) r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [2,4,6,8]) assert numpy.all(r0 == [2, 4, 6, 8])
assert numpy.all(r1 == [4,6,8]) assert numpy.all(r1 == [4, 6, 8])
def test_aliased_outputs_ok_shadow(self): def test_aliased_outputs_ok_shadow(self):
# here the alias between outputs is ok because one of them is not used for subsequent # here the alias between outputs is ok because one of them is not used
# computation. This is like the case where we use one output as a memory buffer to serve # for subsequent computation. This is like the case where we use one
# another output. # output as a memory buffer to serve another output.
class CustomOp(gof.Op): class CustomOp(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a, b], [c, d])
def perform(self, node, inp, out): def perform(self, node, inp, out):
a, b = inp a, b = inp
c, d = out c, d = out
...@@ -415,27 +446,29 @@ class Test_ViewMap(unittest.TestCase): ...@@ -415,27 +446,29 @@ class Test_ViewMap(unittest.TestCase):
x = theano.tensor.dvector('x') x = theano.tensor.dvector('x')
y = theano.tensor.dvector('y') y = theano.tensor.dvector('y')
f = theano.function([x, y], CustomOp()(x,y)[0] * 2, mode='DEBUG_MODE') f = theano.function([x, y], CustomOp()(x, y)[0] * 2, mode='DEBUG_MODE')
r0 = f([1,2,3,4],[5,6,7,8]) r0 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [2,4,6,8])
assert numpy.all(r0 == [2, 4, 6, 8])
def test_aliased_outputs_bad(self): def test_aliased_outputs_bad(self):
# here the alias between outputs is not ok because destroying one destroys the other, but # here the alias between outputs is not ok because destroying one
# there's no way to warn theano about it through the view_map mechanism. # destroys the other, but there's no way to warn theano about it
# through the view_map mechanism.
class CustomOp(gof.Op): class CustomOp(gof.Op):
def make_node(self, a, b): def make_node(self, a, b):
c = a.type() c = a.type()
d = a.type() d = a.type()
return gof.Apply(self, [a,b], [c,d]) return gof.Apply(self, [a, b], [c, d])
def perform(self, node, inp, out): def perform(self, node, inp, out):
a, b = inp a, b = inp
c, d = out c, d = out
r = a * 1 r = a * 1
c[0] = r[:-1] c[0] = r[:-1]
d[0] = r[1:] d[0] = r[1:]
custom_op = CustomOp() custom_op = CustomOp()
x = theano.tensor.dvector() x = theano.tensor.dvector()
...@@ -445,68 +478,78 @@ class Test_ViewMap(unittest.TestCase): ...@@ -445,68 +478,78 @@ class Test_ViewMap(unittest.TestCase):
f = theano.function([x, y], out, mode='DEBUG_MODE') f = theano.function([x, y], out, mode='DEBUG_MODE')
try: try:
r0 = f([1,2,3,4],[5,6,7,8]) f([1, 2, 3, 4], [5, 6, 7, 8])
assert False # DebugMode should have caught the error assert False # DebugMode should have caught the error
except debugmode.BadViewMap, e: except debugmode.BadViewMap, e:
print e print e
pass pass
# the situation can be rescued by picking one of the inputs and pretending that it is # the situation can be rescued by picking one of the inputs and
# aliased to both the outputs. This unfairly disables any destructive operations on the # pretending that it is aliased to both the outputs.
# This unfairly disables any destructive operations on the
# input, but guarantees correctness. # input, but guarantees correctness.
#custom_op.view_map = {0:[0], 1:[1]} #custom_op.view_map = {0:[0], 1:[1]}
#f([1,2,3,4],[5,6,7,8]) #f([1,2,3,4],[5,6,7,8])
class Test_check_isfinite(unittest.TestCase): class Test_check_isfinite(unittest.TestCase):
def setUp(self): def setUp(self):
self.old_ts = theano.tensor.TensorType.filter_checks_isfinite self.old_ts = theano.tensor.TensorType.filter_checks_isfinite
self.old_dm = theano.compile.mode.predefined_modes['DEBUG_MODE'].check_isfinite self.old_dm = theano.compile.mode.predefined_modes[
'DEBUG_MODE'].check_isfinite
def tearDown(self): def tearDown(self):
theano.tensor.TensorType.filter_checks_isfinite = self.old_ts theano.tensor.TensorType.filter_checks_isfinite = self.old_ts
theano.compile.mode.predefined_modes['DEBUG_MODE'].check_isfinite = self.old_dm theano.compile.mode.predefined_modes[
'DEBUG_MODE'].check_isfinite = self.old_dm
def test_check_isfinite(self): def test_check_isfinite(self):
x = theano.tensor.vector() x = theano.tensor.vector()
f = theano.function([x], (x+2) * 5, mode='DEBUG_MODE') f = theano.function([x], (x + 2) * 5, mode='DEBUG_MODE')
g = theano.function([x], theano.tensor.log(x), mode='DEBUG_MODE') g = theano.function([x], theano.tensor.log(x), mode='DEBUG_MODE')
# this should work # this should work
f(numpy.log([3, 4, 5]).astype(config.floatX)) f(numpy.log([3, 4, 5]).astype(config.floatX))
# if TensorType.filter_checks_isfinite were true, these would raise ValueError # if TensorType.filter_checks_isfinite were true, these would raise
# ValueError
# if not, DebugMode will check internally, and raise InvalidValueError # if not, DebugMode will check internally, and raise InvalidValueError
# passing an invalid value as an input should trigger ValueError # passing an invalid value as an input should trigger ValueError
self.assertRaises(debugmode.InvalidValueError, f, self.assertRaises(debugmode.InvalidValueError, f,
numpy.log([3, -4, 5]).astype(config.floatX)) numpy.log([3, -4, 5]).astype(config.floatX))
self.assertRaises(debugmode.InvalidValueError, f, self.assertRaises(debugmode.InvalidValueError, f,
(numpy.asarray([0, 1.0, 0])/0).astype(config.floatX)) (numpy.asarray([0, 1.0, 0]) / 0).astype(config.floatX))
self.assertRaises(debugmode.InvalidValueError, f, self.assertRaises(debugmode.InvalidValueError, f,
(numpy.asarray([1.0, 1.0, 1.0])/0).astype(config.floatX)) (numpy.asarray([1.0, 1.0, 1.0]) / 0).astype(config.floatX))
# generating an invalid value internally should trigger InvalidValueError # generating an invalid value internally should trigger
# InvalidValueError
self.assertRaises(debugmode.InvalidValueError, g, self.assertRaises(debugmode.InvalidValueError, g,
numpy.asarray([3,-4,5], dtype=config.floatX)) numpy.asarray([3, -4, 5], dtype=config.floatX))
# this should disable the exception # this should disable the exception
theano.tensor.TensorType.filter_checks_isfinite = False theano.tensor.TensorType.filter_checks_isfinite = False
theano.compile.mode.predefined_modes['DEBUG_MODE'].check_isfinite = False theano.compile.mode.predefined_modes[
'DEBUG_MODE'].check_isfinite = False
# insert several Inf # insert several Inf
f(numpy.asarray(numpy.asarray([1.0, 1.0, 1.0])/0, dtype=config.floatX)) f(numpy.asarray(numpy.asarray([1.0, 1.0, 1.0]) / 0,
dtype=config.floatX))
def test_check_isfinite_disabled(self): def test_check_isfinite_disabled(self):
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], (x+2) * 5, mode=debugmode.DebugMode(check_isfinite=False)) f = theano.function([x], (x + 2) * 5,
mode=debugmode.DebugMode(check_isfinite=False))
#nan should go through #nan should go through
f(numpy.log([3, -4, 5])) f(numpy.log([3, -4, 5]))
#inf should go through #inf should go through
infs = numpy.asarray([1.0,1.,1.])/0 infs = numpy.asarray([1.0, 1., 1.]) / 0
print infs print infs
f(infs) f(infs)
return return
class Test_preallocated_output(unittest.TestCase): class Test_preallocated_output(unittest.TestCase):
class BrokenCImplementationAdd(gof.Op): class BrokenCImplementationAdd(gof.Op):
...@@ -521,7 +564,7 @@ class Test_preallocated_output(unittest.TestCase): ...@@ -521,7 +564,7 @@ class Test_preallocated_output(unittest.TestCase):
b = theano.tensor.as_tensor_variable(b) b = theano.tensor.as_tensor_variable(b)
assert a.type.dtype == 'float32' assert a.type.dtype == 'float32'
assert a.type.dtype == b.type.dtype assert a.type.dtype == b.type.dtype
assert a.type.ndim==2 assert a.type.ndim == 2
r = gof.Apply(self, [a, b], [a.type()]) r = gof.Apply(self, [a, b], [a.type()])
return r return r
...@@ -595,17 +638,18 @@ class Test_preallocated_output(unittest.TestCase): ...@@ -595,17 +638,18 @@ class Test_preallocated_output(unittest.TestCase):
} }
} }
} }
"""% dict(locals(), **sub) """ % dict(locals(), **sub)
def test_f_contiguous(self): def test_f_contiguous(self):
a = theano.tensor.fmatrix('a') a = theano.tensor.fmatrix('a')
b = theano.tensor.fmatrix('b') b = theano.tensor.fmatrix('b')
z = self.BrokenCImplementationAdd()(a, b) z = self.BrokenCImplementationAdd()(a, b)
out = theano.tensor.dot(z, numpy.eye(7)) # Needed so that z is not the output of the graph # Needed so that z is not the output of the graph
out = theano.tensor.dot(z, numpy.eye(7))
rng = numpy.random.RandomState(seed=utt.fetch_seed()) rng = numpy.random.RandomState(seed=utt.fetch_seed())
a_val = rng.randn(7,7).astype('float32') a_val = rng.randn(7, 7).astype('float32')
b_val = rng.randn(7,7).astype('float32') b_val = rng.randn(7, 7).astype('float32')
init_conf_val = config.DebugMode.check_preallocated_output init_conf_val = config.DebugMode.check_preallocated_output
try: try:
......
import itertools
from unittest import TestCase from unittest import TestCase
from theano.compile.pfunc import pfunc from theano.compile.pfunc import pfunc
...@@ -15,7 +14,8 @@ if cuda_ndarray.cuda_available == False: ...@@ -15,7 +14,8 @@ if cuda_ndarray.cuda_available == False:
import theano.sandbox.cuda as tcn import theano.sandbox.cuda as tcn
from theano.tensor.signal.downsample import DownsampleFactorMax, DownsampleFactorMaxGrad from theano.tensor.signal.downsample import (DownsampleFactorMax,
DownsampleFactorMaxGrad)
import theano.compile.mode import theano.compile.mode
from theano.tensor.tests.test_blas import BaseGemv, TestBlasStrides, TestGer from theano.tensor.tests.test_blas import BaseGemv, TestBlasStrides, TestGer
...@@ -23,20 +23,24 @@ from theano.sandbox.cuda.blas import gpu_gemv_no_inplace, gpu_gemv_inplace ...@@ -23,20 +23,24 @@ from theano.sandbox.cuda.blas import gpu_gemv_no_inplace, gpu_gemv_inplace
from theano.sandbox.cuda.blas import gpu_ger_inplace, gpu_ger_no_inplace from theano.sandbox.cuda.blas import gpu_ger_inplace, gpu_ger_no_inplace
if theano.config.mode=='FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu') mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu') mode_without_gpu = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu')
else: else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def my_rand(*shape): def my_rand(*shape):
return theano._asarray(numpy.random.rand(*shape),dtype='float32') return theano._asarray(numpy.random.rand(*shape), dtype='float32')
def transpose(cuda_mat): def transpose(cuda_mat):
# The easiest way to transpose a cuda matrix for now # The easiest way to transpose a cuda matrix for now
return tcn.dimshuffle(cuda_mat, [1, 0]) return tcn.dimshuffle(cuda_mat, [1, 0])
def test_dot22(): def test_dot22():
def cmp(a_shp, b_shp): def cmp(a_shp, b_shp):
a0 = my_rand(*a_shp) a0 = my_rand(*a_shp)
...@@ -44,7 +48,7 @@ def test_dot22(): ...@@ -44,7 +48,7 @@ def test_dot22():
b = tensor.fmatrix() b = tensor.fmatrix()
f = pfunc([b], [], updates=[(a, tensor.dot(a,b))], mode=mode_with_gpu) f = pfunc([b], [], updates=[(a, tensor.dot(a, b))], mode=mode_with_gpu)
bval = my_rand(*b_shp) bval = my_rand(*b_shp)
f(bval) f(bval)
...@@ -54,16 +58,18 @@ def test_dot22(): ...@@ -54,16 +58,18 @@ def test_dot22():
# Try with a matrix equal to a0, but with strides in both dims # Try with a matrix equal to a0, but with strides in both dims
a.set_value(a0) a.set_value(a0)
a.set_value( a.set_value(
a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], a.get_value(borrow=True,
return_internal_type=True)[::-1, ::-1],
borrow=True) borrow=True)
f(bval) f(bval)
cmp((3,4),(4,5)) cmp((3, 4), (4, 5))
cmp((0,4),(4,5)) cmp((0, 4), (4, 5))
cmp((3,4),(4,0)) cmp((3, 4), (4, 0))
cmp((3,0),(0,5)) cmp((3, 0), (0, 5))
cmp((0,4),(4,0)) cmp((0, 4), (4, 0))
cmp((0,0),(0,0)) cmp((0, 0), (0, 0))
def test_dot22scalar(): def test_dot22scalar():
def cmp(a_shp, b_shp): def cmp(a_shp, b_shp):
...@@ -73,32 +79,39 @@ def test_dot22scalar(): ...@@ -73,32 +79,39 @@ def test_dot22scalar():
av = my_rand(*a_shp) av = my_rand(*a_shp)
bv = my_rand(*b_shp) bv = my_rand(*b_shp)
f = theano.function([a,b], tensor.dot(a,b)*numpy.asarray(4, 'float32'), mode=mode_with_gpu) f = theano.function(
f2 = theano.function([a,b], tensor.dot(a,b)*numpy.asarray(4, 'float32')) [a, b],
t=f.maker.env.toposort() tensor.dot(a, b) * numpy.asarray(4, 'float32'),
assert len(t)==4 mode=mode_with_gpu)
assert isinstance(t[0].op,tcn.GpuFromHost) f2 = theano.function(
assert isinstance(t[1].op,tcn.GpuFromHost) [a, b],
assert isinstance(t[2].op,tcn.blas.GpuDot22Scalar) tensor.dot(a, b) * numpy.asarray(4, 'float32'))
assert isinstance(t[3].op,tcn.HostFromGpu) t = f.maker.env.toposort()
assert numpy.allclose(f(av,bv),f2(av,bv)) assert len(t) == 4
assert isinstance(t[0].op, tcn.GpuFromHost)
f = theano.function([a,b,scalar], tensor.dot(a,b)*scalar, mode=mode_with_gpu) assert isinstance(t[1].op, tcn.GpuFromHost)
f2 = theano.function([a,b,scalar], tensor.dot(a,b)*scalar) assert isinstance(t[2].op, tcn.blas.GpuDot22Scalar)
t=f.maker.env.toposort() assert isinstance(t[3].op, tcn.HostFromGpu)
assert len(t)==4 assert numpy.allclose(f(av, bv), f2(av, bv))
assert isinstance(t[0].op,tcn.GpuFromHost)
assert isinstance(t[1].op,tcn.GpuFromHost) f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
assert isinstance(t[2].op,tcn.blas.GpuDot22Scalar) mode=mode_with_gpu)
assert isinstance(t[3].op,tcn.HostFromGpu) f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
assert numpy.allclose(f(av,bv,0.5),f2(av,bv,0.5)) t = f.maker.env.toposort()
assert len(t) == 4
cmp((3,4),(4,5)) assert isinstance(t[0].op, tcn.GpuFromHost)
cmp((0,4),(4,5)) assert isinstance(t[1].op, tcn.GpuFromHost)
cmp((3,4),(4,0)) assert isinstance(t[2].op, tcn.blas.GpuDot22Scalar)
cmp((3,0),(0,5)) assert isinstance(t[3].op, tcn.HostFromGpu)
cmp((0,4),(4,0)) assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
cmp((0,0),(0,0))
cmp((3, 4), (4, 5))
cmp((0, 4), (4, 5))
cmp((3, 4), (4, 0))
cmp((3, 0), (0, 5))
cmp((0, 4), (4, 0))
cmp((0, 0), (0, 0))
def test_gemm(): def test_gemm():
def cmp(a_shp, b_shp): def cmp(a_shp, b_shp):
...@@ -108,28 +121,33 @@ def test_gemm(): ...@@ -108,28 +121,33 @@ def test_gemm():
b = tensor.fmatrix('b') b = tensor.fmatrix('b')
c = tensor.fmatrix('c') c = tensor.fmatrix('c')
f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))], mode=mode_with_gpu) f = pfunc([b, c], [], updates=[(a, tensor.dot(a, b) + tensor.exp(c))],
assert any([node.op == tcn.blas.gpu_gemm_inplace for node in f.maker.env.toposort()]) mode=mode_with_gpu)
assert any([node.op == tcn.blas.gpu_gemm_inplace
for node in f.maker.env.toposort()])
bval = my_rand(*b_shp) bval = my_rand(*b_shp)
cval = my_rand(a_shp[0],b_shp[1]) cval = my_rand(a_shp[0], b_shp[1])
f(bval,cval) f(bval, cval)
assert numpy.allclose(numpy.dot(a0, bval)+numpy.exp(cval), a.get_value()) assert numpy.allclose(numpy.dot(a0, bval) + numpy.exp(cval),
a.get_value())
# Try with a matrix equal to a0, but with strides in both dims # Try with a matrix equal to a0, but with strides in both dims
a.set_value(a0) a.set_value(a0)
a.set_value( a.set_value(
a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], a.get_value(borrow=True,
return_internal_type=True)[::-1, ::-1],
borrow=True) borrow=True)
f(bval, cval) f(bval, cval)
cmp((3,4),(4,5)) cmp((3, 4), (4, 5))
cmp((0,4),(4,5)) cmp((0, 4), (4, 5))
cmp((3,4),(4,0)) cmp((3, 4), (4, 0))
cmp((3,0),(0,5)) cmp((3, 0), (0, 5))
cmp((0,4),(4,0)) cmp((0, 4), (4, 0))
cmp((0,0),(0,0)) cmp((0, 0), (0, 0))
def test_gemm_no_inplace(): def test_gemm_no_inplace():
...@@ -142,29 +160,35 @@ def test_gemm_no_inplace(): ...@@ -142,29 +160,35 @@ def test_gemm_no_inplace():
b = tcn.fmatrix('b') b = tcn.fmatrix('b')
b2 = tcn.fmatrix('b2') b2 = tcn.fmatrix('b2')
f = pfunc([b,b2], [tensor.dot(a,b2) + c], updates=[(a, tensor.dot(a,b) + c)], mode=mode_with_gpu) f = pfunc(
[b, b2],
[tensor.dot(a, b2) + c],
updates=[(a, tensor.dot(a, b) + c)],
mode=mode_with_gpu)
assert any([node.op == tcn.blas.gpu_gemm_no_inplace for node in f.maker.env.toposort()]) assert any([node.op == tcn.blas.gpu_gemm_no_inplace
for node in f.maker.env.toposort()])
bval = my_rand(*b_shp) bval = my_rand(*b_shp)
bval2 = my_rand(*b_shp) bval2 = my_rand(*b_shp)
rval = f(bval,bval2) rval = f(bval, bval2)
assert numpy.allclose(numpy.dot(a0, bval)+cval, a.get_value()) assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value())
assert numpy.allclose(numpy.dot(a0, bval2)+cval, rval) assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval)
# Try with a matrix equal to a0, but with strides in both dims # Try with a matrix equal to a0, but with strides in both dims
a.set_value(a0) a.set_value(a0)
a.set_value( a.set_value(
a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], a.get_value(borrow=True,
return_internal_type=True)[::-1, ::-1],
borrow=True) borrow=True)
f(bval, bval2) f(bval, bval2)
cmp((3,4),(4,5)) cmp((3, 4), (4, 5))
cmp((0,4),(4,5)) cmp((0, 4), (4, 5))
cmp((3,4),(4,0)) cmp((3, 4), (4, 0))
cmp((3,0),(0,5)) cmp((3, 0), (0, 5))
cmp((0,4),(4,0)) cmp((0, 4), (4, 0))
cmp((0,0),(0,0)) cmp((0, 0), (0, 0))
class TestBlasStridesGpu(TestBlasStrides): class TestBlasStridesGpu(TestBlasStrides):
...@@ -221,15 +245,15 @@ if 0: ...@@ -221,15 +245,15 @@ if 0:
print r, r.shape print r, r.shape
assert (ret==r).all() assert (ret==r).all()
def test_downsample(): def test_downsample():
import random shps = [(1, 1, 1, 12),
shps = [ (1, 1, 1, 12),
(1, 1, 2, 2), (1, 1, 2, 2),
(1, 1, 1, 1), (1, 1, 1, 1),
(1,1,4,4), (1, 1, 4, 4),
(1, 1, 10, 11), (1, 1, 10, 11),
(1, 2, 2, 2), (1, 2, 2, 2),
(3,5,4,4), (3, 5, 4, 4),
(25, 1, 7, 7), (25, 1, 7, 7),
(1, 1, 12, 12), (1, 1, 12, 12),
(1, 1, 2, 14), (1, 1, 2, 14),
...@@ -245,44 +269,61 @@ def test_downsample(): ...@@ -245,44 +269,61 @@ def test_downsample():
(30, 2, 24, 24), (30, 2, 24, 24),
(30, 6, 24, 24), (30, 6, 24, 24),
(10, 10, 10, 11), (10, 10, 10, 11),
(1,1,10,1025), (1, 1, 10, 1025),
(1,1,10,1023), (1, 1, 10, 1023),
(1,1,1025,10), (1, 1, 1025, 10),
(1,1,1023,10), (1, 1, 1023, 10),
] ]
numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps) numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)
for shp in shps: for shp in shps:
for ds in (2, 2), (3,2), (1,1): for ds in (2, 2), (3, 2), (1, 1):
if ds[0] > shp[2]: continue if ds[0] > shp[2]:
if ds[1] > shp[3]: continue continue
if ds[1] > shp[3]:
continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns # GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor. # in the output tensor.
if float(shp[3])/ds[1]>512: continue if float(shp[3]) / ds[1] > 512:
continue
for ignore_border in (True, False): for ignore_border in (True, False):
print 'test_downsample', shp, ds, ignore_border print 'test_downsample', shp, ds, ignore_border
ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border) ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), 'a') a = tcn.shared_constructor(my_rand(*shp), 'a')
f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu) f = pfunc([], ds_op(tensor.as_tensor_variable(a)),
f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu) mode=mode_with_gpu)
assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)),
f.maker.env.toposort()]) mode=mode_without_gpu)
assert any([isinstance(node.op, DownsampleFactorMax) for node in assert any([isinstance(node.op,
f2.maker.env.toposort()]) tcn.blas.GpuDownsampleFactorMax)
assert numpy.allclose(f(),f2()) for node in f.maker.env.toposort()])
assert any([isinstance(node.op, DownsampleFactorMax)
g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_with_gpu) for node in f2.maker.env.toposort()])
g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_without_gpu) assert numpy.allclose(f(), f2())
assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad)
g = pfunc(
[],
tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),
a),
mode=mode_with_gpu)
g2 = pfunc(
[],
tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),
a),
mode=mode_without_gpu)
assert any([isinstance(node.op,
tcn.blas.GpuDownsampleFactorMaxGrad)
for node in g.maker.env.toposort()]) for node in g.maker.env.toposort()])
assert any([isinstance(node.op, DownsampleFactorMaxGrad) assert any([isinstance(node.op, DownsampleFactorMaxGrad)
for node in g2.maker.env.toposort()]) for node in g2.maker.env.toposort()])
assert numpy.allclose(g(),g2()) assert numpy.allclose(g(), g2())
#We already check that the gpu version return the same value as the gpu version # We already check that the gpu version return
#for GpuDownsampleFactorMaxGrad. So no need to call verify_grad here. # the same value as the gpu version for
# GpuDownsampleFactorMaxGrad. So no need to call
# verify_grad here.
class TestGpuGemv(TestCase, BaseGemv, class TestGpuGemv(TestCase, BaseGemv,
...@@ -295,6 +336,7 @@ class TestGpuGemv(TestCase, BaseGemv, ...@@ -295,6 +336,7 @@ class TestGpuGemv(TestCase, BaseGemv,
gemv = gpu_gemv_inplace gemv = gpu_gemv_inplace
gemv_inplace = gpu_gemv_inplace gemv_inplace = gpu_gemv_inplace
class TestGpuGemvNoTransfer(TestCase, BaseGemv, class TestGpuGemvNoTransfer(TestCase, BaseGemv,
unittest_tools.TestOptimizationMixin): unittest_tools.TestOptimizationMixin):
mode = mode_with_gpu mode = mode_with_gpu
...@@ -320,63 +362,70 @@ class TestVectorMatrixDot(TestCase): ...@@ -320,63 +362,70 @@ class TestVectorMatrixDot(TestCase):
def test_dot_vm(self): def test_dot_vm(self):
''' Test vector dot matrix ''' ''' Test vector dot matrix '''
v = theano.shared( numpy.array(numpy.random.rand(2), dtype='float32')) v = theano.shared(numpy.array(numpy.random.rand(2), dtype='float32'))
m = theano.shared( numpy.array(numpy.random.rand(2,5), m = theano.shared(numpy.array(numpy.random.rand(2, 5),
dtype='float32')) dtype='float32'))
no_gpu_f = theano.function([], theano.dot(v,m), mode = mode_without_gpu) no_gpu_f = theano.function([], theano.dot(v, m), mode=mode_without_gpu)
gpu_f = theano.function([], theano.dot(v,m), mode = mode_with_gpu) gpu_f = theano.function([], theano.dot(v, m), mode=mode_with_gpu)
#gpu_f2 is needed to test the case when the input is not on the gpu #gpu_f2 is needed to test the case when the input is not on the gpu
#but the output is moved to the gpu. #but the output is moved to the gpu.
gpu_f2 = theano.function([], tcn.gpu_from_host(theano.dot(v,m)), mode = mode_with_gpu) gpu_f2 = theano.function([], tcn.gpu_from_host(theano.dot(v, m)),
mode=mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol)
assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol)
# Assert that the gpu version actually uses gpu # Assert that the gpu version actually uses gpu
assert sum([node.op is gpu_gemv_inplace for node in assert sum([node.op is gpu_gemv_inplace for node in
gpu_f.maker.env.toposort() ]) == 1 gpu_f.maker.env.toposort()]) == 1
assert sum([node.op is gpu_gemv_inplace for node in assert sum([node.op is gpu_gemv_inplace for node in
gpu_f2.maker.env.toposort() ]) == 1 gpu_f2.maker.env.toposort()]) == 1
# Check double-strided m # Check double-strided m
m.set_value( m.set_value(
m.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], m.get_value(borrow=True,
return_internal_type=True)[::-1, ::-1],
borrow=True) borrow=True)
assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol)
assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol)
def test_dot_mv(self): def test_dot_mv(self):
''' Test matrix dot vector ''' ''' Test matrix dot vector '''
v = theano.shared( numpy.array(numpy.random.rand(2), dtype='float32')) v = theano.shared(numpy.array(numpy.random.rand(2), dtype='float32'))
m = theano.shared( numpy.array(numpy.random.rand(5,2), m = theano.shared(numpy.array(numpy.random.rand(5, 2),
dtype='float32')) dtype='float32'))
no_gpu_f = theano.function([], theano.dot(m,v), mode = mode_without_gpu) no_gpu_f = theano.function([], theano.dot(m, v), mode=mode_without_gpu)
gpu_f = theano.function([], theano.dot(m,v), mode = mode_with_gpu) gpu_f = theano.function([], theano.dot(m, v), mode=mode_with_gpu)
#gpu_f2 is needed to test the case when the input is not on the gpu #gpu_f2 is needed to test the case when the input is not on the gpu
#but the output is moved to the gpu. #but the output is moved to the gpu.
gpu_f2 = theano.function([], tcn.gpu_from_host(theano.dot(m,v)), mode = mode_with_gpu) gpu_f2 = theano.function([], tcn.gpu_from_host(theano.dot(m, v)),
mode=mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol)
assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol)
# Assert that the gpu version actually uses gpu # Assert that the gpu version actually uses gpu
assert sum([node.op is gpu_gemv_inplace for node in assert sum([node.op is gpu_gemv_inplace for node in
gpu_f.maker.env.toposort() ]) == 1 gpu_f.maker.env.toposort()]) == 1
assert sum([node.op is gpu_gemv_inplace for node in assert sum([node.op is gpu_gemv_inplace for node in
gpu_f2.maker.env.toposort() ]) == 1 gpu_f2.maker.env.toposort()]) == 1
def test_gemv1(self): def test_gemv1(self):
''' test vector1+dot(matrix,vector2) ''' ''' test vector1+dot(matrix,vector2) '''
v1 = theano.tensor._shared( numpy.array(numpy.random.rand(2) , dtype='float32')) v1 = theano.tensor._shared(numpy.array(numpy.random.rand(2),
v2 = theano.tensor._shared( numpy.array(numpy.random.rand(5) , dtype='float32')) dtype='float32'))
m = theano.tensor._shared( numpy.array(numpy.random.rand(5,2), dtype='float32')) v2 = theano.tensor._shared(numpy.array(numpy.random.rand(5),
dtype='float32'))
no_gpu_f = theano.function([], v2+theano.dot(m,v1), mode = mode_without_gpu) m = theano.tensor._shared(numpy.array(numpy.random.rand(5, 2),
gpu_f = theano.function([], v2+theano.dot(m,v1), mode = mode_with_gpu) dtype='float32'))
no_gpu_f = theano.function([], v2 + theano.dot(m, v1),
mode=mode_without_gpu)
gpu_f = theano.function([], v2 + theano.dot(m, v1), mode=mode_with_gpu)
#gpu_f2 is needed to test the case when the input is not on the gpu #gpu_f2 is needed to test the case when the input is not on the gpu
#but the output is moved to the gpu. #but the output is moved to the gpu.
gpu_f2 = theano.function([], tcn.gpu_from_host(v2+theano.dot(m,v1)), mode = mode_with_gpu) gpu_f2 = theano.function([], tcn.gpu_from_host(v2 + theano.dot(m, v1)),
mode=mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol)
...@@ -389,15 +438,19 @@ class TestVectorMatrixDot(TestCase): ...@@ -389,15 +438,19 @@ class TestVectorMatrixDot(TestCase):
def test_gemv2(self): def test_gemv2(self):
''' test vector1+dot(vector2,matrix) ''' ''' test vector1+dot(vector2,matrix) '''
v1 = theano.shared( numpy.array(numpy.random.rand(5) , dtype='float32')) v1 = theano.shared(numpy.array(numpy.random.rand(5), dtype='float32'))
v2 = theano.shared( numpy.array(numpy.random.rand(2) , dtype='float32')) v2 = theano.shared(numpy.array(numpy.random.rand(2), dtype='float32'))
m = theano.shared( numpy.array(numpy.random.rand(5,2), dtype='float32')) m = theano.shared(numpy.array(numpy.random.rand(5, 2),
dtype='float32'))
no_gpu_f = theano.function([], v2+theano.dot(v1,m), mode = mode_without_gpu)
gpu_f = theano.function([], v2+theano.dot(v1,m), mode = mode_with_gpu) no_gpu_f = theano.function([], v2 + theano.dot(v1, m),
#gpu_f2 is needed to test the case when the input is not on the gpu mode=mode_without_gpu)
#but the output is moved to the gpu. gpu_f = theano.function([], v2 + theano.dot(v1, m),
gpu_f2 = theano.function([], tcn.gpu_from_host(v2+theano.dot(v1,m)), mode = mode_with_gpu) mode=mode_with_gpu)
# gpu_f2 is needed to test the case when the input is not on the gpu
# but the output is moved to the gpu.
gpu_f2 = theano.function([], tcn.gpu_from_host(v2 + theano.dot(v1, m)),
mode=mode_with_gpu)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol)
...@@ -425,6 +478,7 @@ class TestGpuGer(TestGer): ...@@ -425,6 +478,7 @@ class TestGpuGer(TestGer):
self.ger = gpu_ger_inplace self.ger = gpu_ger_inplace
self.gemm = tcn.blas.gpu_gemm_inplace self.gemm = tcn.blas.gpu_gemm_inplace
class TestGpuGerNoTransfer(TestGer): class TestGpuGerNoTransfer(TestGer):
@staticmethod @staticmethod
def shared(val): def shared(val):
......
...@@ -88,7 +88,7 @@ class DimShuffle(Op): ...@@ -88,7 +88,7 @@ class DimShuffle(Op):
Adding, subtracting dimensions can be done with reshape. Adding, subtracting dimensions can be done with reshape.
""" """
def __init__(self, input_broadcastable, new_order, inplace = False): def __init__(self, input_broadcastable, new_order, inplace=False):
""" """
Usage: DimShuffle(input_broadcastable, new_order, inplace = False) Usage: DimShuffle(input_broadcastable, new_order, inplace = False)
...@@ -115,33 +115,41 @@ class DimShuffle(Op): ...@@ -115,33 +115,41 @@ class DimShuffle(Op):
self.new_order = new_order self.new_order = new_order
self.inplace = inplace self.inplace = inplace
for i in xrange(len(new_order)-1): for i in xrange(len(new_order) - 1):
j = new_order[i] j = new_order[i]
if j != 'x' and j in new_order[i+1:]: if j != 'x' and j in new_order[(i + 1):]:
raise ValueError("The same input dimension may not appear twice in the list of output dimensions", (new_order)) raise ValueError((
"The same input dimension may not appear twice in the "
"list of output dimensions", (new_order)))
# list of dimensions of the input to drop # list of dimensions of the input to drop
self.drop = [] self.drop = []
i2j = {} # this maps i before dropping dimensions to j after dropping dimensions so self.shuffle can be set properly later on # this maps i before dropping dimensions to j after dropping dimensions
# so self.shuffle can be set properly later on
i2j = {}
j = 0 j = 0
for i, b in enumerate(input_broadcastable): for i, b in enumerate(input_broadcastable):
if i not in new_order: if i not in new_order:
# we want to drop this dimension because it's not a value in new_order # we want to drop this dimension because it's not a value in
if b == 1: # 1 aka True # new_order
if b == 1: # 1 aka True
self.drop.append(i) self.drop.append(i)
else: else:
# we cannot drop non-broadcastable dimensions # we cannot drop non-broadcastable dimensions
raise ValueError("You cannot drop a non-broadcastable dimension.", (input_broadcastable, new_order)) raise ValueError(
"You cannot drop a non-broadcastable dimension.",
(input_broadcastable, new_order))
else: else:
i2j[i] = j i2j[i] = j
j += 1 j += 1
# transposition of non-broadcastable dimensions # transposition of non-broadcastable dimensions
# This is how the dimensions will be permuted, without accounting for the extra # This is how the dimensions will be permuted, without accounting for
# 'x' broadcastable dimensions to insert. # the extra 'x' broadcastable dimensions to insert.
self.shuffle = [i2j[x] for x in new_order if x != 'x'] self.shuffle = [i2j[x] for x in new_order if x != 'x']
# list of dimensions of the output that are broadcastable and were not in the original input # list of dimensions of the output that are broadcastable and were not
# in the original input
self.augment = [i for i, x in enumerate(new_order) if x == 'x'] self.augment = [i for i, x in enumerate(new_order) if x == 'x']
if self.inplace: if self.inplace:
...@@ -162,7 +170,10 @@ class DimShuffle(Op): ...@@ -162,7 +170,10 @@ class DimShuffle(Op):
input = as_tensor_variable(_input) input = as_tensor_variable(_input)
ib = tuple(input.type.broadcastable) ib = tuple(input.type.broadcastable)
if not ib == self.input_broadcastable: if not ib == self.input_broadcastable:
raise TypeError("The number of dimensions and/or broadcastable pattern of the input is incorrect for this op. Expected %s, got %s." % (self.input_broadcastable, ib)) raise TypeError((
"The number of dimensions and/or broadcastable pattern of the "
"input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib)))
ob = [] ob = []
for value in self.new_order: for value in self.new_order:
if value == 'x': if value == 'x':
...@@ -170,8 +181,8 @@ class DimShuffle(Op): ...@@ -170,8 +181,8 @@ class DimShuffle(Op):
else: else:
ob.append(ib[value]) ob.append(ib[value])
output = TensorType(dtype = input.type.dtype, output = TensorType(dtype=input.type.dtype,
broadcastable = ob).make_variable() broadcastable=ob).make_variable()
return Apply(self, [input], [output]) return Apply(self, [input], [output])
...@@ -183,15 +194,20 @@ class DimShuffle(Op): ...@@ -183,15 +194,20 @@ class DimShuffle(Op):
and self.input_broadcastable == other.input_broadcastable and self.input_broadcastable == other.input_broadcastable
def _rehash(self): def _rehash(self):
self._hashval = hash(type(self).__name__) ^ hash(type(self).__module__) ^ hash(self.inplace) \ self._hashval = (
^ hash(self.new_order) ^ hash(self.input_broadcastable) hash(type(self).__name__)
^ hash(type(self).__module__)
^ hash(self.inplace)
^ hash(self.new_order)
^ hash(self.input_broadcastable))
def __hash__(self): def __hash__(self):
return self._hashval return self._hashval
def __str__(self): def __str__(self):
if self.inplace: if self.inplace:
return "InplaceDimShuffle{%s}" % ",".join(str(x) for x in self.new_order) return "InplaceDimShuffle{%s}" % ",".join(str(x)
for x in self.new_order)
else: else:
return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order) return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
...@@ -220,7 +236,7 @@ class DimShuffle(Op): ...@@ -220,7 +236,7 @@ class DimShuffle(Op):
if not self.inplace: if not self.inplace:
res = numpy.copy(res) res = numpy.copy(res)
storage[0] = numpy.asarray(res) #asarray puts scalars back into array storage[0] = numpy.asarray(res) # asarray puts scalars back into array
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
ishp, = shapes ishp, = shapes
...@@ -256,32 +272,33 @@ class DimShuffle(Op): ...@@ -256,32 +272,33 @@ class DimShuffle(Op):
clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}'] clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}']
#get the copy / view of the input depending on whether we're doing things inplace or not. #get the copy / view of the input depending on whether we're doingi
# things inplace or not.
if self.inplace: if self.inplace:
get_base = ['{ PyArrayObject * %(basename)s = %(input)s', 'Py_INCREF((PyObject*)%(basename)s)'] get_base = ['{ PyArrayObject * %(basename)s = %(input)s', 'Py_INCREF((PyObject*)%(basename)s)']
else: else:
get_base = [('{ PyArrayObject * %(basename)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s, NULL,' get_base = [('{ PyArrayObject * %(basename)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s, NULL,'
'0, 0, NPY_ALIGNED|NPY_ENSURECOPY, NULL)')] '0, 0, NPY_ALIGNED|NPY_ENSURECOPY, NULL)')]
shape_statements = ['npy_intp dimensions[%i]'%nd_out] shape_statements = ['npy_intp dimensions[%i]' % nd_out]
for i, o in enumerate(self.new_order): for i, o in enumerate(self.new_order):
if o != 'x': if o != 'x':
shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')] shape_statements += [('dimensions[' + str(i) + '] = %(basename)s->dimensions[' + str(o) + ']')]
else: else:
shape_statements += [('dimensions['+str(i)+'] = 1')] shape_statements += [('dimensions[' + str(i) + '] = 1')]
strides_statements = ['npy_intp strides[%i]' % nd_out]
strides_statements = ['npy_intp strides[%i]'%nd_out]
#set the strides of the non-broadcasted dimensions #set the strides of the non-broadcasted dimensions
for i, o in enumerate(self.new_order): for i, o in enumerate(self.new_order):
if o != 'x': if o != 'x':
strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')] strides_statements += [('strides[' + str(i) + '] = %(basename)s->strides[' + str(o) + ']')]
else: else:
strides_statements += [('strides['+str(i)+'] = 0')] strides_statements += [('strides[' + str(i) + '] = 0')]
# set the strides of the broadcasted dimensions # set the strides of the broadcasted dimensions
# this algorithm is from numpy: PyArray_Newshape() in cvs/numpy/numpy/core/src/multiarraymodule.c # this algorithm is from numpy: PyArray_Newshape() in
# cvs/numpy/numpy/core/src/multiarraymodule.c
if nd_out > 0: if nd_out > 0:
strides_statements.append( strides_statements.append(
'if (strides[' + 'if (strides[' +
...@@ -290,8 +307,8 @@ class DimShuffle(Op): ...@@ -290,8 +307,8 @@ class DimShuffle(Op):
str(nd_out) + str(nd_out) +
'-1] = %(basename)s->descr->elsize' '-1] = %(basename)s->descr->elsize'
) )
for i in xrange(nd_out-2,-1, -1): for i in xrange(nd_out - 2, -1, -1):
strides_statements.append("if (strides[%(i)s] == 0) strides[%(i)s] = strides[%(i)s+1] * dimensions[%(i)s+1]"%dict(i=str(i))) strides_statements.append("if (strides[%(i)s] == 0) strides[%(i)s] = strides[%(i)s+1] * dimensions[%(i)s+1]" % dict(i=str(i)))
# #
# PyObject* PyArray_New(PyTypeObject* subtype, int nd, npy_intp* dims, int type_num, # PyObject* PyArray_New(PyTypeObject* subtype, int nd, npy_intp* dims, int type_num,
...@@ -332,7 +349,6 @@ class DimShuffle(Op): ...@@ -332,7 +349,6 @@ class DimShuffle(Op):
print full_code print full_code
if 0: if 0:
import sys
sys.exit() sys.exit()
return full_code % dict(locals(), **sub) return full_code % dict(locals(), **sub)
...@@ -351,8 +367,8 @@ class DimShuffle(Op): ...@@ -351,8 +367,8 @@ class DimShuffle(Op):
# Do not make the DimShuffle inplace as an optimization at the # Do not make the DimShuffle inplace as an optimization at the
# canonicalization optimization phase will remove the implace. # canonicalization optimization phase will remove the implace.
# The inplace will be reintroduced automatically later in the graph. # The inplace will be reintroduced automatically later in the graph.
return [DimShuffle(gz.type.broadcastable, grad_order)(Elemwise(scalar.identity)(gz))] return [DimShuffle(gz.type.broadcastable, grad_order)(
Elemwise(scalar.identity)(gz))]
class DimShufflePrinter: class DimShufflePrinter:
...@@ -365,7 +381,8 @@ class DimShufflePrinter: ...@@ -365,7 +381,8 @@ class DimShufflePrinter:
return pstate.pprinter.process(r) return pstate.pprinter.process(r)
if list(new_order) == list(reversed(range(r.type.ndim))): if list(new_order) == list(reversed(range(r.type.ndim))):
return "%s.T" % pstate.pprinter.process(r) return "%s.T" % pstate.pprinter.process(r)
return "DimShuffle{%s}(%s)" % (", ".join(map(str, new_order)), pstate.pprinter.process(r)) return "DimShuffle{%s}(%s)" % (", ".join(map(str, new_order)),
pstate.pprinter.process(r))
def process(self, r, pstate): def process(self, r, pstate):
if r.owner is None: if r.owner is None:
...@@ -376,8 +393,8 @@ class DimShufflePrinter: ...@@ -376,8 +393,8 @@ class DimShufflePrinter:
else: else:
raise TypeError("Can only print DimShuffle.") raise TypeError("Can only print DimShuffle.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, DimShuffle), DimShufflePrinter()) pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, DimShuffle),
DimShufflePrinter())
################ ################
...@@ -405,30 +422,34 @@ class Elemwise(Op): ...@@ -405,30 +422,34 @@ class Elemwise(Op):
Elemwise(add) # represents + on tensors (x + y) Elemwise(add) # represents + on tensors (x + y)
Elemwise(add, {0 : 0}) # represents the += operation (x += y) Elemwise(add, {0 : 0}) # represents the += operation (x += y)
Elemwise(add, {0 : 1}) # represents += on the second argument (y += x) Elemwise(add, {0 : 1}) # represents += on the second argument (y += x)
Elemwise(mul)(rand(10, 5), rand(1, 5)) # the second input is completed along the first dimension to match the first input Elemwise(mul)(rand(10, 5), rand(1, 5)) # the second input is completed
Elemwise(true_div)(rand(10, 5), rand(10, 1)) # same but along the second dimension # along the first dimension to match the first input
Elemwise(true_div)(rand(10, 5), rand(10, 1)) # same but along the
# second dimension
Elemwise(int_div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5) Elemwise(int_div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5)
Elemwise(log)(rand(3, 4, 5)) Elemwise(log)(rand(3, 4, 5))
""" """
def __init__(self, scalar_op, inplace_pattern = {}, name = None, nfunc_spec = None): def __init__(self, scalar_op, inplace_pattern={}, name=None,
nfunc_spec=None):
""" """
Usage: Elemwise(scalar_op, inplace_pattern = {}) Usage: Elemwise(scalar_op, inplace_pattern = {})
* scalar_op: an instance of a subclass of scalar.ScalarOp which works uniquely on * scalar_op: an instance of a subclass of scalar.ScalarOp which works
scalars uniquely on scalars
* inplace_pattern: a dictionary that maps the index of an output to the * inplace_pattern: a dictionary that maps the index of an output to the
index of an input so the output is calculated inplace using index of an input so the output is calculated inplace using
the input's storage. (Just like destroymap, but without the lists.) the input's storage. (Just like destroymap, but without the lists.)
* nfunc_spec: either None or a tuple of three elements, (nfunc_name, nin, nout) such * nfunc_spec: either None or a tuple of three elements,
that getattr(numpy, nfunc_name) implements this operation, takes nin (nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
inputs and abs(nout) outputs (nout < 0 if the numpy function implements this operation, takes nin inputs and abs(nout) outputs
does not provide the option of providing a numpy array to store the (nout < 0 if the numpy function does not provide the option of
results in). Note that nin cannot always be inferred from the scalar op's providing a numpy array to store the results in). Note that nin
own nin field because that value is sometimes 0 (meaning a variable number cannot always be inferred from the scalar op's own nin field
of inputs), whereas the numpy function may not have varargs. NOTE: as of because that value is sometimes 0 (meaning a variable number of
now, the sign of the nout field is ignored (some work needs to be done inputs), whereas the numpy function may not have varargs.
to resize the destinations when needed). NOTE: as of now, the sign of the nout field is ignored (some work
needs to be done to resize the destinations when needed).
""" """
self.name = name self.name = name
self.scalar_op = scalar_op self.scalar_op = scalar_op
...@@ -441,7 +462,8 @@ class Elemwise(Op): ...@@ -441,7 +462,8 @@ class Elemwise(Op):
if nfunc_spec: if nfunc_spec:
self.nfunc = getattr(numpy, nfunc_spec[0]) self.nfunc = getattr(numpy, nfunc_spec[0])
elif scalar_op.nin > 0: elif scalar_op.nin > 0:
self.ufunc = numpy.frompyfunc(scalar_op.impl, scalar_op.nin, scalar_op.nout) self.ufunc = numpy.frompyfunc(scalar_op.impl, scalar_op.nin,
scalar_op.nout)
#precompute the hash of this node #precompute the hash of this node
self._rehash() self._rehash()
...@@ -461,7 +483,8 @@ class Elemwise(Op): ...@@ -461,7 +483,8 @@ class Elemwise(Op):
if getattr(self, 'nfunc_spec', None): if getattr(self, 'nfunc_spec', None):
self.nfunc = getattr(numpy, self.nfunc_spec[0]) self.nfunc = getattr(numpy, self.nfunc_spec[0])
elif self.scalar_op.nin > 0: elif self.scalar_op.nin > 0:
self.ufunc = numpy.frompyfunc(self.scalar_op.impl, self.scalar_op.nin, self.scalar_op.nout) self.ufunc = numpy.frompyfunc(self.scalar_op.impl,
self.scalar_op.nin, self.scalar_op.nout)
self._rehash() self._rehash()
def make_node(self, *inputs): def make_node(self, *inputs):
...@@ -472,7 +495,8 @@ class Elemwise(Op): ...@@ -472,7 +495,8 @@ class Elemwise(Op):
""" """
inputs = map(as_tensor_variable, inputs) inputs = map(as_tensor_variable, inputs)
shadow = self.scalar_op.make_node(*[Scalar(dtype=i.type.dtype)() for i in inputs]) shadow = self.scalar_op.make_node(
*[Scalar(dtype=i.type.dtype)() for i in inputs])
target_length = max([input.type.ndim for input in inputs]) target_length = max([input.type.ndim for input in inputs])
...@@ -486,30 +510,43 @@ class Elemwise(Op): ...@@ -486,30 +510,43 @@ class Elemwise(Op):
# TODO: use LComplete instead # TODO: use LComplete instead
args.append(DimShuffle( args.append(DimShuffle(
input.type.broadcastable, input.type.broadcastable,
['x']*difference + range(length), ['x'] * difference + range(length),
inplace = True)(input)) inplace=True)(input))
inputs = args inputs = args
#HERE: all the broadcast dims have the same length now #HERE: all the broadcast dims have the same length now
#cleverness: we iterate over the first, second, third broadcast flag of all inputs in # cleverness: we iterate over the first, second, third broadcast flag
#parallel... the all() gives us each output broadcastable bit in turn. # of all inputs in parallel... the all() gives us each output
# broadcastable bit in turn.
#it is multiplied by nout because Elemwise supports multiple outputs (nout of them) # it is multiplied by nout because Elemwise supports multiple outputs
out_broadcastables = [[all(bcast) for bcast in zip(*[input.type.broadcastable for input in inputs])]] * shadow.nout # (nout of them)
out_broadcastables = [[all(bcast)
for bcast in zip(*[input.type.broadcastable
for input in inputs])]] * shadow.nout
#inplace_pattern maps output idx -> input idx #inplace_pattern maps output idx -> input idx
inplace_pattern = self.inplace_pattern inplace_pattern = self.inplace_pattern
if inplace_pattern: if inplace_pattern:
for overwriter, overwritten in inplace_pattern.items(): for overwriter, overwritten in inplace_pattern.items():
for ob, ib in zip(out_broadcastables[overwriter], inputs[overwritten].type.broadcastable): for ob, ib in zip(out_broadcastables[overwriter],
inputs[overwritten].type.broadcastable):
if ib and not ob: if ib and not ob:
raise ValueError("Operation cannot be done inplace on an input with broadcasted dimensions.") raise ValueError((
"Operation cannot be done inplace on an input "
"with broadcasted dimensions."))
out_dtypes = [o.type.dtype for o in shadow.outputs] out_dtypes = [o.type.dtype for o in shadow.outputs]
if any(inputs[i].type.dtype != out_dtypes[o] for o, i in inplace_pattern.items()): if any(inputs[i].type.dtype != out_dtypes[o]
raise TypeError("Cannot do an inplace operation on incompatible data types.", for o, i in inplace_pattern.items()):
([i.type.dtype for i in inputs], out_dtypes, inplace_pattern)) raise TypeError((
outputs = [TensorType(dtype = dtype, broadcastable = broadcastable)() for dtype, broadcastable in zip(out_dtypes, out_broadcastables)] "Cannot do an inplace operation on incompatible data types.",
([i.type.dtype for i in inputs], out_dtypes, inplace_pattern)))
outputs = [TensorType(dtype=dtype, broadcastable=broadcastable)()
for dtype, broadcastable in zip(out_dtypes, out_broadcastables)
]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def __eq__(self, other): def __eq__(self, other):
...@@ -518,25 +555,24 @@ class Elemwise(Op): ...@@ -518,25 +555,24 @@ class Elemwise(Op):
other_items = other.inplace_pattern.items() other_items = other.inplace_pattern.items()
items.sort() items.sort()
other_items.sort() other_items.sort()
rval = (self.scalar_op == other.scalar_op) and (items == other_items) rval = ((self.scalar_op == other.scalar_op)
and (items == other_items))
return rval return rval
return False return False
def _rehash(self): def _rehash(self):
items = self.inplace_pattern.items() items = self.inplace_pattern.items()
items.sort() items.sort()
first_part = [k for k,v in items] first_part = [k for k, v in items]
second_part = [] second_part = []
for k,v in items: for k, v in items:
if isinstance(v, (tuple, list)): if isinstance(v, (tuple, list)):
second_part += [tuple(v)] second_part += [tuple(v)]
else: else:
second_part += [v] second_part += [v]
tuple_items = tuple(first_part + second_part) tuple_items = tuple(first_part + second_part)
#backport
#tuple_items = tuple([k for k,v in items] + [(tuple(v) if isinstance(v, (tuple, list)) else v) for k,v in items])
h = hash('Elemwise') ^ hash(self.scalar_op) ^ hash(tuple_items) h = hash('Elemwise') ^ hash(self.scalar_op) ^ hash(tuple_items)
assert h == getattr(self,'_hashval', h) assert h == getattr(self, '_hashval', h)
self._hashval = h self._hashval = h
def __hash__(self): def __hash__(self):
...@@ -560,7 +596,7 @@ class Elemwise(Op): ...@@ -560,7 +596,7 @@ class Elemwise(Op):
for idx, out in enumerate(outs): for idx, out in enumerate(outs):
# make such that _bgrads computes only the gradients of the # make such that _bgrads computes only the gradients of the
# current output on the inputs ( and not all outputs) # current output on the inputs ( and not all outputs)
ograds = [ theano.tensor.zeros_like(x) for x in outs] ograds = [theano.tensor.zeros_like(x) for x in outs]
ograds[idx] = theano.tensor.ones_like(out) ograds[idx] = theano.tensor.ones_like(out)
bgrads = self._bgrad(inputs, ograds) bgrads = self._bgrad(inputs, ograds)
...@@ -578,9 +614,9 @@ class Elemwise(Op): ...@@ -578,9 +614,9 @@ class Elemwise(Op):
pass pass
elif eval_point is not None: elif eval_point is not None:
if rop_out is None: if rop_out is None:
rop_out = bgrads[jdx]*eval_point rop_out = bgrads[jdx] * eval_point
else: else:
rop_out = rop_out + bgrads[jdx]*eval_point rop_out = rop_out + bgrads[jdx] * eval_point
rval[idx] = rop_out rval[idx] = rop_out
...@@ -589,17 +625,18 @@ class Elemwise(Op): ...@@ -589,17 +625,18 @@ class Elemwise(Op):
def grad(self, inputs, ograds): def grad(self, inputs, ograds):
#compute grad with respect to broadcasted input #compute grad with respect to broadcasted input
rval = self._bgrad(inputs,ograds) rval = self._bgrad(inputs, ograds)
#sum out the broadcasted dimensions #sum out the broadcasted dimensions
for i, ipt in enumerate(inputs): for i, ipt in enumerate(inputs):
if rval[i] is None: if rval[i] is None:
continue continue
# list of all the dimensions that are broadcastable for input[i] so we # list of all the dimensions that are broadcastable for input[i] so
# can sum over them # we can sum over them
# todo: only count dimensions that were effectively broadcasted # todo: only count dimensions that were effectively broadcasted
to_sum = [j for j, bcast in enumerate(ipt.type.broadcastable) if bcast] to_sum = [j for j, bcast in enumerate(ipt.type.broadcastable)
if bcast]
if to_sum: if to_sum:
shuffle = [] shuffle = []
...@@ -612,7 +649,7 @@ class Elemwise(Op): ...@@ -612,7 +649,7 @@ class Elemwise(Op):
j += 1 j += 1
#close if #close if
#close for #close for
sr = Sum(axis = to_sum)(rval[i]) sr = Sum(axis=to_sum)(rval[i])
sr = sr.dimshuffle(shuffle) sr = sr.dimshuffle(shuffle)
#sr = DimShuffle(sr.type.broadcastable, shuffle)(sr) #sr = DimShuffle(sr.type.broadcastable, shuffle)(sr)
rval[i] = sr rval[i] = sr
...@@ -621,7 +658,6 @@ class Elemwise(Op): ...@@ -621,7 +658,6 @@ class Elemwise(Op):
return rval return rval
def _bgrad(self, inputs, ograds): def _bgrad(self, inputs, ograds):
# returns grad, with respect to broadcasted versions of inputs # returns grad, with respect to broadcasted versions of inputs
...@@ -636,15 +672,17 @@ class Elemwise(Op): ...@@ -636,15 +672,17 @@ class Elemwise(Op):
theano.config.compute_test_value = 'off' theano.config.compute_test_value = 'off'
scalar_inputs = [Scalar(dtype = t.type.dtype)() for t in inputs] scalar_inputs = [Scalar(dtype=t.type.dtype)() for t in inputs]
scalar_ograds = [Scalar(dtype = ograd.type.dtype)() for ograd in ograds] scalar_ograds = [Scalar(dtype=ograd.type.dtype)()
for ograd in ograds]
scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds) scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds)
finally: finally:
theano.config.compute_test_value = prev_setting theano.config.compute_test_value = prev_setting
nd = len(inputs[0].type.broadcastable) # this is the same for everyone nd = len(inputs[0].type.broadcastable) # this is the same for everyone
def transform(r): def transform(r):
# From a graph of ScalarOps, make a graph of Broadcast ops. # From a graph of ScalarOps, make a graph of Broadcast ops.
if r in scalar_inputs: if r in scalar_inputs:
...@@ -654,12 +692,14 @@ class Elemwise(Op): ...@@ -654,12 +692,14 @@ class Elemwise(Op):
node = r.owner node = r.owner
if node is None: if node is None:
# the gradient contains a constant, translate it as # the gradient contains a constant, translate it as
# an equivalent TensorType of size 1 and proper number of dimensions # an equivalent TensorType of size 1 and proper number of
res = TensorConstant(TensorType(dtype = r.type.dtype, # dimensions
broadcastable = ()), res = TensorConstant(TensorType(dtype=r.type.dtype,
numpy.asarray(r.data)) # .reshape(b) broadcastable=()),
return DimShuffle((), ['x']*nd, inplace = True)(res) numpy.asarray(r.data)) # .reshape(b)
new_r = Elemwise(node.op, {})(*[transform(ipt) for ipt in node.inputs]) return DimShuffle((), ['x'] * nd, inplace=True)(res)
new_r = Elemwise(node.op, {})(
*[transform(ipt) for ipt in node.inputs])
return new_r return new_r
ret = [] ret = []
for scalar_igrad, ipt in zip(scalar_igrads, inputs): for scalar_igrad, ipt in zip(scalar_igrads, inputs):
...@@ -667,18 +707,19 @@ class Elemwise(Op): ...@@ -667,18 +707,19 @@ class Elemwise(Op):
# undefined gradient # undefined gradient
ret.append(None) ret.append(None)
continue continue
ret.append( transform(scalar_igrad)) ret.append(transform(scalar_igrad))
return ret return ret
def perform(self, node, inputs, output_storage): def perform(self, node, inputs, output_storage):
maxsize = max(len(input.shape) for input in inputs) maxsize = max(len(input.shape) for input in inputs)
for dims in zip(*[[(1, True)]*(maxsize - len(input.shape)) + zip(input.shape, sinput.type.broadcastable) for dims in zip(*[([(1, True)] * (maxsize - len(input.shape))
+ zip(input.shape, sinput.type.broadcastable))
for input, sinput in zip(inputs, node.inputs)]): for input, sinput in zip(inputs, node.inputs)]):
if max(d for d,b in dims) != 1 and (1, False) in dims: if max(d for d, b in dims) != 1 and (1, False) in dims:
# yes there may be more compact ways to write this code, # yes there may be more compact ways to write this code,
# but please maintain python 2.4 compatibility (no "x if c else y") # but please maintain python 2.4 compatibility
# (no "x if c else y")
msg = [] msg = []
assert len(inputs) == len(node.inputs) assert len(inputs) == len(node.inputs)
for input, sinput in zip(inputs, node.inputs): for input, sinput in zip(inputs, node.inputs):
...@@ -701,33 +742,32 @@ class Elemwise(Op): ...@@ -701,33 +742,32 @@ class Elemwise(Op):
raise ValueError('\n'.join(msg_chunks)) raise ValueError('\n'.join(msg_chunks))
else: else:
raise ValueError(base_exc_str) raise ValueError(base_exc_str)
#backport
#raise ValueError('Dimension mismatch; shapes are %s' %
# ', '.join('(%s)' % ', '.join('*' if b else str(d)
# for d, b in zip(input.shape, sinput.type.broadcastable))
# for input, sinput in zip(inputs, node.inputs)))
# Other mismatches will be caught by the ufunc # Other mismatches will be caught by the ufunc
if not self.inplace_pattern: if not self.inplace_pattern:
for output, storage in zip(node.outputs, output_storage): for output, storage in zip(node.outputs, output_storage):
odat = storage[0] odat = storage[0]
shape = [max(values) for values in zip(*[input.shape for input in inputs])] shape = [max(values)
for values in zip(*[input.shape for input in inputs])]
if odat is not None: if odat is not None:
# reuse storage if we can # reuse storage if we can
odat.resize(shape, refcheck = 0) odat.resize(shape, refcheck=0)
else: else:
odat = numpy.ndarray(shape, dtype = output.type.dtype) odat = numpy.ndarray(shape, dtype=output.type.dtype)
storage[0] = odat storage[0] = odat
else: else:
for i, (output, storage) in enumerate(zip(node.outputs, output_storage)): for i, (output, storage) in enumerate(zip(node.outputs,
output_storage)):
#i is an output idx #i is an output idx
if i in self.inplace_pattern: if i in self.inplace_pattern:
odat = inputs[self.inplace_pattern[i]] odat = inputs[self.inplace_pattern[i]]
else: else:
odat = storage[0] odat = storage[0]
shape = [max(values) for values in zip(*[input.shape for input in inputs])] shape = [max(values)
for values in zip(*[input.shape
for input in inputs])]
if odat is not None: if odat is not None:
odat.resize(shape, refcheck = 0) odat.resize(shape, refcheck=0)
else: else:
odat = numpy.ndarray(shape, dtype=output.type.dtype) odat = numpy.ndarray(shape, dtype=output.type.dtype)
storage[0] = odat storage[0] = odat
...@@ -784,10 +824,13 @@ class Elemwise(Op): ...@@ -784,10 +824,13 @@ class Elemwise(Op):
# Since numpy 1.6, function created with numpy.frompyfunc # Since numpy 1.6, function created with numpy.frompyfunc
# always return an ndarray with dtype object # always return an ndarray with dtype object
variable = numpy.asarray(variable, dtype=nout.dtype) variable = numpy.asarray(variable, dtype=nout.dtype)
if hasattr(variable, 'shape') and storage[0].shape != variable.shape:
if (hasattr(variable, 'shape')
and storage[0].shape != variable.shape):
if numpy.prod(variable.shape) == 0: if numpy.prod(variable.shape) == 0:
# numpy don't resize from a shape (1,5) to (0,5) # numpy don't resize from a shape (1,5) to (0,5)
# This bypass the inplace... But I it is important in this case. # This bypass the inplace...
# But I it is important in this case.
storage[0] = variable storage[0] = variable
continue continue
storage[0].resize(variable.shape) storage[0].resize(variable.shape)
...@@ -797,7 +840,8 @@ class Elemwise(Op): ...@@ -797,7 +840,8 @@ class Elemwise(Op):
else: else:
storage[0].itemset(variable) storage[0].itemset(variable)
assert str(storage[0].dtype) != 'object' assert str(storage[0].dtype) != 'object'
# the following should be used instead of the previous loop, unfortunately it tends to segfault # the following should be used instead of the previous loop,
# unfortunately it tends to segfault
# self.ufunc(*(ufunc_args+[s[0] for s in output_storage])) # self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))
def infer_shape(self, node, i_shapes): def infer_shape(self, node, i_shapes):
...@@ -806,12 +850,15 @@ class Elemwise(Op): ...@@ -806,12 +850,15 @@ class Elemwise(Op):
oshp = [] oshp = []
for dim, b in enumerate(o.type.broadcastable): for dim, b in enumerate(o.type.broadcastable):
b_dim = None b_dim = None
if b: # this is broadcastable if b:
# this is broadcastable
b_dim = 1 b_dim = 1
else: # there must be some input that is not broadcastable in dimension 'dim' else:
for ishp, i in zip(i_shapes,node.inputs): # there must be some input that is not broadcastable in
if isinstance(i.type,theano.scalar.Scalar): # dimension 'dim'
continue #we skip scalar for ishp, i in zip(i_shapes, node.inputs):
if isinstance(i.type, theano.scalar.Scalar):
continue # we skip scalar
if not i.type.broadcastable[dim]: if not i.type.broadcastable[dim]:
# input i is not broadcastable in position dim # input i is not broadcastable in position dim
# therefore if its shape is known, we can use it # therefore if its shape is known, we can use it
...@@ -819,12 +866,14 @@ class Elemwise(Op): ...@@ -819,12 +866,14 @@ class Elemwise(Op):
if ishp[dim]: if ishp[dim]:
b_dim = ishp[dim] b_dim = ishp[dim]
break break
# b_dim might still be None, if every input's shape was unknown in dimension 'dim'
# b_dim might still be None, if every input's shape was unknown
# in dimension 'dim'
oshp.append(b_dim) oshp.append(b_dim)
# TODO: it would be interesting to return the constraining information that if # TODO: it would be interesting to return the constraining
# one of the inputs shape[dim] is known and another input's shape[dim] is not, # information that if one of the inputs shape[dim] is known
# that we can now assume that the other input's shape[dim] is the same as the # and another input's shape[dim] is not, that we can now assume
# first. # that the other input's shape[dim] is the same as the first.
rval.append(tuple(oshp)) rval.append(tuple(oshp))
return rval return rval
...@@ -888,16 +937,17 @@ class Elemwise(Op): ...@@ -888,16 +937,17 @@ class Elemwise(Op):
# We loop over the "real" outputs, i.e., those that are not # We loop over the "real" outputs, i.e., those that are not
# inplace (must be allocated) and we declare/allocate/check # inplace (must be allocated) and we declare/allocate/check
# them # them
for output, oname, odtype in zip(real_outputs, real_onames, real_odtypes): for output, oname, odtype in zip(
i += 1 # before this loop, i = number of inputs real_outputs, real_onames, real_odtypes):
i += 1 # before this loop, i = number of inputs
sub['lv%i' % i] = oname sub['lv%i' % i] = oname
sub['olv'] = oname sub['olv'] = oname
alloc += cgen.make_declare([range(nnested)], [odtype], alloc += cgen.make_declare([range(nnested)], [odtype],
dict(sub, lv0 = oname)) dict(sub, lv0=oname))
alloc += cgen.make_alloc(orders, odtype, sub) alloc += cgen.make_alloc(orders, odtype, sub)
alloc += cgen.make_checks([range(nnested)], [odtype], alloc += cgen.make_checks([range(nnested)], [odtype],
dict(sub, lv0 = oname)) dict(sub, lv0=oname))
olv_index = i # index of the last output olv_index = i # index of the last output
# We loop over the "aliased" outputs, i.e., those that are # We loop over the "aliased" outputs, i.e., those that are
# inplace (overwrite the contents of one of the inputs) and # inplace (overwrite the contents of one of the inputs) and
...@@ -927,15 +977,18 @@ class Elemwise(Op): ...@@ -927,15 +977,18 @@ class Elemwise(Op):
# We declare the scalar variables used in the inner loop to do # We declare the scalar variables used in the inner loop to do
# the element-wise computation. Aliased scalar variables need # the element-wise computation. Aliased scalar variables need
# not be declared, as they are #defined in defines # not be declared, as they are #defined in defines
task_decl = "".join(["%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % locals() task_decl = "".join([
for name, dtype in zip(inames + list(real_onames), "%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % locals()
idtypes + list(real_odtypes))]) for name, dtype in zip(inames + list(real_onames),
idtypes + list(real_odtypes))])
# We generate the C code of the inner loop using the scalar op # We generate the C code of the inner loop using the scalar op
task_code = self.scalar_op.c_code( task_code = self.scalar_op.c_code(
Apply(self.scalar_op, Apply(self.scalar_op,
[Scalar(dtype = input.type.dtype)() for input in node.inputs], [Scalar(dtype=input.type.dtype)()
[Scalar(dtype = output.type.dtype)() for output in node.outputs]), for input in node.inputs],
[Scalar(dtype=output.type.dtype)()
for output in node.outputs]),
nodename + '_scalar_', nodename + '_scalar_',
["%s_i" % s for s in _inames], ["%s_i" % s for s in _inames],
["%s_i" % s for s in onames], ["%s_i" % s for s in onames],
...@@ -950,11 +1003,11 @@ class Elemwise(Op): ...@@ -950,11 +1003,11 @@ class Elemwise(Op):
""" % locals() """ % locals()
loop = cgen.make_reordered_loop( loop = cgen.make_reordered_loop(
init_loop_orders = orders + [range(nnested)] * len(real_onames), init_loop_orders=orders + [range(nnested)] * len(real_onames),
olv_index = olv_index, olv_index=olv_index,
dtypes = idtypes + list(real_odtypes), dtypes=(idtypes + list(real_odtypes)),
inner_task = code, inner_task=code,
sub = sub) sub=sub)
return decl, checks, alloc, loop return decl, checks, alloc, loop
def c_code(self, node, nodename, inames, onames, sub): def c_code(self, node, nodename, inames, onames, sub):
...@@ -973,12 +1026,12 @@ class Elemwise(Op): ...@@ -973,12 +1026,12 @@ class Elemwise(Op):
return support_code return support_code
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
version = [6] # the version corresponding to the c code in this Op version = [6] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op, scalar_node = Apply(self.scalar_op,
[Scalar(dtype = input.type.dtype)() for input in node.inputs], [Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype = output.type.dtype)() for output in node.outputs]) [Scalar(dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node)) version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node))
for i in node.inputs + node.outputs: for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version()) version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
...@@ -996,7 +1049,6 @@ class Elemwise(Op): ...@@ -996,7 +1049,6 @@ class Elemwise(Op):
# raise TypeError('All ops in the graph must be Elemwise.') # raise TypeError('All ops in the graph must be Elemwise.')
################ ################
### CAReduce ### ### CAReduce ###
################ ################
...@@ -1040,7 +1092,9 @@ class CAReduce(Op): ...@@ -1040,7 +1092,9 @@ class CAReduce(Op):
- if None, all dimensions are reduced - if None, all dimensions are reduced
""" """
if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1: if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1:
raise NotImplementedError("CAReduce only supports binary functions with a single output.") raise NotImplementedError((
"CAReduce only supports binary functions with a single "
"output."))
self.scalar_op = scalar_op self.scalar_op = scalar_op
if axis is None: if axis is None:
...@@ -1081,20 +1135,23 @@ class CAReduce(Op): ...@@ -1081,20 +1135,23 @@ class CAReduce(Op):
if self.axis is not None: if self.axis is not None:
for axis in self.axis: for axis in self.axis:
if axis >= input.type.ndim or (axis<0 and abs(axis)>input.type.ndim): if (axis >= input.type.ndim
raise ValueError('Not enough dimensions on %s to reduce on axis %s' % (input, axis)) or (axis < 0 and abs(axis) > input.type.ndim)):
raise ValueError((
'Not enough dimensions on %s to reduce on axis %s'
% (input, axis)))
input = as_tensor_variable(input) input = as_tensor_variable(input)
axis = self.axis axis = self.axis
if axis is None: if axis is None:
axis = range(len(input.type.broadcastable)) axis = range(len(input.type.broadcastable))
if any([a<0 for a in axis]): if any([a < 0 for a in axis]):
axis2=[] axis2 = []
for a in self.axis: for a in self.axis:
if a<0: if a < 0:
axis2.append(a+input.type.ndim) axis2.append(a + input.type.ndim)
else: else:
axis2.append(a) axis2.append(a)
assert len(axis)==len(axis2) assert len(axis) == len(axis2)
axis = tuple(axis2) axis = tuple(axis2)
op = self.__class__(self.scalar_op, axis) op = self.__class__(self.scalar_op, axis)
else: else:
...@@ -1115,7 +1172,9 @@ class CAReduce(Op): ...@@ -1115,7 +1172,9 @@ class CAReduce(Op):
self.set_ufunc(self.scalar_op) self.set_ufunc(self.scalar_op)
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and self.scalar_op == other.scalar_op and self.axis == other.axis return (type(self) == type(other)
and self.scalar_op == other.scalar_op
and self.axis == other.axis)
def __hash__(self): def __hash__(self):
if self.axis is None: if self.axis is None:
...@@ -1125,7 +1184,8 @@ class CAReduce(Op): ...@@ -1125,7 +1184,8 @@ class CAReduce(Op):
def __str__(self): def __str__(self):
if self.axis is not None: if self.axis is not None:
return "Reduce{%s}{%s}" % (self.scalar_op, ", ".join(str(x) for x in self.axis)) return "Reduce{%s}{%s}" % (
self.scalar_op, ", ".join(str(x) for x in self.axis))
else: else:
return "Reduce{%s}" % self.scalar_op return "Reduce{%s}" % self.scalar_op
...@@ -1139,13 +1199,17 @@ class CAReduce(Op): ...@@ -1139,13 +1199,17 @@ class CAReduce(Op):
to_reduce = reversed(sorted(axis)) to_reduce = reversed(sorted(axis))
if to_reduce: if to_reduce:
for dimension in to_reduce: for dimension in to_reduce:
# If it's a zero-size array, use scalar_op.identity if available # If it's a zero-size array, use scalar_op.identity
# if available
if variable.shape[dimension] == 0: if variable.shape[dimension] == 0:
if hasattr(self.scalar_op, 'identity'): if hasattr(self.scalar_op, 'identity'):
variable = numpy.array(self.scalar_op.identity) variable = numpy.array(self.scalar_op.identity)
break break
else: else:
raise ValueError("Input (%s) has zero-size on axis %s, but self.scalar_op (%s) has no attribute 'identity'" % (variable, dimension, self.scalar_op)) raise ValueError((
"Input (%s) has zero-size on axis %s, but "
"self.scalar_op (%s) has no attribute 'identity'"
% (variable, dimension, self.scalar_op)))
else: else:
# Numpy 1.6 has a bug where you sometimes have to specify # Numpy 1.6 has a bug where you sometimes have to specify
# "dtype='object'" in reduce for it to work, if the ufunc # "dtype='object'" in reduce for it to work, if the ufunc
...@@ -1161,9 +1225,11 @@ class CAReduce(Op): ...@@ -1161,9 +1225,11 @@ class CAReduce(Op):
variable = numpy.asarray(variable) variable = numpy.asarray(variable)
if numpy.may_share_memory(variable, input): if numpy.may_share_memory(variable, input):
# perhaps numpy is clever for reductions of size 1? We don't want this. # perhaps numpy is clever for reductions of size 1?
# We don't want this.
variable = variable.copy() variable = variable.copy()
output[0] = theano._asarray(variable, dtype = node.outputs[0].type.dtype) output[0] = theano._asarray(variable,
dtype=node.outputs[0].type.dtype)
else: else:
output[0] = numpy.copy(variable) output[0] = numpy.copy(variable)
...@@ -1172,8 +1238,9 @@ class CAReduce(Op): ...@@ -1172,8 +1238,9 @@ class CAReduce(Op):
axis = self.axis axis = self.axis
if axis is None: if axis is None:
return (), return (),
return [ishape[i] for (i,b) in enumerate(node.inputs[0].type.broadcastable) if i not in axis], return [ishape[i]
for (i, b) in enumerate(node.inputs[0].type.broadcastable)
if i not in axis],
def _c_all(self, node, name, inames, onames, sub): def _c_all(self, node, name, inames, onames, sub):
...@@ -1210,36 +1277,41 @@ class CAReduce(Op): ...@@ -1210,36 +1277,41 @@ class CAReduce(Op):
i += 1 i += 1
sub['lv%i' % i] = oname sub['lv%i' % i] = oname
sub['olv'] = oname sub['olv'] = oname
alloc += cgen.make_declare([range(nnested) + ['x'] * len(axis)], [odtype], dict(sub, lv0 = oname)) alloc += cgen.make_declare(
[range(nnested) + ['x'] * len(axis)],
[odtype], dict(sub, lv0=oname))
alloc += cgen.make_alloc([order1], odtype, sub) alloc += cgen.make_alloc([order1], odtype, sub)
alloc += cgen.make_checks([range(nnested) + ['x'] * len(axis)], [odtype], dict(sub, lv0 = oname)) alloc += cgen.make_checks(
[range(nnested) + ['x'] * len(axis)],
[odtype], dict(sub, lv0=oname))
if hasattr(self.scalar_op,'identity'): if hasattr(self.scalar_op, 'identity'):
identity = self.scalar_op.identity identity = self.scalar_op.identity
elif self.scalar_op in [scalar.maximum, scalar.minimum]: elif self.scalar_op in [scalar.maximum, scalar.minimum]:
if self.scalar_op == scalar.maximum: if self.scalar_op == scalar.maximum:
scal_name = 'maximum' scal_name = 'maximum'
if input.type.dtype in ["float32","float64"]: if input.type.dtype in ["float32", "float64"]:
identity = "-__builtin_inf()" identity = "-__builtin_inf()"
elif input.type.dtype.startswith("uint"): elif input.type.dtype.startswith("uint"):
# numpy1.5.1 don't define NPY_MIN_UINT* # numpy1.5.1 don't define NPY_MIN_UINT*
identity = "0" identity = "0"
else: else:
identity = "NPY_MIN_"+str(input.type.dtype).upper() identity = "NPY_MIN_" + str(input.type.dtype).upper()
if self.scalar_op == scalar.minimum: if self.scalar_op == scalar.minimum:
scal_name = 'minimum' scal_name = 'minimum'
if input.type.dtype in ["float32","float64"]: if input.type.dtype in ["float32", "float64"]:
identity = "__builtin_inf()" identity = "__builtin_inf()"
else: else:
identity = "NPY_MAX_"+str(input.type.dtype).upper() identity = "NPY_MAX_" + str(input.type.dtype).upper()
fail = sub["fail"] fail = sub["fail"]
pattern=[0]*len(node.inputs[0].broadcastable) pattern = [0] * len(node.inputs[0].broadcastable)
axis = self.axis axis = self.axis
if axis == None: axis = range(len(pattern)) if axis == None:
axis = range(len(pattern))
for i in axis: for i in axis:
pattern[i]=1 pattern[i] = 1
pattern_ = str(pattern)[1:-1] pattern_ = str(pattern)[1:-1]
decl +="""int tosum[]={%(pattern_)s};"""%locals() decl += """int tosum[]={%(pattern_)s};""" % locals()
alloc += """ alloc += """
for(int i=0;i<%(iname)s->nd;i++){ for(int i=0;i<%(iname)s->nd;i++){
if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){ if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){
...@@ -1247,23 +1319,30 @@ for(int i=0;i<%(iname)s->nd;i++){ ...@@ -1247,23 +1319,30 @@ for(int i=0;i<%(iname)s->nd;i++){
%(fail)s; %(fail)s;
} }
} }
"""%locals() """ % locals()
else: else:
raise TypeError("The CAReduce.scalar_op must have an identity field.") raise TypeError(
"The CAReduce.scalar_op must have an identity field.")
task0_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n%(name)s_i = %(identity)s;" % dict(dtype = odtype,
name = onames[0], task0_decl = (
identity = identity) "%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
"%(name)s_i = %(identity)s;"
task1_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % dict(dtype = idtype, name = inames[0]) % dict(dtype=odtype, name=onames[0], identity=identity))
task1_code = self.scalar_op.c_code(Apply(self.scalar_op, task1_decl = ("%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
[Scalar(dtype = input.type.dtype)() for input in node.inputs*2], % dict(dtype=idtype, name=inames[0]))
[Scalar(dtype = output.type.dtype)() for input in node.outputs]),
None, task1_code = self.scalar_op.c_code(
["%s_i" % onames[0], "%s_i" % inames[0]], Apply(
["%s_i" % onames[0]], self.scalar_op,
sub) [Scalar(dtype=input.type.dtype)()
for input in (node.inputs * 2)],
[Scalar(dtype=output.type.dtype)()
for input in node.outputs]),
None,
["%s_i" % onames[0], "%s_i" % inames[0]],
["%s_i" % onames[0]],
sub)
code1 = """ code1 = """
{ {
%(task1_decl)s %(task1_decl)s
...@@ -1275,10 +1354,16 @@ for(int i=0;i<%(iname)s->nd;i++){ ...@@ -1275,10 +1354,16 @@ for(int i=0;i<%(iname)s->nd;i++){
if len(axis) == 1: if len(axis) == 1:
all_code = [("", "")] * nnested + [(task0_decl, code1), ""] all_code = [("", "")] * nnested + [(task0_decl, code1), ""]
else: else:
all_code = [("", "")] * nnested + [(task0_decl, "")] + [("", "")] * (len(axis) - 2) + [("", code1), ""] all_code = (
[("", "")] * nnested
+ [(task0_decl, "")]
+ [("", "")] * (len(axis) - 2)
+ [("", code1), ""])
else: else:
all_code = [task0_decl + code1] all_code = [task0_decl + code1]
loop = cgen.make_loop([order, range(nnested) + ['x'] * len(axis)], [idtype, odtype], all_code, sub) loop = cgen.make_loop(
[order, range(nnested) + ['x'] * len(axis)],
[idtype, odtype], all_code, sub)
return decl, checks, alloc, loop return decl, checks, alloc, loop
def c_code(self, node, name, inames, onames, sub): def c_code(self, node, name, inames, onames, sub):
...@@ -1290,12 +1375,12 @@ for(int i=0;i<%(iname)s->nd;i++){ ...@@ -1290,12 +1375,12 @@ for(int i=0;i<%(iname)s->nd;i++){
return ['<vector>', '<algorithm>'] return ['<vector>', '<algorithm>']
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
version = [4] # the version corresponding to the c code in this Op version = [4] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op, scalar_node = Apply(self.scalar_op,
[Scalar(dtype = input.type.dtype)() for input in node.inputs], [Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype = output.type.dtype)() for output in node.outputs]) [Scalar(dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node)) version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node))
for i in node.inputs + node.outputs: for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version()) version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
...@@ -1553,8 +1638,9 @@ class Prod(CAReduceDtype): ...@@ -1553,8 +1638,9 @@ class Prod(CAReduceDtype):
of the other terms). This is easy to do by broadcasting the original of the other terms). This is easy to do by broadcasting the original
product. product.
(Note that we also need to broadcast-multiply by the "incoming gradient", (Note that we also need to broadcast-multiply by the
ie. the gradient of the cost relative to the output/product). "incoming gradient", ie. the gradient of the cost relative to the
output/product).
----- -----
...@@ -1565,8 +1651,8 @@ class Prod(CAReduceDtype): ...@@ -1565,8 +1651,8 @@ class Prod(CAReduceDtype):
non-zero, but is zero for all others. non-zero, but is zero for all others.
* If more than one zero is present, then all the derivatives are zero. * If more than one zero is present, then all the derivatives are zero.
For the last two cases (with 1 or more zeros), we can't use the division For the last two cases (with 1 or more zeros), we can't use the
trick, as this gives divisions by 0. division trick, as this gives divisions by 0.
Implementing that case-by-case logic is not as trivial, so a bunch of Implementing that case-by-case logic is not as trivial, so a bunch of
hacks are piled down here to do it. Notably, for the "only one zero" hacks are piled down here to do it. Notably, for the "only one zero"
...@@ -1583,10 +1669,9 @@ class Prod(CAReduceDtype): ...@@ -1583,10 +1669,9 @@ class Prod(CAReduceDtype):
''' '''
prod_in, = inp prod_in, = inp
gz, = grads gz, = grads
if prod_in.dtype[0:3] in ('int','uin'): if prod_in.dtype[0:3] in ('int', 'uin'):
return [None] return [None]
# Prepare the broadcasting that is used everywhere to broadcast # Prepare the broadcasting that is used everywhere to broadcast
# over the original groups (ie. broadcast over the elements of a given # over the original groups (ie. broadcast over the elements of a given
# product) # product)
...@@ -1622,7 +1707,8 @@ class Prod(CAReduceDtype): ...@@ -1622,7 +1707,8 @@ class Prod(CAReduceDtype):
where_zeros = T.eq(prod_in, 0.0) where_zeros = T.eq(prod_in, 0.0)
sum_where_zeros = T.sum(where_zeros, axis=self.axis) sum_where_zeros = T.sum(where_zeros, axis=self.axis)
groups_with_single_zero = T.eq(sum_where_zeros, 1).dimshuffle(new_dims) groups_with_single_zero = T.eq(sum_where_zeros, 1).dimshuffle(
new_dims)
# tensor with 0 everywhere except for those places where # tensor with 0 everywhere except for those places where
# a 0 part of a group with a single zero was to be found # a 0 part of a group with a single zero was to be found
where_single_zero = groups_with_single_zero * where_zeros where_single_zero = groups_with_single_zero * where_zeros
...@@ -1631,8 +1717,8 @@ class Prod(CAReduceDtype): ...@@ -1631,8 +1717,8 @@ class Prod(CAReduceDtype):
where_gz_not_zero = T.neq(gz, 0.0) where_gz_not_zero = T.neq(gz, 0.0)
# only take ProdWithoutZeros for the groups with single zeros # only take ProdWithoutZeros for the groups with single zeros
# with non-null incoming gradient # with non-null incoming gradient
where_to_take_prod_without_zeros = \ where_to_take_prod_without_zeros = (
groups_with_single_zero * where_gz_not_zero groups_with_single_zero * where_gz_not_zero)
# preprocess the original input so that we set 0 everywhere # preprocess the original input so that we set 0 everywhere
# except for groups that contain a single zero, to avoid computing # except for groups that contain a single zero, to avoid computing
# multiplications on other groups # multiplications on other groups
...@@ -1640,13 +1726,17 @@ class Prod(CAReduceDtype): ...@@ -1640,13 +1726,17 @@ class Prod(CAReduceDtype):
# TODO: put lazy switch here, if it'd work # TODO: put lazy switch here, if it'd work
# this is pretty efficient already (no multiplication if 0), but # this is pretty efficient already (no multiplication if 0), but
# it'd be even better if we had a lazy if per element # it'd be even better if we had a lazy if per element
prod_without_zeros = ProdWithoutZeros(axis=self.axis)(prod_without_zeros_in) prod_without_zeros = ProdWithoutZeros(axis=self.axis)(
prod_without_zeros_in)
prod_without_zeros = prod_without_zeros.dimshuffle(new_dims) prod_without_zeros = prod_without_zeros.dimshuffle(new_dims)
groups_without_zeros = T.eq(sum_where_zeros, 0).dimshuffle(new_dims) groups_without_zeros = T.eq(sum_where_zeros, 0).dimshuffle(
new_dims)
final_grad = T.switch(groups_without_zeros, grad_case_without_zeros, final_grad = T.switch(
T.switch(where_single_zero, prod_without_zeros, 0.0) * gz) groups_without_zeros,
grad_case_without_zeros,
T.switch(where_single_zero, prod_without_zeros, 0.0) * gz)
return [final_grad] return [final_grad]
...@@ -1659,6 +1749,7 @@ class Prod(CAReduceDtype): ...@@ -1659,6 +1749,7 @@ class Prod(CAReduceDtype):
def c_code_cache_version(self): def c_code_cache_version(self):
return () return ()
class MulWithoutZeros(scalar.BinaryScalarOp): class MulWithoutZeros(scalar.BinaryScalarOp):
# "identity" here is zero, as in Reduce we don't want to start # "identity" here is zero, as in Reduce we don't want to start
# with reducing (1, something_else): this leads to the erronous # with reducing (1, something_else): this leads to the erronous
...@@ -1669,22 +1760,27 @@ class MulWithoutZeros(scalar.BinaryScalarOp): ...@@ -1669,22 +1760,27 @@ class MulWithoutZeros(scalar.BinaryScalarOp):
identity = 0. identity = 0.
commutative = True commutative = True
associative = True associative = True
def impl(self, x, y): def impl(self, x, y):
if x == 0: if x == 0:
return y return y
if y == 0: if y == 0:
return x return x
return x*y return x * y
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
x, y = inp x, y = inp
z, = out z, = out
return ("%(z)s = ((%(x)s == 0) ? (%(y)s) : " + \ return (("%(z)s = ((%(x)s == 0) ? (%(y)s) : "
"((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );") % locals() + "((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );")
% locals())
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (1,)
mul_without_zeros = MulWithoutZeros(scalar.upcast_out, name = 'mul_without_zeros')
mul_without_zeros = MulWithoutZeros(scalar.upcast_out,
name='mul_without_zeros')
class ProdWithoutZeros(CAReduceDtype): class ProdWithoutZeros(CAReduceDtype):
def __init__(self, axis=None, dtype=None): def __init__(self, axis=None, dtype=None):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论