提交 47d5b5a4 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

merged after an hg pull

......@@ -1303,7 +1303,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
print >> infolog, 'trailing event in optimization', i, ':', j
print >> infolog, ' ', str(li[j])
elif li[j] != l0[j]:
print 'non-equal optimization events', i, ':', j
print >>infolog, 'non-equal optimization events', i, ':', j
print >>infolog, ' ', str(l0[j])
print >>infolog, ' ', str(li[j])
#print >> infolog, "* ", j,
......
......@@ -963,9 +963,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
lopt_change = self.process_node(env, node, lopt)
if lopt_change:
process_count[lopt] += 1
else:
process_count[lopt] += 0
changed |= lopt_change
changed = True
if node not in env.nodes:
break# go to next node
finally:
self.detach_updater(env, u)
self.detach_updater(env, u) #TODO: erase this line, it's redundant at best
......
......@@ -153,7 +153,7 @@ class PycudaElemwiseKernel(Op):
z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape)
i = inputs + z
sp = splay(i[0].mem_size)
self.pycuda_fct(*i, grid=sp[0], block=sp[1])
self.pycuda_fct(*i)#, grid=sp[0], block=sp[1])
pycuda_optimizer = EquilibriumDB()
gpu_seqopt.register("pycuda_optimizer", pycuda_optimizer, 1.5, "fast_run")
......
......@@ -113,6 +113,7 @@ if cuda_available:
GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4
, scalar, vector, matrix, row, col, tensor3, tensor4)
from basic_ops import host_from_gpu, gpu_from_host
import opt
import cuda_ndarray
......
......@@ -37,7 +37,7 @@ def get_str_list_logical_scalar(node, value_str='ii_i%i_value', data_str='ii_i%i
class NaiveAlgo(object):
verbose = 0 # 1, 2 or 3 for more verbose output.
cache_version = ()
cache_version = ('debug', 10, verbose)
cache_version = ('debug', 11, verbose)
def __init__(self, scalar_op, sync=True):
"""
......@@ -834,7 +834,14 @@ nd_collapse_[i]=0;
""" %locals()
#check that all inputs have valid dimensions
for iname in inputs:
for id,iname in enumerate(inputs):
broadcasts = ', '.join(map(str,map(int,node.inputs[id].broadcastable)))
nd = node.inputs[id].ndim
print >> sio, """
int broadcasts_%(iname)s[%(nd)s] = {%(broadcasts)s};
""" %locals()
#check that all inputs have valid dimensions
for id,iname in enumerate(inputs):
print >> sio, """
//std::cerr << "C_CODE %(opname)s checking input %(iname)s\\n";
if (%(nd)s != %(iname)s->nd)
......@@ -845,7 +852,7 @@ nd_collapse_[i]=0;
for (int i = 0; i< %(nd)s; ++i)
{
dims[i] = (dims[i] == 1) ? CudaNdarray_HOST_DIMS(%(iname)s)[i] : dims[i];
if ((CudaNdarray_HOST_DIMS(%(iname)s)[i] != 1) && (dims[i] != CudaNdarray_HOST_DIMS(%(iname)s)[i]))
if ((!(broadcasts_%(iname)s[i] && CudaNdarray_HOST_DIMS(%(iname)s)[i] == 1))&& (dims[i] != CudaNdarray_HOST_DIMS(%(iname)s)[i]))
{
//std::cerr << "C_CODE %(opname)s checking input %(iname)s failed\\n";
PyErr_Format(PyExc_TypeError, "GpuElemwise input has incompatible dim[%%i] == %%i, where output has size %%i",
......
......@@ -54,7 +54,8 @@ class InputToGpuOptimizer(Optimizer):
try:
new_input = host_from_gpu(gpu_from_host(input))
env.replace_validate(input, new_input, "To allow further optimisation to move Ops to gpu")
if new_input.type==input.type:
env.replace_validate(input, new_input, "To allow further optimisation to move Ops to gpu")
except Exception, e:
#as we currently only support float32, this can fail.
#Using try except make that we won't need
......@@ -136,10 +137,7 @@ def local_gpu_dimshuffle_0(node):
# move the add to a GpuAdd
new_op = GpuDimShuffle(node.op.input_broadcastable,
node.op.new_order)
if node.op.inplace:
return [host_from_gpu(new_op(gpu_from_host(input)))]
else:
return [host_from_gpu(new_op(gpu_from_host(tensor.tensor_copy(input))))]
return [host_from_gpu(new_op(gpu_from_host(input)))]
if node.op == gpu_from_host:
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, tensor.DimShuffle):
......
......@@ -27,6 +27,9 @@ else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def rand_cuda_ndarray(shape):
return cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),dtype='float32'))
def tes_use():
tcn.use()
......@@ -206,6 +209,18 @@ def test_elemwise0():
assert numpy.all(a0 + 1.0 == a.value)
def test_elemwise_bad_broadcast():
x = cuda.fmatrix('x')
y = cuda.fmatrix('y')
f = theano.function([x, y], x * y)
try:
f(rand_cuda_ndarray((10, 3)), rand_cuda_ndarray((10, 1)))
except TypeError:
pass
else:
raise Exception("Theano should have raised an error")
def test_elemwise1():
""" Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """
......
......@@ -10,7 +10,7 @@ import numpy
from theano import Op, Apply, shared, config, Variable
from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length, cast, opt
from theano.tensor import zeros_like, sqrt, log, sin, cos, join
from theano.tensor import zeros_like, sqrt, log, sin, cos, join, prod
from theano.compile import optdb
from theano.gof import local_optimizer
......@@ -556,6 +556,13 @@ class GPU_mrg_uniform(mrg_uniform_base):
class MRG_RandomStreams(object):
"""Module component with similar interface to numpy.random (numpy.random.RandomState)"""
state_updates = []
"""A list of pairs of the form (input_r, output_r), representing the
update rules of all the random states generated by this RandomStreams"""
def updates(self):
return list(self.state_updates)
def __init__(self, seed=12345, use_cuda=None):
"""
:type seed: int or list of 6 int.
......@@ -612,7 +619,7 @@ class MRG_RandomStreams(object):
def n_streams(self, size):
# TODO: a smart way of choosing the number of streams
if isinstance(size, (tuple, list)):
if isinstance(size, (tuple, list)) and all([isinstance(i,int) for i in size]):
r = 1
for s in size:
r *= s
......@@ -627,6 +634,7 @@ class MRG_RandomStreams(object):
def pretty_return(self, node_rstate, new_rstate, sample):
sample.rstate = node_rstate
sample.update = (node_rstate, new_rstate)
self.state_updates.append((node_rstate, new_rstate))
node_rstate.default_update = new_rstate
return sample
......@@ -639,13 +647,12 @@ class MRG_RandomStreams(object):
ndim may be a plain integer to supplement the missing
information.
:param: size: Can be a list of integer or a Theano variable like the shape of some tensor.
The number of dimensions must be computable at compile time.
:param: size: Can be a list of integer or Theano variable(ex: the shape of other Theano Variable)
TODO: can size be None?
"""
if isinstance(size, tuple):
assert all([isinstance(i,int) for i in size]), "size must be a tuple of int or a Theano variable"
else: assert isinstance(size, Variable), "size must be a tuple of int or a Theano variable"
assert all([isinstance(i,int) or isinstance(i,Variable) for i in size]), "size must be a tuple of int or a Theano variable"
else: assert isinstance(size, Variable) and size.ndim==1, "size must be a tuple of int or a Theano variable"
if nstreams is None:
nstreams = self.n_streams(size)
......@@ -706,24 +713,33 @@ class MRG_RandomStreams(object):
raise NotImplementedError("MRG_RandomStreams.multinomial only implemented with n == 1 and pvals.ndim = 2")
def normal(self, size=None, avg=0.0, std=1.0, ndim=None, dtype=config.floatX):
"""
:param: size: Can be a list of integer or Theano variable(ex: the shape of other Theano Variable)
"""
# We need an even number of ]0,1[ samples. Then we split them
# in two halves. First half becomes our U1's for Box-Muller,
# second half our U2's. See Wikipedia page:
# http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
assert isinstance(size, tuple), "size must be a tuple"
assert all([isinstance(i,int) for i in size])
n_samples = numpy.prod(size)
evened = False
constant = False
if isinstance(size, tuple) and all([isinstance(i,int) for i in size]):
constant = True
n_samples = numpy.prod(size)
if n_samples % 2 == 1:
n_samples += 1
evened = True
if n_samples % 2 == 1:
n_samples += 1
evened = True
else:
n_samples = prod(size)+(prod(size)%2)#if even, don't change, if odd, +1
flattened = self.uniform(size=(n_samples,), dtype=dtype)
U1 = flattened[:n_samples/2]
U2 = flattened[n_samples/2:]
if constant:
U1 = flattened[:n_samples/2]
U2 = flattened[n_samples/2:]
else:
U1 = flattened[:prod(flattened.shape)/2]
U2 = flattened[prod(flattened.shape)/2:]
#normal_samples = zeros_like(flattened)
sqrt_ln_U1 = sqrt(-2.0*log(U1))
......@@ -740,8 +756,10 @@ class MRG_RandomStreams(object):
final_samples = None
if evened:
final_samples = normal_samples[:-1]
else:
elif constant:
final_samples = normal_samples
else:
final_samples = normal_samples[:prod(size)]
final_samples = avg + std * final_samples
......
......@@ -433,53 +433,55 @@ def test_normal0():
steps = 50
if mode in ['DEBUG_MODE','FAST_COMPILE']:
sample_size = (99,30)
sample_size = (25,30)
rtol=.02
else:
sample_size = (999,50)
rtol=.01
sample_size_odd = (sample_size[0],sample_size[1]-1)
x = tensor.matrix()
for size,const_size,var_input,input in [(sample_size,sample_size,[],[]), (x.shape,sample_size,[x],[numpy.zeros(sample_size)]),
(sample_size_odd,sample_size_odd,[],[]),#test odd value
(x.shape,sample_size_odd,[x],[numpy.zeros(sample_size_odd)]),#test odd value
]:
print ''
print 'ON CPU:'
print ''
print 'ON CPU:'
R = MRG_RandomStreams(234, use_cuda=False)
n = R.normal(size=sample_size, avg=-5.0, std=2.0)
f = theano.function([], n, mode=mode)
theano.printing.debugprint(f)
print 'random?[:10]\n', f()[0,0:10]
basictest(f, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='mrg ', allow_01=True, mean_rtol=rtol)
R = MRG_RandomStreams(234, use_cuda=False)
n = R.normal(size=size, avg=-5.0, std=2.0)
f = theano.function(var_input, n, mode=mode)
theano.printing.debugprint(f)
print 'random?[:10]\n', f(*input)[0,0:10]
basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
sys.stdout.flush()
sys.stdout.flush()
# now with odd number of samples
sample_size = (sample_size[0],sample_size[1]-1)
if mode!='FAST_COMPILE' and cuda_available:
print ''
print 'ON GPU:'
R = MRG_RandomStreams(234, use_cuda=True)
n = R.normal(size=size, avg=-5.0, std=2.0, dtype='float32')
assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
f = theano.function(var_input, theano.Out(
theano.sandbox.cuda.basic_ops.gpu_from_host(n),
borrow=True), mode=mode_with_gpu)
if mode!='FAST_COMPILE' and cuda_available:
print ''
print 'ON GPU:'
R = MRG_RandomStreams(234, use_cuda=True)
n = R.normal(size=sample_size, avg=-5.0, std=2.0, dtype='float32')
assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
f = theano.function([], theano.Out(
theano.sandbox.cuda.basic_ops.gpu_from_host(n),
borrow=True), mode=mode_with_gpu)
theano.printing.debugprint(f)
sys.stdout.flush()
print 'random?[:10]\n', numpy.asarray(f(*input))[0,0:10]
print '----'
sys.stdout.flush()
basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
theano.printing.debugprint(f)
sys.stdout.flush()
print 'random?[:10]\n', numpy.asarray(f())[0,0:10]
print '----'
sys.stdout.flush()
basictest(f, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True, mean_rtol=rtol)
print ''
print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234)
print ''
print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234)
nn = RR.normal(size=sample_size, avg=-5.0, std=2.0)
ff = theano.function([], nn)
nn = RR.normal(size=size, avg=-5.0, std=2.0)
ff = theano.function(var_input, nn)
basictest(ff, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True, mean_rtol=rtol)
basictest(ff, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)
def basic_multinomialtest(f, steps, sample_size, target_pvals, prefix="", mean_rtol=0.04):
......
......@@ -2705,6 +2705,8 @@ class Rebroadcast(Op):
broadcast_pattern[k] = str(int(v))
return '%s{%s}' % (self.__class__.__name__, ','.join(broadcast_pattern))
def make_node(self, x):
if x.ndim <= numpy.max(self.axis.keys()):
raise ValueError('Trying to rebroadcast inexistant dimension')
t = x.type.__class__(dtype = x.type.dtype,
broadcastable = [self.axis.get(i, b)
for i, b in enumerate(x.type.broadcastable)])
......
......@@ -2010,6 +2010,42 @@ def check_for_x_over_absX(numerators, denominators):
return numerators, denominators
local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX')
@register_canonicalize
@gof.local_optimizer([T.abs_])
def local_abs_lift(node):
"""
move the abs toward the input. This is needed for check_for_x_over_absX to apply in more case.
"""
if node.op == T.abs_ and node.inputs[0].owner:
assert node.nin == 1
if node.inputs[0].owner.op == T.mul:
return [T.mul(*[T.abs_(i) for i in node.inputs[0].owner.inputs])]
if node.inputs[0].owner.op == T.true_div:
i = node.inputs[0].owner.inputs
return [T.true_div(T.abs_(i[0]),T.abs_(i[1]))]
@register_specialize
@gof.local_optimizer([])
def local_abs_merge(node):
"""
merge abs generated by local_abs_lift when the canonizer don't need it anymore
"""
if node.op == T.mul and sum([i.owner.op == T.abs_ for i in node.inputs if i.owner])>1:
inputs = []
for i in node.inputs:
if i.owner and i.owner.op == T.abs_:
inputs.append(i.owner.inputs[0])
else:
const = get_constant_value(i)
if not (const>=0).all():
return False
inputs.append(i)
return [T.abs_(T.mul(*inputs))]
if node.op == T.true_div and sum([i.owner.op == T.abs_ for i in node.inputs if i.owner])==2:
return [T.abs_(T.true_div(node.inputs[0].owner.inputs[0],node.inputs[1].owner.inputs[0]))]
@register_stabilize
@gof.local_optimizer([T.log])
def local_log1p(node):
......@@ -2279,7 +2315,12 @@ def local_elemwise_fusion_op(OP):
#if the scalar_op don't have a c implementation, we skip its fusion to allow the fusion of the other ops.
do_fusion=True
try:
s_input = [scalar.Scalar(x.dtype).make_variable() for x in i.owner.inputs]
s_input = []
for ii in i.owner.inputs:
if ii in inputs:
s_input.append(s_inputs[inputs.index(ii)])
else:
s_input.append(scalar.Scalar(ii.dtype).make_variable())
s_op=i.owner.op.scalar_op(*s_input)
i.owner.op.scalar_op.c_code(s_op.owner,"test_presence_of_c_code",
["x" for x in i.owner.inputs],
......@@ -2298,8 +2339,11 @@ def local_elemwise_fusion_op(OP):
s_inputs.extend(s_input)
s_g.append(s_op)
else:
if i in inputs:
s=s_inputs[inputs.index(i)]
else:
s=scalar.Scalar(i.dtype).make_variable()
inputs.append(i)
s=scalar.Scalar(i.dtype).make_variable()
s_inputs.append(s)
s_g.append(s)
......@@ -2308,6 +2352,21 @@ def local_elemwise_fusion_op(OP):
# print "local_elemwise_fusion: no elemwise in inputs. Nothing to fuse."
return False
#remove duplicate inputs, we most keep the order.
inputs2=[]
s_inputs2=[]
for i,si in zip(inputs,s_inputs):
if i not in inputs2:
inputs2.append(i)
s_inputs2.append(si)
else:
assert si in s_inputs2
inputs = inputs2
s_inputs = s_inputs2
del inputs2, s_inputs2
assert len(s_inputs)==len(inputs)
otype = node.outputs[0].type
s_new_out=node.op.scalar_op(*s_g)
try:
......
......@@ -27,7 +27,6 @@ utt.seed_rng()
def inplace_func(inputs, outputs, mode=get_default_mode()):
return function(inputs, outputs, mode=mode, accept_inplace=True)
def eval_outputs(outputs):
variables = inplace_func([], outputs)()
if len(variables) == 1:
......@@ -2611,48 +2610,55 @@ def test_autocast():
finally:
ac.__exit__()
def test_unbroadcast_addbroadcast():
"""
test that the unbroadcast fct don't insert not needed broadcast
and fuse consecutive Rebroadcast op
"""
x=matrix()
assert unbroadcast(x,0) is x
assert unbroadcast(x,1) is x
assert unbroadcast(x,1,0) is x
assert unbroadcast(x,0,1) is x
assert addbroadcast(x,0) is not x
assert addbroadcast(x,1) is not x
assert addbroadcast(x,1,0).owner.inputs[0] is x
assert unbroadcast(addbroadcast(x,0),0) is x
assert addbroadcast(unbroadcast(x,0),0) is not x
x=row()
assert unbroadcast(x,0) is not x
assert unbroadcast(x,1) is x
assert unbroadcast(x,1,0) is not x
assert unbroadcast(x,0,1) is not x
assert addbroadcast(x,0) is x
assert addbroadcast(x,1).owner.inputs[0] is x
assert addbroadcast(x,1,0).owner.inputs[0] is x
assert addbroadcast(x,0,1).owner.inputs[0] is x
assert unbroadcast(addbroadcast(x,1),1) is x
assert addbroadcast(unbroadcast(x,1),1) is not x
#the first broadcast is remove the broadcast, so the second
#should not make one
assert unbroadcast(unbroadcast(x,0),0).owner.inputs[0] is x
#test that consecutive Rebroadcast op are fused
x=TensorType(dtype = 'float64', broadcastable = (True,True))()
assert unbroadcast(unbroadcast(x,1),0).owner.inputs[0] is x
assert addbroadcast(unbroadcast(x,1),0).owner.inputs[0] is x
assert addbroadcast(unbroadcast(x,0),0) is x
class test_broadcast(unittest.TestCase):
def test_broadcast_bigdim(self):
def f():
x = matrix()
addbroadcast(x,2)
self.failUnlessRaises(ValueError, f)
def test_unbroadcast_addbroadcast(self):
"""
test that the unbroadcast fct don't insert not needed broadcast
and fuse consecutive Rebroadcast op
"""
x=matrix()
assert unbroadcast(x,0) is x
assert unbroadcast(x,1) is x
assert unbroadcast(x,1,0) is x
assert unbroadcast(x,0,1) is x
assert addbroadcast(x,0) is not x
assert addbroadcast(x,1) is not x
assert addbroadcast(x,1,0).owner.inputs[0] is x
assert unbroadcast(addbroadcast(x,0),0) is x
assert addbroadcast(unbroadcast(x,0),0) is not x
x=row()
assert unbroadcast(x,0) is not x
assert unbroadcast(x,1) is x
assert unbroadcast(x,1,0) is not x
assert unbroadcast(x,0,1) is not x
assert addbroadcast(x,0) is x
assert addbroadcast(x,1).owner.inputs[0] is x
assert addbroadcast(x,1,0).owner.inputs[0] is x
assert addbroadcast(x,0,1).owner.inputs[0] is x
assert unbroadcast(addbroadcast(x,1),1) is x
assert addbroadcast(unbroadcast(x,1),1) is not x
#the first broadcast is remove the broadcast, so the second
#should not make one
assert unbroadcast(unbroadcast(x,0),0).owner.inputs[0] is x
#test that consecutive Rebroadcast op are fused
x=TensorType(dtype = 'float64', broadcastable = (True,True))()
assert unbroadcast(unbroadcast(x,1),0).owner.inputs[0] is x
assert addbroadcast(unbroadcast(x,1),0).owner.inputs[0] is x
assert addbroadcast(unbroadcast(x,0),0) is x
def test_mod():
"""
We add this test as not all language and C implementation give the same
......
......@@ -484,9 +484,67 @@ class test_canonize(unittest.TestCase):
assert numpy.all(numpy.isfinite(out))
assert numpy.allclose(out,numpy.sign(val_inputs[0]))
assert(out_dtype==out.dtype)
assert len(f.maker.env.toposort())==1
#test (2*x) / (3*abs(x)) -> sign(x)
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
((2*dx)/(3*abs(dx)),[dx],[0.5-dxv],'float64'),
((2*fx)/(3*abs(fx)),[fx],[0.5-fxv],'float32'),
((2*dx)/(3*abs(dx)),[dx],[0.0*dxv],'float64'),
((2*fx)/(3*abs(fx)),[fx],[0.0*fxv],'float32'),
((2*dv)/(3*abs(dv)),[dv],[0.5-dvv],'float64'),
((2*fv)/(3*abs(fv)),[fv],[0.5-fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
topo = f.maker.env.toposort()
out = f(*val_inputs)
assert numpy.all(numpy.isfinite(out))
assert numpy.allclose(out,numpy.sign(val_inputs[0])*2/3)
assert(out_dtype==out.dtype)
finally:
mode._optimizer = old_optimizer
def test_abs_mul_div(self):
"""
test that if we have
4 * x / abs(2*x) it get simplifier during canonicalisation.
"""
x=T.dscalar()
a=T.abs_(x)
if theano.config.mode=='FAST_COMPILE':
mode = theano.compile.mode.get_mode('FAST_RUN').excluding("local_elemwise_fusion")
else:
mode = theano.compile.mode.get_default_mode().excluding("local_elemwise_fusion")
f=theano.function([x],[(4*x)/abs(2*x)], mode = mode)
print f.maker.env.toposort()
print
f(.1)
f(-1)
#some stabilization optimization make the output be finite instead of nan
#debug_mode will raise an error when he see nan
if not isinstance(mode,theano.compile.debugmode.DebugMode):
assert numpy.isfinite(f(0))
assert len(f.maker.env.toposort())==2
assert f.maker.env.toposort()[0].op==T.sgn
f=theano.function([x],[(4*x)/abs(x/2)], mode = mode)
print f.maker.env.toposort()
print
f(.1)
f(-1)
#some stabilization optimization make the output be finite instead of nan
#debug_mode will raise an error when he see nan
if not isinstance(mode,theano.compile.debugmode.DebugMode):
assert numpy.isfinite(f(0))
assert len(f.maker.env.toposort())==2
assert f.maker.env.toposort()[0].op==T.sgn
def test_multiple_case_that_fail(self):
import theano.tensor, theano.compile
......@@ -553,6 +611,30 @@ class test_canonize(unittest.TestCase):
"""
raise SkipTest("Not implemented")
def test_local_merge_abs():
x,y,z = T.matrices('xyz')
x_val = numpy.random.rand(5,5)
y_val = numpy.random.rand(5,5)
z_val = numpy.random.rand(5,5)
mode = theano.config.mode
if mode == "FAST_COMPILE":
mode = "FAST_RUN"
mode = theano.compile.mode.get_mode(mode).excluding("local_elemwise_fusion")
f = theano.function([x,y,z],(abs(y*z*-2)), mode=mode)
f(x_val,y_val,z_val)
theano.printing.debugprint(f)
assert isinstance(f.maker.env.toposort()[1].op.scalar_op, scal.Abs)
assert len(f.maker.env.toposort())==2
f = theano.function([x,y,z],abs(x/y), mode=mode)
f(x_val,y_val,z_val)
theano.printing.debugprint(f)
assert isinstance(f.maker.env.toposort()[1].op.scalar_op, scal.Abs)
assert len(f.maker.env.toposort())==2
def test_mixeddiv():
"""Test that int division is preserved"""
i = iscalar()
......@@ -692,7 +774,7 @@ class test_fusion(unittest.TestCase):
#TODO: BIT OP only with ints, xor, or, and, invert, cast
# (fx-theano.tensor.or_(fy,fz),(fx,fy,fz),(fxv,fyv,fzv),1,fxv-(fy|fz),'float32'),
# (fx-theano.tensor.xor(fy,fz),(fx,fy,fz),(fxv,fyv,fzv),1,fxv-(fy^fz),'float32'),
(theano.tensor.pow(fx*fy+fz,fx*fy),(fx,fy,fz),(fxv,fyv,fzv),2,numpy.power(fxv*fyv+fzv,fxv*fyv),'float32'),
(theano.tensor.pow(fx*fy+fz,fx*fy),(fx,fy,fz),(fxv,fyv,fzv),1,numpy.power(fxv*fyv+fzv,fxv*fyv),'float32'),
(fv+fy**fz,(fv,fy,fz),(fvv,fyv,fzv),2,fvv+fyv**fzv,'float32'),#fused with a dimshuffle
(fv-fy+tanh(fz),(fv,fy,fz),(fvv,fyv,fzv),2,fvv-fyv+numpy.tanh(fzv),'float32'),#fused with a dimshuffle
]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论