提交 47d5b5a4 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

merged after an hg pull

...@@ -1303,7 +1303,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions ...@@ -1303,7 +1303,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
print >> infolog, 'trailing event in optimization', i, ':', j print >> infolog, 'trailing event in optimization', i, ':', j
print >> infolog, ' ', str(li[j]) print >> infolog, ' ', str(li[j])
elif li[j] != l0[j]: elif li[j] != l0[j]:
print 'non-equal optimization events', i, ':', j print >>infolog, 'non-equal optimization events', i, ':', j
print >>infolog, ' ', str(l0[j]) print >>infolog, ' ', str(l0[j])
print >>infolog, ' ', str(li[j]) print >>infolog, ' ', str(li[j])
#print >> infolog, "* ", j, #print >> infolog, "* ", j,
......
...@@ -963,9 +963,9 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -963,9 +963,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
lopt_change = self.process_node(env, node, lopt) lopt_change = self.process_node(env, node, lopt)
if lopt_change: if lopt_change:
process_count[lopt] += 1 process_count[lopt] += 1
else: changed = True
process_count[lopt] += 0 if node not in env.nodes:
changed |= lopt_change break# go to next node
finally: finally:
self.detach_updater(env, u) self.detach_updater(env, u)
self.detach_updater(env, u) #TODO: erase this line, it's redundant at best self.detach_updater(env, u) #TODO: erase this line, it's redundant at best
......
...@@ -153,7 +153,7 @@ class PycudaElemwiseKernel(Op): ...@@ -153,7 +153,7 @@ class PycudaElemwiseKernel(Op):
z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape) z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape)
i = inputs + z i = inputs + z
sp = splay(i[0].mem_size) sp = splay(i[0].mem_size)
self.pycuda_fct(*i, grid=sp[0], block=sp[1]) self.pycuda_fct(*i)#, grid=sp[0], block=sp[1])
pycuda_optimizer = EquilibriumDB() pycuda_optimizer = EquilibriumDB()
gpu_seqopt.register("pycuda_optimizer", pycuda_optimizer, 1.5, "fast_run") gpu_seqopt.register("pycuda_optimizer", pycuda_optimizer, 1.5, "fast_run")
......
...@@ -113,6 +113,7 @@ if cuda_available: ...@@ -113,6 +113,7 @@ if cuda_available:
GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc, GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4 GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4
, scalar, vector, matrix, row, col, tensor3, tensor4) , scalar, vector, matrix, row, col, tensor3, tensor4)
from basic_ops import host_from_gpu, gpu_from_host
import opt import opt
import cuda_ndarray import cuda_ndarray
......
...@@ -37,7 +37,7 @@ def get_str_list_logical_scalar(node, value_str='ii_i%i_value', data_str='ii_i%i ...@@ -37,7 +37,7 @@ def get_str_list_logical_scalar(node, value_str='ii_i%i_value', data_str='ii_i%i
class NaiveAlgo(object): class NaiveAlgo(object):
verbose = 0 # 1, 2 or 3 for more verbose output. verbose = 0 # 1, 2 or 3 for more verbose output.
cache_version = () cache_version = ()
cache_version = ('debug', 10, verbose) cache_version = ('debug', 11, verbose)
def __init__(self, scalar_op, sync=True): def __init__(self, scalar_op, sync=True):
""" """
...@@ -834,7 +834,14 @@ nd_collapse_[i]=0; ...@@ -834,7 +834,14 @@ nd_collapse_[i]=0;
""" %locals() """ %locals()
#check that all inputs have valid dimensions #check that all inputs have valid dimensions
for iname in inputs: for id,iname in enumerate(inputs):
broadcasts = ', '.join(map(str,map(int,node.inputs[id].broadcastable)))
nd = node.inputs[id].ndim
print >> sio, """
int broadcasts_%(iname)s[%(nd)s] = {%(broadcasts)s};
""" %locals()
#check that all inputs have valid dimensions
for id,iname in enumerate(inputs):
print >> sio, """ print >> sio, """
//std::cerr << "C_CODE %(opname)s checking input %(iname)s\\n"; //std::cerr << "C_CODE %(opname)s checking input %(iname)s\\n";
if (%(nd)s != %(iname)s->nd) if (%(nd)s != %(iname)s->nd)
...@@ -845,7 +852,7 @@ nd_collapse_[i]=0; ...@@ -845,7 +852,7 @@ nd_collapse_[i]=0;
for (int i = 0; i< %(nd)s; ++i) for (int i = 0; i< %(nd)s; ++i)
{ {
dims[i] = (dims[i] == 1) ? CudaNdarray_HOST_DIMS(%(iname)s)[i] : dims[i]; dims[i] = (dims[i] == 1) ? CudaNdarray_HOST_DIMS(%(iname)s)[i] : dims[i];
if ((CudaNdarray_HOST_DIMS(%(iname)s)[i] != 1) && (dims[i] != CudaNdarray_HOST_DIMS(%(iname)s)[i])) if ((!(broadcasts_%(iname)s[i] && CudaNdarray_HOST_DIMS(%(iname)s)[i] == 1))&& (dims[i] != CudaNdarray_HOST_DIMS(%(iname)s)[i]))
{ {
//std::cerr << "C_CODE %(opname)s checking input %(iname)s failed\\n"; //std::cerr << "C_CODE %(opname)s checking input %(iname)s failed\\n";
PyErr_Format(PyExc_TypeError, "GpuElemwise input has incompatible dim[%%i] == %%i, where output has size %%i", PyErr_Format(PyExc_TypeError, "GpuElemwise input has incompatible dim[%%i] == %%i, where output has size %%i",
......
...@@ -54,6 +54,7 @@ class InputToGpuOptimizer(Optimizer): ...@@ -54,6 +54,7 @@ class InputToGpuOptimizer(Optimizer):
try: try:
new_input = host_from_gpu(gpu_from_host(input)) new_input = host_from_gpu(gpu_from_host(input))
if new_input.type==input.type:
env.replace_validate(input, new_input, "To allow further optimisation to move Ops to gpu") env.replace_validate(input, new_input, "To allow further optimisation to move Ops to gpu")
except Exception, e: except Exception, e:
#as we currently only support float32, this can fail. #as we currently only support float32, this can fail.
...@@ -136,10 +137,7 @@ def local_gpu_dimshuffle_0(node): ...@@ -136,10 +137,7 @@ def local_gpu_dimshuffle_0(node):
# move the add to a GpuAdd # move the add to a GpuAdd
new_op = GpuDimShuffle(node.op.input_broadcastable, new_op = GpuDimShuffle(node.op.input_broadcastable,
node.op.new_order) node.op.new_order)
if node.op.inplace:
return [host_from_gpu(new_op(gpu_from_host(input)))] return [host_from_gpu(new_op(gpu_from_host(input)))]
else:
return [host_from_gpu(new_op(gpu_from_host(tensor.tensor_copy(input))))]
if node.op == gpu_from_host: if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, tensor.DimShuffle): if host_input.owner and isinstance(host_input.owner.op, tensor.DimShuffle):
......
...@@ -27,6 +27,9 @@ else: ...@@ -27,6 +27,9 @@ else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def rand_cuda_ndarray(shape):
return cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),dtype='float32'))
def tes_use(): def tes_use():
tcn.use() tcn.use()
...@@ -206,6 +209,18 @@ def test_elemwise0(): ...@@ -206,6 +209,18 @@ def test_elemwise0():
assert numpy.all(a0 + 1.0 == a.value) assert numpy.all(a0 + 1.0 == a.value)
def test_elemwise_bad_broadcast():
x = cuda.fmatrix('x')
y = cuda.fmatrix('y')
f = theano.function([x, y], x * y)
try:
f(rand_cuda_ndarray((10, 3)), rand_cuda_ndarray((10, 1)))
except TypeError:
pass
else:
raise Exception("Theano should have raised an error")
def test_elemwise1(): def test_elemwise1():
""" Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """ """ Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """
......
...@@ -10,7 +10,7 @@ import numpy ...@@ -10,7 +10,7 @@ import numpy
from theano import Op, Apply, shared, config, Variable from theano import Op, Apply, shared, config, Variable
from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length, cast, opt from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length, cast, opt
from theano.tensor import zeros_like, sqrt, log, sin, cos, join from theano.tensor import zeros_like, sqrt, log, sin, cos, join, prod
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer from theano.gof import local_optimizer
...@@ -556,6 +556,13 @@ class GPU_mrg_uniform(mrg_uniform_base): ...@@ -556,6 +556,13 @@ class GPU_mrg_uniform(mrg_uniform_base):
class MRG_RandomStreams(object): class MRG_RandomStreams(object):
"""Module component with similar interface to numpy.random (numpy.random.RandomState)""" """Module component with similar interface to numpy.random (numpy.random.RandomState)"""
state_updates = []
"""A list of pairs of the form (input_r, output_r), representing the
update rules of all the random states generated by this RandomStreams"""
def updates(self):
return list(self.state_updates)
def __init__(self, seed=12345, use_cuda=None): def __init__(self, seed=12345, use_cuda=None):
""" """
:type seed: int or list of 6 int. :type seed: int or list of 6 int.
...@@ -612,7 +619,7 @@ class MRG_RandomStreams(object): ...@@ -612,7 +619,7 @@ class MRG_RandomStreams(object):
def n_streams(self, size): def n_streams(self, size):
# TODO: a smart way of choosing the number of streams # TODO: a smart way of choosing the number of streams
if isinstance(size, (tuple, list)): if isinstance(size, (tuple, list)) and all([isinstance(i,int) for i in size]):
r = 1 r = 1
for s in size: for s in size:
r *= s r *= s
...@@ -627,6 +634,7 @@ class MRG_RandomStreams(object): ...@@ -627,6 +634,7 @@ class MRG_RandomStreams(object):
def pretty_return(self, node_rstate, new_rstate, sample): def pretty_return(self, node_rstate, new_rstate, sample):
sample.rstate = node_rstate sample.rstate = node_rstate
sample.update = (node_rstate, new_rstate) sample.update = (node_rstate, new_rstate)
self.state_updates.append((node_rstate, new_rstate))
node_rstate.default_update = new_rstate node_rstate.default_update = new_rstate
return sample return sample
...@@ -639,13 +647,12 @@ class MRG_RandomStreams(object): ...@@ -639,13 +647,12 @@ class MRG_RandomStreams(object):
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing
information. information.
:param: size: Can be a list of integer or a Theano variable like the shape of some tensor. :param: size: Can be a list of integer or Theano variable(ex: the shape of other Theano Variable)
The number of dimensions must be computable at compile time.
TODO: can size be None? TODO: can size be None?
""" """
if isinstance(size, tuple): if isinstance(size, tuple):
assert all([isinstance(i,int) for i in size]), "size must be a tuple of int or a Theano variable" assert all([isinstance(i,int) or isinstance(i,Variable) for i in size]), "size must be a tuple of int or a Theano variable"
else: assert isinstance(size, Variable), "size must be a tuple of int or a Theano variable" else: assert isinstance(size, Variable) and size.ndim==1, "size must be a tuple of int or a Theano variable"
if nstreams is None: if nstreams is None:
nstreams = self.n_streams(size) nstreams = self.n_streams(size)
...@@ -706,24 +713,33 @@ class MRG_RandomStreams(object): ...@@ -706,24 +713,33 @@ class MRG_RandomStreams(object):
raise NotImplementedError("MRG_RandomStreams.multinomial only implemented with n == 1 and pvals.ndim = 2") raise NotImplementedError("MRG_RandomStreams.multinomial only implemented with n == 1 and pvals.ndim = 2")
def normal(self, size=None, avg=0.0, std=1.0, ndim=None, dtype=config.floatX): def normal(self, size=None, avg=0.0, std=1.0, ndim=None, dtype=config.floatX):
"""
:param: size: Can be a list of integer or Theano variable(ex: the shape of other Theano Variable)
"""
# We need an even number of ]0,1[ samples. Then we split them # We need an even number of ]0,1[ samples. Then we split them
# in two halves. First half becomes our U1's for Box-Muller, # in two halves. First half becomes our U1's for Box-Muller,
# second half our U2's. See Wikipedia page: # second half our U2's. See Wikipedia page:
# http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform # http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
assert isinstance(size, tuple), "size must be a tuple"
assert all([isinstance(i,int) for i in size])
n_samples = numpy.prod(size)
evened = False evened = False
constant = False
if isinstance(size, tuple) and all([isinstance(i,int) for i in size]):
constant = True
n_samples = numpy.prod(size)
if n_samples % 2 == 1: if n_samples % 2 == 1:
n_samples += 1 n_samples += 1
evened = True evened = True
else:
n_samples = prod(size)+(prod(size)%2)#if even, don't change, if odd, +1
flattened = self.uniform(size=(n_samples,), dtype=dtype) flattened = self.uniform(size=(n_samples,), dtype=dtype)
if constant:
U1 = flattened[:n_samples/2] U1 = flattened[:n_samples/2]
U2 = flattened[n_samples/2:] U2 = flattened[n_samples/2:]
else:
U1 = flattened[:prod(flattened.shape)/2]
U2 = flattened[prod(flattened.shape)/2:]
#normal_samples = zeros_like(flattened) #normal_samples = zeros_like(flattened)
sqrt_ln_U1 = sqrt(-2.0*log(U1)) sqrt_ln_U1 = sqrt(-2.0*log(U1))
...@@ -740,8 +756,10 @@ class MRG_RandomStreams(object): ...@@ -740,8 +756,10 @@ class MRG_RandomStreams(object):
final_samples = None final_samples = None
if evened: if evened:
final_samples = normal_samples[:-1] final_samples = normal_samples[:-1]
else: elif constant:
final_samples = normal_samples final_samples = normal_samples
else:
final_samples = normal_samples[:prod(size)]
final_samples = avg + std * final_samples final_samples = avg + std * final_samples
......
...@@ -433,53 +433,55 @@ def test_normal0(): ...@@ -433,53 +433,55 @@ def test_normal0():
steps = 50 steps = 50
if mode in ['DEBUG_MODE','FAST_COMPILE']: if mode in ['DEBUG_MODE','FAST_COMPILE']:
sample_size = (99,30) sample_size = (25,30)
rtol=.02 rtol=.02
else: else:
sample_size = (999,50) sample_size = (999,50)
rtol=.01 rtol=.01
sample_size_odd = (sample_size[0],sample_size[1]-1)
x = tensor.matrix()
for size,const_size,var_input,input in [(sample_size,sample_size,[],[]), (x.shape,sample_size,[x],[numpy.zeros(sample_size)]),
(sample_size_odd,sample_size_odd,[],[]),#test odd value
(x.shape,sample_size_odd,[x],[numpy.zeros(sample_size_odd)]),#test odd value
]:
print '' print ''
print 'ON CPU:' print 'ON CPU:'
R = MRG_RandomStreams(234, use_cuda=False) R = MRG_RandomStreams(234, use_cuda=False)
n = R.normal(size=sample_size, avg=-5.0, std=2.0) n = R.normal(size=size, avg=-5.0, std=2.0)
f = theano.function([], n, mode=mode) f = theano.function(var_input, n, mode=mode)
theano.printing.debugprint(f) theano.printing.debugprint(f)
print 'random?[:10]\n', f()[0,0:10] print 'random?[:10]\n', f(*input)[0,0:10]
basictest(f, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='mrg ', allow_01=True, mean_rtol=rtol) basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
sys.stdout.flush() sys.stdout.flush()
# now with odd number of samples
sample_size = (sample_size[0],sample_size[1]-1)
if mode!='FAST_COMPILE' and cuda_available: if mode!='FAST_COMPILE' and cuda_available:
print '' print ''
print 'ON GPU:' print 'ON GPU:'
R = MRG_RandomStreams(234, use_cuda=True) R = MRG_RandomStreams(234, use_cuda=True)
n = R.normal(size=sample_size, avg=-5.0, std=2.0, dtype='float32') n = R.normal(size=size, avg=-5.0, std=2.0, dtype='float32')
assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
f = theano.function([], theano.Out( f = theano.function(var_input, theano.Out(
theano.sandbox.cuda.basic_ops.gpu_from_host(n), theano.sandbox.cuda.basic_ops.gpu_from_host(n),
borrow=True), mode=mode_with_gpu) borrow=True), mode=mode_with_gpu)
theano.printing.debugprint(f) theano.printing.debugprint(f)
sys.stdout.flush() sys.stdout.flush()
print 'random?[:10]\n', numpy.asarray(f())[0,0:10] print 'random?[:10]\n', numpy.asarray(f(*input))[0,0:10]
print '----' print '----'
sys.stdout.flush() sys.stdout.flush()
basictest(f, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True, mean_rtol=rtol) basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
print '' print ''
print 'ON CPU w NUMPY:' print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234) RR = theano.tensor.shared_randomstreams.RandomStreams(234)
nn = RR.normal(size=sample_size, avg=-5.0, std=2.0) nn = RR.normal(size=size, avg=-5.0, std=2.0)
ff = theano.function([], nn) ff = theano.function(var_input, nn)
basictest(ff, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True, mean_rtol=rtol) basictest(ff, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)
def basic_multinomialtest(f, steps, sample_size, target_pvals, prefix="", mean_rtol=0.04): def basic_multinomialtest(f, steps, sample_size, target_pvals, prefix="", mean_rtol=0.04):
......
...@@ -2705,6 +2705,8 @@ class Rebroadcast(Op): ...@@ -2705,6 +2705,8 @@ class Rebroadcast(Op):
broadcast_pattern[k] = str(int(v)) broadcast_pattern[k] = str(int(v))
return '%s{%s}' % (self.__class__.__name__, ','.join(broadcast_pattern)) return '%s{%s}' % (self.__class__.__name__, ','.join(broadcast_pattern))
def make_node(self, x): def make_node(self, x):
if x.ndim <= numpy.max(self.axis.keys()):
raise ValueError('Trying to rebroadcast inexistant dimension')
t = x.type.__class__(dtype = x.type.dtype, t = x.type.__class__(dtype = x.type.dtype,
broadcastable = [self.axis.get(i, b) broadcastable = [self.axis.get(i, b)
for i, b in enumerate(x.type.broadcastable)]) for i, b in enumerate(x.type.broadcastable)])
......
...@@ -2010,6 +2010,42 @@ def check_for_x_over_absX(numerators, denominators): ...@@ -2010,6 +2010,42 @@ def check_for_x_over_absX(numerators, denominators):
return numerators, denominators return numerators, denominators
local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX') local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX')
@register_canonicalize
@gof.local_optimizer([T.abs_])
def local_abs_lift(node):
"""
move the abs toward the input. This is needed for check_for_x_over_absX to apply in more case.
"""
if node.op == T.abs_ and node.inputs[0].owner:
assert node.nin == 1
if node.inputs[0].owner.op == T.mul:
return [T.mul(*[T.abs_(i) for i in node.inputs[0].owner.inputs])]
if node.inputs[0].owner.op == T.true_div:
i = node.inputs[0].owner.inputs
return [T.true_div(T.abs_(i[0]),T.abs_(i[1]))]
@register_specialize
@gof.local_optimizer([])
def local_abs_merge(node):
"""
merge abs generated by local_abs_lift when the canonizer don't need it anymore
"""
if node.op == T.mul and sum([i.owner.op == T.abs_ for i in node.inputs if i.owner])>1:
inputs = []
for i in node.inputs:
if i.owner and i.owner.op == T.abs_:
inputs.append(i.owner.inputs[0])
else:
const = get_constant_value(i)
if not (const>=0).all():
return False
inputs.append(i)
return [T.abs_(T.mul(*inputs))]
if node.op == T.true_div and sum([i.owner.op == T.abs_ for i in node.inputs if i.owner])==2:
return [T.abs_(T.true_div(node.inputs[0].owner.inputs[0],node.inputs[1].owner.inputs[0]))]
@register_stabilize @register_stabilize
@gof.local_optimizer([T.log]) @gof.local_optimizer([T.log])
def local_log1p(node): def local_log1p(node):
...@@ -2279,7 +2315,12 @@ def local_elemwise_fusion_op(OP): ...@@ -2279,7 +2315,12 @@ def local_elemwise_fusion_op(OP):
#if the scalar_op don't have a c implementation, we skip its fusion to allow the fusion of the other ops. #if the scalar_op don't have a c implementation, we skip its fusion to allow the fusion of the other ops.
do_fusion=True do_fusion=True
try: try:
s_input = [scalar.Scalar(x.dtype).make_variable() for x in i.owner.inputs] s_input = []
for ii in i.owner.inputs:
if ii in inputs:
s_input.append(s_inputs[inputs.index(ii)])
else:
s_input.append(scalar.Scalar(ii.dtype).make_variable())
s_op=i.owner.op.scalar_op(*s_input) s_op=i.owner.op.scalar_op(*s_input)
i.owner.op.scalar_op.c_code(s_op.owner,"test_presence_of_c_code", i.owner.op.scalar_op.c_code(s_op.owner,"test_presence_of_c_code",
["x" for x in i.owner.inputs], ["x" for x in i.owner.inputs],
...@@ -2298,8 +2339,11 @@ def local_elemwise_fusion_op(OP): ...@@ -2298,8 +2339,11 @@ def local_elemwise_fusion_op(OP):
s_inputs.extend(s_input) s_inputs.extend(s_input)
s_g.append(s_op) s_g.append(s_op)
else: else:
inputs.append(i) if i in inputs:
s=s_inputs[inputs.index(i)]
else:
s=scalar.Scalar(i.dtype).make_variable() s=scalar.Scalar(i.dtype).make_variable()
inputs.append(i)
s_inputs.append(s) s_inputs.append(s)
s_g.append(s) s_g.append(s)
...@@ -2308,6 +2352,21 @@ def local_elemwise_fusion_op(OP): ...@@ -2308,6 +2352,21 @@ def local_elemwise_fusion_op(OP):
# print "local_elemwise_fusion: no elemwise in inputs. Nothing to fuse." # print "local_elemwise_fusion: no elemwise in inputs. Nothing to fuse."
return False return False
#remove duplicate inputs, we most keep the order.
inputs2=[]
s_inputs2=[]
for i,si in zip(inputs,s_inputs):
if i not in inputs2:
inputs2.append(i)
s_inputs2.append(si)
else:
assert si in s_inputs2
inputs = inputs2
s_inputs = s_inputs2
del inputs2, s_inputs2
assert len(s_inputs)==len(inputs)
otype = node.outputs[0].type otype = node.outputs[0].type
s_new_out=node.op.scalar_op(*s_g) s_new_out=node.op.scalar_op(*s_g)
try: try:
......
...@@ -27,7 +27,6 @@ utt.seed_rng() ...@@ -27,7 +27,6 @@ utt.seed_rng()
def inplace_func(inputs, outputs, mode=get_default_mode()): def inplace_func(inputs, outputs, mode=get_default_mode()):
return function(inputs, outputs, mode=mode, accept_inplace=True) return function(inputs, outputs, mode=mode, accept_inplace=True)
def eval_outputs(outputs): def eval_outputs(outputs):
variables = inplace_func([], outputs)() variables = inplace_func([], outputs)()
if len(variables) == 1: if len(variables) == 1:
...@@ -2611,7 +2610,14 @@ def test_autocast(): ...@@ -2611,7 +2610,14 @@ def test_autocast():
finally: finally:
ac.__exit__() ac.__exit__()
def test_unbroadcast_addbroadcast(): class test_broadcast(unittest.TestCase):
def test_broadcast_bigdim(self):
def f():
x = matrix()
addbroadcast(x,2)
self.failUnlessRaises(ValueError, f)
def test_unbroadcast_addbroadcast(self):
""" """
test that the unbroadcast fct don't insert not needed broadcast test that the unbroadcast fct don't insert not needed broadcast
and fuse consecutive Rebroadcast op and fuse consecutive Rebroadcast op
......
...@@ -484,9 +484,67 @@ class test_canonize(unittest.TestCase): ...@@ -484,9 +484,67 @@ class test_canonize(unittest.TestCase):
assert numpy.all(numpy.isfinite(out)) assert numpy.all(numpy.isfinite(out))
assert numpy.allclose(out,numpy.sign(val_inputs[0])) assert numpy.allclose(out,numpy.sign(val_inputs[0]))
assert(out_dtype==out.dtype) assert(out_dtype==out.dtype)
assert len(f.maker.env.toposort())==1
#test (2*x) / (3*abs(x)) -> sign(x)
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
((2*dx)/(3*abs(dx)),[dx],[0.5-dxv],'float64'),
((2*fx)/(3*abs(fx)),[fx],[0.5-fxv],'float32'),
((2*dx)/(3*abs(dx)),[dx],[0.0*dxv],'float64'),
((2*fx)/(3*abs(fx)),[fx],[0.0*fxv],'float32'),
((2*dv)/(3*abs(dv)),[dv],[0.5-dvv],'float64'),
((2*fv)/(3*abs(fv)),[fv],[0.5-fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
topo = f.maker.env.toposort()
out = f(*val_inputs)
assert numpy.all(numpy.isfinite(out))
assert numpy.allclose(out,numpy.sign(val_inputs[0])*2/3)
assert(out_dtype==out.dtype)
finally: finally:
mode._optimizer = old_optimizer mode._optimizer = old_optimizer
def test_abs_mul_div(self):
"""
test that if we have
4 * x / abs(2*x) it get simplifier during canonicalisation.
"""
x=T.dscalar()
a=T.abs_(x)
if theano.config.mode=='FAST_COMPILE':
mode = theano.compile.mode.get_mode('FAST_RUN').excluding("local_elemwise_fusion")
else:
mode = theano.compile.mode.get_default_mode().excluding("local_elemwise_fusion")
f=theano.function([x],[(4*x)/abs(2*x)], mode = mode)
print f.maker.env.toposort()
print
f(.1)
f(-1)
#some stabilization optimization make the output be finite instead of nan
#debug_mode will raise an error when he see nan
if not isinstance(mode,theano.compile.debugmode.DebugMode):
assert numpy.isfinite(f(0))
assert len(f.maker.env.toposort())==2
assert f.maker.env.toposort()[0].op==T.sgn
f=theano.function([x],[(4*x)/abs(x/2)], mode = mode)
print f.maker.env.toposort()
print
f(.1)
f(-1)
#some stabilization optimization make the output be finite instead of nan
#debug_mode will raise an error when he see nan
if not isinstance(mode,theano.compile.debugmode.DebugMode):
assert numpy.isfinite(f(0))
assert len(f.maker.env.toposort())==2
assert f.maker.env.toposort()[0].op==T.sgn
def test_multiple_case_that_fail(self): def test_multiple_case_that_fail(self):
import theano.tensor, theano.compile import theano.tensor, theano.compile
...@@ -553,6 +611,30 @@ class test_canonize(unittest.TestCase): ...@@ -553,6 +611,30 @@ class test_canonize(unittest.TestCase):
""" """
raise SkipTest("Not implemented") raise SkipTest("Not implemented")
def test_local_merge_abs():
x,y,z = T.matrices('xyz')
x_val = numpy.random.rand(5,5)
y_val = numpy.random.rand(5,5)
z_val = numpy.random.rand(5,5)
mode = theano.config.mode
if mode == "FAST_COMPILE":
mode = "FAST_RUN"
mode = theano.compile.mode.get_mode(mode).excluding("local_elemwise_fusion")
f = theano.function([x,y,z],(abs(y*z*-2)), mode=mode)
f(x_val,y_val,z_val)
theano.printing.debugprint(f)
assert isinstance(f.maker.env.toposort()[1].op.scalar_op, scal.Abs)
assert len(f.maker.env.toposort())==2
f = theano.function([x,y,z],abs(x/y), mode=mode)
f(x_val,y_val,z_val)
theano.printing.debugprint(f)
assert isinstance(f.maker.env.toposort()[1].op.scalar_op, scal.Abs)
assert len(f.maker.env.toposort())==2
def test_mixeddiv(): def test_mixeddiv():
"""Test that int division is preserved""" """Test that int division is preserved"""
i = iscalar() i = iscalar()
...@@ -692,7 +774,7 @@ class test_fusion(unittest.TestCase): ...@@ -692,7 +774,7 @@ class test_fusion(unittest.TestCase):
#TODO: BIT OP only with ints, xor, or, and, invert, cast #TODO: BIT OP only with ints, xor, or, and, invert, cast
# (fx-theano.tensor.or_(fy,fz),(fx,fy,fz),(fxv,fyv,fzv),1,fxv-(fy|fz),'float32'), # (fx-theano.tensor.or_(fy,fz),(fx,fy,fz),(fxv,fyv,fzv),1,fxv-(fy|fz),'float32'),
# (fx-theano.tensor.xor(fy,fz),(fx,fy,fz),(fxv,fyv,fzv),1,fxv-(fy^fz),'float32'), # (fx-theano.tensor.xor(fy,fz),(fx,fy,fz),(fxv,fyv,fzv),1,fxv-(fy^fz),'float32'),
(theano.tensor.pow(fx*fy+fz,fx*fy),(fx,fy,fz),(fxv,fyv,fzv),2,numpy.power(fxv*fyv+fzv,fxv*fyv),'float32'), (theano.tensor.pow(fx*fy+fz,fx*fy),(fx,fy,fz),(fxv,fyv,fzv),1,numpy.power(fxv*fyv+fzv,fxv*fyv),'float32'),
(fv+fy**fz,(fv,fy,fz),(fvv,fyv,fzv),2,fvv+fyv**fzv,'float32'),#fused with a dimshuffle (fv+fy**fz,(fv,fy,fz),(fvv,fyv,fzv),2,fvv+fyv**fzv,'float32'),#fused with a dimshuffle
(fv-fy+tanh(fz),(fv,fy,fz),(fvv,fyv,fzv),2,fvv-fyv+numpy.tanh(fzv),'float32'),#fused with a dimshuffle (fv-fy+tanh(fz),(fv,fy,fz),(fvv,fyv,fzv),2,fvv-fyv+numpy.tanh(fzv),'float32'),#fused with a dimshuffle
] ]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论