提交 56267692 authored 作者: Simon Lemieux's avatar Simon Lemieux

merge

...@@ -150,38 +150,35 @@ def execs_timeit_2vector(exprs, fname=None): ...@@ -150,38 +150,35 @@ def execs_timeit_2vector(exprs, fname=None):
assert len(colors)>=len(times) assert len(colors)>=len(times)
fig = pylab.figure() fig = pylab.figure()
for idx,(time,expr) in enumerate(zip(times,str_expr)): for idx,(time,expr) in enumerate(zip(times,str_expr)):
###
###
###
# Creating each subplot
###
###
###
###
pylab.subplot(220+idx+1) pylab.subplot(220+idx+1)
pylab.subplots_adjust(wspace=0.25, hspace=0.25) pylab.subplots_adjust(wspace=0.25, hspace=0.25)
#legend=[] #legend=[]
#plot = fig.add_subplot(1,len(exprs),idx) #plot = fig.add_subplot(1,len(exprs),idx)
speedup = [t[0].min()/t[1].min() for t in time] speedup = [t[0].min()/t[1].min() for t in time]
pylab.semilogx(nb_calls, speedup, linewidth=1.0, linestyle = '--', color='r') pylab.semilogx(nb_calls, speedup, linewidth=1.0, linestyle = '--', color='r')
speedup = [t[0].min()/t[2].min() for t in time] speedup = [t[0].min()/t[2].min() for t in time]
pylab.semilogx(nb_calls, speedup, linewidth=1.0, color = 'b') pylab.semilogx(nb_calls, speedup, linewidth=1.0, color = 'b')
pylab.grid(True) pylab.grid(True)
if (idx == 2) or (idx == 3): if (idx == 2) or (idx == 3):
pylab.xlabel('Dimension of vectors a and b') pylab.xlabel('Dimension of vectors a and b', fontsize = 15)
if (idx == 0) or (idx == 2): if (idx == 0) or (idx == 2):
pylab.ylabel('Speed up vs NumPy') pylab.ylabel('Speed up vs NumPy', fontsize = 15)
pylab.axhline(y=1, linewidth=1.0, color='black') pylab.axhline(y=1, linewidth=1.0, color='black')
pylab.xlim(1e3,1e7) pylab.xlim(1e3,1e7)
pylab.xticks([1e3,1e5,1e7],['1e3','1e5','1e7']) pylab.xticks([1e3,1e5,1e7],['1e3','1e5','1e7'])
pylab.title(expr) pylab.title(expr)
#for time,expr,color in zip(times,str_expr,colors):
# speedup = [t[0].min()/t[1].min() for t in time]
# plot.semilogx(nb_calls, speedup, linewidth=1.0, linestyle='--', color=color)
# speedup = [t[0].min()/t[2].min() for t in time]
# plot.semilogx(nb_calls, speedup, linewidth=1.0, color=color)
#legend += ["Numexpr "+expr,"Theano "+expr]
#pylab.title('Speed up Numexpr and Theano vs NumPy')
#pylab.grid(True)
#pylab.xlabel('Nb element')
#pylab.ylabel('Speed up vs NumPy')
#pylab.legend(legend,loc='upper left')
# fig.legend(legend,loc='upper left')
if fname: if fname:
fig.savefig(fname) fig.savefig(fname)
......
...@@ -126,6 +126,18 @@ class AddDestroyHandler(gof.Optimizer): ...@@ -126,6 +126,18 @@ class AddDestroyHandler(gof.Optimizer):
super(AddDestroyHandler, self).add_requirements(env) super(AddDestroyHandler, self).add_requirements(env)
env.extend(gof.DestroyHandler()) env.extend(gof.DestroyHandler())
class PrintCurrentEnv(gof.Optimizer):
"""This optimizer is for debugging.
Toss it into the optimization pipeline to see the state of things at any given point.
"""
def __init__(self, header):
self.header =header
def apply(self, env):
import theano.printing
print "PrintCurrentEnv:", self.header
theano.printing.debugprint(env.outputs)
optdb = gof.SequenceDB() optdb = gof.SequenceDB()
optdb.register('merge1', gof.MergeOptimizer(), optdb.register('merge1', gof.MergeOptimizer(),
0, 'fast_run', 'fast_compile') 0, 'fast_run', 'fast_compile')
...@@ -133,10 +145,19 @@ optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwis ...@@ -133,10 +145,19 @@ optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwis
1, 'fast_run', 'fast_compile') 1, 'fast_run', 'fast_compile')
optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=False), optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=False),
1.2, 'fast_run', 'fast_compile') 1.2, 'fast_run', 'fast_compile')
optdb.register('Print1.21', PrintCurrentEnv('Post-canonicalize'),
1.21,)# 'fast_run', 'fast_compile')
optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs
1.5, 'fast_run') 1.5, 'fast_run')
optdb.register('Print1.51', PrintCurrentEnv('Post-stabilize'),
1.51,) #'fast_run', 'fast_compile')
optdb.register('specialize', gof.EquilibriumDB(), # misc special cases for speed optdb.register('specialize', gof.EquilibriumDB(), # misc special cases for speed
2, 'fast_run') 2, 'fast_run')
optdb.register('Print2.01', PrintCurrentEnv('Post-specialize'),
2.01, )#'fast_run', 'fast_compile')
optdb.register('specialize_device', gof.EquilibriumDB(), # misc special cases for speed that are dependent on the device.
48.6, 'fast_run')#must be after gpu stuff at 48.5
optdb.register('merge2', gof.MergeOptimizer(), # especially constant merge optdb.register('merge2', gof.MergeOptimizer(), # especially constant merge
49, 'fast_run') 49, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(), optdb.register('add_destroy_handler', AddDestroyHandler(),
......
...@@ -341,6 +341,8 @@ class Value(Variable): ...@@ -341,6 +341,8 @@ class Value(Variable):
if value is not None: if value is not None:
raise ValueError("Value instances cannot have an owner.") raise ValueError("Value instances cannot have an owner.")
owner = property(lambda self: None, __set_owner) owner = property(lambda self: None, __set_owner)
value = property(lambda self: self.data,
doc='read-only data access method')
# index is not defined, because the `owner` attribute must necessarily be None # index is not defined, because the `owner` attribute must necessarily be None
......
...@@ -525,7 +525,8 @@ class PatternSub(LocalOptimizer): ...@@ -525,7 +525,8 @@ class PatternSub(LocalOptimizer):
(scrabble, 'x')) (scrabble, 'x'))
""" """
def __init__(self, in_pattern, out_pattern, allow_multiple_clients = False): def __init__(self, in_pattern, out_pattern, allow_multiple_clients = False,
skip_identities_fn=None):
""" """
Creates a PatternSub that replaces occurrences of Creates a PatternSub that replaces occurrences of
in_pattern by occurrences of out_pattern. in_pattern by occurrences of out_pattern.
...@@ -543,7 +544,12 @@ class PatternSub(LocalOptimizer): ...@@ -543,7 +544,12 @@ class PatternSub(LocalOptimizer):
raise TypeError("The pattern to search for must start with a specific Op instance.") raise TypeError("The pattern to search for must start with a specific Op instance.")
self.__doc__ = self.__class__.__doc__ + "\n\nThis instance does: " + str(self) + "\n" self.__doc__ = self.__class__.__doc__ + "\n\nThis instance does: " + str(self) + "\n"
self.allow_multiple_clients = allow_multiple_clients self.allow_multiple_clients = allow_multiple_clients
self.skip_identities_fn = skip_identities_fn
def skip_identities(self, expr):
if self.skip_identities_fn:
return self.skip_identities_fn(expr)
def op_key(self): def op_key(self):
return self.op return self.op
...@@ -568,13 +574,22 @@ class PatternSub(LocalOptimizer): ...@@ -568,13 +574,22 @@ class PatternSub(LocalOptimizer):
if node.op != self.op: if node.op != self.op:
return False return False
def match(pattern, expr, u, allow_multiple_clients = False): def match(pattern, expr, u, allow_multiple_clients = False):
def retry_with_equiv():
expr_equiv = self.skip_identities(expr)
if expr_equiv is None:
return False
#TODO: Not sure how to handle multiple_clients flag
###print 'retrying match', pattern, expr_equiv
return match(pattern, expr_equiv, u,
allow_multiple_clients=allow_multiple_clients)
if isinstance(pattern, (list, tuple)): if isinstance(pattern, (list, tuple)):
if expr.owner is None: if expr.owner is None:
return False return False
if not (expr.owner.op == pattern[0]) or (not allow_multiple_clients and len(expr.clients) > 1): if not (expr.owner.op == pattern[0]) or (not allow_multiple_clients and len(expr.clients) > 1):
return False return retry_with_equiv()
if len(pattern) - 1 != len(expr.owner.inputs): if len(pattern) - 1 != len(expr.owner.inputs):
return False return retry_with_equiv()
for p, v in zip(pattern[1:], expr.owner.inputs): for p, v in zip(pattern[1:], expr.owner.inputs):
u = match(p, v, u, self.allow_multiple_clients) u = match(p, v, u, self.allow_multiple_clients)
if not u: if not u:
...@@ -588,17 +603,17 @@ class PatternSub(LocalOptimizer): ...@@ -588,17 +603,17 @@ class PatternSub(LocalOptimizer):
if constraint(expr): if constraint(expr):
return match(real_pattern, expr, u, pattern.get('allow_multiple_clients', False)) return match(real_pattern, expr, u, pattern.get('allow_multiple_clients', False))
else: else:
return False return retry_with_equiv()
elif isinstance(pattern, str): elif isinstance(pattern, str):
v = unify.Var(pattern) v = unify.Var(pattern)
if u[v] is not v and u[v] is not expr: if u[v] is not v and u[v] is not expr:
return False return retry_with_equiv()
else: else:
u = u.merge(expr, v) u = u.merge(expr, v)
elif isinstance(pattern, graph.Constant) and isinstance(expr, graph.Constant) and pattern.equals(expr): elif isinstance(pattern, graph.Constant) and isinstance(expr, graph.Constant) and pattern.equals(expr):
return u return u
else: else:
return False return retry_with_equiv()
return u return u
def build(pattern, u): def build(pattern, u):
...@@ -614,6 +629,7 @@ class PatternSub(LocalOptimizer): ...@@ -614,6 +629,7 @@ class PatternSub(LocalOptimizer):
if u: if u:
p = self.out_pattern p = self.out_pattern
new = build(p, u) new = build(p, u)
####print "PatternSub matched:", new
return [new] return [new]
else: else:
return False return False
......
...@@ -359,7 +359,8 @@ pprint.assign(lambda pstate, r: hasattr(pstate, 'target') and pstate.target is n ...@@ -359,7 +359,8 @@ pprint.assign(lambda pstate, r: hasattr(pstate, 'target') and pstate.target is n
pp = pprint pp = pprint
def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.png'), compact=True, mode=None, format='png'): def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.png'),
compact=True, mode=None, format='png', with_ids=False):
""" """
print to a file in png format the graph of op of a compile theano fct. print to a file in png format the graph of op of a compile theano fct.
...@@ -390,14 +391,15 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -390,14 +391,15 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
g=pd.Dot() g=pd.Dot()
var_str={} var_str={}
all_strings = set()
def var_name(var): def var_name(var):
if var in var_str: if var in var_str:
return var_str[var] return var_str[var]
if var.name is not None: if var.name is not None:
varstr = var.name+" "+str(var.type) varstr = 'name='+var.name+" "+str(var.type)
elif isinstance(var,gof.Constant): elif isinstance(var,gof.Constant):
dstr = str(var.data) dstr = 'val='+str(var.data)
if '\n' in dstr: if '\n' in dstr:
dstr = dstr[:dstr.index('\n')] dstr = dstr[:dstr.index('\n')]
if len(dstr) > 30: if len(dstr) > 30:
...@@ -408,12 +410,17 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -408,12 +410,17 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
else: else:
#a var id is needed as otherwise var with the same type will be merged in the graph. #a var id is needed as otherwise var with the same type will be merged in the graph.
varstr = str(var.type) varstr = str(var.type)
varstr += ' ' + str(len(var_str)) if (varstr in all_strings) or with_ids:
varstr += ' id=' + str(len(var_str))
var_str[var]=varstr var_str[var]=varstr
all_strings.add(varstr)
return varstr return varstr
topo = fct.maker.env.toposort() topo = fct.maker.env.toposort()
apply_name_cache = {}
def apply_name(node): def apply_name(node):
if node in apply_name_cache:
return apply_name_cache[node]
prof_str='' prof_str=''
if mode: if mode:
time = mode.apply_time.get((topo.index(node),node),0) time = mode.apply_time.get((topo.index(node),node),0)
...@@ -425,7 +432,12 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -425,7 +432,12 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
pf=0 pf=0
else: pf = time*100/mode.fct_call_time[fct] else: pf = time*100/mode.fct_call_time[fct]
prof_str=' (%.3fs,%.3f%%,%.3f%%)'%(time,pt,pf) prof_str=' (%.3fs,%.3f%%,%.3f%%)'%(time,pt,pf)
return str(node.op).replace(':','_')+' '+str(topo.index(node))+prof_str applystr = str(node.op).replace(':','_')
if (applystr in all_strings) or with_ids:
applystr = applystr+' id='+str(topo.index(node))+prof_str
all_strings.add(applystr)
apply_name_cache[node] = applystr
return applystr
# Update the inputs that have an update function # Update the inputs that have an update function
input_update={} input_update={}
...@@ -434,16 +446,18 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -434,16 +446,18 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
if i.update is not None: if i.update is not None:
input_update[outputs.pop()] = i input_update[outputs.pop()] = i
apply_shape='ellipse'
var_shape='box'
for node_idx,node in enumerate(topo): for node_idx,node in enumerate(topo):
astr=apply_name(node) astr=apply_name(node)
g.add_node(pd.Node(astr,shape='box')) g.add_node(pd.Node(astr,shape=apply_shape))
for id,var in enumerate(node.inputs): for id,var in enumerate(node.inputs):
varstr=var_name(var) varstr=var_name(var)
label='' label=''
if len(node.inputs)>1: if len(node.inputs)>1:
label=str(id) label=str(id)
if var.owner is None: if var.owner is None:
g.add_node(pd.Node(varstr,color='green')) g.add_node(pd.Node(varstr,color='green',shape=var_shape))
g.add_edge(pd.Edge(varstr,astr, label=label)) g.add_edge(pd.Edge(varstr,astr, label=label))
elif var.name or not compact: elif var.name or not compact:
g.add_edge(pd.Edge(varstr,astr, label=label)) g.add_edge(pd.Edge(varstr,astr, label=label))
...@@ -460,10 +474,10 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -460,10 +474,10 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
label=str(id) label=str(id)
if out: if out:
g.add_edge(pd.Edge(astr, varstr, label=label)) g.add_edge(pd.Edge(astr, varstr, label=label))
g.add_node(pd.Node(varstr,color='blue')) g.add_node(pd.Node(varstr,color='blue',shape=var_shape))
elif len(var.clients)==0: elif len(var.clients)==0:
g.add_edge(pd.Edge(astr, varstr, label=label)) g.add_edge(pd.Edge(astr, varstr, label=label))
g.add_node(pd.Node(varstr,color='grey')) g.add_node(pd.Node(varstr,color='grey',shape=var_shape))
elif var.name or not compact: elif var.name or not compact:
g.add_edge(pd.Edge(astr, varstr, label=label)) g.add_edge(pd.Edge(astr, varstr, label=label))
# else: # else:
...@@ -495,9 +509,9 @@ def pydot_var(vars, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -495,9 +509,9 @@ def pydot_var(vars, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
return var_str[var] return var_str[var]
if var.name is not None: if var.name is not None:
varstr = var.name varstr = 'name='+var.name
elif isinstance(var,gof.Constant): elif isinstance(var,gof.Constant):
dstr = str(var.data) dstr = 'val='+str(var.data)
if '\n' in dstr: if '\n' in dstr:
dstr = dstr[:dstr.index('\n')] dstr = dstr[:dstr.index('\n')]
if len(dstr) > 30: if len(dstr) > 30:
......
...@@ -21,8 +21,8 @@ def debug(*msg): ...@@ -21,8 +21,8 @@ def debug(*msg):
# printed and this module will not be working properly (we set `cuda_available` # printed and this module will not be working properly (we set `cuda_available`
# to False). # to False).
# This variable is True by default, and set to False if something goes wrong # This variable is True by default, and set to False if nvcc is not available or
# when trying to initialize cuda. # their is no cuda card or something goes wrong when trying to initialize cuda.
cuda_available = True cuda_available = True
# Global variable to avoid displaying the same warning multiple times. # Global variable to avoid displaying the same warning multiple times.
...@@ -89,6 +89,9 @@ except Exception, e: ...@@ -89,6 +89,9 @@ except Exception, e:
error( "Failed to compile cuda_ndarray.cu: %s" % str(e)) error( "Failed to compile cuda_ndarray.cu: %s" % str(e))
set_cuda_disabled() set_cuda_disabled()
if cuda_available:
cuda_available=device_available()
if cuda_available: if cuda_available:
#check if their is an old cuda_ndarray that was loading instead of the one we compiled! #check if their is an old cuda_ndarray that was loading instead of the one we compiled!
import cuda_ndarray.cuda_ndarray import cuda_ndarray.cuda_ndarray
......
...@@ -1715,14 +1715,21 @@ class GpuSubtensor(tensor.Subtensor): ...@@ -1715,14 +1715,21 @@ class GpuSubtensor(tensor.Subtensor):
cdata = tuple(map(convert, self.idx_list)) cdata = tuple(map(convert, self.idx_list))
if len(cdata) == 1: if len(cdata) == 1:
cdata = cdata[0] cdata = cdata[0]
out[0] = x.__getitem__(cdata)
# some numpy installations don't expose the __index__() methods for if 0:
# numpy.int8/16/32/64. Casting to python's int instead # JSB: commenting this out because it breaks code and does not look right
start = int(cdata.start) if cdata.start!=None else None # Dumi could you try to run the examples in the DeepLearningBenchmarks
stop = int(cdata.stop) if cdata.stop!=None else None # for example? This logic doesn't seem right -- we just
step = int(cdata.step) if cdata.step!=None else None # cast cdata to a tuple, so it doesn't have a .start field.
newslice = slice(start,stop,step)
out[0] = x.__getitem__(newslice) # some numpy installations don't expose the __index__() methods for
# numpy.int8/16/32/64. Casting to python's int instead
start = int(cdata.start) if cdata.start!=None else None
stop = int(cdata.stop) if cdata.stop!=None else None
step = int(cdata.step) if cdata.step!=None else None
newslice = slice(start,stop,step)
out[0] = x.__getitem__(newslice)
class GpuIncSubtensor(tensor.IncSubtensor): class GpuIncSubtensor(tensor.IncSubtensor):
def make_node(self, x, y, *inputs): def make_node(self, x, y, *inputs):
......
...@@ -722,7 +722,9 @@ conv_rows_stack2( float* img, float* kern, float* out, ...@@ -722,7 +722,9 @@ conv_rows_stack2( float* img, float* kern, float* out,
if(preload_full_kern) idx_kern=&d_kern[row*kern_wid]; if(preload_full_kern) idx_kern=&d_kern[row*kern_wid];
else idx_kern=d_kern; else idx_kern=d_kern;
const float* idx_in=&d_img[((shared_row+row)%nb_rows)*img_wid+out_col]; const float* idx_in=&d_img[((shared_row+row)%nb_rows)*img_wid+out_col];
convolutionRowNoFlip<KERN_WIDTH>(sum,idx_in,idx_kern,kern_wid); float sum_ =0.0f;
convolutionRowNoFlip<KERN_WIDTH>(sum_,idx_in,idx_kern,kern_wid);
sum+=sum_;//We pass by an intermediate variable to have more precission.
} }
} }
} }
......
...@@ -1604,6 +1604,35 @@ static PyTypeObject CudaNdarrayType = ...@@ -1604,6 +1604,35 @@ static PyTypeObject CudaNdarrayType =
CudaNdarray_new, /* tp_new */ CudaNdarray_new, /* tp_new */
}; };
//This fct return True it is able to find a cuda card and query its properti
//Otherwise we return False
PyObject *
device_available(PyObject* _unsed, PyObject * args)
{
int deviceCount;
cudaError err = cudaGetDeviceCount(&deviceCount);
if( cudaSuccess != err) {
Py_RETURN_FALSE;
}
if (deviceCount <= 0) {
Py_RETURN_FALSE;
}
cudaDeviceProp deviceProp;
err=cudaGetDeviceProperties(&deviceProp, 0);
if( cudaSuccess != err) {
Py_RETURN_FALSE;
}
if(deviceProp.major == 9999 && deviceProp.minor == 9999 ){
Py_RETURN_FALSE;
}
Py_RETURN_TRUE;
}
PyObject * PyObject *
CudaNdarray_gpu_init(PyObject* _unsed, PyObject * args) CudaNdarray_gpu_init(PyObject* _unsed, PyObject * args)
{ {
...@@ -1810,6 +1839,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s ...@@ -1810,6 +1839,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
static PyMethodDef module_methods[] = { static PyMethodDef module_methods[] = {
{"dot", CudaNdarray_Dot, METH_VARARGS, "Returns the matrix product of two CudaNdarray arguments."}, {"dot", CudaNdarray_Dot, METH_VARARGS, "Returns the matrix product of two CudaNdarray arguments."},
{"device_available", device_available, METH_VARARGS, "Return Py_True if a cuda card is available."},
{"gpu_init", CudaNdarray_gpu_init, METH_VARARGS, "Allow to select the gpu card to use."}, {"gpu_init", CudaNdarray_gpu_init, METH_VARARGS, "Allow to select the gpu card to use."},
{"filter", filter, METH_VARARGS, "filter(obj, broadcastable, strict, storage) returns a CudaNdarray initialized to obj if it matches the constraints of broadcastable. strict=True prevents any numeric casting. If storage is a CudaNdarray it may be overwritten and used as the return value."}, {"filter", filter, METH_VARARGS, "filter(obj, broadcastable, strict, storage) returns a CudaNdarray initialized to obj if it matches the constraints of broadcastable. strict=True prevents any numeric casting. If storage is a CudaNdarray it may be overwritten and used as the return value."},
{"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"}, {"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"},
......
...@@ -165,7 +165,9 @@ def exec_conv(version, shapes, verbose, random, mode, print_=None, rtol=1e-5, on ...@@ -165,7 +165,9 @@ def exec_conv(version, shapes, verbose, random, mode, print_=None, rtol=1e-5, on
ret = _params_allgood(ishape, kshape, mode, ret = _params_allgood(ishape, kshape, mode,
subsample=subshape, img_stride=istride, kern_stride=kstride, subsample=subshape, img_stride=istride, kern_stride=kstride,
version=ver, verbose=verbose, random=random, id=id,print_=print_,rtol=rtol,ones=ones) version=ver, verbose=verbose, random=random, id=id,print_=print_,rtol=rtol,ones=ones)
except: except Exception, e:
print ver, id,(ishape, kshape, subshape, istride, kstride)
print e
pass pass
if not ret: if not ret:
failed_version.add(ver) failed_version.add(ver)
......
...@@ -11,6 +11,7 @@ if cuda_available: ...@@ -11,6 +11,7 @@ if cuda_available:
import unittest import unittest
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from nose.plugins.skip import SkipTest
#TODO: test gpu #TODO: test gpu
# Done in test_consistency_GPU_{serial,parallel} # Done in test_consistency_GPU_{serial,parallel}
...@@ -22,7 +23,6 @@ from theano.tests import unittest_tools as utt ...@@ -22,7 +23,6 @@ from theano.tests import unittest_tools as utt
#TODO: make tests work when no flags gived. Now need: THEANO_FLAGS=device=gpu0,floatX=float32 #TODO: make tests work when no flags gived. Now need: THEANO_FLAGS=device=gpu0,floatX=float32
# Partly done, in test_consistency_GPU_{serial,parallel} # Partly done, in test_consistency_GPU_{serial,parallel}
#TODO: bug fix test_normal0, in normal() fct, n_samples currently need to be numpy.prod(size) not self.n_streams(size)
mode = config.mode mode = config.mode
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
...@@ -287,6 +287,7 @@ def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=[], ...@@ -287,6 +287,7 @@ def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=[],
for i in xrange(steps): for i in xrange(steps):
t0 = time.time() t0 = time.time()
ival = f(*inputs) ival = f(*inputs)
assert ival.shape==sample_size
dt += time.time() - t0 dt += time.time() - t0
ival = numpy.asarray(ival) ival = numpy.asarray(ival)
if i == 0: if i == 0:
...@@ -324,7 +325,7 @@ def test_uniform(): ...@@ -324,7 +325,7 @@ def test_uniform():
sample_size = (10,100) sample_size = (10,100)
steps = 50 steps = 50
else: else:
sample_size = (500,100) sample_size = (500,50)
steps = int(1e3) steps = int(1e3)
x = tensor.matrix() x = tensor.matrix()
...@@ -381,9 +382,9 @@ def test_binomial(): ...@@ -381,9 +382,9 @@ def test_binomial():
if mode in ['DEBUG_MODE','FAST_COMPILE']: if mode in ['DEBUG_MODE','FAST_COMPILE']:
sample_size = (10,50) sample_size = (10,50)
steps = 70 steps = 50
else: else:
sample_size = (500,100) sample_size = (500,50)
steps = int(1e3) steps = int(1e3)
x = tensor.matrix() x = tensor.matrix()
...@@ -430,9 +431,9 @@ def test_normal0(): ...@@ -430,9 +431,9 @@ def test_normal0():
steps = 50 steps = 50
if mode in ['DEBUG_MODE','FAST_COMPILE']: if mode in ['DEBUG_MODE','FAST_COMPILE']:
sample_size = (99,100) sample_size = (99,30)
else: else:
sample_size = (999,100) sample_size = (999,50)
print '' print ''
print 'ON CPU:' print 'ON CPU:'
...@@ -464,8 +465,8 @@ def test_normal0(): ...@@ -464,8 +465,8 @@ def test_normal0():
print 'random?[:10]\n', numpy.asarray(f())[0,0:10] print 'random?[:10]\n', numpy.asarray(f())[0,0:10]
print '----' print '----'
sys.stdout.flush() sys.stdout.flush()
basictest(f, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True) basictest(f, steps, sample_size_odd, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True)
print '' print ''
print 'ON CPU w NUMPY:' print 'ON CPU w NUMPY:'
...@@ -476,7 +477,7 @@ def test_normal0(): ...@@ -476,7 +477,7 @@ def test_normal0():
basictest(ff, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True) basictest(ff, steps, sample_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True)
def basic_multinomialtest(f, steps, target_pvals, prefix="", mean_rtol=0.04): def basic_multinomialtest(f, steps, sample_size, target_pvals, prefix="", mean_rtol=0.04):
dt = 0.0 dt = 0.0
avg_pvals = numpy.zeros(target_pvals.shape, dtype=config.floatX) avg_pvals = numpy.zeros(target_pvals.shape, dtype=config.floatX)
...@@ -484,6 +485,7 @@ def basic_multinomialtest(f, steps, target_pvals, prefix="", mean_rtol=0.04): ...@@ -484,6 +485,7 @@ def basic_multinomialtest(f, steps, target_pvals, prefix="", mean_rtol=0.04):
for i in xrange(steps): for i in xrange(steps):
t0 = time.time() t0 = time.time()
ival = f() ival = f()
assert ival.shape==sample_size
dt += time.time() - t0 dt += time.time() - t0
#ival = numpy.asarray(ival) #ival = numpy.asarray(ival)
avg_pvals += ival avg_pvals += ival
...@@ -518,7 +520,7 @@ def test_multinomial(): ...@@ -518,7 +520,7 @@ def test_multinomial():
f = theano.function([], m, mode=mode_) f = theano.function([], m, mode=mode_)
theano.printing.debugprint(f) theano.printing.debugprint(f)
basic_multinomialtest(f, steps, pvals, prefix='mrg ') basic_multinomialtest(f, steps, sample_size, pvals, prefix='mrg ')
sys.stdout.flush() sys.stdout.flush()
...@@ -535,4 +537,4 @@ def test_multinomial(): ...@@ -535,4 +537,4 @@ def test_multinomial():
theano.printing.debugprint(f) theano.printing.debugprint(f)
sys.stdout.flush() sys.stdout.flush()
basic_multinomialtest(f, steps, pvals, prefix='gpu mrg ') basic_multinomialtest(f, steps, sample_size, pvals, prefix='gpu mrg ')
...@@ -302,7 +302,7 @@ class Scalar(Type): ...@@ -302,7 +302,7 @@ class Scalar(Type):
return "" return ""
def c_code_cache_version(self): def c_code_cache_version(self):
return (8,) # put const around operators and added unary '-' operator return (8, numpy.__version__) # put const around operators and added unary '-' operator
# no need to put lib.amdlibm here as c_compile_args() are put in the key. # no need to put lib.amdlibm here as c_compile_args() are put in the key.
return (7,) # make complex c code optional return (7,) # make complex c code optional
return (6,) # added implemeentations of operators that work with scalar arguments return (6,) # added implemeentations of operators that work with scalar arguments
...@@ -932,6 +932,7 @@ class IntDiv(BinaryScalarOp): ...@@ -932,6 +932,7 @@ class IntDiv(BinaryScalarOp):
return [None] * len(inputs) return [None] * len(inputs)
int_div = IntDiv(upcast_out, name = 'int_div') int_div = IntDiv(upcast_out, name = 'int_div')
floor_div = int_div
class Mod(BinaryScalarOp): class Mod(BinaryScalarOp):
def impl(self, x, y): def impl(self, x, y):
......
...@@ -887,6 +887,11 @@ class _tensor_py_operators: ...@@ -887,6 +887,11 @@ class _tensor_py_operators:
except Exception, e: except Exception, e:
return NotImplemented return NotImplemented
def __truediv__(self,other): return true_div(self, other)
def __floordiv__(self,other): return floor_div(self, other)
def __rtruediv__(self,other): return true_div(other, self)
def __rfloordiv__(self,other): return floor_div(other, self)
# ##### DON"T USE THESE BECAUSE INPLACE OPS SHOULD BE INSERTED BY OPTIMIZATION ONLY # ##### DON"T USE THESE BECAUSE INPLACE OPS SHOULD BE INSERTED BY OPTIMIZATION ONLY
# #ARITHMETIC - INPLACE # #ARITHMETIC - INPLACE
# def __iadd__(self,other): return _add_inplace(self,other) # def __iadd__(self,other): return _add_inplace(self,other)
...@@ -2066,6 +2071,11 @@ def true_div(a, b): ...@@ -2066,6 +2071,11 @@ def true_div(a, b):
"""elementwise [true] division (inverse of multiplication)""" """elementwise [true] division (inverse of multiplication)"""
# see decorator for function body # see decorator for function body
@_scal_elemwise
def floor_div(a, b):
"""elementwise [floor] division (inverse of multiplication)"""
# see decorator for function body
@_scal_elemwise @_scal_elemwise
def int_div(a, b): def int_div(a, b):
"""elementwise integer-division""" """elementwise integer-division"""
...@@ -3607,8 +3617,12 @@ class Dot(Op): ...@@ -3607,8 +3617,12 @@ class Dot(Op):
nx = x.type.ndim nx = x.type.ndim
ny = y.type.ndim ny = y.type.ndim
if nx not in (1,2): raise TypeError('not matrix or vector', x) if nx not in (1,2):
if ny not in (1,2): raise TypeError('not matrix or vector', y) raise TypeError(('dot supports matrix and vector args: email theano-dev about'
' enabling numpy dot semantics if you want them'), x)
if ny not in (1,2):
raise TypeError(('dot supports matrix and vector args: email theano-dev about'
' enabling numpy dot semantics if you want them'), y)
if nx == 2 and ny == 2: if nx == 2 and ny == 2:
bz = [x.type.broadcastable[0], y.type.broadcastable[1]] bz = [x.type.broadcastable[0], y.type.broadcastable[1]]
......
差异被折叠。
...@@ -397,41 +397,41 @@ def local_softmax_with_bias(node): ...@@ -397,41 +397,41 @@ def local_softmax_with_bias(node):
return return
return [sm_bias] return [sm_bias]
if 0: def softmax_simplifier(numerators, denominators):
def softmax_simplifier(numerators, denominators): for numerator in list(numerators):
for numerator in list(numerators): #TODO: a single softmax'd vector??
#TODO: a single softmax'd vector?? if not numerator.type.dtype.startswith('float'):
if not numerator.type.dtype.startswith('float'): continue
continue
if not numerator.type.broadcastable == (False, False):
if not numerator.type.broadcastable == (False, False): continue
continue if numerator.owner and numerator.owner.op == tensor.exp:
if numerator.owner and numerator.owner.op == tensor.exp: x = numerator.owner.inputs[0]
x = numerator.owner.inputs[0] else:
else: continue
continue
matching_denom = None
matching_denom = None
for denominator in denominators:
for denominator in denominators: if denominator.owner and isinstance(denominator.owner.op, tensor.DimShuffle):
if denominator.owner and isinstance(denominator.owner.op, tensor.DimShuffle): if denominator.owner.op.new_order == (0,'x'):
if denominator.owner.op.new_order == (0,'x'): z = denominator.owner.inputs[0] # thing getting dimshuffled
z = denominator.owner.inputs[0] # thing getting dimshuffled if z.owner and isinstance(z.owner.op, tensor.Sum):
if z.owner and isinstance(z.owner.op, tensor.Sum): #print 'ASDF', denominator.owner.op.new_order
#print 'ASDF', denominator.owner.op.new_order #print z.owner.op.axis
#print z.owner.op.axis if z.owner.op.axis == (1,):
if z.owner.op.axis == (1,): #print "almost there.. softmax", x, z.owner.inputs[0]
#print "almost there.. softmax", x, z.owner.inputs[0] if z.owner.inputs[0] is numerator:
if z.owner.inputs[0] is numerator: matching_denom = denominator
matching_denom = denominator break
break if matching_denom:
if matching_denom: numerators.remove(numerator)
numerators.remove(numerator) denominators.remove(matching_denom)
denominators.remove(matching_denom) numerators.append(softmax(x))
numerators.append(softmax(x)) return numerators, denominators
return numerators, denominators opt.local_mul_canonizer.add_simplifier(softmax_simplifier, 'softmax_simplifier')
opt.local_mul_canonizer.add_simplifier(softmax_simplifier, 'softmax_simplifier')
if 0:
def softmax_grad_simplifier(numerators, denominators): def softmax_grad_simplifier(numerators, denominators):
print "mul simplify numerators" print "mul simplify numerators"
printing.debugprint(numerators) printing.debugprint(numerators)
......
...@@ -8,7 +8,7 @@ from theano import gof ...@@ -8,7 +8,7 @@ from theano import gof
from theano import scalar from theano import scalar
from theano import printing from theano import printing
from theano.tensor import basic as tensor from theano.tensor import basic as tensor
from theano.printing import pprint from theano.printing import pprint, debugprint
from theano.tensor import elemwise from theano.tensor import elemwise
from theano.tensor import opt from theano.tensor import opt
from theano.compile import optdb from theano.compile import optdb
...@@ -95,10 +95,17 @@ softplus = elemwise.Elemwise(scalar_softplus, name='softplus') ...@@ -95,10 +95,17 @@ softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus')) pprint.assign(softplus, printing.FunctionPrinter('softplus'))
def _skip_mul_1(r):
if r.owner and r.owner.op == tensor.mul:
not_is_1 = [i for i in r.owner.inputs if not _is_1(i) ]
if len(not_is_1)==1:
return not_is_1[0]
logsigm_to_softplus = gof.PatternSub( logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')), (tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))), (tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True) allow_multiple_clients = True,
skip_identities_fn=_skip_mul_1)
def _is_1(expr): def _is_1(expr):
"""rtype bool. True iff expr is a constant close to 1 """rtype bool. True iff expr is a constant close to 1
...@@ -115,7 +122,8 @@ log1msigm_to_softplus = gof.PatternSub( ...@@ -115,7 +122,8 @@ log1msigm_to_softplus = gof.PatternSub(
dict(pattern='y', constraint = _is_1), dict(pattern='y', constraint = _is_1),
(sigmoid, 'x'))), (sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')), (tensor.neg, (softplus, 'x')),
allow_multiple_clients = True) allow_multiple_clients = True,
skip_identities_fn=_skip_mul_1)
log1pexp_to_softplus = gof.PatternSub( log1pexp_to_softplus = gof.PatternSub(
(tensor.log1p, (tensor.log1p,
...@@ -329,3 +337,48 @@ register_local_1msigmoid = False ...@@ -329,3 +337,48 @@ register_local_1msigmoid = False
if register_local_1msigmoid: if register_local_1msigmoid:
opt.register_canonicalize(local_1msigmoid) opt.register_canonicalize(local_1msigmoid)
if 0:
# This code is if'd out because it is not complete,
# and it isn't obviously a good idea anyway.
# The motivation here was to identify the last exp() node
# in the SciPy2010 article, which was not optimized away at the time of publication,
# so the example is actually not numerically stable, even though it should be.
@opt.register_stabilize
@gof.local_optimizer([tensor.mul])
def local_sigm_gest(node):
print "CANONICALIZE"
print sigm_canonicalize(node)
def sigm_canonicalize(node):
add = tensor.add
mul = tensor.mul
div = tensor.true_div
if node.op == tensor.add:
rval = []
for i in node.inputs:
rval += sigm_canonicalize(i)
return rval
if node.op == tensor.mul:
rval = sigm_canonicalize(node.inputs[0])
for i in node.inputs[1:]:
old_rval = rval
rval = []
for t1 in sigm_canonicalize(i):
for t0 in old_rval:
assert t1.owner.op == div
assert t0.owner.op == div
t0top, t0bot = t0.owner.inputs
t1top, t1bot = t1.owner.inputs
rval.append(div(mul(*(t0top+t1top)), mul(*(t0bot+t1bot))))
if len(rval) > 100:
# This loop can be exponentially long.
# aborting
return []
elif len(node.outputs)>1:
return []
else:
return [node.outputs[0]]
...@@ -924,6 +924,10 @@ class Test_softmax_opt(): ...@@ -924,6 +924,10 @@ class Test_softmax_opt():
assert softmax in f_ops assert softmax in f_ops
f(self.rng.rand(3,4)) f(self.rng.rand(3,4))
def test_grad(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x')
# test that function contains softmax and no div. # test that function contains softmax and no div.
w = T.matrix() w = T.matrix()
g = theano.function([c,w],T.grad((p_y*w).sum(), c)) g = theano.function([c,w],T.grad((p_y*w).sum(), c))
......
...@@ -144,6 +144,11 @@ def register_specialize(lopt, *tags, **kwargs): ...@@ -144,6 +144,11 @@ def register_specialize(lopt, *tags, **kwargs):
compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags) compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags)
return lopt return lopt
def register_specialize_device(lopt, *tags, **kwargs):
name = (kwargs and kwargs.pop('name')) or lopt.__name__
compile.optdb['specialize_device'].register(name, lopt, 'fast_run', *tags)
return lopt
def register_stabilize(lopt, *tags, **kwargs): def register_stabilize(lopt, *tags, **kwargs):
name = (kwargs and kwargs.pop('name')) or lopt.__name__ name = (kwargs and kwargs.pop('name')) or lopt.__name__
compile.optdb['stabilize'].register(name, lopt, 'fast_run', *tags) compile.optdb['stabilize'].register(name, lopt, 'fast_run', *tags)
...@@ -189,6 +194,11 @@ def local_dimshuffle_lift(node): ...@@ -189,6 +194,11 @@ def local_dimshuffle_lift(node):
register_canonicalize(local_dimshuffle_lift) register_canonicalize(local_dimshuffle_lift)
register_specialize(local_dimshuffle_lift) register_specialize(local_dimshuffle_lift)
@register_canonicalize
@gof.local_optimizer([])
def local_dimshuffle_no_inplace_at_canonicalize(node):
if isinstance(node.op, T.DimShuffle) and node.op.inplace:
return [T.DimShuffle(node.op.input_broadcastable, node.op.new_order, inplace=False)(node.inputs[0])]
##################################### #####################################
...@@ -1603,18 +1613,20 @@ def local_sum_mul_by_scalar(node): ...@@ -1603,18 +1613,20 @@ def local_sum_mul_by_scalar(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([])
def local_sum_div_dimshuffle(node): def local_sum_div_dimshuffle(node):
'''sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis=l) / b, '''sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis={...}) / b,
if dimension l of the DimShuffle is 'x'.''' if dimension l of the DimShuffle is 'x'.'''
# TODO: extend it to product, and quotient of products # TODO: extend it to product, and quotient of products
if isinstance(node.op, T.Sum): if isinstance(node.op, T.Sum):
axis = node.op.axis axis = node.op.axis
if axis is None:
axis = range(node.inputs[0].ndim)
#print 'axis =', axis #print 'axis =', axis
thing_summed = node.inputs[0] thing_summed = node.inputs[0]
dimshuffled = None dimshuffled = None
if thing_summed.owner and thing_summed.owner.op == T.true_div: if thing_summed.owner and thing_summed.owner.op == T.true_div:
numerator, denominator = thing_summed.owner.inputs numerator, denominator = thing_summed.owner.inputs
if isinstance(numerator.owner.op, T.DimShuffle): if numerator.owner and isinstance(numerator.owner.op, T.DimShuffle):
new_order = numerator.owner.op.new_order new_order = numerator.owner.op.new_order
#print 'new_order =', new_order #print 'new_order =', new_order
# check compatibility # check compatibility
...@@ -1630,7 +1642,7 @@ def local_sum_div_dimshuffle(node): ...@@ -1630,7 +1642,7 @@ def local_sum_div_dimshuffle(node):
#else: #else:
# print 'incompatible dims:', axis, new_order # print 'incompatible dims:', axis, new_order
if isinstance(denominator.owner.op, T.DimShuffle): if denominator.owner and isinstance(denominator.owner.op, T.DimShuffle):
new_order = denominator.owner.op.new_order new_order = denominator.owner.op.new_order
#print 'new_order =', new_order #print 'new_order =', new_order
# check compatibility # check compatibility
...@@ -1827,9 +1839,31 @@ def local_pow_specialize(node): ...@@ -1827,9 +1839,31 @@ def local_pow_specialize(node):
rval = [T.inv(xsym)] rval = [T.inv(xsym)]
if N.all(y == -2): if N.all(y == -2):
rval = [T.inv(T.sqr(xsym))] rval = [T.inv(T.sqr(xsym))]
if rval:
rval[0] = T.cast(rval[0], odtype)
assert rval[0].type == node.outputs[0].type, (rval, node.outputs)
return rval
else:
return False
register_specialize(local_pow_specialize)
# Optimize all integral powers in [-RANGE, RANGE] @register_specialize_device
if config.experimental.pow and rval is None and abs(y)==int(abs(y)) and abs(y) <= 512:# 512 is too small for the cpu and too big for some gpu! @gof.local_optimizer([T.pow])
def local_pow_specialize_device(node):
"""
This optimization is not the same on all device. We do it only on cpu here.
"""
if node.op == T.pow:
#the idea here is that we have pow(x, y)
odtype = node.outputs[0].dtype
xsym = node.inputs[0]
ysym = node.inputs[1]
y = local_mul_canonizer.get_constant(ysym)
if (y is not None) \
and encompasses_broadcastable(xsym.type.broadcastable, ysym.type.broadcastable):
rval = None
# 512 is too small for the cpu and too big for some gpu!
if abs(y)==int(abs(y)) and abs(y) <= 512:
pow2 = [xsym] pow2 = [xsym]
pow2_scal = [theano.scalar.Scalar(xsym.dtype)()] pow2_scal = [theano.scalar.Scalar(xsym.dtype)()]
y_to_do = abs(y) y_to_do = abs(y)
...@@ -1859,14 +1893,7 @@ def local_pow_specialize(node): ...@@ -1859,14 +1893,7 @@ def local_pow_specialize(node):
rval[0] = T.cast(rval[0], odtype) rval[0] = T.cast(rval[0], odtype)
assert rval[0].type == node.outputs[0].type, (rval, node.outputs) assert rval[0].type == node.outputs[0].type, (rval, node.outputs)
return rval return rval
else:
return False
register_specialize(local_pow_specialize)
theano.configparser.AddConfigVar('experimental.pow',
"Transform a pow to a constant integer to a graph of mul. Fast on cpu, but more work needed for gpu.",
theano.configparser.BoolParam(False),
)
@gof.local_optimizer([T.mul]) @gof.local_optimizer([T.mul])
def local_mul_specialize(node): def local_mul_specialize(node):
"""Remove special-case constants from mul arguments """Remove special-case constants from mul arguments
...@@ -1965,20 +1992,28 @@ register_specialize(local_add_specialize) ...@@ -1965,20 +1992,28 @@ register_specialize(local_add_specialize)
mul_canonizer = in2out(gof.LocalOptGroup(local_mul_canonizer, local_fill_cut, local_fill_sink)) mul_canonizer = in2out(gof.LocalOptGroup(local_mul_canonizer, local_fill_cut, local_fill_sink))
def check_for_x_over_absX(numerators, denominators): def check_for_x_over_absX(numerators, denominators):
"""Convert x/abs(x) into sign(x). """
# TODO: this function should dig/search through dimshuffles # TODO: this function should dig/search through dimshuffles
# This won't catch a dimshuffled absolute value # This won't catch a dimshuffled absolute value
for den in list(denominators): for den in list(denominators):
if den.owner and den.owner.op == T.abs_ and den.owner.inputs[0] in numerators: if den.owner and den.owner.op == T.abs_ and den.owner.inputs[0] in numerators:
denominators.remove(den) if den.owner.inputs[0].type.dtype.startswith('complex'):
numerators.remove(den.owner.inputs[0]) #TODO: Make an Op that projects a complex number to have unit length
numerators.append(T.sgn(den.owner.inputs[0])) # but projects 0 to 0. That would be a weird Op, but consistent with the
# special case below. I heard there's some convention in Matlab that is
# similar to this... but not sure.
pass
else:
denominators.remove(den)
numerators.remove(den.owner.inputs[0])
numerators.append(T.sgn(den.owner.inputs[0]))
return numerators, denominators return numerators, denominators
local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'teststest') local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'teststest')
@register_stabilize @register_stabilize
@gof.local_optimizer([T.log]) @gof.local_optimizer([T.log])
def local_log1p(node): def local_log1p(node):
# log(1+exp(x)) -> log1p(x) # log(1+x) -> log1p(x)
if node.op == T.log: if node.op == T.log:
log_arg, = node.inputs log_arg, = node.inputs
if log_arg.owner and log_arg.owner.op == T.add: if log_arg.owner and log_arg.owner.op == T.add:
...@@ -2207,7 +2242,7 @@ def local_elemwise_fusion_op(OP): ...@@ -2207,7 +2242,7 @@ def local_elemwise_fusion_op(OP):
""" """
def local_fuse(node): def local_fuse(node):
""" """
As part of specialisation, we fusion two consecutif elemwise op of the same shape. As part of specialisation, we fuse two consecutive elemwise op of the same shape.
For mixed dtype, we let the Compise op do the cast. It let the C compile do the cast. For mixed dtype, we let the Compise op do the cast. It let the C compile do the cast.
The number of dimension is validated at call time by theano itself. The number of dimension is validated at call time by theano itself.
...@@ -2240,7 +2275,7 @@ def local_elemwise_fusion_op(OP): ...@@ -2240,7 +2275,7 @@ def local_elemwise_fusion_op(OP):
for i in node.inputs: for i in node.inputs:
do_fusion = False do_fusion = False
catch = False catch = False
if i.owner and isinstance(i.owner.op, OP) and len(i.clients)<=1: if i.owner and isinstance(i.owner.op, OP) and len(i.clients)==1:
#if the scalar_op don't have a c implementation, we skip its fusion to allow the fusion of the other ops. #if the scalar_op don't have a c implementation, we skip its fusion to allow the fusion of the other ops.
do_fusion=True do_fusion=True
try: try:
...@@ -2296,7 +2331,7 @@ def local_elemwise_fusion_op(OP): ...@@ -2296,7 +2331,7 @@ def local_elemwise_fusion_op(OP):
# There is a hard limit of 256 bytes for the formal argument list to a GPU kernel function. # There is a hard limit of 256 bytes for the formal argument list to a GPU kernel function.
# Here, we estimate how many bytes the new Op will need, and abort if it needs too much. # Here, we estimate how many bytes the new Op will need, and abort if it needs too much.
if True: if OP != T.Elemwise:
argument_limit = 240 # 16 bytes are used for block and thread coords etc. argument_limit = 240 # 16 bytes are used for block and thread coords etc.
#TODO: read in from architecture to make this 4 or 8 #TODO: read in from architecture to make this 4 or 8
int_size = 8 int_size = 8
......
...@@ -1604,6 +1604,54 @@ class t_dot(unittest.TestCase): ...@@ -1604,6 +1604,54 @@ class t_dot(unittest.TestCase):
#utt.verify_grad(dot, [self.rand(), self.rand(2)]) #utt.verify_grad(dot, [self.rand(), self.rand(2)])
#utt.verify_grad(dot, [self.rand(), self.rand(2,5)]) #utt.verify_grad(dot, [self.rand(), self.rand(2,5)])
def test_broadcastable_patterns(self):
#
# These examples hsould all work because we broadcastable or no, all dimensions of all
# results have size 1.
#
def val_for(r):
if r.ndim == 0:
return numpy.asarray(1.1, dtype=r.dtype)
if r.ndim == 1:
return numpy.asarray([1.2], dtype=r.dtype)
elif r.ndim == 2:
return numpy.asarray([[1.3]], dtype=r.dtype)
raise ValueError()
failures = []
for dtype0 in ('float32', 'float64', 'complex64', 'complex128'):
for dtype1 in ('float32', 'float64', 'complex64', 'complex128'):
for bc0 in ((True,), (False,), (True, True), (True, False), (False, True),
(False,False)):
for bc1 in ((True,), (False,), (True, True), (True, False), (False, True),
(False,False)):
x = TensorType(dtype=dtype0, broadcastable=bc0)()
y = TensorType(dtype=dtype1, broadcastable=bc1)()
z = dot(x,y)
t = TensorType(dtype=dtype0, broadcastable=z.broadcastable)()
rval = z * 3 + 2*t
if rval.type.dtype.startswith('complex'):
# there is a problem with complex numbers right now
# Elemwise code doesn't compile when both precisions of complex
# numbers are used in the same file because the operators
# aren't declared properly.
failures.append((dtype0, dtype1, bc0, bc1))
continue
f = function([x,y,t], rval)
xval = val_for(x)
yval = val_for(y)
tval = val_for(t)
f(xval, yval, tval) #debugmode checks result
#if failures:
#print failures
assert not failures
class T_tensorfromscalar(unittest.TestCase): class T_tensorfromscalar(unittest.TestCase):
def test0(self): def test0(self):
s = scal.constant(56) s = scal.constant(56)
......
...@@ -416,7 +416,7 @@ def test_gemm_canonicalize(): ...@@ -416,7 +416,7 @@ def test_gemm_canonicalize():
can = [] can = []
_gemm_canonicalize(X + Y + u, 1.0, can, 0) _gemm_canonicalize(X + Y + u, 1.0, can, 0)
assert can == [(1.0, X), (1.0, Y), u] assert can == [(1.0, X), (1.0, Y), u], can
can = [] can = []
_gemm_canonicalize(a*X + Y - b*Z*c, 1.0, can, 0) _gemm_canonicalize(a*X + Y - b*Z*c, 1.0, can, 0)
......
from nose.plugins.skip import SkipTest
import unittest
import theano
import numpy
import random
import numpy.random
from theano.tests import unittest_tools as utt
'''
Different tests that are not connected to any particular Op, or functionality of
Theano. Here will go for example code that we will publish in papers, that we
should ensure that it will remain operational
'''
class T_diverse(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def scipy_paper_example1(self):
a = theano.tensor.vector('a') # declare variable
b = a + a**10 # build expression
f = theano.function([a], b) # compile function
assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026]))
def scipy_papaer_example2(self):
''' This just sees if things compile well and if they run '''
x = T.matrix()
y = T.vector()
w = shared(rng.randn(100))
b = shared(numpy.zeros(()))
# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b))
xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
prediction = p_1 > 0.5
cost = xent.mean() + 0.01*(w**2).sum()
gw,gb = T.grad(cost, [w,b])
# Compile expressions to functions
train = function(
inputs=[x,y],
outputs=[prediction, xent],
updates={w:w-0.1*gw, b:b-0.1*gb})
predict = function(inputs=[x], outputs=prediction)
N = 4
feats = 100
D = (rng.randn(N, feats), rng.randint(size=4,low=0, high=2))
training_steps = 10
for i in range(training_steps):
pred, err = train(D[0], D[1])
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论