提交 a68c35b9 authored 作者: Frederic Bastien's avatar Frederic Bastien

merged modif in the trunk of theano.

...@@ -56,7 +56,7 @@ class OpFromGraph(gof.Op): ...@@ -56,7 +56,7 @@ class OpFromGraph(gof.Op):
def make_node(self, *inputs): def make_node(self, *inputs):
for input, type in zip(inputs, self.input_types): for input, type in zip(inputs, self.input_types):
if not type == input.type: if not type == input.type:
raise TypeError("Wrong type, expected %s but got %s" % type, input.type) raise TypeError("Wrong type, expected %s but got %s" % (type, input.type))
return gof.Apply(self, return gof.Apply(self,
inputs, inputs,
[type() for type in self.output_types]) [type() for type in self.output_types])
......
...@@ -15,7 +15,7 @@ import numpy ...@@ -15,7 +15,7 @@ import numpy
import theano.gof import theano.gof
#from theano import gof #from theano import gof
import copy import copy
import time
import mode as mode_module import mode as mode_module
from io import * from io import *
...@@ -840,6 +840,7 @@ def function(inputs, outputs, mode=None, accept_inplace = False): ...@@ -840,6 +840,7 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
f[<kitname>] = seed #re-seed the elements of a RandomKit f[<kitname>] = seed #re-seed the elements of a RandomKit
""" """
t1 = time.time()
if mode is None: if mode is None:
mode = mode_module.default_mode mode = mode_module.default_mode
#backport #backport
...@@ -885,6 +886,10 @@ def function(inputs, outputs, mode=None, accept_inplace = False): ...@@ -885,6 +886,10 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
else: else:
Maker = getattr(mode, 'function_maker', FunctionMaker) Maker = getattr(mode, 'function_maker', FunctionMaker)
fn = Maker(inputs, outputs, mode, accept_inplace = accept_inplace).create(defaults) fn = Maker(inputs, outputs, mode, accept_inplace = accept_inplace).create(defaults)
t2 = time.time()
if hasattr(mode, 'compile_time'):
mode.compile_time+=t2-t1
return fn return fn
......
...@@ -9,28 +9,37 @@ class ProfileMode(Mode): ...@@ -9,28 +9,37 @@ class ProfileMode(Mode):
def __init__(self, linker=OpWiseCLinker(), optimizer=None): def __init__(self, linker=OpWiseCLinker(), optimizer=None):
local_time = [0.0] local_time = [0.0]
apply_time = {} apply_time = {}
apply_call = {}
op_time = {} op_time = {}
op_cimpl = {} op_cimpl = {}
op_call = {}
def blah(i, node, th): def blah(i, node, th):
if hasattr(th, 'cthunk'): if hasattr(th, 'cthunk'):
t0 = time.time() t0 = time.time()
run_cthunk(th.cthunk) failure = run_cthunk(th.cthunk)
dt = time.time() - t0 dt = time.time() - t0
if failure:
raise RuntimeError('A C Op raised an exception. PerformLinker cannot tell you what it was though. Use a standard mode such as FAST_RUN to correct the problem.')
else: else:
t0 = time.time() t0 = time.time()
th() th()
dt = time.time() - t0 dt = time.time() - t0
local_time[0] += dt local_time[0] += dt
apply_time[(i,node.op)] = apply_time.get((i,node.op), 0.0) + dt apply_time[(i,node.op, tuple(node.inputs))] = apply_time.get((i,node.op, tuple(node.inputs)), 0.0) + dt
apply_call[(i,node.op, tuple(node.inputs))] = apply_call.get((i,node.op, tuple(node.inputs)), 0) + 1
op_time[node.op] = op_time.get(node.op, 0.0) + dt op_time[node.op] = op_time.get(node.op, 0.0) + dt
op_cimpl[node.op] = hasattr(th, 'cthunk') op_cimpl[node.op] = hasattr(th, 'cthunk')
op_call[node.op] = op_call.get(node.op,0) + 1
self.local_time = local_time self.local_time = local_time
self.apply_time = apply_time self.apply_time = apply_time
self.apply_call = apply_call
self.op_time = op_time self.op_time = op_time
self.op_cimpl = op_cimpl self.op_cimpl = op_cimpl
self.op_call = op_call
self.compile_time = 0 #time passed in function()
if isinstance(linker, str): if isinstance(linker, str):
linker = predefined_linkers[linker] linker = predefined_linkers[linker]
...@@ -48,81 +57,98 @@ class ProfileMode(Mode): ...@@ -48,81 +57,98 @@ class ProfileMode(Mode):
The Op-wise summary print the execution time of all Apply nodes executing the same Op are grouped together and the total execution time per Op is shown (so if you use dot twice, you will see only one entry there corresponding to the sum of the time spent in each of them). If two Op have different hash value, they will be separate. The Op-wise summary print the execution time of all Apply nodes executing the same Op are grouped together and the total execution time per Op is shown (so if you use dot twice, you will see only one entry there corresponding to the sum of the time spent in each of them). If two Op have different hash value, they will be separate.
The type-Op-wise summary group the result by type of op. So event if two Op have different hash value, they will be merged. The type-Op-wise summary group the result by type of op. So event if two Op have different hash value, they will be merged.
Their is an hack with the Op-wise summary. Go see it if you want to know more.
param: n_apply_to_print the number of apply to print. Default 15. param: n_apply_to_print the number of apply to print. Default 15.
param: n_ops_to_print the number of ops to print. Default 20. param: n_ops_to_print the number of ops to print. Default 20.
""" """
local_time = self.local_time[0] local_time = self.local_time[0]
apply_time = self.apply_time apply_time = self.apply_time
apply_call = self.apply_call
op_time = self.op_time op_time = self.op_time
op_call = self.op_call
print '' print ''
print 'ProfileMode.print_summary()' print 'ProfileMode.print_summary()'
print '---------------------------' print '---------------------------'
print '' print ''
print 'local_time %fs (Time spent running thunks)'% local_time print 'local_time %fs (Time spent running thunks)'% local_time
print 'Apply-wise summary: <% of local_time spent at this position> <total of local_time spent at this position> (<Apply position>, <Apply Op name>)' print 'Apply-wise summary: <% of local_time spent at this position> <total of local_time spent at this position> <nb_call> <Apply position> <Apply Op name>'
atimes = [(t/local_time, t, (a[0], str(a[1]))) for a, t in apply_time.items()] atimes = [(t/local_time, t, (a[0], str(a[1])), apply_call[a]) for a, t in apply_time.items()]
atimes.sort() atimes.sort()
atimes.reverse() atimes.reverse()
tot=0 tot=0
for f,t,a in atimes[:n_apply_to_print]: for f,t,a,nb_call in atimes[:n_apply_to_print]:
tot+=t tot+=t
print ' %.2f%% %.3fs %.3fs %i %s' % (f*100, tot, t, a[0], a[1]) print ' %4.1f%% %.3fs %.3fs %i %i %s' % (f*100, tot, t, nb_call, a[0], a[1])
print ' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\ print ' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
%(max(0, len(atimes)-n_apply_to_print), %(max(0, len(atimes)-n_apply_to_print),
sum(f for f, t, a in atimes[n_apply_to_print:])*100, sum(f for f, t, a, nb_call in atimes[n_apply_to_print:])*100,
sum(t for f, t, a in atimes[n_apply_to_print:])) sum(t for f, t, a, nb_call in atimes[n_apply_to_print:]))
print '\nOp-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <Op name>' flops=False
otimes = [(t/local_time, t, a, self.op_cimpl[a]) for a, t in op_time.items()] flops_msg=''
for a,t in op_time.items():
if hasattr(a,'flops'):
flops=True
flops_msg=' <MFlops/s>'
print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
break
print '\nOp-wise summary: < of local_time spent on this kind of Op> <cumulative seconds> <self seconds>%s <nb_call> <Op name>'%(flops_msg)
otimes = [(t/local_time, t, a, self.op_cimpl[a], op_call[a]) for a, t in op_time.items()]
otimes.sort() otimes.sort()
otimes.reverse() otimes.reverse()
tot=0 tot=0
for f,t,a,ci in otimes[:n_ops_to_print]: for f,t,a,ci,nb_call in otimes[:n_ops_to_print]:
tot+=t tot+=t
if ci: if ci:
msg = '*' msg = '*'
else: else:
msg = ' ' msg = ' '
print ' %.2f%% %.3fs %.3fs %s %s' % (f*100, tot, t, msg, a) m=-1
#backport if hasattr(a,'flops'):
#print ' %.2f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a) m=a.flops*self.op_call[a]/t/1e6
print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\ if flops:
print ' %4.1f%% %.3fs %.3fs %s %7.1f %d %s' % (f*100, tot, t, msg, m, nb_call, a)
else:
print ' %4.1f%% %.3fs %.3fs %s %s' % (f*100, tot, t, msg, a)
print ' ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)'\
%(max(0, len(otimes)-n_ops_to_print), %(max(0, len(otimes)-n_ops_to_print),
sum(f for f, t, a, ci in otimes[n_ops_to_print:])*100, sum(f for f, t, a, ci, nb_call in otimes[n_ops_to_print:])*100,
sum(t for f, t, a, ci in otimes[n_ops_to_print:])) sum(t for f, t, a, ci, nb_call in otimes[n_ops_to_print:]))
print '(*) Op is running a c implementation' print '(*) Op is running a c implementation'
sop_time={} sop_time={}
sop_call={}
sop_c={} #map each op class to Bool. True iff all applies were done in c. sop_c={} #map each op class to Bool. True iff all applies were done in c.
for a,t in op_time.items(): for a,t in op_time.items():
sop_time.setdefault(type(a),0) sop_time.setdefault(type(a),0)
sop_time[type(a)]+=t sop_time[type(a)]+=t
sop_c.setdefault(type(a),True) sop_c.setdefault(type(a),True)
sop_c[type(a)]=sop_c[type(a)] and self.op_cimpl[a] sop_c[type(a)]=sop_c[type(a)] and self.op_cimpl[a]
print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <Op name>' sop_call[type(a)]=sop_call.get(type(a),0)+op_call[a]
sotimes = [(t/local_time, t, a, sop_c[a]) for a, t in sop_time.items()] print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <nb_call> <Op name>'
sotimes = [(t/local_time, t, a, sop_c[a], sop_call[a]) for a, t in sop_time.items()]
sotimes.sort() sotimes.sort()
sotimes.reverse() sotimes.reverse()
tot=0 tot=0
for f,t,a,ci in sotimes[:n_ops_to_print]: for f,t,a,ci, nb_call in sotimes[:n_ops_to_print]:
tot+=t tot+=t
if ci: if ci:
msg = '*' msg = '*'
else: else:
msg = ' ' msg = ' '
print ' %.2f%% %.3fs %.3fs %s %s' % (f*100, tot, t, msg, a) print ' %4.1f%% %.3fs %.3fs %s %d %s' % (f*100, tot, t, msg, nb_call, a)
#backport
#print ' %.2f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a)
print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\ print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
%(max(0, len(sotimes)-n_ops_to_print), %(max(0, len(sotimes)-n_ops_to_print),
sum(f for f, t, a in sotimes[n_ops_to_print:])*100, sum(f for f, t, a, nb_call in sotimes[n_ops_to_print:])*100,
sum(t for f, t, a in sotimes[n_ops_to_print:])) sum(t for f, t, a, nb_call in sotimes[n_ops_to_print:]))
print '(*) Op is running a c implementation' print '(*) Op is running a c implementation'
print 'compile time: %.3fs'%self.compile_time
register_mode('PROFILE_MODE',ProfileMode()) register_mode('PROFILE_MODE',ProfileMode())
...@@ -138,3 +164,4 @@ def atexit_print_default_profile_mode(): ...@@ -138,3 +164,4 @@ def atexit_print_default_profile_mode():
#Register atexit_print_default_profile_mode to have the summary of the #Register atexit_print_default_profile_mode to have the summary of the
#predefined mode PROFILE_MODE if it is used printed when the program terminate. #predefined mode PROFILE_MODE if it is used printed when the program terminate.
atexit.register(atexit_print_default_profile_mode) atexit.register(atexit_print_default_profile_mode)
...@@ -279,7 +279,7 @@ class T_function(unittest.TestCase): ...@@ -279,7 +279,7 @@ class T_function(unittest.TestCase):
a = T.dmatrix() a = T.dmatrix()
f = function([a], Out(a, borrow=False)) f = function([a], Out(a, borrow=False))
o = N.ones((3,3)) o = N.ones((3,3))
assert o is not f(o) assert o is f(o) #borrow does not imply copy.
class T_picklefunction(unittest.TestCase): class T_picklefunction(unittest.TestCase):
......
...@@ -357,7 +357,6 @@ class CLinker(link.Linker): ...@@ -357,7 +357,6 @@ class CLinker(link.Linker):
self.env = env self.env = env
self.fetch_variables() self.fetch_variables()
self.no_recycling = no_recycling self.no_recycling = no_recycling
self.module_compile_str = cmodule.gcc_module_compile_str
return self return self
def fetch_variables(self): def fetch_variables(self):
...@@ -397,6 +396,8 @@ class CLinker(link.Linker): ...@@ -397,6 +396,8 @@ class CLinker(link.Linker):
self.consts = [] self.consts = []
c_support_code_apply = []
symbol = {} symbol = {}
# (init_)tasks contains a list of pairs (Op/Variable, task_name) # (init_)tasks contains a list of pairs (Op/Variable, task_name)
...@@ -477,7 +478,7 @@ class CLinker(link.Linker): ...@@ -477,7 +478,7 @@ class CLinker(link.Linker):
id += 2 id += 2
for node in self.node_order: for node_num, node in enumerate(self.node_order):
# We populate sub with a mapping from the variable names specified by the op's c_var_names # We populate sub with a mapping from the variable names specified by the op's c_var_names
# method to the actual variable names that we will use. # method to the actual variable names that we will use.
...@@ -486,7 +487,7 @@ class CLinker(link.Linker): ...@@ -486,7 +487,7 @@ class CLinker(link.Linker):
## for variable, vname in zip(op.inputs + op.outputs, ivnames + ovnames): ## for variable, vname in zip(op.inputs + op.outputs, ivnames + ovnames):
## sub[vname] = symbol[variable] ## sub[vname] = symbol[variable]
name = "<invalid_c_thing>" name = "node_%i" % node_num
isyms, osyms = [symbol[r] for r in node.inputs], [symbol[r] for r in node.outputs] isyms, osyms = [symbol[r] for r in node.inputs], [symbol[r] for r in node.outputs]
# c_validate_update is deprecated # c_validate_update is deprecated
...@@ -498,6 +499,11 @@ class CLinker(link.Linker): ...@@ -498,6 +499,11 @@ class CLinker(link.Linker):
sub['fail'] = failure_code(sub) sub['fail'] = failure_code(sub)
op = node.op op = node.op
# type-specific support code
try: c_support_code_apply.append(op.c_support_code_apply(node, name))
except utils.MethodNotDefined: pass
# emit c_code
try: behavior = op.c_code(node, name, isyms, osyms, sub) try: behavior = op.c_code(node, name, isyms, osyms, sub)
except utils.MethodNotDefined: except utils.MethodNotDefined:
raise NotImplementedError("%s cannot produce C code" % op) raise NotImplementedError("%s cannot produce C code" % op)
...@@ -539,6 +545,7 @@ class CLinker(link.Linker): ...@@ -539,6 +545,7 @@ class CLinker(link.Linker):
self.blocks = blocks self.blocks = blocks
self.tasks = tasks self.tasks = tasks
all = self.inputs + self.outputs + self.orphans all = self.inputs + self.outputs + self.orphans
self.c_support_code_apply = c_support_code_apply
if (self.init_tasks, self.tasks) != self.get_init_tasks(): if (self.init_tasks, self.tasks) != self.get_init_tasks():
print >> sys.stderr, "init_tasks\n", self.init_tasks print >> sys.stderr, "init_tasks\n", self.init_tasks
...@@ -561,6 +568,7 @@ class CLinker(link.Linker): ...@@ -561,6 +568,7 @@ class CLinker(link.Linker):
This might contain duplicates. This might contain duplicates.
""" """
ret = [] ret = []
# generic support code
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret.append(x.c_support_code()) try: ret.append(x.c_support_code())
except utils.MethodNotDefined: pass except utils.MethodNotDefined: pass
...@@ -599,28 +607,70 @@ class CLinker(link.Linker): ...@@ -599,28 +607,70 @@ class CLinker(link.Linker):
def headers(self): def headers(self):
"""WRITEME """WRITEME
Returns a list of headers that are needed by one Returns a list of headers that are needed by one
or more Variables or Ops. or more Types or Ops.
This might contain duplicates. The return value will not contain duplicates.
""" """
ret = [] ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_headers() try: ret += x.c_headers()
except utils.MethodNotDefined: pass except utils.MethodNotDefined: pass
return ret return list(set(ret))
def c_compiler(self):
c_compiler = None
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
if hasattr(x, 'c_compiler'):
x_compiler = x.c_compiler()
else:
continue
if c_compiler is None:
c_compiler = x_compiler
else:
if x_compiler and (x_compiler != c_compiler):
raise Exception('Nodes have requested specific different compilers',
(c_compiler, x_compiler))
return cmodule.gcc_module_compile_str if (c_compiler is None) else c_compiler
def header_dirs(self):
"""WRITEME
Returns a list of lib directories that are needed by one
or more Types or Ops.
The return value will not contain duplicates.
"""
ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_header_dirs()
except utils.MethodNotDefined: pass
return list(set(ret))
def libraries(self): def libraries(self):
"""WRITEME """WRITEME
Returns a list of libraries that are needed by one Returns a list of libraries that are needed by one
or more Variables or Ops. or more Types or Ops.
This might contain duplicates. The return value will not contain duplicates.
""" """
ret = [] ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_libraries() try: ret += x.c_libraries()
except utils.MethodNotDefined: pass except utils.MethodNotDefined: pass
return ret return list(set(ret))
def lib_dirs(self):
"""WRITEME
Returns a list of lib directories that are needed by one
or more Types or Ops.
The return value will not contain duplicates.
"""
ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_lib_dirs()
except utils.MethodNotDefined: pass
return list(set(ret))
def __compile__(self, input_storage = None, output_storage = None): def __compile__(self, input_storage = None, output_storage = None):
"""WRITEME """WRITEME
...@@ -787,11 +837,13 @@ class CLinker(link.Linker): ...@@ -787,11 +837,13 @@ class CLinker(link.Linker):
get_lock() get_lock()
try: try:
debug("LOCATION", location) debug("LOCATION", location)
module = self.module_compile_str( c_compiler = self.c_compiler()
module = c_compiler(
module_name=mod.name, module_name=mod.name,
src_code = mod.code(), src_code = mod.code(),
location=location, location=location,
include_dirs=[], include_dirs=self.header_dirs(),
lib_dirs=self.lib_dirs(),
libs=self.libraries(), libs=self.libraries(),
preargs=self.compile_args()) preargs=self.compile_args())
finally: finally:
...@@ -834,7 +886,7 @@ class CLinker(link.Linker): ...@@ -834,7 +886,7 @@ class CLinker(link.Linker):
""" % dict(struct_name = self.struct_name) """ % dict(struct_name = self.struct_name)
# We add all the support code, compile args, headers and libs we need. # We add all the support code, compile args, headers and libs we need.
for support_code in self.support_code(): for support_code in self.support_code() + self.c_support_code_apply:
mod.add_support_code(support_code) mod.add_support_code(support_code)
mod.add_support_code(self.struct_code) mod.add_support_code(self.struct_code)
mod.add_support_code(static) mod.add_support_code(static)
......
...@@ -93,7 +93,12 @@ class DynamicModule(object): ...@@ -93,7 +93,12 @@ class DynamicModule(object):
def code(self): def code(self):
sio = StringIO.StringIO() sio = StringIO.StringIO()
for inc in self.includes: for inc in self.includes:
print >> sio, "#include", inc if not inc:
continue
if inc[0] == '<' or inc[0] == '"':
print >> sio, "#include", inc
else:
print >> sio, '#include "%s"'%inc
print >> sio, "//////////////////////" print >> sio, "//////////////////////"
print >> sio, "//// Support Code" print >> sio, "//// Support Code"
...@@ -236,12 +241,8 @@ class ModuleCache(object): ...@@ -236,12 +241,8 @@ class ModuleCache(object):
self.module_from_name = dict(self.module_from_name) self.module_from_name = dict(self.module_from_name)
self.entry_from_key = dict(self.entry_from_key) self.entry_from_key = dict(self.entry_from_key)
self.stats = [0, 0, 0] self.stats = [0, 0, 0]
if force_fresh is None: if force_fresh is not None:
self.force_fresh = self.force_fresh
else:
self.force_fresh = force_fresh self.force_fresh = force_fresh
#backport
#self.force_fresh = self.force_fresh if force_fresh is None else force_fresh
self.loaded_key_pkl = set() self.loaded_key_pkl = set()
self.refresh() self.refresh()
...@@ -401,10 +402,13 @@ class ModuleCache(object): ...@@ -401,10 +402,13 @@ class ModuleCache(object):
key_broken = True key_broken = True
if not key_broken: if not key_broken:
key_from_file = cPickle.load(file(key_pkl)) try:
if key != key_from_file: key_from_file = cPickle.load(file(key_pkl))
raise Exception("key not equal to unpickled version (Hint: verify the __eq__ and __hash__ functions for your Ops", (key, key_from_file)) if key != key_from_file:
self.loaded_key_pkl.add(key_pkl) raise Exception("key not equal to unpickled version (Hint: verify the __eq__ and __hash__ functions for your Ops", (key, key_from_file))
self.loaded_key_pkl.add(key_pkl) # adding the key file to this set means it is a versioned key
except cPickle.UnpicklingError:
warning('Cache failure due to un-loadable key', key)
self.entry_from_key[key] = name self.entry_from_key[key] = name
self.module_from_name[name] = module self.module_from_name[name] = module
...@@ -426,8 +430,6 @@ class ModuleCache(object): ...@@ -426,8 +430,6 @@ class ModuleCache(object):
if age_thresh is None: if age_thresh is None:
age_thresh = self.age_thresh age_thresh = self.age_thresh
#backport
#age_thresh = self.age_thresh if age_thresh is None else age_thresh
compilelock.get_lock() compilelock.get_lock()
try: try:
# update the age of modules that have been accessed by other processes # update the age of modules that have been accessed by other processes
...@@ -516,30 +518,58 @@ def get_gcc_shared_library_arg(): ...@@ -516,30 +518,58 @@ def get_gcc_shared_library_arg():
else: else:
return '-shared' return '-shared'
def std_include_dirs():
return [distutils.sysconfig.get_python_inc()] + numpy.distutils.misc_util.get_numpy_include_dirs()
def std_lib_dirs_and_libs():
python_inc = distutils.sysconfig.get_python_inc()
if sys.platform == 'win32':
# Typical include directory: C:\Python26\include
libname = os.path.basename(os.path.dirname(python_inc)).lower()
# Also add directory containing the Python library to the library
# directories.
python_lib_dir = os.path.join(os.path.dirname(python_inc), 'libs')
lib_dirs = [python_lib_dir]
return [libname], [python_lib_dir]
else:
# Typical include directory: /usr/include/python2.6
libname = os.path.basename(python_inc)
return [libname], []
def std_libs():
return std_lib_dirs_and_libs()[0]
def std_lib_dirs():
return std_lib_dirs_and_libs()[1]
def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
preargs=[], tmpdir=None): preargs=[]):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:returns: dynamically-imported python module of the compiled code.
"""
#TODO: don't to the dlimport in this function #TODO: don't to the dlimport in this function
if preargs is None: if preargs is None:
preargs = [] preargs = []
else: else:
preargs = list(preargs) preargs = list(preargs)
#backport
#preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC') preargs.append('-fPIC')
no_opt = False no_opt = False
include_dirs = std_include_dirs() + include_dirs
include_dirs = [distutils.sysconfig.get_python_inc()] + \ libs = std_libs() + libs
numpy.distutils.misc_util.get_numpy_include_dirs()\ lib_dirs = std_lib_dirs() + lib_dirs
+ include_dirs
python_inc = distutils.sysconfig.get_python_inc()
if sys.platform == 'win32': if sys.platform == 'win32':
python_inc = distutils.sysconfig.get_python_inc()
# Typical include directory: C:\Python26\include # Typical include directory: C:\Python26\include
libname = os.path.basename(os.path.dirname(python_inc)).lower() libname = os.path.basename(os.path.dirname(python_inc)).lower()
# Also add directory containing the Python library to the library # Also add directory containing the Python library to the library
...@@ -548,6 +578,7 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[] ...@@ -548,6 +578,7 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
lib_dirs = [python_lib_dir] + lib_dirs lib_dirs = [python_lib_dir] + lib_dirs
else: else:
# Typical include directory: /usr/include/python2.6 # Typical include directory: /usr/include/python2.6
python_inc = distutils.sysconfig.get_python_inc()
libname = os.path.basename(python_inc) libname = os.path.basename(python_inc)
...@@ -561,7 +592,8 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[] ...@@ -561,7 +592,8 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
workdir = location workdir = location
cppfilename = os.path.join(workdir, 'mod.cpp')
cppfilename = os.path.join(location, 'mod.cpp')
cppfile = file(cppfilename, 'w') cppfile = file(cppfilename, 'w')
debug('Writing module C++ code to', cppfilename) debug('Writing module C++ code to', cppfilename)
...@@ -571,7 +603,7 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[] ...@@ -571,7 +603,7 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
cppfile.write(src_code) cppfile.write(src_code)
cppfile.close() cppfile.close()
lib_filename = os.path.join(workdir, '%s.%s' % lib_filename = os.path.join(location, '%s.%s' %
(module_name, get_lib_extension())) (module_name, get_lib_extension()))
debug('Generating shared lib', lib_filename) debug('Generating shared lib', lib_filename)
...@@ -586,93 +618,21 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[] ...@@ -586,93 +618,21 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
cmd.extend(['-L%s'%ldir for ldir in lib_dirs]) cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
cmd.extend(['-l%s'%l for l in libs]) cmd.extend(['-l%s'%l for l in libs])
debug('Running cmd', ' '.join(cmd)) debug('Running cmd', ' '.join(cmd))
p = subprocess.Popen(cmd) p = subprocess.Popen(cmd)
status = p.wait() status = p.wait()
if status: if status:
error('g++ return status', status) print '==============================='
else: for i, l in enumerate(src_code.split('\n')):
#touch the __init__ file #gcc put its messages to stderr, so we add ours now
file(os.path.join(workdir, "__init__.py"),'w').close() print >> sys.stderr, '%05i\t%s'%(i+1, l)
print '==============================='
rval = dlimport(lib_filename) raise Exception('g++ return status', status)
return rval
#touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close()
def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], return dlimport(lib_filename)
preargs=[], tmpdir=None):
if preargs is None:
preargs = []
else:
preargs = list(preargs)
#backport
#preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC')
no_opt = False
raise NotImplementedError()
#TODO: -O preargs should be passed globally, not to -Xcompiler
#TODO: where to find these strings? sys? distutils?
include_dirs = ['/usr/include/python2.6'] + include_dirs
libs = ['python2.6', 'cudart'] + libs
lib_dirs = ['/usr/local/cuda/lib']+lib_dirs
workdir = tempfile.mkdtemp(dir=location)
cppfilename = os.path.join(workdir, 'mod.cpp') #.cpp to use g++
cppfilename = os.path.join(workdir, 'mod.cu') #.cu to use nvopencc
cppfile = file(cppfilename, 'w')
debug('Writing module C++ code to', cppfilename)
ofiles = []
rval = None
try:
cppfile.write(src_code)
cppfile.close()
lib_filename = os.path.join(workdir, '%s.%s' %
(module_name, get_lib_extension()))
debug('Generating shared lib', lib_filename)
cmd = ['nvcc', '-shared', '-g']
cmd.extend(['-Xcompiler', ','.join(preargs)])
cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename])
cmd.append(cppfilename)
cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
cmd.extend(['-l%s'%l for l in libs])
debug('Running cmd', ' '.join(cmd))
p = subprocess.Popen(cmd)
status = p.wait()
if status:
warning('nvcc return status', status)
else:
#touch the __init__ file
file(os.path.join(workdir, "__init__.py"),'w').close()
#load the module
sys.path.insert(0, workdir)
try:
rval = __import__(module_name, {}, {}, [module_name])
if not rval:
debug('__import__ failed')
finally:
del sys.path[0]
assert pathcopy == sys.path
finally:
warning("TODO: cleanup")
#os.remove(cppfilename)
for ofile in ofiles:
#os.remove(ofiles[0])
pass
return rval
def icc_module_compile_str(*args): def icc_module_compile_str(*args):
raise NotImplementedError() raise NotImplementedError()
......
...@@ -5,8 +5,6 @@ from type import Type ...@@ -5,8 +5,6 @@ from type import Type
import sys, traceback import sys, traceback
from copy import copy from copy import copy
from cutils import run_cthunk
__excepthook = sys.excepthook __excepthook = sys.excepthook
def thunk_hook(type, value, trace): def thunk_hook(type, value, trace):
......
...@@ -11,8 +11,138 @@ __docformat__ = "restructuredtext en" ...@@ -11,8 +11,138 @@ __docformat__ = "restructuredtext en"
import utils import utils
import traceback import traceback
class CLinkerObject(object):
"""Standard elements of an Op or Type used with the CLinker
"""
def c_headers(self):
"""Optional: Return a list of header files required by code returned by
this class.
For example: return ['<iostream>', '<math.h>', '/full/path/to/header.h']
These strings will be prefixed with "#include " and inserted at the beginning of the c
source code.
Strings in this list that start neither with '<' nor '"' will be enclosed in
double-quotes.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_headers", type(self), self.__class__.__name__)
def c_header_dirs(self):
"""Optional: Return a list of header search paths required by code returned by
this class.
For example: return ['/usr/local/include', '/opt/weirdpath/src/include'].
Provide search paths for headers, in addition to those in any relevant environment
variables.
Hint: for unix compilers, these are the things that get '-I' prefixed in the compiler
cmdline.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_lib_dirs", type(self), self.__class__.__name__)
def c_libraries(self):
"""Optional: Return a list of libraries required by code returned by
this class.
For example: return ['gsl', 'gslcblas', 'm', 'fftw3', 'g2c'].
The compiler will search the directories specified by the environment
variable LD_LIBRARY_PATH in addition to any returned by `c_lib_dirs`.
Hint: for unix compilers, these are the things that get '-l' prefixed in the compiler
cmdline.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_libraries", type(self), self.__class__.__name__)
def c_lib_dirs(self):
"""Optional: Return a list of library search paths required by code returned by
this class.
For example: return ['/usr/local/lib', '/opt/weirdpath/build/libs'].
Provide search paths for libraries, in addition to those in any relevant environment
variables (e.g. LD_LIBRARY_PATH).
Hint: for unix compilers, these are the things that get '-L' prefixed in the compiler
cmdline.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_lib_dirs", type(self), self.__class__.__name__)
def c_support_code(self):
"""Optional: Return utility code for use by a `Variable` or `Op` to be
included at global scope prior to the rest of the code for this class.
class CLinkerOp(object): QUESTION: How many times will this support code be emitted for a graph
with many instances of the same type?
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_support_code", type(self), self.__class__.__name__)
def c_code_cache_version(self):
"""Return a tuple of integers indicating the version of this Op.
An empty tuple indicates an 'unversioned' Op that will not be cached between processes.
The cache mechanism may erase cached modules that have been superceded by newer
versions. See `ModuleCache` for details.
"""
return (1,)
def c_compile_args(self):
"""Optional: Return a list of compile args recommended to compile the
code returned by other methods in this class.
Example: return ['-ffast-math']
Compiler arguments related to headers, libraries and search paths should be provided
via the functions `c_headers`, `c_libraries`, `c_header_dirs`, and `c_lib_dirs`.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_compile_args", type(self), self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined("c_no_compile_args", type(self), self.__class__.__name__)
class CLinkerOp(CLinkerObject):
""" """
Interface definition for `Op` subclasses compiled by `CLinker`. Interface definition for `Op` subclasses compiled by `CLinker`.
...@@ -83,94 +213,22 @@ class CLinkerOp(object): ...@@ -83,94 +213,22 @@ class CLinkerOp(object):
raise utils.MethodNotDefined('%s.c_code_cleanup' \ raise utils.MethodNotDefined('%s.c_code_cleanup' \
% self.__class__.__name__) % self.__class__.__name__)
def c_compile_args(self): def c_support_code_apply(self, node, name):
"""Optional: Return a list of recommended gcc compiler arguments. """Optional: Return utility code for use by an `Op` that will be inserted at global
scope, that can be specialized for the support of a particular `Apply` node.
QUESTION: is this function optional?
This is only a hint.
EXAMPLE
WRITEME
"""
raise utils.MethodNotDefined('%s.c_compile_args' \
% self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
EXAMPLE :param node: an Apply instance in the graph being compiled
WRITEME :param node_id: a string or number that serves to uniquely identify this node.
Symbol names defined by this support code should include the node_id, so that they can
be called from the c_code, and so that they do not cause name collisions.
:Exceptions: :Exceptions:
- `MethodNotDefined`: the subclass does not override this method - `MethodNotDefined`: Subclass does not implement this method
""" """
raise utils.MethodNotDefined('%s.c_no_compile_args' \ raise utils.MethodNotDefined("c_support_code_apply", type(self), self.__class__.__name__)
% self.__class__.__name__)
def c_headers(self):
"""Optional: Return a list of header files that must be included to compile the C code.
A subclass should override this method.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined('%s.c_headers' \
% self.__class__.__name__)
def c_libraries(self):
"""Optional: Return a list of libraries to link against to manipulate this `Op`.
A subclass should override this method.
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined('%s.c_libraries' \
% self.__class__.__name__)
def c_support_code(self):
"""Optional: Return support code for use by the code that is returned by `c_code`.
Support code is inserted into the C code at global scope.
A subclass should override this method.
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined('%s.c_support_code' \
% self.__class__.__name__)
def c_code_cache_version(self):
"""Return a tuple of integers indicating the version of this Op.
An empty tuple indicates an 'unversioned' Op that will not be cached between processes.
The cache mechanism may erase cached modules that have been superceded by newer
versions. See `ModuleCache` for details.
"""
return (1,)
class PureOp(object): class PureOp(object):
""" """
......
...@@ -750,6 +750,7 @@ class NavigatorOptimizer(Optimizer): ...@@ -750,6 +750,7 @@ class NavigatorOptimizer(Optimizer):
raise raise
if replacements is False or replacements is None: if replacements is False or replacements is None:
return False return False
assert len(node.outputs) == len(replacements)
repl_pairs = zip(node.outputs, replacements) repl_pairs = zip(node.outputs, replacements)
try: try:
env.replace_all_validate(repl_pairs, reason=lopt) env.replace_all_validate(repl_pairs, reason=lopt)
......
import sys import sys, StringIO
if sys.version_info[:2] >= (2,5): if sys.version_info[:2] >= (2,5):
from collections import defaultdict from collections import defaultdict
...@@ -145,4 +145,14 @@ class SequenceDB(DB): ...@@ -145,4 +145,14 @@ class SequenceDB(DB):
opts.sort(key = lambda obj: self.__priority__[obj.name]) opts.sort(key = lambda obj: self.__priority__[obj.name])
return opt.SeqOptimizer(opts, failure_callback = self.failure_callback) return opt.SeqOptimizer(opts, failure_callback = self.failure_callback)
def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)"%id(self)
print >> stream, " priority", self.__priority__
print >> stream, " names", self._names
print >> stream, " db", self.__db__
def __str__(self):
sio = StringIO.StringIO()
self.print_summary(sio)
return sio.getvalue()
...@@ -127,7 +127,6 @@ class Div(Binary): ...@@ -127,7 +127,6 @@ class Div(Binary):
return x / y return x / y
div = Div() div = Div()
def inputs(): def inputs():
x = double('x') x = double('x')
y = double('y') y = double('y')
...@@ -250,3 +249,33 @@ def test_duallinker_mismatch(): ...@@ -250,3 +249,33 @@ def test_duallinker_mismatch():
raise Exception("An exception should have been raised here!") raise Exception("An exception should have been raised here!")
except MyExc, e: except MyExc, e:
pass pass
################################
# Test that failure code works #
################################
class AddFail(Binary):
def c_code(self, node, name, (x, y), (z, ), sub):
fail=sub['fail']
return """%(z)s = %(x)s + %(y)s;
PyErr_SetString(PyExc_RuntimeError, "failing here");
%(fail)s;""" % locals()
def impl(self, x, y):
return x + y
add_fail = AddFail()
def test_fail_error():
x, y, z = inputs()
x = Constant(tdouble, 7.2, name = 'x')
e = add_fail(mul(x, y), mul(y, z))
lnk = OpWiseCLinker().accept(Env([y, z], [e]))
fn = lnk.make_function()
try:
res = fn(1.5, 3.0)
except RuntimeError:
print 'Yay, TEST PASSED'
return #test passed
assert 0 #test failed
...@@ -12,8 +12,9 @@ import traceback ...@@ -12,8 +12,9 @@ import traceback
######## ########
# Type # # Type #
######## ########
from .op import CLinkerObject
class CLinkerType(object): class CLinkerType(CLinkerObject):
"""Interface specification for Types that can be arguments to a `CLinkerOp`. """Interface specification for Types that can be arguments to a `CLinkerOp`.
A CLinkerType instance is mainly reponsible for providing the C code that A CLinkerType instance is mainly reponsible for providing the C code that
...@@ -176,89 +177,8 @@ class CLinkerType(object): ...@@ -176,89 +177,8 @@ class CLinkerType(object):
""" """
raise MethodNotDefined("c_sync", type(self), self.__class__.__name__) raise MethodNotDefined("c_sync", type(self), self.__class__.__name__)
def c_compile_args(self):
"""Optional: Return a list of compile args recommended to compile the
code returned by other methods in this class.
WRITEME: example of formatting for -I, -L, -f args.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_compile_args", type(self), self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise MethodNotDefined("c_no_compile_args", type(self), self.__class__.__name__)
def c_headers(self):
"""Optional: Return a list of header files required by code returned by
this class.
WRITEME: example of local file, standard file.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_headers", type(self), self.__class__.__name__)
def c_libraries(self):
"""Optional: Return a list of libraries required by code returned by
this class.
For example: return ['gsl', 'gslcblas', 'm', 'fftw3', 'g2c'].
The compiler will search the directories specified by the environment
variable LD_LIBRARY_PATH. No option is provided for an Op to provide an
extra library directory because this would change the linking path for
other Ops in a potentially disasterous way.
QUESTION: What about via the c_compile_args? a -L option is allowed no?
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_libraries", type(self), self.__class__.__name__)
def c_support_code(self):
"""Optional: Return utility code for use by a `Variable` or `Op` to be
included at global scope prior to the rest of the code for this class.
QUESTION: How many times will this support code be emitted for a graph
with many instances of the same type?
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_support_code", type(self), self.__class__.__name__)
def c_code_cache_version(self):
"""Return a tuple of integers indicating the version of this Op.
An empty tuple indicates an 'unversioned' Op that will not be cached between processes.
The cache mechanism may erase cached modules that have been superceded by newer
versions. See `ModuleCache` for details.
"""
return (1,)
class PureType(object): class PureType(object):
"""Interface specification for variable type instances. """Interface specification for variable type instances.
......
...@@ -10,26 +10,16 @@ from gof.python25 import all ...@@ -10,26 +10,16 @@ from gof.python25 import all
import gof.utils import gof.utils
import logging import logging
_logger=logging.getLogger("theano.gradient") _logger = logging.getLogger('theano.gradient')
_logger.setLevel(logging.WARN) def warning(*msg):
_logger.warning('WARNING theano.gradient: '+' '.join(msg))
def error(*args): def info(*msg):
#sys.stderr.write('ERROR:'+ ' '.join(str(a) for a in args)+'\n') _logger.info('INFO theano.gradient: '+' '.join(msg))
_logger.error("ERROR: "+' '.join(str(a) for a in args))
def warning(*args):
#sys.stderr.write('WARNING:'+ ' '.join(str(a) for a in args)+'\n')
_logger.warning("WARNING: "+' '.join(str(a) for a in args))
def info(*args):
#sys.stderr.write('INFO:'+ ' '.join(str(a) for a in args)+'\n')
_logger.info("INFO: "+' '.join(str(a) for a in args))
def debug(*args):
#sys.stderr.write('DEBUG:'+ ' '.join(str(a) for a in args)+'\n')
_logger.debug("DEBUG: "+' '.join(str(a) for a in args))
_msg_retType = 'op.grad(...) returned a non-list' _msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients' _msg_badlen = 'op.grad(...) returned wrong number of gradients'
def grad_sources_inputs(sources, graph_inputs): def grad_sources_inputs(sources, graph_inputs, warn_type=True):
""" """
A gradient source is a pair (``r``, ``g_r``), in which ``r`` is a `Variable`, and ``g_r`` is a A gradient source is a pair (``r``, ``g_r``), in which ``r`` is a `Variable`, and ``g_r`` is a
`Variable` that is a gradient wrt ``r``. `Variable` that is a gradient wrt ``r``.
...@@ -114,6 +104,11 @@ def grad_sources_inputs(sources, graph_inputs): ...@@ -114,6 +104,11 @@ def grad_sources_inputs(sources, graph_inputs):
len(g_inputs), len(g_inputs),
len(node.inputs)) len(node.inputs))
for ii, (r, g_r) in enumerate(zip(node.inputs, g_inputs)): for ii, (r, g_r) in enumerate(zip(node.inputs, g_inputs)):
if warn_type:
if g_r and (getattr(r,'type',0) != getattr(g_r,'type', 1)):
r_type = getattr(r,'type', None)
g_r_type = getattr(g_r,'type', None)
info('%s.grad returned a different type for input %i: %s vs. %s'%(node.op, ii, r_type, g_r_type))
if g_r and len(sources) == 1 and sources[0][0].name and r.name: if g_r and len(sources) == 1 and sources[0][0].name and r.name:
g_r.name = "(d%s/d%s)" % (sources[0][0].name, r.name) g_r.name = "(d%s/d%s)" % (sources[0][0].name, r.name)
if g_r is not None: if g_r is not None:
......
...@@ -47,7 +47,6 @@ class ConvOp(Op): ...@@ -47,7 +47,6 @@ class ConvOp(Op):
unroll_batch - c code generation option unroll_batch - c code generation option
unroll_kern - c code generation option unroll_kern - c code generation option
The reason that this op does the summation over convolutions within the 'stack' is that The reason that this op does the summation over convolutions within the 'stack' is that
it allows us to be memory-efficient about how gradients are calculated. If, for it allows us to be memory-efficient about how gradients are calculated. If, for
example, we had a convolution op that took a list of images, a list of kernels, and example, we had a convolution op that took a list of images, a list of kernels, and
...@@ -92,14 +91,25 @@ class ConvOp(Op): ...@@ -92,14 +91,25 @@ class ConvOp(Op):
if self.bsize<=self.unroll_batch: if self.bsize<=self.unroll_batch:
self.unroll_batch = self.bsize self.unroll_batch = self.bsize
else: else:
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a divisor of bsize(%s). We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize)) #find the maximum value under unroll_batch that would work
self.unroll_batch=1 new=self.unroll_batch
assert(new>=1)
while self.bsize % new!=0:
new-=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a divisor of bsize(%s). We revert it to %d. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize),new)
self.unroll_batch=new
if self.unroll_kern>0 and self.nkern % unroll_kern!=0: if self.unroll_kern>0 and self.nkern % unroll_kern!=0:
if self.nkern<=self.unroll_kern: if self.nkern<=self.unroll_kern:
self.unroll_kern = self.nkern self.unroll_kern = self.nkern
else: else:
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a divisor of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern)) #find the maximum value under unroll_kern that would work
self.unroll_kern=1 new=self.unroll_kern
assert(new>=1)
while self.nkern % new!=0:
new-=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a divisor of nkern(%s)We revert it to %d. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern),new)
self.unroll_kern=new
self.outshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (dx,dy), output_mode) self.outshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (dx,dy), output_mode)
self.fulloutshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (1,1), output_mode) self.fulloutshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (1,1), output_mode)
self.out_mode = output_mode self.out_mode = output_mode
...@@ -137,6 +147,33 @@ class ConvOp(Op): ...@@ -137,6 +147,33 @@ class ConvOp(Op):
def __str__(self): def __str__(self):
return "ConvOp{" +",".join(str((a, getattr(self, a))) for a in self.__attrnames) + "}" return "ConvOp{" +",".join(str((a, getattr(self, a))) for a in self.__attrnames) + "}"
def set_flops(self):
""" Usefull with the hack in profilemode to print the MFlops"""
if self.out_mode=="valid":
self.flops=self.kshp[0]*self.kshp[1]*2#nb mul and add by output pixed
self.flops*=self.outshp[0]*self.outshp[1]#nb flops by output image
self.flops*=self.imshp[0]*self.nkern*self.bsize#for all outputs images#n_stack==self.imshp[0]
else: #full mode not implemented
self.flops=0
for out_row in range(self.outshp[0]):#loop over output row
for out_col in range(self.outshp[0]):#loop over output col
for row in range(self.kshp[0]):#loop over kern row
if row+out_row-self.kshp[0]+1<0 or row+out_row-self.kshp[0]+1>=self.imshp[1]: continue
col=0
max_col=self.kshp[1]
img_col=out_col-self.kshp[1]+1
max_col=min(max_col,self.imshp[2]-img_col)
if img_col<0:
col=-img_col
img_col+=col
while col < max_col: #loop over kern col
self.flops+=1
col+=1
self.flops*=self.imshp[0]*self.nkern*self.bsize#for all outputs images#n_stack==self.imshp[0]
def make_node(self, inputs, kerns): def make_node(self, inputs, kerns):
# TODO: find a way to make ConvOp work for N-D (after NIPS09) # TODO: find a way to make ConvOp work for N-D (after NIPS09)
""" """
...@@ -188,7 +225,6 @@ class ConvOp(Op): ...@@ -188,7 +225,6 @@ class ConvOp(Op):
buf = N.zeros((batchsize,)+ self.imshp_logical, dtype=img2d.dtype) buf = N.zeros((batchsize,)+ self.imshp_logical, dtype=img2d.dtype)
buf[:,:,::rstride, ::cstride] = img2d buf[:,:,::rstride, ::cstride] = img2d
img2d = buf img2d = buf
print 'A'
del buf, rstride, cstride del buf, rstride, cstride
if self.kshp != self.kshp_logical: if self.kshp != self.kshp_logical:
...@@ -204,7 +240,6 @@ class ConvOp(Op): ...@@ -204,7 +240,6 @@ class ConvOp(Op):
assert coffset >= 0 assert coffset >= 0
buf[:,:,roffset::rstride, coffset::cstride] = filtersflipped buf[:,:,roffset::rstride, coffset::cstride] = filtersflipped
filtersflipped = buf filtersflipped = buf
print 'B'
del buf, rstride, cstride del buf, rstride, cstride
for b in range(batchsize): for b in range(batchsize):
...@@ -293,7 +328,10 @@ class ConvOp(Op): ...@@ -293,7 +328,10 @@ class ConvOp(Op):
unroll_batch=un_b, unroll_kern=un_k, unroll_batch=un_b, unroll_kern=un_k,
imshp_logical=imshp_logical, imshp_logical=imshp_logical,
kshp_logical=kshp_logical, kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned)(img,filters) kshp_logical_top_aligned=kshp_logical_top_aligned)
if hasattr(self,'flops'):
dw.set_flops()
dw = dw(img,filters)
assert (dw.owner.op.outshp==self.kshp).all() assert (dw.owner.op.outshp==self.kshp).all()
if self.out_mode == 'valid': if self.out_mode == 'valid':
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1] # before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
...@@ -311,7 +349,10 @@ class ConvOp(Op): ...@@ -311,7 +349,10 @@ class ConvOp(Op):
1,1, output_mode=mode, 1,1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k, unroll_batch=un_b, unroll_kern=un_k,
imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1]), imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1]),
kshp_logical=None)(gz,filters) kshp_logical=None)
if hasattr(self,'flops'):
din.set_flops()
din = din(gz,filters)
assert (din.owner.op.outshp==self.imshp[1:]).all() assert (din.owner.op.outshp==self.imshp[1:]).all()
return [din, dw] return [din, dw]
......
""" Ops for downsampling images.
Planned:
DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax.
"""
#This file should move along with conv.py
from theano import sparse, gof, Op, tensor, Variable, Apply
from theano.printing import Print
class DownsampleFactorMaxGrad(Op):
def __init__(self, ds, ignore_border):
self.ds = tuple(ds)
self.ignore_border = ignore_border
def __eq__(self, other):
return type(self) == type(other) and self.ds == other.ds and self.ignore_border == other.ignore_border
def __hash__(self):
return hash(type(self)) ^ hash(self.ds) ^ hash(self.ignore_border)
def make_node(self, x, maxout, gz):
# make_node should only be called by the grad function of DownsampleFactorMax,
# so these asserts should not fail.
assert isinstance(x, Variable) and x.ndim==4
assert isinstance(maxout, Variable) and maxout.ndim==4
assert isinstance(gz, Variable) and gz.ndim==4
return Apply(self, [x, maxout, gz], [x.type()])
def perform(self, node, (x, maxout, gz), (gx_stg,)):
gx = N.zeros_like(x)
ds0, ds1 = self.ds
shape2 = (x.shape[2] / ds0 * ds0) if self.ignore_border else x.shape[2]
shape3 = (x.shape[3] / ds1 * ds1) if self.ignore_border else x.shape[3]
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for i in xrange(shape2):
zi = i / ds0
for j in xrange(shape3):
zj = j / ds1
gx[n,k,i,j] = gz[n,k,zi,zj] if (maxout[n,k,zi,zj] == x[n,k,i,j]) else 0
gx_stg[0] = gx
def c_code(self, node, name, (x, z, gz), (gx,), sub):
fail = sub['fail']
self_ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
return """
int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int z_typenum = PyArray_ObjectType((PyObject*)%(z)s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
int x_shp0_usable;
int x_shp1_usable;
int z_shp0, z_shp1;
if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)s;
}
if(%(x)s->nd!=4)
{
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
%(fail)s;
}
if(%(z)s->nd!=4)
{
PyErr_SetString(PyExc_ValueError, "z must be a 4d ndarray");
%(fail)s;
}
if(%(gz)s->nd!=4)
{
PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray");
%(fail)s;
}
z_shp0 = %(z)s->dimensions[2];
z_shp1 = %(z)s->dimensions[3];
if (%(self_ignore_border)s)
{
x_shp0_usable = z_shp0 * %(ds0)s;
x_shp1_usable = z_shp1 * %(ds1)s;
}
else
{
x_shp0_usable = %(x)s->dimensions[2];
x_shp1_usable = %(x)s->dimensions[3];
}
if ((!%(gx)s)
|| *PyArray_DIMS(%(gx)s)!=4
||(%(gx)s->dimensions[0] != %(x)s->dimensions[0])
||(%(gx)s->dimensions[1] != %(x)s->dimensions[1])
||(%(gx)s->dimensions[2] != %(x)s->dimensions[2])
||(%(gx)s->dimensions[3] != %(x)s->dimensions[3])
)
{
Py_XDECREF(%(gx)s);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(4, %(x)s->dimensions, x_typenum,0);
}
for(int b=0;b<%(x)s->dimensions[0];b++){
for(int k=0;k<%(x)s->dimensions[1];k++){
int mini_i = 0;
int zi = 0;
for(int i=0;i< x_shp0_usable; i++){
int mini_j = 0;
int zj = 0;
for(int j=0; j< x_shp1_usable; j++){
dtype_%(x)s * __restrict__ xp = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,i,j)));
dtype_%(gx)s * __restrict__ gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j)));
dtype_%(z)s * __restrict__ zp = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,b,k,zi,zj)));
dtype_%(gz)s * __restrict__ gzp = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s,b,k,zi,zj)));
gxp[0] = (zp[0] == xp[0]) ? gzp[0] : 0;
mini_j = (mini_j + 1 == %(ds1)s) ? 0 : mini_j+1;
zj += (mini_j == 0);
}//for j
mini_i = (mini_i + 1 == %(ds0)s) ? 0 : mini_i+1;
zi += (mini_i == 0);
for (int j = x_shp1_usable; j < %(x)s->dimensions[3]; ++j) {
dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j)));
gxp[0] = 0;
}
}//for i
for(int i = x_shp0_usable; i < %(x)s->dimensions[2]; i++){
for (int j = 0; j < %(x)s->dimensions[3]; ++j) {
dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j)));
gxp[0] = 0;
}
}
}//for k
}//for b
""" %locals()
class DownsampleFactorMax(Op):
"""
For N-dimensional tensors, consider that the last two dimensions span images.
This Op downsamples these images by taking the max over non-overlapping rectangular regions.
TODO: what ignore_border do?
"""
def out_shape(imgshape, ignore_border=False):
#old code not tested (not evenread)
#TODO:DON'T use ignore_border!!!
rval = [imgshape[0], imgshape[1], imgshape[2]/self.ds[0], imgshape[3]/self.ds[1]]
if imgshape[2] % self.ds[0]:
rval[2] += 1
if imgshape[3] % self.ds[1]:
rval[3] += 1
return tuple(rval)
def __init__(self, ds, ignore_border=False):
self.ds = tuple(ds)
self.ignore_border = ignore_border
def __eq__(self, other):
return type(self) == type(other) and self.ds == other.ds
def __hash__(self):
return hash(type(self)) ^ hash(self.ds)
def make_node(self, x):
dmatrix4 = tensor.TensorType(x.type.dtype, (False, False, False, False))
if x.type != dmatrix4:
raise NotImplementedError()
return gof.Apply(self, [x], [dmatrix4()])
def perform(self, node, (x,), (z,)):
"""
"""
if len(x.shape)!=4:
raise NotImplementedError('DownsampleFactorMax requires 4D input for now')
if z[0] is None:
z[0] = N.zeros(self.out_shape(x.shape, self.ignore_border)) -float('inf')
zz=z[0]
ds0, ds1 = self.ds
x_usable2 = (x.shape[2] / ds0 * ds0) if self.ignore_border else x.shape[2]
x_usable3 = (x.shape[3] / ds1 * ds1) if self.ignore_border else x.shape[3]
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for i in xrange(x_usable2):
zi = i / ds0
for j in xrange(x_usable3):
zj = j / ds1
zz[n,k,zi,zj] = __builtin__.max(zz[n,k,zi,zj], x[n,k,i,j])
def grad(self,(x,), (gz,)):
maxout = self(x)
return [DownsampleFactorMaxGrad(self.ds, ignore_border=self.ignore_border)(x, maxout, gz)]
def c_code(self, node, name, (x,), (z, ), sub):
fail=sub['fail']
self_ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
return """
int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int x_shp0_usable;
int x_shp1_usable;
int z_shp0, z_shp1;
if(%(x)s->nd!=4)
{
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
%(fail)s;
}
z_shp0 = %(x)s->dimensions[2] / %(ds0)s;
z_shp1 = %(x)s->dimensions[3] / %(ds1)s;
if (%(self_ignore_border)s)
{
x_shp0_usable = z_shp0 * %(ds0)s;
x_shp1_usable = z_shp1 * %(ds1)s;
}
else
{
z_shp0 += (%(x)s->dimensions[2] %% %(ds0)s) ? 1 : 0;
z_shp1 += (%(x)s->dimensions[3] %% %(ds1)s) ? 1 : 0;
x_shp0_usable = %(x)s->dimensions[2];
x_shp1_usable = %(x)s->dimensions[3];
}
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(x)s->dimensions[0])
||(%(z)s->dimensions[1] != %(x)s->dimensions[1])
||(%(z)s->dimensions[2] != z_shp0)
||(%(z)s->dimensions[3] != z_shp1)
)
{
if (%(z)s) Py_XDECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0};
dims[0]=%(x)s->dimensions[0];
dims[1]=%(x)s->dimensions[1];
dims[2]=z_shp0;
dims[3]=z_shp1;
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0); //TODO: zeros not necessary
}
for(int b=0;b<%(x)s->dimensions[0];b++){
for(int k=0;k<%(x)s->dimensions[1];k++){
int mini_i = 0;
int zi = 0;
for(int i=0;i< x_shp0_usable; i++){
int mini_j = 0;
int zj = 0;
for(int j=0; j<x_shp1_usable; j++){
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,i,j)))[0];
dtype_%(z)s * __restrict__ z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,b,k,zi,zj)));
z[0] = (((mini_j|mini_i) == 0) || z[0] < a) ? a : z[0];
mini_j = ((mini_j + 1) == %(ds1)s) ? 0 : mini_j+1;
zj += (mini_j == 0);
}
mini_i = ((mini_i + 1) == %(ds0)s) ? 0 : mini_i+1;
zi += (mini_i == 0);
}
}
}
""" % locals()
...@@ -31,10 +31,25 @@ def as_scalar(x, name = None): ...@@ -31,10 +31,25 @@ def as_scalar(x, name = None):
def constant(x): def constant(x):
if isinstance(x, float): if isinstance(x, float):
return ScalarConstant(float64, x) for dtype in ['float32', 'float64']:
x_ = numpy.asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
assert x_ is not None
return ScalarConstant(Scalar(str(x_.dtype)), x)
if isinstance(x, int): if isinstance(x, int):
return ScalarConstant(int64, x) for dtype in ['int8', 'int16', 'int32', 'int64']:
return ScalarConstant(float64, float(x)) x_ = numpy.asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
assert x_ is not None
return ScalarConstant(Scalar(str(x_.dtype)), x)
if isinstance(x, complex):
raise NotImplementedError()
raise TypeError(x)
#return ScalarConstant(float64, float(x))
class Scalar(Type): class Scalar(Type):
...@@ -206,9 +221,9 @@ class _scalar_py_operators: ...@@ -206,9 +221,9 @@ class _scalar_py_operators:
def __neg__(self): return neg(self) def __neg__(self): return neg(self)
#CASTS #CASTS
def __int__(self): return AsInt(self).out #def __int__(self): return AsInt(self).out
def __float__(self): return AsInt(self).out #def __float__(self): return AsDouble(self).out
def __complex__(self): return AsComplex(self).out #def __complex__(self): return AsComplex(self).out
#BITWISE #BITWISE
def __invert__(self): return invert(self) def __invert__(self): return invert(self)
......
差异被折叠。
差异被折叠。
...@@ -136,7 +136,8 @@ class DimShuffle(Op): ...@@ -136,7 +136,8 @@ class DimShuffle(Op):
self.__dict__.update(d) self.__dict__.update(d)
self._rehash() self._rehash()
def make_node(self, input): def make_node(self, _input):
input = as_tensor_variable(_input)
ib = tuple(input.type.broadcastable) ib = tuple(input.type.broadcastable)
if not ib == self.input_broadcastable: if not ib == self.input_broadcastable:
raise TypeError("The number of dimensions and/or broadcastable pattern of the input is incorrect for this op. Expected %s, got %s." % (self.input_broadcastable, ib)) raise TypeError("The number of dimensions and/or broadcastable pattern of the input is incorrect for this op. Expected %s, got %s." % (self.input_broadcastable, ib))
...@@ -659,7 +660,7 @@ class Elemwise(Op): ...@@ -659,7 +660,7 @@ class Elemwise(Op):
task_code = self.scalar_op.c_code(Apply(self.scalar_op, task_code = self.scalar_op.c_code(Apply(self.scalar_op,
[Scalar(dtype = input.type.dtype)() for input in node.inputs], [Scalar(dtype = input.type.dtype)() for input in node.inputs],
[Scalar(dtype = output.type.dtype)() for input in node.outputs]), [Scalar(dtype = output.type.dtype)() for output in node.outputs]),
name + '_scalar_', name + '_scalar_',
["%s_i" % s for s in _inames], ["%s_i" % s for s in _inames],
["%s_i" % s for s in onames], ["%s_i" % s for s in onames],
...@@ -687,6 +688,9 @@ class Elemwise(Op): ...@@ -687,6 +688,9 @@ class Elemwise(Op):
def c_support_code(self): def c_support_code(self):
return self.scalar_op.c_support_code() return self.scalar_op.c_support_code()
def c_code_cache_version(self):
return (4,)
# def elemwise_to_scal(env): # def elemwise_to_scal(env):
# mapping = {} # mapping = {}
# inputs = [] # inputs = []
...@@ -886,11 +890,12 @@ class Sum(CAReduce): ...@@ -886,11 +890,12 @@ class Sum(CAReduce):
CAReduce.__init__(self, scalar.add, axis) CAReduce.__init__(self, scalar.add, axis)
def _output_dtype(self, idtype): def _output_dtype(self, idtype):
if idtype.startswith('int'): # we want to protect against overflow
return 'int64' #we want to protect against overflow return dict(
else: int8='int32',
return idtype int16='int32',
int32='int64',
).get(idtype, idtype)
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
gz = as_tensor_variable(gz) gz = as_tensor_variable(gz)
......
...@@ -57,20 +57,48 @@ def make_checks(loop_orders, dtypes, sub): ...@@ -57,20 +57,48 @@ def make_checks(loop_orders, dtypes, sub):
""" % locals() """ % locals()
adjust = [] adjust = []
check = "" check = ""
for matches in zip(*loop_orders): if 0:
to_compare = [(j, x) for j, x in enumerate(matches) if x != "x"] # original dimension-checking loop builds a single if condition, and if it is true, it
if len(to_compare) < 2: # raises a generic error message
continue for matches in zip(*loop_orders):
j, x = to_compare[0] to_compare = [(j, x) for j, x in enumerate(matches) if x != "x"]
first = "%%(lv%(j)s)s_n%(x)s" % locals() if len(to_compare) < 2:
cond = " || ".join(["%(first)s != %%(lv%(j)s)s_n%(x)s" % locals() for j, x in to_compare[1:]]) continue
if cond: j, x = to_compare[0]
check += """ first = "%%(lv%(j)s)s_n%(x)s" % locals()
if (%(cond)s) { cond = " || ".join(["%(first)s != %%(lv%(j)s)s_n%(x)s" % locals() for j, x in to_compare[1:]])
PyErr_SetString(PyExc_ValueError, "Input dimensions do not match (Try re-running with py linker for more information)."); if cond:
%%(fail)s check += """
} if (%(cond)s) {
""" % locals() PyErr_SetString(PyExc_ValueError, "Input dimensions do not match (Try re-running with py linker for more information).");
%%(fail)s
}
""" % locals()
else:
# revised dimension-checking loop build multiple if conditions, and the first one that
# is true raises a more informative error message
for matches in zip(*loop_orders):
to_compare = [(j, x) for j, x in enumerate(matches) if x != "x"]
#elements of to_compare are pairs ( input_variable_idx, input_variable_dim_idx )
if len(to_compare) < 2:
continue
j0, x0 = to_compare[0]
for (j, x) in to_compare[1:]:
check += """
if (%%(lv%(j0)s)s_n%(x0)s != %%(lv%(j)s)s_n%(x)s)
{
PyErr_Format(PyExc_ValueError, "Input dimension mis-match. (input[%%%%i].shape[%%%%i] = %%%%i, input[%%%%i].shape[%%%%i] = %%%%i)",
%(j0)s,
%(x0)s,
%%(lv%(j0)s)s_n%(x0)s,
%(j)s,
%(x)s,
%%(lv%(j)s)s_n%(x)s
);
%%(fail)s
}
""" % locals()
return init % sub + check % sub return init % sub + check % sub
......
...@@ -77,7 +77,6 @@ pprint.assign(softplus, printing.FunctionPrinter('softplus')) ...@@ -77,7 +77,6 @@ pprint.assign(softplus, printing.FunctionPrinter('softplus'))
# TENSOR OPS # TENSOR OPS
# #
class SoftmaxWithBias(gof.Op): class SoftmaxWithBias(gof.Op):
""" """
An L{Op} for the output of neural-net multiclass classifiers. An L{Op} for the output of neural-net multiclass classifiers.
...@@ -133,6 +132,8 @@ class SoftmaxWithBias(gof.Op): ...@@ -133,6 +132,8 @@ class SoftmaxWithBias(gof.Op):
def c_headers(self): def c_headers(self):
return ['<iostream>','<cmath>'] return ['<iostream>','<cmath>']
def c_code_cache_version(self):
return ()
@staticmethod @staticmethod
def c_code_template(): def c_code_template():
# this implementation was lifted from # this implementation was lifted from
...@@ -157,14 +158,14 @@ class SoftmaxWithBias(gof.Op): ...@@ -157,14 +158,14 @@ class SoftmaxWithBias(gof.Op):
PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s; %(fail)s;
} }
if (%(x)s->descr->type_num != PyArray_DOUBLE) if ((%(x)s->descr->type_num != PyArray_DOUBLE)&&(%(x)s->descr->type_num != PyArray_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "a not float64"); PyErr_SetString(PyExc_TypeError, "a not float");
%(fail)s; %(fail)s;
} }
if (%(b)s->descr->type_num != PyArray_DOUBLE) if ((%(b)s->descr->type_num != PyArray_DOUBLE) && (%(b)s->descr->type_num != PyArray_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "b not float64"); PyErr_SetString(PyExc_TypeError, "b not float");
%(fail)s; %(fail)s;
} }
if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])) if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
...@@ -193,22 +194,22 @@ class SoftmaxWithBias(gof.Op): ...@@ -193,22 +194,22 @@ class SoftmaxWithBias(gof.Op):
double sum = 0.0; double sum = 0.0;
bool discount_max = false; bool discount_max = false;
const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i); const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * i);
const double* __restrict__ b_i = (double*)(%(b)s->data); const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(%(b)s->data);
double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
""" """
inside_row_loop = """ inside_row_loop = """
npy_intp Sx = %(x)s->strides[1]/sizeof(double); npy_intp Sx = %(x)s->strides[1]/sizeof(dtype_%(x)s);
npy_intp Sb = %(b)s->strides[0]/sizeof(double); npy_intp Sb = %(b)s->strides[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0; size_t row_max_j=0;
double row_max = x_i[0] + b_i[0]; dtype_%(sm)s row_max = x_i[0] + b_i[0];
// Get the maximum value of the row // Get the maximum value of the row
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
double row_ij = x_i[j * Sx] + b_i[j * Sb]; dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
// std::cout << "1" << row_ij << "\\n"; // std::cout << "1" << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j; row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max; row_max = (row_ij > row_max) ? row_ij : row_max;
...@@ -216,9 +217,9 @@ class SoftmaxWithBias(gof.Op): ...@@ -216,9 +217,9 @@ class SoftmaxWithBias(gof.Op):
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
double row_ij = x_i[j * Sx] + b_i[j * Sb]; dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
// std::cout << "2" << row_ij << "\\n"; // std::cout << "2" << row_ij << "\\n";
double sm_ij = exp(row_ij - row_max); dtype_%(sm)s sm_ij = exp(row_ij - row_max);
// std::cout << "3" << sm_ij << "\\n"; // std::cout << "3" << sm_ij << "\\n";
sum += sm_ij; sum += sm_ij;
sm_i[j * Ssm] = sm_ij; sm_i[j * Ssm] = sm_ij;
...@@ -292,12 +293,18 @@ class SoftmaxGrad(gof.Op): ...@@ -292,12 +293,18 @@ class SoftmaxGrad(gof.Op):
def grad(self, *args): def grad(self, *args):
raise NotImplementedError() raise NotImplementedError()
def c_code_cache_version(self):
return ()
def c_code(self, node, name, (dy, sm), (dx,), sub): def c_code(self, node, name, (dy, sm), (dx,), sub):
return ''' return '''
if ((%(dy)s->descr->type_num != PyArray_DOUBLE) if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && (%(dy)s->descr->type_num != PyArray_FLOAT))
|| (%(sm)s->descr->type_num != PyArray_DOUBLE)) {
PyErr_SetString(PyExc_TypeError, "types should be float or float64");
%(fail)s;
}
if ((%(sm)s->descr->type_num != PyArray_DOUBLE) && (%(sm)s->descr->type_num != PyArray_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "types should be float64, float64"); PyErr_SetString(PyExc_TypeError, "types should be float or float64");
%(fail)s; %(fail)s;
} }
if ((%(dy)s->nd != 2) if ((%(dy)s->nd != 2)
...@@ -327,12 +334,12 @@ class SoftmaxGrad(gof.Op): ...@@ -327,12 +334,12 @@ class SoftmaxGrad(gof.Op):
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i) for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
{ {
const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i); const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (%(dy)s->data + %(dy)s->strides[0] * i);
npy_intp Sdy = %(dy)s->strides[1]/sizeof(double); npy_intp Sdy = %(dy)s->strides[1]/sizeof(dtype_%(dy)s);
const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i); const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i); dtype_%(dx)s* __restrict__ dx_i = (dtype_%(dx)s*) (%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
double sum_dy_times_sm = 0.; double sum_dy_times_sm = 0.;
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
...@@ -505,7 +512,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -505,7 +512,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
raise ValueError('y_idx must have same number of rows as x') raise ValueError('y_idx must have same number of rows as x')
sm = numpy.zeros_like(x) # softmax sm = numpy.zeros_like(x) # softmax
nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) nll = numpy.zeros(x.shape[0], dtype=node.outputs[0].type.dtype) #nll(y | softmax(x))
am = numpy.zeros_like(y_idx) am = numpy.zeros_like(y_idx)
for i in xrange(sm.shape[0]): for i in xrange(sm.shape[0]):
#add the bias vector to the i'th row of x #add the bias vector to the i'th row of x
...@@ -600,7 +607,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -600,7 +607,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
begin_row_loop, begin_row_loop,
""" """
const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); dtype_%(nll)s* __restrict__ nll_i = (dtype_%(nll)s*)(%(nll)s->data + %(nll)s->strides[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i); %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
""", """,
inside_row_loop, inside_row_loop,
...@@ -619,6 +626,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -619,6 +626,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
end_row_loop) end_row_loop)
def c_code_cache_version(self):
return ()
def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub): def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
y_idx_type = node.inputs[2].type.dtype_specs()[1] y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type am_type = y_idx_type
...@@ -649,15 +658,20 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -649,15 +658,20 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
output_storage[0][0] = dx output_storage[0][0] = dx
def grad(self, *args): def grad(self, *args):
raise NotImplementedError() raise NotImplementedError()
def c_code_cache_version(self):
return ()
def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub): def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
y_idx_type = node.inputs[2].type.dtype_specs()[1] y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """ return """
if ((%(dnll)s->descr->type_num != PyArray_DOUBLE) if ((%(dnll)s->descr->type_num != PyArray_DOUBLE) && (%(dnll)s->descr->type_num != PyArray_FLOAT))
|| (%(sm)s->descr->type_num != PyArray_DOUBLE) {
) PyErr_SetString(PyExc_TypeError, "dnll type should be float32 or float64");
%(fail)s;
}
if ((%(sm)s->descr->type_num != PyArray_DOUBLE) && (%(sm)s->descr->type_num != PyArray_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "types should be float64, float64, int64"); PyErr_SetString(PyExc_TypeError, "sm type should be float32 or float64");
%(fail)s; %(fail)s;
} }
if ((%(y_idx)s->descr->type_num != PyArray_INT64) if ((%(y_idx)s->descr->type_num != PyArray_INT64)
...@@ -699,15 +713,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -699,15 +713,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i) for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
{ {
const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0]; const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
double* __restrict__ dx_i = (double*)(%(dx)s->data + %(dx)s->strides[0] * i); dtype_%(dx)s* __restrict__ dx_i = (dtype_%(dx)s*)(%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
{ {
...@@ -803,7 +817,8 @@ class CrossentropyCategorical1Hot(gof.Op): ...@@ -803,7 +817,8 @@ class CrossentropyCategorical1Hot(gof.Op):
'(got type: %s instead of: %s)' % (_true_one_of_n.type, '(got type: %s instead of: %s)' % (_true_one_of_n.type,
tensor.lvector)) tensor.lvector))
return gof.Apply(self, [_coding_dist, _true_one_of_n], [tensor.dvector()]) return gof.Apply(self, [_coding_dist, _true_one_of_n],
[tensor.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()])
def perform(self, node, (coding, one_of_n), (y_out,)): def perform(self, node, (coding, one_of_n), (y_out,)):
y = numpy.zeros_like(coding[:,0]) y = numpy.zeros_like(coding[:,0])
......
...@@ -114,7 +114,7 @@ def local_dimshuffle_lift(node): ...@@ -114,7 +114,7 @@ def local_dimshuffle_lift(node):
input = node.inputs[0] input = node.inputs[0]
inode = input.owner inode = input.owner
if inode and isinstance(inode.op, Elemwise): if inode and isinstance(inode.op, Elemwise) and (len(input.clients)==1):
return inode.op.make_node(*[DimShuffle(input.type.broadcastable, return inode.op.make_node(*[DimShuffle(input.type.broadcastable,
op.new_order, op.new_order,
op.inplace)(input) for input in inode.inputs]).outputs op.inplace)(input) for input in inode.inputs]).outputs
......
...@@ -143,7 +143,11 @@ class RandomFunction(gof.Op): ...@@ -143,7 +143,11 @@ class RandomFunction(gof.Op):
# build the inputs to this Apply by overlaying args on self.args # build the inputs to this Apply by overlaying args on self.args
inputs = [] inputs = []
for arg, default in zip(args, self.args): for arg, default in zip(args, self.args):
assert arg is None or default.type.dtype == arg.type.dtype # The NAACL test is failing because of this assert.
# I am commenting out the requirement that the dtypes match because it doesn't seem
# to me to be necessary (although I agree it is typically true).
# -JB 20090819
#assert arg is None or default.type.dtype == arg.type.dtype
if arg is None: if arg is None:
input = default input = default
else: else:
......
...@@ -4,14 +4,11 @@ from theano.gof import Env ...@@ -4,14 +4,11 @@ from theano.gof import Env
from theano.printing import pp from theano.printing import pp
import numpy import numpy
from theano.tensor.blas import * from theano.tensor.blas import *
from theano.tensor.blas import _dot22, res_is_a from theano.tensor.blas import _dot22, res_is_a, _as_scalar, _is_real_matrix
from unittest import TestCase from unittest import TestCase
from theano.tests import unittest_tools from theano.tests import unittest_tools
from copy import copy from copy import copy
_as_scalar = GemmLocalOptimizer._as_scalar
_is_real_matrix = GemmLocalOptimizer._is_real_matrix
from theano import In, Out from theano import In, Out
from test_basic import (_approx_eq, as_tensor_variable, inplace_func, from test_basic import (_approx_eq, as_tensor_variable, inplace_func,
compile, value, constant, inplace, eval_outputs) compile, value, constant, inplace, eval_outputs)
......
...@@ -206,18 +206,35 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -206,18 +206,35 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
print 'BEFORE' print 'BEFORE'
for node in env.toposort(): for node in env.toposort():
print node.op print node.op, node.inputs
print '----' print '----'
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env) theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER' print 'AFTER'
for node in env.toposort(): for node in env.toposort():
print node.op print node.op, node.inputs
assert env.toposort()[3].op == crossentropy_softmax_argmax_1hot_with_bias # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
assert env.toposort()[5].op == crossentropy_softmax_1hot_with_bias_dx # cleaned up as well as we'd like.
assert len(env.toposort()) == 6 #shorthand for actually checking what I really has_cx1hot = False
has_cx1hotdx = False
has_softmax = False
has_softmaxdx = False
for node in env.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx :
has_cx1hotdx = True
if node.op == softmax:
has_softmax = True
if node.op == softmax_grad:
has_softmaxdx = True
assert has_cx1hot
assert has_cx1hotdx
assert not has_softmax
assert not has_softmaxdx
def test_argmax_pushdown(): def test_argmax_pushdown():
x = tensor.dmatrix() x = tensor.dmatrix()
......
...@@ -10,7 +10,9 @@ from theano.gradient import * ...@@ -10,7 +10,9 @@ from theano.gradient import *
from theano import gradient from theano import gradient
_grad_sources_inputs = grad_sources_inputs def _grad_sources_inputs(*args):
# warn_type was introduced after this code, it complains throughout for nothing.
return grad_sources_inputs(warn_type=False, *args)
class test_grad_sources_inputs(unittest.TestCase): class test_grad_sources_inputs(unittest.TestCase):
def test_retNone1(self): def test_retNone1(self):
...@@ -148,7 +150,7 @@ class test_grad_sources_inputs(unittest.TestCase): ...@@ -148,7 +150,7 @@ class test_grad_sources_inputs(unittest.TestCase):
return [1] return [1]
i = gof.generic() i = gof.generic()
a1 = O(self).make_node(i) a1 = O(self).make_node(i)
g = grad_sources_inputs([(a1.outputs[0], 1)], None) g = grad_sources_inputs([(a1.outputs[0], 1)], None, warn_type=False)
self.failUnless(g[i] is 1) self.failUnless(g[i] is 1)
def test_some_None_igrads(self): def test_some_None_igrads(self):
...@@ -170,7 +172,7 @@ class test_grad_sources_inputs(unittest.TestCase): ...@@ -170,7 +172,7 @@ class test_grad_sources_inputs(unittest.TestCase):
k = gof.generic() k = gof.generic()
a1 = O(self, True).make_node(i,j) a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(a1.outputs[1], k) a2 = O(self, True).make_node(a1.outputs[1], k)
g = grad_sources_inputs([(a2.outputs[0], 1)], None) g = grad_sources_inputs([(a2.outputs[0], 1)], None, warn_type=False)
self.failUnless(g[i] is 1 and j not in g and k not in g) self.failUnless(g[i] is 1 and j not in g and k not in g)
a1 = O(self, True).make_node(i,j) a1 = O(self, True).make_node(i,j)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论