提交 041e23b1 authored 作者: Frederic Bastien's avatar Frederic Bastien

merge changeset 9b9f8dd051ed

......@@ -12,6 +12,7 @@ import numpy
from .. import gof
import sys
import copy
import time
import mode as mode_module
from io import *
......@@ -788,6 +789,7 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
f[<kitname>] = seed #re-seed the elements of a RandomKit
"""
t1 = time.time()
mode = mode if mode is not None else mode_module.default_mode
......@@ -819,6 +821,9 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
Maker = getattr(mode, 'function_maker', FunctionMaker)
fn = Maker(inputs, outputs, mode, accept_inplace = accept_inplace).create(defaults)
t2 = time.time()
if hasattr(mode, 'compile_time'):
mode.compile_time+=t2-t1
return fn
......
......@@ -9,8 +9,10 @@ class ProfileMode(Mode):
def __init__(self, linker=OpWiseCLinker(), optimizer=None):
local_time = [0.0]
apply_time = {}
apply_call = {}
op_time = {}
op_cimpl = {}
op_call = {}
def blah(i, node, th):
if hasattr(th, 'cthunk'):
......@@ -24,13 +26,18 @@ class ProfileMode(Mode):
local_time[0] += dt
apply_time[(i,node.op)] = apply_time.get((i,node.op), 0.0) + dt
apply_call[(i,node.op)] = apply_call.get((i,node.op), 0) + 1
op_time[node.op] = op_time.get(node.op, 0.0) + dt
op_cimpl[node.op] = hasattr(th, 'cthunk')
op_call[node.op] = op_call.get(node.op,0) + 1
self.local_time = local_time
self.apply_time = apply_time
self.apply_call = apply_call
self.op_time = op_time
self.op_cimpl = op_cimpl
self.op_call = op_call
self.compile_time = 0 #time passed in function()
if isinstance(linker, str):
linker = predefined_linkers[linker]
......@@ -48,6 +55,8 @@ class ProfileMode(Mode):
The Op-wise summary print the execution time of all Apply nodes executing the same Op are grouped together and the total execution time per Op is shown (so if you use dot twice, you will see only one entry there corresponding to the sum of the time spent in each of them). If two Op have different hash value, they will be separate.
The type-Op-wise summary group the result by type of op. So event if two Op have different hash value, they will be merged.
Their is an hack with the Op-wise summary. Go see it if you want to know more.
param: n_apply_to_print the number of apply to print. Default 15.
param: n_ops_to_print the number of ops to print. Default 20.
......@@ -68,22 +77,37 @@ class ProfileMode(Mode):
tot=0
for f,t,a in atimes[:n_apply_to_print]:
tot+=t
print ' %.2f%% %.3fs %.3fs %i %s' % (f*100, tot, t, a[0], a[1])
print ' %4.1f%% %.3fs %.3fs %i %s' % (f*100, tot, t, a[0], a[1])
print ' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
%(max(0, len(atimes)-n_apply_to_print),
sum(f for f, t, a in atimes[n_apply_to_print:])*100,
sum(t for f, t, a in atimes[n_apply_to_print:]))
print '\nOp-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <Op name>'
flops=False
flops_msg=''
for a,t in op_time.items():
if hasattr(a,'flops'):
flops=True
flops_msg=' <MFlops/s>'
print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
break
print '\nOp-wise summary: < of local_time spent on this kind of Op> <cumulative seconds> <self seconds>%s <Op name>'%(flops_msg)
otimes = [(t/local_time, t, a, self.op_cimpl[a]) for a, t in op_time.items()]
otimes.sort()
otimes.reverse()
tot=0
for f,t,a,ci in otimes[:n_ops_to_print]:
tot+=t
print ' %.2f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a)
print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
m=-1
if hasattr(a,'flops'):
m=a.flops*self.op_call[a]/t/1e6
if flops:
print ' %4.1f%% %.3fs %.3fs %s %7.1f %s' % (f*100, tot, t, '*' if ci else ' ', m,a)
else:
print ' %4.1f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a)
print ' ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)'\
%(max(0, len(otimes)-n_ops_to_print),
sum(f for f, t, a, ci in otimes[n_ops_to_print:])*100,
sum(t for f, t, a, ci in otimes[n_ops_to_print:]))
......@@ -104,13 +128,13 @@ class ProfileMode(Mode):
tot=0
for f,t,a,ci in sotimes[:n_ops_to_print]:
tot+=t
print ' %.2f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a)
print ' %4.1f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a)
print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
%(max(0, len(sotimes)-n_ops_to_print),
sum(f for f, t, a in sotimes[n_ops_to_print:])*100,
sum(t for f, t, a in sotimes[n_ops_to_print:]))
print '(*) Op is running a c implementation'
print 'compile time: %.3fs'%self.compile_time
register_mode('PROFILE_MODE',ProfileMode())
......
......@@ -352,7 +352,6 @@ class CLinker(link.Linker):
self.env = env
self.fetch_variables()
self.no_recycling = no_recycling
self.module_compile_str = cmodule.gcc_module_compile_str
return self
def fetch_variables(self):
......@@ -392,6 +391,8 @@ class CLinker(link.Linker):
self.consts = []
c_support_code_apply = []
symbol = {}
# (init_)tasks contains a list of pairs (Op/Variable, task_name)
......@@ -472,7 +473,7 @@ class CLinker(link.Linker):
id += 2
for node in self.node_order:
for node_num, node in enumerate(self.node_order):
# We populate sub with a mapping from the variable names specified by the op's c_var_names
# method to the actual variable names that we will use.
......@@ -481,7 +482,7 @@ class CLinker(link.Linker):
## for variable, vname in zip(op.inputs + op.outputs, ivnames + ovnames):
## sub[vname] = symbol[variable]
name = "<invalid_c_thing>"
name = "node_%i" % node_num
isyms, osyms = [symbol[r] for r in node.inputs], [symbol[r] for r in node.outputs]
# c_validate_update is deprecated
......@@ -493,6 +494,11 @@ class CLinker(link.Linker):
sub['fail'] = failure_code(sub)
op = node.op
# type-specific support code
try: c_support_code_apply.append(op.c_support_code_apply(node, name))
except utils.MethodNotDefined: pass
# emit c_code
try: behavior = op.c_code(node, name, isyms, osyms, sub)
except utils.MethodNotDefined:
raise NotImplementedError("%s cannot produce C code" % op)
......@@ -529,6 +535,7 @@ class CLinker(link.Linker):
self.blocks = blocks
self.tasks = tasks
all = self.inputs + self.outputs + self.orphans
self.c_support_code_apply = c_support_code_apply
if (self.init_tasks, self.tasks) != self.get_init_tasks():
print >> sys.stderr, "init_tasks\n", self.init_tasks
......@@ -551,6 +558,7 @@ class CLinker(link.Linker):
This might contain duplicates.
"""
ret = []
# generic support code
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret.append(x.c_support_code())
except utils.MethodNotDefined: pass
......@@ -589,28 +597,70 @@ class CLinker(link.Linker):
def headers(self):
"""WRITEME
Returns a list of headers that are needed by one
or more Variables or Ops.
or more Types or Ops.
This might contain duplicates.
The return value will not contain duplicates.
"""
ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_headers()
except utils.MethodNotDefined: pass
return ret
return list(set(ret))
def c_compiler(self):
c_compiler = None
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
if hasattr(x, 'c_compiler'):
x_compiler = x.c_compiler()
else:
continue
if c_compiler is None:
c_compiler = x_compiler
else:
if x_compiler and (x_compiler != c_compiler):
raise Exception('Nodes have requested specific different compilers',
(c_compiler, x_compiler))
return cmodule.gcc_module_compile_str if (c_compiler is None) else c_compiler
def header_dirs(self):
"""WRITEME
Returns a list of lib directories that are needed by one
or more Types or Ops.
The return value will not contain duplicates.
"""
ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_header_dirs()
except utils.MethodNotDefined: pass
return list(set(ret))
def libraries(self):
"""WRITEME
Returns a list of libraries that are needed by one
or more Variables or Ops.
or more Types or Ops.
This might contain duplicates.
The return value will not contain duplicates.
"""
ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_libraries()
except utils.MethodNotDefined: pass
return ret
return list(set(ret))
def lib_dirs(self):
"""WRITEME
Returns a list of lib directories that are needed by one
or more Types or Ops.
The return value will not contain duplicates.
"""
ret = []
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_lib_dirs()
except utils.MethodNotDefined: pass
return list(set(ret))
def __compile__(self, input_storage = None, output_storage = None):
"""WRITEME
......@@ -774,11 +824,13 @@ class CLinker(link.Linker):
get_lock()
try:
debug("LOCATION", location)
module = self.module_compile_str(
c_compiler = self.c_compiler()
module = c_compiler(
module_name=mod.name,
src_code = mod.code(),
location=location,
include_dirs=[],
include_dirs=self.header_dirs(),
lib_dirs=self.lib_dirs(),
libs=self.libraries(),
preargs=self.compile_args())
finally:
......@@ -821,7 +873,7 @@ class CLinker(link.Linker):
""" % dict(struct_name = self.struct_name)
# We add all the support code, compile args, headers and libs we need.
for support_code in self.support_code():
for support_code in self.support_code() + self.c_support_code_apply:
mod.add_support_code(support_code)
mod.add_support_code(self.struct_code)
mod.add_support_code(static)
......
......@@ -93,7 +93,12 @@ class DynamicModule(object):
def code(self):
sio = StringIO.StringIO()
for inc in self.includes:
print >> sio, "#include", inc
if not inc:
continue
if inc[0] == '<' or inc[0] == '"':
print >> sio, "#include", inc
else:
print >> sio, '#include "%s"'%inc
print >> sio, "//////////////////////"
print >> sio, "//// Support Code"
......@@ -464,23 +469,11 @@ def get_lib_extension():
else:
return 'so'
def get_gcc_shared_library_arg():
"""Return the platform-dependent GCC argument for shared libraries."""
if sys.platform == 'darwin':
return '-dynamiclib'
else:
return '-shared'
def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
preargs=[], tmpdir=None):
#TODO: don't to the dlimport in this function
preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC')
no_opt = False
def std_include_dirs():
return [distutils.sysconfig.get_python_inc()] + numpy.distutils.misc_util.get_numpy_include_dirs()
include_dirs = [distutils.sysconfig.get_python_inc()] + \
numpy.distutils.misc_util.get_numpy_include_dirs()\
+ include_dirs
def std_lib_dirs_and_libs():
python_inc = distutils.sysconfig.get_python_inc()
if sys.platform == 'win32':
# Typical include directory: C:\Python26\include
......@@ -488,15 +481,43 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
# Also add directory containing the Python library to the library
# directories.
python_lib_dir = os.path.join(os.path.dirname(python_inc), 'libs')
lib_dirs = [python_lib_dir] + lib_dirs
lib_dirs = [python_lib_dir]
return [libname], [python_lib_dir]
else:
# Typical include directory: /usr/include/python2.6
libname = os.path.basename(python_inc)
libs = [libname] + libs
return [libname], []
def std_libs():
return std_lib_dirs_and_libs()[0]
def std_lib_dirs():
return std_lib_dirs_and_libs()[1]
def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
preargs=[]):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:returns: dynamically-imported python module of the compiled code.
"""
#TODO: don't to the dlimport in this function
preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC')
no_opt = False
workdir = location
include_dirs = std_include_dirs() + include_dirs
libs = std_libs() + libs
lib_dirs = std_lib_dirs() + lib_dirs
cppfilename = os.path.join(workdir, 'mod.cpp')
cppfilename = os.path.join(location, 'mod.cpp')
cppfile = file(cppfilename, 'w')
debug('Writing module C++ code to', cppfilename)
......@@ -506,11 +527,11 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
cppfile.write(src_code)
cppfile.close()
lib_filename = os.path.join(workdir, '%s.%s' %
lib_filename = os.path.join(location, '%s.%s' %
(module_name, get_lib_extension()))
debug('Generating shared lib', lib_filename)
cmd = ['g++', get_gcc_shared_library_arg(), '-g']
cmd = ['g++', '-shared', '-g']
if no_opt:
cmd.extend(p for p in preargs if not p.startswith('-O'))
else:
......@@ -526,83 +547,18 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
status = p.wait()
if status:
error('g++ return status', status)
else:
#touch the __init__ file
file(os.path.join(workdir, "__init__.py"),'w').close()
print '==============================='
for i, l in enumerate(src_code.split('\n')):
#gcc put its messages to stderr, so we add ours now
print >> sys.stderr, '%05i\t%s'%(i+1, l)
print '==============================='
raise Exception('g++ return status', status)
rval = dlimport(lib_filename)
return rval
#touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close()
return dlimport(lib_filename)
def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
preargs=[], tmpdir=None):
preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC')
no_opt = False
raise NotImplementedError()
#TODO: -O preargs should be passed globally, not to -Xcompiler
#TODO: where to find these strings? sys? distutils?
include_dirs = ['/usr/include/python2.6'] + include_dirs
libs = ['python2.6', 'cudart'] + libs
lib_dirs = ['/usr/local/cuda/lib']+lib_dirs
workdir = tempfile.mkdtemp(dir=location)
cppfilename = os.path.join(workdir, 'mod.cpp') #.cpp to use g++
cppfilename = os.path.join(workdir, 'mod.cu') #.cu to use nvopencc
cppfile = file(cppfilename, 'w')
debug('Writing module C++ code to', cppfilename)
ofiles = []
rval = None
try:
cppfile.write(src_code)
cppfile.close()
lib_filename = os.path.join(workdir, '%s.%s' %
(module_name, get_lib_extension()))
debug('Generating shared lib', lib_filename)
cmd = ['nvcc', '-shared', '-g']
cmd.extend(['-Xcompiler', ','.join(preargs)])
cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename])
cmd.append(cppfilename)
cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
cmd.extend(['-l%s'%l for l in libs])
debug('Running cmd', ' '.join(cmd))
p = subprocess.Popen(cmd)
status = p.wait()
if status:
warning('nvcc return status', status)
else:
#touch the __init__ file
file(os.path.join(workdir, "__init__.py"),'w').close()
#load the module
sys.path.insert(0, workdir)
try:
rval = __import__(module_name, {}, {}, [module_name])
if not rval:
debug('__import__ failed')
finally:
del sys.path[0]
assert pathcopy == sys.path
finally:
warning("TODO: cleanup")
#os.remove(cppfilename)
for ofile in ofiles:
#os.remove(ofiles[0])
pass
return rval
def icc_module_compile_str(*args):
raise NotImplementedError()
......
......@@ -11,8 +11,138 @@ __docformat__ = "restructuredtext en"
import utils
import traceback
class CLinkerObject(object):
"""Standard elements of an Op or Type used with the CLinker
"""
def c_headers(self):
"""Optional: Return a list of header files required by code returned by
this class.
For example: return ['<iostream>', '<math.h>', '/full/path/to/header.h']
These strings will be prefixed with "#include " and inserted at the beginning of the c
source code.
Strings in this list that start neither with '<' nor '"' will be enclosed in
double-quotes.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_headers", type(self), self.__class__.__name__)
def c_header_dirs(self):
"""Optional: Return a list of header search paths required by code returned by
this class.
For example: return ['/usr/local/include', '/opt/weirdpath/src/include'].
Provide search paths for headers, in addition to those in any relevant environment
variables.
Hint: for unix compilers, these are the things that get '-I' prefixed in the compiler
cmdline.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_lib_dirs", type(self), self.__class__.__name__)
def c_libraries(self):
"""Optional: Return a list of libraries required by code returned by
this class.
For example: return ['gsl', 'gslcblas', 'm', 'fftw3', 'g2c'].
The compiler will search the directories specified by the environment
variable LD_LIBRARY_PATH in addition to any returned by `c_lib_dirs`.
Hint: for unix compilers, these are the things that get '-l' prefixed in the compiler
cmdline.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_libraries", type(self), self.__class__.__name__)
def c_lib_dirs(self):
"""Optional: Return a list of library search paths required by code returned by
this class.
For example: return ['/usr/local/lib', '/opt/weirdpath/build/libs'].
Provide search paths for libraries, in addition to those in any relevant environment
variables (e.g. LD_LIBRARY_PATH).
Hint: for unix compilers, these are the things that get '-L' prefixed in the compiler
cmdline.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_lib_dirs", type(self), self.__class__.__name__)
def c_support_code(self):
"""Optional: Return utility code for use by a `Variable` or `Op` to be
included at global scope prior to the rest of the code for this class.
class CLinkerOp(object):
QUESTION: How many times will this support code be emitted for a graph
with many instances of the same type?
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_support_code", type(self), self.__class__.__name__)
def c_code_cache_version(self):
"""Return a tuple of integers indicating the version of this Op.
An empty tuple indicates an 'unversioned' Op that will not be cached between processes.
The cache mechanism may erase cached modules that have been superceded by newer
versions. See `ModuleCache` for details.
"""
return (1,)
def c_compile_args(self):
"""Optional: Return a list of compile args recommended to compile the
code returned by other methods in this class.
Example: return ['-ffast-math']
Compiler arguments related to headers, libraries and search paths should be provided
via the functions `c_headers`, `c_libraries`, `c_header_dirs`, and `c_lib_dirs`.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined("c_compile_args", type(self), self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined("c_no_compile_args", type(self), self.__class__.__name__)
class CLinkerOp(CLinkerObject):
"""
Interface definition for `Op` subclasses compiled by `CLinker`.
......@@ -83,94 +213,22 @@ class CLinkerOp(object):
raise utils.MethodNotDefined('%s.c_code_cleanup' \
% self.__class__.__name__)
def c_compile_args(self):
"""Optional: Return a list of recommended gcc compiler arguments.
QUESTION: is this function optional?
This is only a hint.
EXAMPLE
WRITEME
"""
raise utils.MethodNotDefined('%s.c_compile_args' \
% self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
def c_support_code_apply(self, node, name):
"""Optional: Return utility code for use by an `Op` that will be inserted at global
scope, that can be specialized for the support of a particular `Apply` node.
EXAMPLE
:param node: an Apply instance in the graph being compiled
WRITEME
:param node_id: a string or number that serves to uniquely identify this node.
Symbol names defined by this support code should include the node_id, so that they can
be called from the c_code, and so that they do not cause name collisions.
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
- `MethodNotDefined`: Subclass does not implement this method
"""
raise utils.MethodNotDefined('%s.c_no_compile_args' \
% self.__class__.__name__)
def c_headers(self):
"""Optional: Return a list of header files that must be included to compile the C code.
A subclass should override this method.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
raise utils.MethodNotDefined("c_support_code_apply", type(self), self.__class__.__name__)
"""
raise utils.MethodNotDefined('%s.c_headers' \
% self.__class__.__name__)
def c_libraries(self):
"""Optional: Return a list of libraries to link against to manipulate this `Op`.
A subclass should override this method.
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined('%s.c_libraries' \
% self.__class__.__name__)
def c_support_code(self):
"""Optional: Return support code for use by the code that is returned by `c_code`.
Support code is inserted into the C code at global scope.
A subclass should override this method.
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined('%s.c_support_code' \
% self.__class__.__name__)
def c_code_cache_version(self):
"""Return a tuple of integers indicating the version of this Op.
An empty tuple indicates an 'unversioned' Op that will not be cached between processes.
The cache mechanism may erase cached modules that have been superceded by newer
versions. See `ModuleCache` for details.
"""
return (1,)
class PureOp(object):
"""
......
......@@ -745,6 +745,7 @@ class NavigatorOptimizer(Optimizer):
raise
if replacements is False or replacements is None:
return False
assert len(node.outputs) == len(replacements)
repl_pairs = zip(node.outputs, replacements)
try:
env.replace_all_validate(repl_pairs, reason=lopt)
......
import sys, StringIO
from collections import defaultdict
import opt
......@@ -140,4 +140,14 @@ class SequenceDB(DB):
opts.sort(key = lambda obj: self.__priority__[obj.name])
return opt.SeqOptimizer(opts, failure_callback = self.failure_callback)
def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)"%id(self)
print >> stream, " priority", self.__priority__
print >> stream, " names", self._names
print >> stream, " db", self.__db__
def __str__(self):
sio = StringIO.StringIO()
self.print_summary(sio)
return sio.getvalue()
......@@ -12,8 +12,9 @@ import traceback
########
# Type #
########
from .op import CLinkerObject
class CLinkerType(object):
class CLinkerType(CLinkerObject):
"""Interface specification for Types that can be arguments to a `CLinkerOp`.
A CLinkerType instance is mainly reponsible for providing the C code that
......@@ -176,89 +177,8 @@ class CLinkerType(object):
"""
raise MethodNotDefined("c_sync", type(self), self.__class__.__name__)
def c_compile_args(self):
"""Optional: Return a list of compile args recommended to compile the
code returned by other methods in this class.
WRITEME: example of formatting for -I, -L, -f args.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_compile_args", type(self), self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise MethodNotDefined("c_no_compile_args", type(self), self.__class__.__name__)
def c_headers(self):
"""Optional: Return a list of header files required by code returned by
this class.
WRITEME: example of local file, standard file.
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_headers", type(self), self.__class__.__name__)
def c_libraries(self):
"""Optional: Return a list of libraries required by code returned by
this class.
For example: return ['gsl', 'gslcblas', 'm', 'fftw3', 'g2c'].
The compiler will search the directories specified by the environment
variable LD_LIBRARY_PATH. No option is provided for an Op to provide an
extra library directory because this would change the linking path for
other Ops in a potentially disasterous way.
QUESTION: What about via the c_compile_args? a -L option is allowed no?
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_libraries", type(self), self.__class__.__name__)
def c_support_code(self):
"""Optional: Return utility code for use by a `Variable` or `Op` to be
included at global scope prior to the rest of the code for this class.
QUESTION: How many times will this support code be emitted for a graph
with many instances of the same type?
:Exceptions:
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_support_code", type(self), self.__class__.__name__)
def c_code_cache_version(self):
"""Return a tuple of integers indicating the version of this Op.
An empty tuple indicates an 'unversioned' Op that will not be cached between processes.
The cache mechanism may erase cached modules that have been superceded by newer
versions. See `ModuleCache` for details.
"""
return (1,)
class PureType(object):
"""Interface specification for variable type instances.
......
......@@ -9,9 +9,12 @@ import numpy #for numeric_grad
from gof.python25 import all
import gof.utils
def warning(msg):
# replace this with logger.warning when adding logging support
print >> sys.stderr, 'WARNING', msg
import logging
_logger = logging.getLogger('theano.gradient')
def warning(*msg):
_logger.warning('WARNING theano.gradient: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.gradient: '+' '.join(msg))
_msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients'
......@@ -103,7 +106,9 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
for ii, (r, g_r) in enumerate(zip(node.inputs, g_inputs)):
if warn_type:
if g_r and (getattr(r,'type',0) != getattr(g_r,'type', 1)):
warning('%s.grad returned a different type for input %i: %s vs. %s'%(node.op, ii, r, g_r))
r_type = getattr(r,'type', None)
g_r_type = getattr(g_r,'type', None)
info('%s.grad returned a different type for input %i: %s vs. %s'%(node.op, ii, r_type, g_r_type))
if g_r and len(sources) == 1 and sources[0][0].name and r.name:
g_r.name = "(d%s/d%s)" % (sources[0][0].name, r.name)
if g_r is not None:
......
......@@ -92,14 +92,25 @@ class ConvOp(Op):
if self.bsize<=self.unroll_batch:
self.unroll_batch = self.bsize
else:
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a divisor of bsize(%s). We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize))
self.unroll_batch=1
#find the maximum value under unroll_batch that would work
new=self.unroll_batch
assert(new>=1)
while self.bsize % new!=0:
new-=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a divisor of bsize(%s). We revert it to %d. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize),new)
self.unroll_batch=mew
if self.unroll_kern>0 and self.nkern % unroll_kern!=0:
if self.nkern<=self.unroll_kern:
self.unroll_kern = self.nkern
else:
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a divisor of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
self.unroll_kern=1
#find the maximum value under unroll_kern that would work
new=self.unroll_kern
assert(new>=1)
while self.nkern % new!=0:
new-=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a divisor of nkern(%s)We revert it to %d. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern),new)
self.unroll_kern=new
self.outshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (dx,dy), output_mode)
self.fulloutshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (1,1), output_mode)
self.out_mode = output_mode
......@@ -137,6 +148,16 @@ class ConvOp(Op):
def __str__(self):
return "ConvOp{" +",".join(str((a, getattr(self, a))) for a in self.__attrnames) + "}"
def set_flops(self):
""" Usefull with the hack in profilemode to print the MFlops"""
if self.out_mode=="valid":
self.flops=self.kshp[0]*self.kshp[1]*2#nb mul and add by output pixed
self.flops*=self.outshp[0]*self.outshp[1]#nb flops by output image
self.flops*=self.imshp[0]*self.nkern*self.bsize#for all outputs images#n_stack==self.imshp[0]
else: #full mode not implemented
self.flops=-1
def make_node(self, inputs, kerns):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
"""
......@@ -188,7 +209,6 @@ class ConvOp(Op):
buf = N.zeros((batchsize,)+ self.imshp_logical, dtype=img2d.dtype)
buf[:,:,::rstride, ::cstride] = img2d
img2d = buf
print 'A'
del buf, rstride, cstride
if self.kshp != self.kshp_logical:
......@@ -204,7 +224,6 @@ class ConvOp(Op):
assert coffset >= 0
buf[:,:,roffset::rstride, coffset::cstride] = filtersflipped
filtersflipped = buf
print 'B'
del buf, rstride, cstride
for b in range(batchsize):
......@@ -293,7 +312,10 @@ class ConvOp(Op):
unroll_batch=un_b, unroll_kern=un_k,
imshp_logical=imshp_logical,
kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned)(img,filters)
kshp_logical_top_aligned=kshp_logical_top_aligned)
if hasattr(self,'flops'):
dw.set_flops()
dw = dw(img,filters)
assert (dw.owner.op.outshp==self.kshp).all()
if self.out_mode == 'valid':
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
......@@ -311,7 +333,10 @@ class ConvOp(Op):
1,1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k,
imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1]),
kshp_logical=None)(gz,filters)
kshp_logical=None)
if hasattr(self,'flops'):
din.set_flops()
din = din(gz,filters)
assert (din.owner.op.outshp==self.imshp[1:]).all()
return [din, dw]
......
......@@ -31,10 +31,25 @@ def as_scalar(x, name = None):
def constant(x):
if isinstance(x, float):
return ScalarConstant(float64, x)
for dtype in ['float32', 'float64']:
x_ = numpy.asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
assert x_ is not None
return ScalarConstant(Scalar(str(x_.dtype)), x)
if isinstance(x, int):
return ScalarConstant(int64, x)
return ScalarConstant(float64, float(x))
for dtype in ['int8', 'int16', 'int32', 'int64']:
x_ = numpy.asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
assert x_ is not None
return ScalarConstant(Scalar(str(x_.dtype)), x)
if isinstance(x, complex):
raise NotImplementedError()
raise TypeError(x)
#return ScalarConstant(float64, float(x))
class Scalar(Type):
......@@ -192,9 +207,9 @@ class _scalar_py_operators:
def __neg__(self): return neg(self)
#CASTS
def __int__(self): return AsInt(self).out
def __float__(self): return AsInt(self).out
def __complex__(self): return AsComplex(self).out
#def __int__(self): return AsInt(self).out
#def __float__(self): return AsDouble(self).out
#def __complex__(self): return AsComplex(self).out
#BITWISE
def __invert__(self): return invert(self)
......
......@@ -140,11 +140,26 @@ def constant_or_value(x, rtype, name=None, ndim=None):
- `ValueError`: `x` could not be expanded to have ndim dimensions
"""
if isinstance(x, numpy.ndarray):
x_ = None
if rtype is TensorConstant and isinstance(x, int):
for dtype in ['int8', 'int16', 'int32', 'int64']:
x_ = numpy.asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
elif rtype is TensorConstant and isinstance(x, float):
for dtype in ['float32', 'float64']:
x_ = numpy.asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
elif isinstance(x, numpy.ndarray):
x_ = x
else:
x_ = numpy.asarray(x)
assert type(x_) == numpy.ndarray
bcastable = [d == 1 for d in x_.shape]
if ndim is not None:
if len(bcastable) < ndim:
......@@ -261,7 +276,8 @@ class TensorType(Type):
"""Compare True iff other is the same kind of TensorType"""
return type(self) == type(other) and other.dtype == self.dtype and other.broadcastable == self.broadcastable
def values_eq_approx(self, a, b):
@staticmethod
def values_eq_approx(a, b):
if type(a) is numpy.ndarray and type(b) is numpy.ndarray:
if a.shape != b.shape:
return False
......@@ -653,18 +669,22 @@ class _tensor_py_operators:
"""
return reshape(self, shape, ndim=ndim)
def dimshuffle(self, pattern):
def dimshuffle(self, *pattern):
"""Reorder the dimensions of this variable, optionally inserting broadcasted dimensions.
:param pattern: list of int mixed with 'x' for broadcastable dimensions
:param pattern: list/tuple of int mixed with 'x' for broadcastable dimensions
For example, to create a 3D view of a [2D] matrix, call ``dimshuffle([0,'x',1])``. This
will create a 3D view such that the middle dimension is an implicit broadcasted
dimension. To do the same thing on the transpose of that matrix, call ``dimshuffle([1,
'x', 0])``.
This function supports the pattern passed as a tuple, or as a variable-length argument (e.g. ``a.dimshuffle(pattern)`` is equivalent to ``a.dimshuffle(*pattern)`` where ``pattern`` is a list/tuple of ints mixed with 'x' characters).
For more information, see `DimShuffle`.
"""
if (len(pattern) == 1) and (isinstance(pattern[0], (list, tuple))):
pattern = pattern[0]
op = DimShuffle(list(self.type.broadcastable), pattern)
return op(self)
......@@ -683,7 +703,8 @@ class _tensor_py_operators:
return Subtensor(args)(self, *Subtensor.collapse(args, lambda entry: isinstance(entry, Variable)))
#COPYING
def copy(self): return tensor_copy(self)
def copy(self):
return tensor_copy(self)
def __iter__(self):
try:
......@@ -949,11 +970,11 @@ class MaxAndArgmax(Op):
inputs = [x, axis]
broadcastable = [False] * (x.type.ndim - 1) #TODO: be less conservative
outputs = [tensor(x.type.dtype, broadcastable),
tensor(axis.type.dtype, broadcastable)]
tensor('int32', broadcastable)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, axis), (max, max_idx)):
max[0] = numpy.asarray(numpy.max(x, axis))
max_idx[0] = numpy.asarray(numpy.argmax(x, axis))
max_idx[0] = numpy.asarray(numpy.argmax(x, axis), dtype='int32')
def grad(self, (x, axis), (g_max, g_max_idx)):
# @warning: This only works if axis is 0, else the max is
# broadcasted wrong in the call to eq.
......@@ -1542,6 +1563,7 @@ class Subtensor(Op):
return type(self) == type(other) and self.idx_list == other.idx_list
def __hash__(self):
#TODO: optimize by cache this hash value
idx_list = tuple((entry.start, entry.stop, entry.step)
if isinstance(entry, slice)
else entry
......@@ -2096,9 +2118,13 @@ class Reshape(Op):
self.name = name
def __eq__(self, other):
return (type(other) is Reshape) and (other.ndim == self.ndim) and self.name == other.name
# .name does not participate because it doesn't affect computations
return (type(other) == type(self)) and (other.ndim == self.ndim)
def __hash__(self):
return hash(Reshape) ^ hash(self.ndim) ^ hash(self.name)
# .name does not participate because it doesn't affect computations
return hash(type(self)) ^ hash(self.ndim)
def __str__(self):
return '%s{%i}' % (self.__class__.__name__, self.ndim)
def make_node(self, x, shp):
x = as_tensor_variable(x)
shp = as_tensor_variable(shp)
......
差异被折叠。
......@@ -133,7 +133,8 @@ class DimShuffle(Op):
self.__dict__.update(d)
self._rehash()
def make_node(self, input):
def make_node(self, _input):
input = as_tensor_variable(_input)
ib = tuple(input.type.broadcastable)
if not ib == self.input_broadcastable:
raise TypeError("The number of dimensions and/or broadcastable pattern of the input is incorrect for this op. Expected %s, got %s." % (self.input_broadcastable, ib))
......@@ -624,7 +625,7 @@ class Elemwise(Op):
task_code = self.scalar_op.c_code(Apply(self.scalar_op,
[Scalar(dtype = input.type.dtype)() for input in node.inputs],
[Scalar(dtype = output.type.dtype)() for input in node.outputs]),
[Scalar(dtype = output.type.dtype)() for output in node.outputs]),
name + '_scalar_',
["%s_i" % s for s in _inames],
["%s_i" % s for s in onames],
......@@ -847,11 +848,12 @@ class Sum(CAReduce):
CAReduce.__init__(self, scalar.add, axis)
def _output_dtype(self, idtype):
if idtype.startswith('int'):
return 'int64' #we want to protect against overflow
else:
return idtype
# we want to protect against overflow
return dict(
int8='int32',
int16='int32',
int32='int64',
).get(idtype, idtype)
def grad(self, (x, ), (gz, )):
gz = as_tensor_variable(gz)
......
......@@ -133,6 +133,8 @@ class SoftmaxWithBias(gof.Op):
def c_headers(self):
return ['<iostream>','<cmath>']
def c_code_cache_version(self):
return ()
@staticmethod
def c_code_template():
# this implementation was lifted from
......@@ -157,14 +159,14 @@ class SoftmaxWithBias(gof.Op):
PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s;
}
if (%(x)s->descr->type_num != PyArray_DOUBLE)
if ((%(x)s->descr->type_num != PyArray_DOUBLE)&&(%(x)s->descr->type_num != PyArray_DOUBLE))
{
PyErr_SetString(PyExc_TypeError, "a not float64");
PyErr_SetString(PyExc_TypeError, "a not float");
%(fail)s;
}
if (%(b)s->descr->type_num != PyArray_DOUBLE)
if ((%(b)s->descr->type_num != PyArray_DOUBLE) && (%(b)s->descr->type_num != PyArray_DOUBLE))
{
PyErr_SetString(PyExc_TypeError, "b not float64");
PyErr_SetString(PyExc_TypeError, "b not float");
%(fail)s;
}
if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
......@@ -193,22 +195,22 @@ class SoftmaxWithBias(gof.Op):
double sum = 0.0;
bool discount_max = false;
const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
const double* __restrict__ b_i = (double*)(%(b)s->data);
double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
const REAL* __restrict__ x_i = (REAL*)(%(x)s->data + %(x)s->strides[0] * i);
const REAL* __restrict__ b_i = (REAL*)(%(b)s->data);
REAL* __restrict__ sm_i = (REAL*)(%(sm)s->data + %(sm)s->strides[0] * i);
"""
inside_row_loop = """
npy_intp Sx = %(x)s->strides[1]/sizeof(double);
npy_intp Sb = %(b)s->strides[0]/sizeof(double);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
npy_intp Sx = %(x)s->strides[1]/sizeof(REAL);
npy_intp Sb = %(b)s->strides[0]/sizeof(REAL);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(REAL);
size_t row_max_j=0;
double row_max = x_i[0] + b_i[0];
REAL row_max = x_i[0] + b_i[0];
// Get the maximum value of the row
for (j = 0; j < Nx[1]; ++j)
{
double row_ij = x_i[j * Sx] + b_i[j * Sb];
REAL row_ij = x_i[j * Sx] + b_i[j * Sb];
// std::cout << "1" << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
......@@ -216,9 +218,9 @@ class SoftmaxWithBias(gof.Op):
for (j = 0; j < Nx[1]; ++j)
{
double row_ij = x_i[j * Sx] + b_i[j * Sb];
REAL row_ij = x_i[j * Sx] + b_i[j * Sb];
// std::cout << "2" << row_ij << "\\n";
double sm_ij = exp(row_ij - row_max);
REAL sm_ij = exp(row_ij - row_max);
// std::cout << "3" << sm_ij << "\\n";
sum += sm_ij;
sm_i[j * Ssm] = sm_ij;
......@@ -292,12 +294,24 @@ class SoftmaxGrad(gof.Op):
def grad(self, *args):
raise NotImplementedError()
def c_code_cache_version(self):
return ()
def c_code(self, node, name, (dy, sm), (dx,), sub):
if node.inputs[1].type.dtype != node.inputs[0].type.dtype:
raise NotImplementedError()
if node.inputs[0].type.dtype == 'float32':
REAL = 'float'
else:
REAL = 'double'
return '''
if ((%(dy)s->descr->type_num != PyArray_DOUBLE)
|| (%(sm)s->descr->type_num != PyArray_DOUBLE))
if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && ((%(dy)s->descr->type_num != PyArray_FLOAT)
{
PyErr_SetString(PyExc_TypeError, "types should be float or float64");
%(fail)s;
}
if ((%(sm)s->descr->type_num != PyArray_DOUBLE) && ((%(sm)s->descr->type_num != PyArray_FLOAT)
{
PyErr_SetString(PyExc_TypeError, "types should be float64, float64");
PyErr_SetString(PyExc_TypeError, "types should be float or float64");
%(fail)s;
}
if ((%(dy)s->nd != 2)
......@@ -327,12 +341,12 @@ class SoftmaxGrad(gof.Op):
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
{
const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i);
npy_intp Sdy = %(dy)s->strides[1]/sizeof(double);
const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
const REAL* __restrict__ dy_i = (REAL*) (%(dy)s->data + %(dy)s->strides[0] * i);
npy_intp Sdy = %(dy)s->strides[1]/sizeof(REAL);
const REAL* __restrict__ sm_i = (REAL*) (%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(REAL);
REAL* __restrict__ dx_i = (REAL*) (%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(REAL);
double sum_dy_times_sm = 0.;
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
......@@ -587,7 +601,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
begin_row_loop,
"""
const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
REAL* __restrict__ nll_i = (REAL*)(%(nll)s->data + %(nll)s->strides[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
""",
inside_row_loop,
......@@ -610,6 +624,10 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type
code_template = ''.join(self.c_code_template())
if node.inputs[0].type.dtype == 'float32':
REAL = 'float'
else:
REAL = 'double'
return code_template % dict(locals(), **sub)
class CrossentropySoftmax1HotWithBiasDx (gof.Op):
......@@ -686,15 +704,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
{
const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
const REAL dnll_i = ((REAL*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
const REAL* __restrict__ sm_i = (REAL*)(%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(REAL);
double* __restrict__ dx_i = (double*)(%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
REAL* __restrict__ dx_i = (REAL*)(%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(REAL);
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
{
......@@ -790,7 +808,8 @@ class CrossentropyCategorical1Hot(gof.Op):
'(got type: %s instead of: %s)' % (_true_one_of_n.type,
tensor.lvector))
return gof.Apply(self, [_coding_dist, _true_one_of_n], [tensor.dvector()])
return gof.Apply(self, [_coding_dist, _true_one_of_n],
[tensor.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()])
def perform(self, node, (coding, one_of_n), (y_out,)):
y = numpy.zeros_like(coding[:,0])
......
......@@ -114,7 +114,7 @@ def local_dimshuffle_lift(node):
input = node.inputs[0]
inode = input.owner
if inode and isinstance(inode.op, Elemwise):
if inode and isinstance(inode.op, Elemwise) and (len(input.clients)==1):
return inode.op.make_node(*[DimShuffle(input.type.broadcastable,
op.new_order,
op.inplace)(input) for input in inode.inputs]).outputs
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论