提交 b01c7960 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1552 from nouiz/mixed2

Mixed2
......@@ -19,7 +19,7 @@ def print_help(exit_status):
print 'Type "theano-cache clear" to erase the cache'
print 'Type "theano-cache list" to print the cache content'
print 'Type "theano-cache unlock" to unlock the cache directory'
print 'Type "theano-cache cleanup" to delete keys in the old format'
print 'Type "theano-cache cleanup" to delete keys in the old format/code version'
print 'Type "theano-cache purge" to force deletion of the cache directory'
print ('Type "theano-cache basecompiledir" '
'to print the parent of the cache directory')
......@@ -60,6 +60,8 @@ elif len(sys.argv) == 2:
theano.gof.compiledir.print_compiledir_content()
elif sys.argv[1] == 'cleanup':
theano.gof.compiledir.cleanup()
cache = get_module_cache(init_args=dict(do_refresh=False))
cache.clear_old()
elif sys.argv[1] == 'unlock':
theano.gof.compilelock.force_unlock()
print 'Lock successfully removed!'
......
......@@ -1067,6 +1067,18 @@ The six usual equality and inequality operators share the same interface.
Returns a variable representing the result of logical inequality (a!=b).
.. function:: isnan(a)
Returns a variable representing the comparison of ``a`` elements with nan.
This is equivalent to ``numpy.isnan``.
.. function:: isinf(a)
Returns a variable representing the comparison of ``a`` elements
with inf or -inf.
This is equivalent to ``numpy.isinf``.
Condition
---------
......
......@@ -566,13 +566,13 @@ class ProfileStats(object):
sh = self.variable_shape.get(var, 'no shape')
st = self.variable_strides.get(var, 'no strides')
dtype = getattr(var, 'dtype', 'no dtype')
print " input %d: dtype=%s, shape=%s, strides=%s " % (
print >> file, " input %d: dtype=%s, shape=%s, strides=%s " % (
idx, dtype, sh, st)
for idx, var in enumerate(a.outputs):
sh = self.variable_shape.get(var, 'no shape')
st = self.variable_strides.get(var, 'no strides')
dtype = getattr(var, 'dtype', 'no dtype')
print " output %d: dtype=%s, shape=%s, strides=%s " % (
print >> file, " output %d: dtype=%s, shape=%s, strides=%s " % (
idx, dtype, sh, st)
# Same as before, this I've sacrificied some information making
# the output more readable
......
import cPickle
import errno
import logging
import os
import platform
import re
......@@ -17,6 +18,9 @@ from theano.configparser import config, AddConfigVar, ConfigParam, StrParam
from theano.gof.utils import flatten
from theano.misc.windows import call_subprocess_Popen
_logger = logging.getLogger("theano.gof.compiledir")
# Using the dummy file descriptors below is a workaround for a crash
# experienced in an unusual Python 2.4.4 Windows environment with the default
# None values.
......@@ -181,7 +185,7 @@ def cleanup():
"""
Delete keys in old format from the compiledir.
Old clean up include key in old format:
Old clean up include key in old format or with old version of the c_code:
1) keys that have an ndarray in them.
Now we use a hash in the keys of the constant data.
2) key that don't have the numpy ABI version in them
......@@ -204,24 +208,46 @@ def cleanup():
have_c_compiler = False
for obj in flatten(key):
if isinstance(obj, numpy.ndarray):
keydata.remove_key(key)
have_npy_abi_version = False
break
elif isinstance(obj, basestring):
if obj.startswith('NPY_ABI_VERSION=0x'):
have_npy_abi_version = True
elif obj.startswith('c_compiler_str='):
have_c_compiler = True
elif (isinstance(obj, (theano.gof.Op, theano.gof.Type)) and
hasattr(obj, 'c_code_cache_version')):
v = obj.c_code_cache_version()
if v not in [(), None] and v not in key[0]:
have_npy_abi_version = False
break
if not have_npy_abi_version or not have_c_compiler:
keydata.remove_key(key)
try:
#This can happen when we move the compiledir.
if keydata.key_pkl != filename:
keydata.key_pkl = filename
keydata.remove_key(key)
except IOError, e:
_logger.error(
"Could not remove file '%s'. To complete "
"the clean-up, please remove manually "
"the directory containing it.",
filename)
if len(keydata.keys) == 0:
shutil.rmtree(os.path.join(compiledir, directory))
except EOFError:
print ("ERROR while reading this key file '%s'."
" Delete its directory" % filename)
_logger.error(
"Could not read key file '%s'. To complete "
"the clean-up, please remove manually "
"the directory containing it.",
filename)
except IOError:
pass
_logger.error(
"Could not clean up this directory: '%s'. To complete "
"the clean-up, please remove it manually.",
directory)
finally:
if file is not None:
file.close()
......
......@@ -1424,15 +1424,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
loop_timing = []
global_opt_timing = []
time_lopts = {}
time_opts = {}
io_toposort_timing = []
nb_nodes = []
for gopt in self.global_optimizers:
process_count.setdefault(gopt, 0)
for lopt in self.local_optimizers:
process_count.setdefault(lopt, 0)
time_lopts.setdefault(lopt, 0)
for opt in self.global_optimizers + self.local_optimizers:
process_count.setdefault(opt, 0)
time_opts.setdefault(opt, 0)
while changed and not max_use_abort:
t0 = time.time()
......@@ -1441,7 +1438,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
#apply global optimizers
for gopt in self.global_optimizers:
fgraph.change_tracker.reset()
t_opt = time.time()
gopt.apply(fgraph)
time_opts[gopt] += time.time() - t_opt
if fgraph.change_tracker.changed:
process_count[gopt] += 1
changed = True
......@@ -1482,9 +1481,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
current_node = node
for lopt in self.local_optimizers:
t_lopt = time.time()
t_opt = time.time()
lopt_change = self.process_node(fgraph, node, lopt)
time_lopts[lopt] += time.time() - t_lopt
time_opts[lopt] += time.time() - t_opt
if lopt_change:
process_count[lopt] += 1
changed = True
......@@ -1507,7 +1506,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
config.optdb.max_use_ratio)
return (self, loop_timing, process_count, max_nb_nodes,
global_opt_timing, nb_nodes, time_lopts, io_toposort_timing)
global_opt_timing, nb_nodes, time_opts, io_toposort_timing)
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None)
......@@ -1521,7 +1520,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
@staticmethod
def print_profile(stream, prof, level=0):
(opt, loop_timing, process_count, max_nb_nodes,
global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof
global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof
blanc = (' ' * level)
print >> stream, blanc, "EquilibriumOptimizer",
print >> stream, blanc, getattr(opt, "name",
......@@ -1540,7 +1539,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
count_opt = []
for opt, count in process_count.iteritems():
if count > 0:
count_opt.append((time_lopts[opt], count, opt))
count_opt.append((time_opts[opt], count, opt))
if count_opt:
print >> stream, blanc, \
......@@ -1554,7 +1553,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
@staticmethod
def merge_profile(prof1, prof2):
#(opt, loop_timing, process_count, max_nb_nodes,
# global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof1
# global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof1
local_optimizers = set(prof1[0].local_optimizers).union(
prof2[0].local_optimizers)
......@@ -1588,12 +1587,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
nb_nodes = merge_list(prof1[5], prof2[5])
time_lopts = prof1[6].copy()
time_opts = prof1[6].copy()
for opt, t in prof2[6].iteritems():
if opt in time_lopts:
time_lopts[opt] += t
if opt in time_opts:
time_opts[opt] += t
else:
time_lopts[opt] = t
time_opts[opt] = t
io_toposort_timing = merge_list(prof1[7], prof2[7])
......@@ -1606,7 +1605,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
max_nb_nodes,
global_opt_timing,
nb_nodes,
time_lopts,
time_opts,
io_toposort_timing)
#################
......
......@@ -29,6 +29,7 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) {
*/
#ifndef CONV_KERNEL_CU
#define CONV_KERNEL_CU
#include <stdint.h>
/*
#define CHECK_BANK_CONFLICTS 0
......@@ -44,7 +45,9 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) {
#define MIN(a, b) ((a) < (b) ? (a) : (b) )
#define MAX(a, b) ((a) < (b) ? (b) : (a) )
const unsigned long int COALESCED_ALIGN = 0xFFFFFFFFFFFFFF00; // zero-out the trailing bits of pointers
//Must be the same size as a ptr. We can't use unsigned long as on Windows 64
//bit, it is 32 bit.
const uintptr_t COALESCED_ALIGN = 0xFFFFFFFFFFFFFF00; // zero-out the trailing bits of pointers
__device__ void load_to_shared(float * dst, const float * src, const int thread_id, int nb_thread, const int N, const bool flipped=false){
if (nb_thread < 64)
......@@ -73,7 +76,7 @@ __device__ void load_to_shared(float * dst, const float * src, const int thread_
if (thread_id < nb_thread)
{
const float * my_src_ptr = (const float *)(
((unsigned long int)src) & COALESCED_ALIGN);
((uintptr_t)src) & COALESCED_ALIGN);
my_src_ptr += thread_id;
while (my_src_ptr < src + N)
{
......
......@@ -47,13 +47,13 @@ static PyObject *CudaNdarray_get_shape(CudaNdarray *self, void *closure);
int _outstanding_mallocs[] = {0,0};
#if COMPUTE_GPU_MEM_USED
int _allocated_size = 0;
int _max_allocated_size = 0;
size_t _allocated_size = 0;
size_t _max_allocated_size = 0;
const int TABLE_SIZE = 10000;
struct table_struct{
void* ptr;
int size;
size_t size;
};
table_struct _alloc_size_table[TABLE_SIZE];
#endif
......@@ -92,26 +92,26 @@ void * device_malloc(size_t size, int verbose)
if (err2 != cudaSuccess){
cudaGetLastError();
fprintf(stderr,
"Error when tring to find the memory information"
"Error when trying to find the memory information"
" on the GPU: %s\n", cudaGetErrorString(err2));
}
#if COMPUTE_GPU_MEM_USED
fprintf(stderr,
"Error allocating %li bytes of device memory (%s)."
"Error allocating %zd bytes of device memory (%s)."
" new total bytes allocated: %d."
" Driver report %d bytes free and %d bytes total \n",
(long)size, cudaGetErrorString(err), _allocated_size,
" Driver report %zd bytes free and %zd bytes total \n",
size, cudaGetErrorString(err), _allocated_size,
free, total);
#else
fprintf(stderr,
"Error allocating %li bytes of device memory (%s)."
" Driver report %d bytes free and %d bytes total \n",
(long)size, cudaGetErrorString(err), free, total);
"Error allocating %zd bytes of device memory (%s)."
" Driver report %zd bytes free and %zd bytes total \n",
size, cudaGetErrorString(err), free, total);
#endif
}
PyErr_Format(PyExc_MemoryError,
"Error allocating %li bytes of device memory (%s).",
(long)size, cudaGetErrorString(err));
"Error allocating %zd bytes of device memory (%s).",
size, cudaGetErrorString(err));
return NULL;
}
if (rval != NULL){
......@@ -227,15 +227,15 @@ int device_free(void *ptr)
}
assert(i<TABLE_SIZE);
fprintf(stderr,
"Error freeing device pointer %p (%s) of size %d. %d byte already allocated."
" Driver report %d bytes free and %d bytes total \n",
"Error freeing device pointer %p (%s) of size %d. %zd byte already allocated."
" Driver report %zd bytes free and %zd bytes total \n",
ptr, cudaGetErrorString(err),
_alloc_size_table[i].size, _allocated_size, free, total);
}
#else
fprintf(stderr,
"Error freeing device pointer %p (%s)."
" Driver report %d bytes free and %d bytes total \n",
" Driver report %zd bytes free and %zd bytes total \n",
ptr,
cudaGetErrorString(err), free, total);
#endif
......
......@@ -3564,11 +3564,10 @@ class T_Scan(unittest.TestCase):
assert not opt_obj.belongs_to_set(scan_node1, [scan_node2])
assert not opt_obj.belongs_to_set(scan_node2, [scan_node1])
def test_remove_constants_and_unused_inputs_scan(self):
"""
Test the opt remove_constants_and_unused_inputs_scan
def test_remove_constants_and_unused_inputs_scan_non_seqs(self):
"""Test the opt remove_constants_and_unused_inputs_scan for
non sequences.
TODO: currently we only test non_seqs, should test
"""
W = theano.tensor.matrix(name='W')
v = theano.tensor.ivector(name='v')
......@@ -3594,17 +3593,61 @@ class T_Scan(unittest.TestCase):
f(numpy.zeros((3, 3), dtype=theano.config.floatX), [1, 2])
scan_node = f.maker.fgraph.toposort()[-1]
# TODO: Why this assert always fail?
# assert (len(scan_node.inputs) ==
# len(set(scan_node.inputs)))
# The first input is the number of iteration.
assert (len(scan_node.inputs[1:]) ==
len(set(scan_node.inputs[1:])))
inp = scan_node.op.inner_non_seqs(scan_node.op.inputs)
assert len(inp) == 1
assert (len(inp) == len(set(inp)))
inp = scan_node.op.outer_non_seqs(scan_node)
assert len(inp) == 1
assert (len(inp) == len(set(inp)))
#import pdb;pdb.set_trace()
#utt.assert_allclose(f([1, 2]), [[0, 0, 0], [1, 1, 1], [1, 1, 1]])
def test_remove_constants_and_unused_inputs_scan_seqs(self):
"""
Test the opt remove_constants_and_unused_inputs_scan for sequences.
"""
W = theano.tensor.matrix(name='W')
v = theano.tensor.ivector(name='v')
vv = theano.tensor.matrix(name='vv')
y1, _ = theano.scan(lambda i, W: W[i], sequences=v,
outputs_info=None, non_sequences=[W])
y2, _ = theano.scan(lambda i, _, W: W[i], sequences=[v, v],
outputs_info=None, non_sequences=W)
y3, _ = theano.scan(lambda i, _, W: W[i], sequences=[v, vv[0]],
outputs_info=None, non_sequences=W)
y4, _ = theano.scan(lambda _, i, W: W[i], sequences=[vv[0], v],
outputs_info=None, non_sequences=W)
y5, _ = theano.scan(lambda _, i, _2, W: W[i], sequences=[vv, v, vv[0]],
outputs_info=None, non_sequences=W)
y6, _ = theano.scan(lambda _, _2, i, W: W[i], sequences=[vv[0], vv, v],
outputs_info=None, non_sequences=W)
y7, _ = theano.scan(lambda i, _, _2, W: W[i],
sequences=[v, vv[0], vv[0]],
outputs_info=None, non_sequences=W)
y8, _ = theano.scan(lambda _, i, W, _2, _3: W[i], sequences=[vv[0], v],
outputs_info=None, non_sequences=[W, W[0], W[0]])
for out in [y1, y2, y3, y4, y5, y6, y7, y8]:
#This used to raise an exception
f = theano.function([W, v, vv], out, on_unused_input='ignore',
mode=mode_with_opt)
f(numpy.zeros((3, 3), theano.config.floatX),
[1, 2],
numpy.zeros((3, 3), theano.config.floatX))
scan_node = f.maker.fgraph.toposort()[-1]
# The first input is the number of iteration.
assert (len(scan_node.inputs[1:]) ==
len(set(scan_node.inputs[1:])))
inp = scan_node.op.inner_seqs(scan_node.op.inputs)
assert len(inp) == 1
inp = scan_node.op.outer_seqs(scan_node)
assert len(inp) == 1
inp = scan_node.op.inner_non_seqs(scan_node.op.inputs)
assert len(inp) == 1
inp = scan_node.op.outer_non_seqs(scan_node)
assert len(inp) == 1
def test_speed():
......
......@@ -2,7 +2,7 @@ import theano
from theano.tensor import basic as T
import numpy as N
#from util import strutil
from theano.tensor.blas_headers import blas_header_text
from theano.tensor.blas_headers import blas_header_text, blas_header_version
from theano.tensor.blas import ldflags
from theano.misc import strutil
from theano.gradient import grad_undefined
......@@ -51,8 +51,7 @@ class Conv3D(theano.Op):
return "Conv3D"
def c_code_cache_version(self):
return (3, blas_header_text.version)
return (3, blas_header_version())
def make_node(self, V, W, b, d):
"""
......
......@@ -543,8 +543,8 @@ class _tensor_py_operators:
def get_scalar_constant_value(self):
return theano.tensor.basic.get_scalar_constant_value(self)
def zeros_like(model, dtype=None):
return theano.tensor.basic.zeros_like(model, dtype=dtype)
def zeros_like(self, dtype=None):
return theano.tensor.basic.zeros_like(self, dtype=dtype)
class TensorVariable(_tensor_py_operators, Variable):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论