提交 560ad497 authored 作者: lamblin's avatar lamblin

Merge pull request #623 from nouiz/mixed

Mixed
...@@ -83,6 +83,7 @@ New Features ...@@ -83,6 +83,7 @@ New Features
* C code reuses preallocated outputs (only done by Scan) (Pascal L.) * C code reuses preallocated outputs (only done by Scan) (Pascal L.)
* Garbage collection of intermediate results during Theano function calls * Garbage collection of intermediate results during Theano function calls
for Ops with C code (Pascal L.) for Ops with C code (Pascal L.)
* Theano flags compiledir_format now support the parameter numpy_version.
Sparse Sparse
* Implement theano.sparse.mul(sparse1, sparse2) when both inputs don't * Implement theano.sparse.mul(sparse1, sparse2) when both inputs don't
......
...@@ -915,7 +915,8 @@ MKL library included in EPD, so you should not need to compile your own BLAS. ...@@ -915,7 +915,8 @@ MKL library included in EPD, so you should not need to compile your own BLAS.
<https://github.com/xianyi/OpenBLAS>`_ is a new project that <https://github.com/xianyi/OpenBLAS>`_ is a new project that
continues GotoBLAS: it has a better installation process and implements continues GotoBLAS: it has a better installation process and implements
additional functions (not currently used by Theano). additional functions (not currently used by Theano).
We did not try OpenBLAS on Windows. We did not try OpenBLAS on Windows. When installed, you probably need to
use this Theano flags: ``theano.config.blas.ldflags = "-lopenblas"``
.. note:: .. note::
......
...@@ -17,10 +17,10 @@ purpose of it is to hack it to investigate what your own particular program is d ...@@ -17,10 +17,10 @@ purpose of it is to hack it to investigate what your own particular program is d
predefined_optimizers) predefined_optimizers)
class StepMode(Mode): class StepMode(Mode):
def __init__(self, linker=None, optimizer=None): def __init__(self, linker=None, optimizer='default'):
if linker is None: if linker is None:
linker = config.linker linker = config.linker
if optimizer is None: if optimizer is 'default':
optimizer = config.optimizer optimizer = config.optimizer
def blah(i, node, th): def blah(i, node, th):
# This function will be run for each node in your compiled program. # This function will be run for each node in your compiled program.
......
...@@ -286,10 +286,10 @@ class Mode(object): ...@@ -286,10 +286,10 @@ class Mode(object):
predefined_modes. predefined_modes.
""" """
def __init__(self, linker=None, optimizer=None): def __init__(self, linker=None, optimizer='default'):
if linker is None: if linker is None:
linker = config.linker linker = config.linker
if optimizer is None: if optimizer is 'default':
optimizer = config.optimizer optimizer = config.optimizer
self.__setstate__((linker, optimizer)) self.__setstate__((linker, optimizer))
......
...@@ -17,6 +17,7 @@ compiledir_format_dict = {"platform": platform.platform(), ...@@ -17,6 +17,7 @@ compiledir_format_dict = {"platform": platform.platform(),
"processor": platform.processor(), "processor": platform.processor(),
"python_version": platform.python_version(), "python_version": platform.python_version(),
"theano_version": theano.__version__, "theano_version": theano.__version__,
"numpy_version": numpy.__version__,
} }
compiledir_format_keys = ", ".join(compiledir_format_dict.keys()) compiledir_format_keys = ", ".join(compiledir_format_dict.keys())
default_compiledir_format =\ default_compiledir_format =\
......
...@@ -28,6 +28,7 @@ from theano.sandbox.cuda import GpuElemwise, CudaNdarrayType, GpuOp ...@@ -28,6 +28,7 @@ from theano.sandbox.cuda import GpuElemwise, CudaNdarrayType, GpuOp
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous) gpu_contiguous)
from theano.sandbox.cuda.opt import gpu_seqopt from theano.sandbox.cuda.opt import gpu_seqopt
from theano.tensor.utils import hash_from_dict
import pycuda_init import pycuda_init
if not pycuda_init.pycuda_available: if not pycuda_init.pycuda_available:
...@@ -116,7 +117,7 @@ class PycudaElemwiseKernelOp(GpuOp): ...@@ -116,7 +117,7 @@ class PycudaElemwiseKernelOp(GpuOp):
def __hash__(self): def __hash__(self):
return (hash(type(self)) ^ hash(self.scalar_op) ^ return (hash(type(self)) ^ hash(self.scalar_op) ^
hash(self.inplace_pattern)) hash_from_dict(self.inplace_pattern))
def make_node(self, *inputs): def make_node(self, *inputs):
_inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs] _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
...@@ -202,7 +203,7 @@ class PycudaElemwiseSourceModuleOp(GpuOp): ...@@ -202,7 +203,7 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
def __hash__(self): def __hash__(self):
return (hash(type(self)) ^ hash(self.scalar_op) ^ return (hash(type(self)) ^ hash(self.scalar_op) ^
hash(self.inplace_pattern)) hash_from_dict(self.inplace_pattern))
def make_node(self, *inputs): def make_node(self, *inputs):
_inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs] _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
......
...@@ -92,6 +92,10 @@ class Images2Neibs(Op): ...@@ -92,6 +92,10 @@ class Images2Neibs(Op):
fail = sub['fail'] fail = sub['fail']
mode = self.mode mode = self.mode
return """ return """
#ifndef CEIL_INTDIV
#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
#endif
int grid_c = -1; //number of patch in height int grid_c = -1; //number of patch in height
int grid_d = -1; //number of patch in width int grid_d = -1; //number of patch in width
{ {
...@@ -141,10 +145,9 @@ class Images2Neibs(Op): ...@@ -141,10 +145,9 @@ class Images2Neibs(Op):
(long int)c, (long int)d, (long int)(%(ten4)s->dimensions[2]), (long int)(%(ten4)s->dimensions[3])); (long int)c, (long int)d, (long int)(%(ten4)s->dimensions[2]), (long int)(%(ten4)s->dimensions[3]));
%(fail)s; %(fail)s;
} }
//grid_c = CEIL_INTDIV(((%(ten4)s->dimensions)[2]),step_x) grid_c = CEIL_INTDIV(((%(ten4)s->dimensions)[2]),step_x);
//grid_d = CEIL_INTDIV(((%(ten4)s->dimensions)[3]),step_y) grid_d = CEIL_INTDIV(((%(ten4)s->dimensions)[3]),step_y);
grid_c = ((%(ten4)s->dimensions)[2])/step_x + ((((%(ten4)s->dimensions)[2])%%step_x)? 1:0);
grid_d = ((%(ten4)s->dimensions)[3])/step_y + ((((%(ten4)s->dimensions)[3])%%step_y)? 1:0);
}else if ( "%(mode)s" == "valid") { }else if ( "%(mode)s" == "valid") {
if ( ((%(ten4)s->dimensions)[2] < c) ||( (((%(ten4)s->dimensions)[2]-c) %% step_x)!=0)) if ( ((%(ten4)s->dimensions)[2] < c) ||( (((%(ten4)s->dimensions)[2]-c) %% step_x)!=0))
{ {
...@@ -454,6 +457,10 @@ class GpuImages2Neibs(Images2Neibs, GpuOp): ...@@ -454,6 +457,10 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
fail = sub['fail'] fail = sub['fail']
mode = self.mode mode = self.mode
return """ return """
#ifndef CEIL_INTDIV
#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
#endif
int grid_c = -1; int grid_c = -1;
int grid_d = -1; int grid_d = -1;
...@@ -491,10 +498,12 @@ class GpuImages2Neibs(Images2Neibs, GpuOp): ...@@ -491,10 +498,12 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
c, d, CudaNdarray_HOST_DIMS(%(ten4)s)[2], CudaNdarray_HOST_DIMS(%(ten4)s)[3]); c, d, CudaNdarray_HOST_DIMS(%(ten4)s)[2], CudaNdarray_HOST_DIMS(%(ten4)s)[3]);
%(fail)s; %(fail)s;
} }
//grid_c = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]),step_x) grid_c = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]),
//grid_d = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]),step_y) step_x);
grid_c = ((CudaNdarray_HOST_DIMS(%(ten4)s))[2])/step_x + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[2])%%step_x)? 1:0); grid_d = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]),
grid_d = ((CudaNdarray_HOST_DIMS(%(ten4)s))[3])/step_y + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[3])%%step_y)? 1:0); step_y);
}else if ( "%(mode)s" == "valid") { }else if ( "%(mode)s" == "valid") {
if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[2] < c) ||( (((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c) %% step_x)!=0)) if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[2] < c) ||( (((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{ {
......
...@@ -515,7 +515,21 @@ csr_fmatrix = SparseType(format='csr', dtype='float32') ...@@ -515,7 +515,21 @@ csr_fmatrix = SparseType(format='csr', dtype='float32')
# CONSTRUCTION # CONSTRUCTION
class CSMProperties(gof.Op): class CSMProperties(gof.Op):
"""Extract all of .data .indices and .indptr""" """Extract all of .data .indices and .indptr
:note: We won't implement infer_shape for this op now. This will
ask that we implement an GetNNZ op, and this op will keep
the dependence on the input of this op. So this won't help
to remove computations in the graph. To remove computation,
we will need to make an infer_sparse_pattern feature to
remove computations. Doing this is trickier then the
infer_shape feature. For example, how do we handle the case
when some op create some 0 values? So there is dependence
on the values themselves. We could write an infer_shape for
the last output that is the shape, but I dough this will
get used.
"""
# we don't return a view of the shape, we create a new ndarray from the # we don't return a view of the shape, we create a new ndarray from the
# shape tuple. # shape tuple.
......
...@@ -13,6 +13,8 @@ from theano import scalar ...@@ -13,6 +13,8 @@ from theano import scalar
from theano.scalar import Scalar from theano.scalar import Scalar
from theano.printing import min_informative_str, pprint from theano.printing import min_informative_str, pprint
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
from theano.tensor.utils import hash_from_dict
config = theano.config config = theano.config
...@@ -563,17 +565,8 @@ class Elemwise(Op): ...@@ -563,17 +565,8 @@ class Elemwise(Op):
return False return False
def _rehash(self): def _rehash(self):
items = self.inplace_pattern.items() inplace_pattern_hash = hash_from_dict(self.inplace_pattern)
items.sort() h = hash('Elemwise') ^ hash(self.scalar_op) ^ inplace_pattern_hash
first_part = [k for k, v in items]
second_part = []
for k, v in items:
if isinstance(v, (tuple, list)):
second_part += [tuple(v)]
else:
second_part += [v]
tuple_items = tuple(first_part + second_part)
h = hash('Elemwise') ^ hash(self.scalar_op) ^ hash(tuple_items)
assert h == getattr(self, '_hashval', h) assert h == getattr(self, '_hashval', h)
self._hashval = h self._hashval = h
......
import numpy import numpy
from theano.tensor.utils import hash_from_ndarray from theano.tensor.utils import hash_from_ndarray, hash_from_dict
def test_hash_from_ndarray(): def test_hash_from_ndarray():
...@@ -31,3 +31,18 @@ def test_hash_from_ndarray(): ...@@ -31,3 +31,18 @@ def test_hash_from_ndarray():
assert hash_from_ndarray(rng[:4]) == hash_from_ndarray(rng[:4].copy()) assert hash_from_ndarray(rng[:4]) == hash_from_ndarray(rng[:4].copy())
assert hash_from_ndarray(rng[::2]) == hash_from_ndarray(rng[::2].copy()) assert hash_from_ndarray(rng[::2]) == hash_from_ndarray(rng[::2].copy())
assert hash_from_ndarray(rng[::-1]) == hash_from_ndarray(rng[::-1].copy()) assert hash_from_ndarray(rng[::-1]) == hash_from_ndarray(rng[::-1].copy())
def test_hash_from_dict():
dicts = [{}, {0: 0}, {0: 1}, {1: 0}, {1: 1},
{0: (0,)}, {0: [1]},
{0: (0, 1)}, {0: [1, 0]},
]
hashs = []
for idx, d in enumerate(dicts):
h = hash_from_dict(d)
assert h not in hashs
hashs.append(h)
# List are not hashable. So they are transformed into tuple.
assert hash_from_dict({0: (0,)}) == hash_from_dict({0: [0]})
...@@ -18,3 +18,28 @@ def hash_from_ndarray(data): ...@@ -18,3 +18,28 @@ def hash_from_ndarray(data):
hash_from_code(str(data.shape)) + hash_from_code(str(data.shape)) +
hash_from_code(str(data.strides)) + hash_from_code(str(data.strides)) +
hash_from_code(str(data.dtype))) hash_from_code(str(data.dtype)))
def hash_from_dict(d):
"""Work around the fact that dict are not hashable in python
This request that all object have a sorted order that depend only
on the value of the object. This is true for integer/float/string
We do not verify that the objects in the dict what this properties
Also, we transform values that are list into tuple as list are not
hashable.
"""
items = d.items()
items.sort()
first_part = [k for k, v in items]
second_part = []
for k, v in items:
if isinstance(v, (tuple, list)):
second_part += [tuple(v)]
else:
second_part += [v]
tuple_items = tuple(first_part + second_part)
return hash(tuple_items)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论