提交 fd7875ad authored 作者: bscellier's avatar bscellier 提交者: GitHub

Merge branch 'master' into import_numpy_gpuarray

...@@ -13,5 +13,5 @@ echo "===== Testing theano core" ...@@ -13,5 +13,5 @@ echo "===== Testing theano core"
# Test theano core # Test theano core
PARTS="theano -e cuda -e gpuarray" PARTS="theano -e cuda -e gpuarray"
THEANO_PARAM="${PARTS} --with-timer --timer-top-n 10 --with-xunit --xunit-file=theanocore_tests.xml" THEANO_PARAM="${PARTS} --with-timer --timer-top-n 10 --with-xunit --xunit-file=theanocore_tests.xml"
FLAGS="mode=FAST_RUN,floatX=float32" FLAGS="mode=FAST_RUN,floatX=float32,on_opt_error=raise,on_shape_error=raise"
THEANO_FLAGS=${FLAGS} bin/theano-nose ${THEANO_PARAM} THEANO_FLAGS=${FLAGS} bin/theano-nose ${THEANO_PARAM}
...@@ -76,5 +76,5 @@ THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \ ...@@ -76,5 +76,5 @@ THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \ theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \
theano/sandbox/tests/test_rng_mrg.py:test_GPUA_full_fill \ theano/sandbox/tests/test_rng_mrg.py:test_GPUA_full_fill \
theano/scan_module/tests/test_scan.py:T_Scan_Gpuarray" theano/scan_module/tests/test_scan.py:T_Scan_Gpuarray"
FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN" FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN,on_opt_error=raise,on_shape_error=raise"
THEANO_FLAGS=${FLAGS} time nosetests -v --with-xunit --xunit-file=theanogpuarray_tests.xml ${THEANO_GPUARRAY_TESTS} THEANO_FLAGS=${FLAGS} time nosetests -v --with-xunit --xunit-file=theanogpuarray_tests.xml ${THEANO_GPUARRAY_TESTS}
...@@ -5,11 +5,11 @@ import os ...@@ -5,11 +5,11 @@ import os
import sys import sys
if sys.platform == 'win32': if sys.platform == 'win32':
config_cxx = 'cxx=' config_for_theano_cache_script = 'cxx=,device=cpu'
theano_flags = os.environ['THEANO_FLAGS'] if 'THEANO_FLAGS' in os.environ else '' theano_flags = os.environ['THEANO_FLAGS'] if 'THEANO_FLAGS' in os.environ else ''
if theano_flags: if theano_flags:
theano_flags += ',' theano_flags += ','
theano_flags += config_cxx theano_flags += config_for_theano_cache_script
os.environ['THEANO_FLAGS'] = theano_flags os.environ['THEANO_FLAGS'] = theano_flags
import theano import theano
......
...@@ -64,11 +64,18 @@ The documentation will be automatically regenerated in the next few hours. ...@@ -64,11 +64,18 @@ The documentation will be automatically regenerated in the next few hours.
Generate and upload the package Generate and upload the package
=============================== ===============================
For release candidates, only upload on PyPI.
On PyPI On PyPI
------- -------
Set your umask to ``0022`` to ensure that the package file will be readable from other people.
To check your umask::
umask
To set your umask::
umask 0022
Now change ``ISRELEASED`` in ``setup.py`` to ``True``. Now change ``ISRELEASED`` in ``setup.py`` to ``True``.
Finally, use setuptools to register and upload the release:: Finally, use setuptools to register and upload the release::
...@@ -84,8 +91,8 @@ UnicodeDecodeError if there are non-ASCII characters in NEWS.txt. You ...@@ -84,8 +91,8 @@ UnicodeDecodeError if there are non-ASCII characters in NEWS.txt. You
would need to change NEWS.txt so it contains only ASCII characters (the would need to change NEWS.txt so it contains only ASCII characters (the
problem usually comes from diacritics in people's names). problem usually comes from diacritics in people's names).
On mloss.org On mloss.org (for final releases only)
------------ --------------------------------------
Project page is at http://mloss.org/software/view/241/. Project page is at http://mloss.org/software/view/241/.
Account jaberg is listed as submitter. Account jaberg is listed as submitter.
...@@ -138,8 +145,10 @@ then run the script. ...@@ -138,8 +145,10 @@ then run the script.
Announce the release Announce the release
==================== ====================
Generate an e-mail from the template in in ``EMAIL.txt``, including content Generate an e-mail from the template in ``EMAIL.txt``, including content
from ``NEWS.txt``, and send it to the following mailing lists: from ``NEWS.txt``.
For final releases, send the e-mail to the following mailing lists:
* theano-users * theano-users
* theano-announce * theano-announce
...@@ -152,3 +161,8 @@ For release candidates, only e-mail: ...@@ -152,3 +161,8 @@ For release candidates, only e-mail:
* theano-announce * theano-announce
* theano-dev * theano-dev
* theano-users * theano-users
For alpha and beta releases, only e-mail:
* theano-dev
* theano-users
...@@ -19,11 +19,34 @@ ...@@ -19,11 +19,34 @@
The user-friendly constructor is :func:`shared` The user-friendly constructor is :func:`shared`
.. attribute:: value .. method:: get_value(self, borrow=False, return_internal_type=False)
Read/write access to the [non-symbolic] value/data associated with this SharedVariable. :param borrow: True to permit returning of an object aliased to internal memory.
:type borrow: bool
Changes to this value will be visible to all functions using this SharedVariable.
:param return_internal_type: True to permit the returning of an arbitrary type object used
internally to store the shared variable.
:type return_internal_type: bool
By default, return a copy of the data. If ``borrow=True`` (and
``return_internal_type=False``), maybe it will return a copy.
For tensor, it will always return a ndarray by default, so if
the data is on the GPU, it will return a copy, but if the data
is on the CPU, it will return the original data. If you do
``borrow=True`` and ``return_internal_type=True``, it will
always return the original data, not a copy, but this can be a
GPU object.
.. method:: set_value(self, new_value, borrow=False)
:param new_value: The new value.
:type new_value: A compatible type for this shared variable.
:param borrow: True to use the new_value directly, potentially creating problems
related to aliased memory.
:type borrow: bool
The new value will be seen by all functions using this SharedVariable.
.. method:: __init__(self, name, type, value, strict, container=None) .. method:: __init__(self, name, type, value, strict, container=None)
......
...@@ -10,6 +10,9 @@ ...@@ -10,6 +10,9 @@
.. moduleauthor:: LISA .. moduleauthor:: LISA
.. seealso:: cuDNN batch normalization: :class:`theano.gpuarray.dnn.dnn_batch_normalization_train`, :class:`theano.gpuarray.dnn.dnn_batch_normalization_test>`. They must be added manually as they do not have the same user interface. .. autofunction:: theano.tensor.nnet.bn.batch_normalization_train
.. autofunction:: theano.tensor.nnet.bn.batch_normalization_test
.. seealso:: cuDNN batch normalization: :class:`theano.gpuarray.dnn.dnn_batch_normalization_train`, :class:`theano.gpuarray.dnn.dnn_batch_normalization_test>`.
.. autofunction:: theano.tensor.nnet.bn.batch_normalization .. autofunction:: theano.tensor.nnet.bn.batch_normalization
...@@ -59,11 +59,11 @@ class OpFromGraph(gof.Op): ...@@ -59,11 +59,11 @@ class OpFromGraph(gof.Op):
.. code-block:: python .. code-block:: python
import numpy import numpy as np
import theano import theano
from theano import config, function, OpFromGraph, tensor from theano import config, function, OpFromGraph, tensor
x, y, z = tensor.scalars('xyz') x, y, z = tensor.scalars('xyz')
s = theano.shared(numpy.random.rand(2, 2).astype(config.floatX)) s = theano.shared(np.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s e = x + y * z + s
op = OpFromGraph([x, y, z], [e]) op = OpFromGraph([x, y, z], [e])
# op behaves like a normal theano op # op behaves like a normal theano op
......
...@@ -14,7 +14,7 @@ import six.moves.copyreg as copyreg ...@@ -14,7 +14,7 @@ import six.moves.copyreg as copyreg
from itertools import chain, product as itertools_product from itertools import chain, product as itertools_product
from theano.compat import izip from theano.compat import izip
import numpy import numpy as np
import theano import theano
from theano import gof, config from theano import gof, config
...@@ -270,15 +270,15 @@ class BadOptimization(DebugModeError): ...@@ -270,15 +270,15 @@ class BadOptimization(DebugModeError):
print(" New Value: ", str(self.new_r_val), file=sio) print(" New Value: ", str(self.new_r_val), file=sio)
try: try:
ov = numpy.asarray(self.old_r_val) ov = np.asarray(self.old_r_val)
nv = numpy.asarray(self.new_r_val) nv = np.asarray(self.new_r_val)
ssio = StringIO() ssio = StringIO()
abs_diff = numpy.absolute(nv - ov) abs_diff = np.absolute(nv - ov)
print(" Max Abs Diff: ", numpy.max(abs_diff), file=ssio) print(" Max Abs Diff: ", np.max(abs_diff), file=ssio)
print(" Mean Abs Diff: ", numpy.mean(abs_diff), file=ssio) print(" Mean Abs Diff: ", np.mean(abs_diff), file=ssio)
print(" Median Abs Diff: ", numpy.median(abs_diff), file=ssio) print(" Median Abs Diff: ", np.median(abs_diff), file=ssio)
print(" Std Abs Diff: ", numpy.std(abs_diff), file=ssio) print(" Std Abs Diff: ", np.std(abs_diff), file=ssio)
arg_max_val = numpy.argmax(abs_diff) arg_max_val = np.argmax(abs_diff)
values_at_max = (nv.flatten()[arg_max_val], values_at_max = (nv.flatten()[arg_max_val],
ov.flatten()[arg_max_val]) ov.flatten()[arg_max_val])
print(" Value at Max Diff: ", values_at_max, file=ssio) print(" Value at Max Diff: ", values_at_max, file=ssio)
...@@ -286,13 +286,13 @@ class BadOptimization(DebugModeError): ...@@ -286,13 +286,13 @@ class BadOptimization(DebugModeError):
# N.B. the maximum(..., 1e-8) protects against div by 0 when # N.B. the maximum(..., 1e-8) protects against div by 0 when
# nv == ov == 0 # nv == ov == 0
reldiff = (abs_diff / reldiff = (abs_diff /
numpy.maaximum(numpy.absolute(nv) + numpy.absolute(ov), np.maximum(np.absolute(nv) + np.absolute(ov),
1e-8)) 1e-8))
print(" Max Rel Diff: ", numpy.max(reldiff), file=ssio) print(" Max Rel Diff: ", np.max(reldiff), file=ssio)
print(" Mean Rel Diff: ", numpy.mean(reldiff), file=ssio) print(" Mean Rel Diff: ", np.mean(reldiff), file=ssio)
print(" Median Rel Diff: ", numpy.median(reldiff), file=ssio) print(" Median Rel Diff: ", np.median(reldiff), file=ssio)
print(" Std Rel Diff: ", numpy.std(reldiff), file=ssio) print(" Std Rel Diff: ", np.std(reldiff), file=ssio)
arg_max_val = numpy.argmax(reldiff) arg_max_val = np.argmax(reldiff)
values_at_max = (nv.flatten()[arg_max_val], values_at_max = (nv.flatten()[arg_max_val],
ov.flatten()[arg_max_val]) ov.flatten()[arg_max_val])
print(" Value at Max Diff: ", values_at_max, file=ssio) print(" Value at Max Diff: ", values_at_max, file=ssio)
...@@ -342,8 +342,8 @@ class BadDestroyMap(DebugModeError): ...@@ -342,8 +342,8 @@ class BadDestroyMap(DebugModeError):
print(" repr (old val):", repr(self.old_val), file=sio) print(" repr (old val):", repr(self.old_val), file=sio)
print(" repr (new val):", repr(self.new_val), file=sio) print(" repr (new val):", repr(self.new_val), file=sio)
try: try:
npy_old_val = numpy.asarray(self.old_val) npy_old_val = np.asarray(self.old_val)
npy_new_val = numpy.asarray(self.new_val) npy_new_val = np.asarray(self.new_val)
print(" value dtype (new <space> old):", npy_new_val.dtype, print(" value dtype (new <space> old):", npy_new_val.dtype,
npy_old_val.dtype, file=sio) npy_old_val.dtype, file=sio)
print(" value shape (new <space> old):", npy_new_val.shape, print(" value shape (new <space> old):", npy_new_val.shape,
...@@ -356,13 +356,13 @@ class BadDestroyMap(DebugModeError): ...@@ -356,13 +356,13 @@ class BadDestroyMap(DebugModeError):
print(" value min (new-old):", delta.min(), file=sio) print(" value min (new-old):", delta.min(), file=sio)
print(" value max (new-old):", delta.max(), file=sio) print(" value max (new-old):", delta.max(), file=sio)
print(" value argmin (new-old):", print(" value argmin (new-old):",
numpy.unravel_index(delta.argmin(), npy_new_val.shape), np.unravel_index(delta.argmin(), npy_new_val.shape),
file=sio) file=sio)
print(" value argmax (new-old):", print(" value argmax (new-old):",
numpy.unravel_index(delta.argmax(), npy_new_val.shape), np.unravel_index(delta.argmax(), npy_new_val.shape),
file=sio) file=sio)
print(" location of first 10 mismatches:", print(" location of first 10 mismatches:",
numpy.transpose(numpy.nonzero(delta))[:10], file=sio) np.transpose(np.nonzero(delta))[:10], file=sio)
print("", file=sio) print("", file=sio)
except Exception as e: except Exception as e:
print("(Numpy-hints failed with: %s)" % str(e), file=sio) print("(Numpy-hints failed with: %s)" % str(e), file=sio)
...@@ -453,7 +453,7 @@ class InvalidValueError(DebugModeError): ...@@ -453,7 +453,7 @@ class InvalidValueError(DebugModeError):
v_dtype = v.dtype v_dtype = v.dtype
v_min = v.min() v_min = v.min()
v_max = v.max() v_max = v.max()
v_isfinite = numpy.all(numpy.isfinite(v)) v_isfinite = np.all(np.isfinite(v))
except Exception: except Exception:
pass pass
client_node = self.client_node client_node = self.client_node
...@@ -1025,7 +1025,7 @@ def _lessbroken_deepcopy(a): ...@@ -1025,7 +1025,7 @@ def _lessbroken_deepcopy(a):
# this exists because copy.deepcopy on numpy arrays is broken # this exists because copy.deepcopy on numpy arrays is broken
# This logic is also in link.py # This logic is also in link.py
from theano.gof.type import _cdata_type from theano.gof.type import _cdata_type
if type(a) in (numpy.ndarray, numpy.memmap): if type(a) in (np.ndarray, np.memmap):
rval = a.copy() rval = a.copy()
elif type(a) is _cdata_type: elif type(a) is _cdata_type:
# This is not copyable (and should be used for constant data). # This is not copyable (and should be used for constant data).
...@@ -1034,7 +1034,7 @@ def _lessbroken_deepcopy(a): ...@@ -1034,7 +1034,7 @@ def _lessbroken_deepcopy(a):
rval = copy.deepcopy(a) rval = copy.deepcopy(a)
assert type(rval) == type(a), (type(rval), type(a)) assert type(rval) == type(a), (type(rval), type(a))
if isinstance(rval, numpy.ndarray): if isinstance(rval, np.ndarray):
assert rval.dtype == a.dtype assert rval.dtype == a.dtype
return rval return rval
...@@ -1241,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1241,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# There is no risk to overwrite inputs, since r does not work # There is no risk to overwrite inputs, since r does not work
# inplace. # inplace.
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, CudaNdarrayType)):
reuse_outputs[r][...] = numpy.asarray( reuse_outputs[r][...] = np.asarray(
def_val).astype(r.type.dtype) def_val).astype(r.type.dtype)
if reuse_outputs: if reuse_outputs:
...@@ -1259,7 +1259,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1259,7 +1259,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
new_buf = r.type.value_zeros(r_vals[r].shape) new_buf = r.type.value_zeros(r_vals[r].shape)
# CudaNdarray don't have flags field # CudaNdarray don't have flags field
# assert new_buf.flags["C_CONTIGUOUS"] # assert new_buf.flags["C_CONTIGUOUS"]
new_buf[...] = numpy.asarray(def_val).astype(r.type.dtype) new_buf[...] = np.asarray(def_val).astype(r.type.dtype)
c_cont_outputs[r] = new_buf c_cont_outputs[r] = new_buf
...@@ -1273,7 +1273,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1273,7 +1273,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
f_cont_outputs = {} f_cont_outputs = {}
for r in considered_outputs: for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, CudaNdarrayType)):
new_buf = numpy.zeros( new_buf = np.zeros(
shape=r_vals[r].shape, shape=r_vals[r].shape,
dtype=r_vals[r].dtype, dtype=r_vals[r].dtype,
order='F') order='F')
...@@ -1331,7 +1331,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1331,7 +1331,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
else: else:
buf_shape.append(s * 2) buf_shape.append(s * 2)
new_buf = r.type.value_zeros(buf_shape) new_buf = r.type.value_zeros(buf_shape)
new_buf[...] = numpy.asarray(def_val).astype(r.type.dtype) new_buf[...] = np.asarray(def_val).astype(r.type.dtype)
init_strided[r] = new_buf init_strided[r] = new_buf
# The number of combinations is exponential in the number of # The number of combinations is exponential in the number of
...@@ -1377,7 +1377,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1377,7 +1377,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
r_buf = r_buf[tuple(strides)][tuple(shapes)] r_buf = r_buf[tuple(strides)][tuple(shapes)]
assert r_buf.shape == r_vals[r].shape assert r_buf.shape == r_vals[r].shape
r_buf[...] = numpy.asarray(def_val).astype(r_buf.dtype) r_buf[...] = np.asarray(def_val).astype(r_buf.dtype)
strided[r] = r_buf strided[r] = r_buf
if strided: if strided:
...@@ -1405,7 +1405,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1405,7 +1405,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for s, sd in zip(r_vals[r].shape, for s, sd in zip(r_vals[r].shape,
r_shape_diff)] r_shape_diff)]
new_buf = r.type.value_zeros(out_shape) new_buf = r.type.value_zeros(out_shape)
new_buf[...] = numpy.asarray( new_buf[...] = np.asarray(
def_val).astype(r.type.dtype) def_val).astype(r.type.dtype)
wrong_size[r] = new_buf wrong_size[r] = new_buf
...@@ -2261,7 +2261,7 @@ class _Linker(gof.link.LocalLinker): ...@@ -2261,7 +2261,7 @@ class _Linker(gof.link.LocalLinker):
# HACK TO LOOK LIKE A REAL DESTRUCTIVE ACTION # HACK TO LOOK LIKE A REAL DESTRUCTIVE ACTION
# TOOK PLACE # TOOK PLACE
if ((type(dr_vals[r][0]) in if ((type(dr_vals[r][0]) in
(numpy.ndarray, numpy.memmap)) and (np.ndarray, np.memmap)) and
(dr_vals[r][0].dtype == (dr_vals[r][0].dtype ==
storage_map[r][0].dtype) and storage_map[r][0].dtype) and
(dr_vals[r][0].shape == (dr_vals[r][0].shape ==
......
...@@ -13,7 +13,7 @@ from six import string_types ...@@ -13,7 +13,7 @@ from six import string_types
from theano.compile.io import In from theano.compile.io import In
from theano.compile.function_module import orig_function from theano.compile.function_module import orig_function
from theano.compile.pfunc import pfunc from theano.compile.pfunc import pfunc
from numpy import any import numpy as np
import warnings import warnings
from theano import compat from theano import compat
...@@ -286,7 +286,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -286,7 +286,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
"input.") "input.")
# compute some features of the arguments: # compute some features of the arguments:
uses_tuple = any([isinstance(i, (list, tuple)) for i in inputs]) uses_tuple = np.any([isinstance(i, (list, tuple)) for i in inputs])
uses_updates = bool(updates) uses_updates = bool(updates)
uses_givens = bool(givens) uses_givens = bool(givens)
......
...@@ -12,13 +12,14 @@ import six.moves.cPickle as pickle ...@@ -12,13 +12,14 @@ import six.moves.cPickle as pickle
from itertools import chain from itertools import chain
import time import time
import warnings import warnings
import numpy import numpy as np
import theano import theano
from theano import config, gof from theano import config, gof
from theano.compat import izip from theano.compat import izip
from theano.gof import graph from theano.gof import graph
import theano.compile.mode import theano.compile.mode
import theano.compile.profiling
from theano.compile.io import ( from theano.compile.io import (
In, SymbolicInput, SymbolicOutput) In, SymbolicInput, SymbolicOutput)
from theano.compile.ops import deep_copy_op, view_op from theano.compile.ops import deep_copy_op, view_op
...@@ -663,7 +664,7 @@ class Function(object): ...@@ -663,7 +664,7 @@ class Function(object):
input_storage = [i.value for i in ins] input_storage = [i.value for i in ins]
# reinitialize new maker and create new function # reinitialize new maker and create new function
if profile is None: if profile is None:
profile = config.profile profile = config.profile or config.print_global_stats
# profile -> True or False # profile -> True or False
if profile is True: if profile is True:
if name: if name:
...@@ -749,6 +750,12 @@ class Function(object): ...@@ -749,6 +750,12 @@ class Function(object):
List of outputs on indices/keys from ``output_subset`` or all of them, List of outputs on indices/keys from ``output_subset`` or all of them,
if ``output_subset`` is not passed. if ``output_subset`` is not passed.
""" """
def restore_defaults():
for i, (required, refeed, value) in enumerate(self.defaults):
if refeed:
if isinstance(value, gof.Container):
value = value.storage[0]
self[i] = value
profile = self.profile profile = self.profile
t0 = time.time() t0 = time.time()
...@@ -804,6 +811,7 @@ class Function(object): ...@@ -804,6 +811,7 @@ class Function(object):
e.args = ("Bad input " + argument_name + " to " + e.args = ("Bad input " + argument_name + " to " +
function_name + " at index %d (0-based). %s" function_name + " at index %d (0-based). %s"
% (i, where),) + e.args % (i, where),) + e.args
restore_defaults()
raise raise
s.provided += 1 s.provided += 1
i += 1 i += 1
...@@ -829,9 +837,9 @@ class Function(object): ...@@ -829,9 +837,9 @@ class Function(object):
in args_share_memory[j]], in args_share_memory[j]],
[self.input_storage[k].storage[0] for k [self.input_storage[k].storage[0] for k
in args_share_memory[j]]) in args_share_memory[j]])
if numpy.any([(var.type is i_var.type and if np.any([(var.type is i_var.type and
var.type.may_share_memory(val, i_val)) var.type.may_share_memory(val, i_val))
for (var, val) in group_j]): for (var, val) in group_j]):
is_aliased = True is_aliased = True
args_share_memory[j].append(i) args_share_memory[j].append(i)
...@@ -853,14 +861,17 @@ class Function(object): ...@@ -853,14 +861,17 @@ class Function(object):
if not self.trust_input: if not self.trust_input:
for c in self.input_storage: for c in self.input_storage:
if c.required and not c.provided: if c.required and not c.provided:
restore_defaults()
raise TypeError("Missing required input: %s" % raise TypeError("Missing required input: %s" %
getattr(self.inv_finder[c], 'variable', getattr(self.inv_finder[c], 'variable',
self.inv_finder[c])) self.inv_finder[c]))
if c.provided > 1: if c.provided > 1:
restore_defaults()
raise TypeError("Multiple values for input: %s" % raise TypeError("Multiple values for input: %s" %
getattr(self.inv_finder[c], 'variable', getattr(self.inv_finder[c], 'variable',
self.inv_finder[c])) self.inv_finder[c]))
if c.implicit and c.provided > 0: if c.implicit and c.provided > 0:
restore_defaults()
raise TypeError( raise TypeError(
'Tried to provide value for implicit input: %s' 'Tried to provide value for implicit input: %s'
% getattr(self.inv_finder[c], 'variable', % getattr(self.inv_finder[c], 'variable',
...@@ -873,6 +884,7 @@ class Function(object): ...@@ -873,6 +884,7 @@ class Function(object):
self.fn() if output_subset is None else\ self.fn() if output_subset is None else\
self.fn(output_subset=output_subset) self.fn(output_subset=output_subset)
except Exception: except Exception:
restore_defaults()
if hasattr(self.fn, 'position_of_error'): if hasattr(self.fn, 'position_of_error'):
# this is a new vm-provided function or c linker # this is a new vm-provided function or c linker
# they need this because the exception manipulation # they need this because the exception manipulation
...@@ -925,11 +937,7 @@ class Function(object): ...@@ -925,11 +937,7 @@ class Function(object):
outputs = outputs[:self.n_returned_outputs] outputs = outputs[:self.n_returned_outputs]
# Put default values back in the storage # Put default values back in the storage
for i, (required, refeed, value) in enumerate(self.defaults): restore_defaults()
if refeed:
if isinstance(value, gof.Container):
value = value.storage[0]
self[i] = value
# #
# NOTE: This logic needs to be replicated in # NOTE: This logic needs to be replicated in
# scan. # scan.
...@@ -937,6 +945,7 @@ class Function(object): ...@@ -937,6 +945,7 @@ class Function(object):
# #
dt_call = time.time() - t0 dt_call = time.time() - t0
theano.compile.profiling.total_fct_exec_time += dt_call
self.maker.mode.call_time += dt_call self.maker.mode.call_time += dt_call
if profile: if profile:
profile.fct_callcount += 1 profile.fct_callcount += 1
...@@ -1019,9 +1028,9 @@ def _pickle_Function(f): ...@@ -1019,9 +1028,9 @@ def _pickle_Function(f):
all_data = input_storage + inputs_data all_data = input_storage + inputs_data
for i, d_i in enumerate(all_data): for i, d_i in enumerate(all_data):
for j, d_j in enumerate(all_data): for j, d_j in enumerate(all_data):
if ((i < j) and isinstance(d_i, numpy.ndarray) and if ((i < j) and isinstance(d_i, np.ndarray) and
isinstance(d_j, numpy.ndarray)): isinstance(d_j, np.ndarray)):
if numpy.may_share_memory(d_i, d_j): if np.may_share_memory(d_i, d_j):
if f.pickle_aliased_memory_strategy == 'warn': if f.pickle_aliased_memory_strategy == 'warn':
_logger.warning('aliased relationship between ' _logger.warning('aliased relationship between '
'Function arguments %s, %s ' 'Function arguments %s, %s '
...@@ -1041,7 +1050,7 @@ def _constructor_Function(maker, input_storage, inputs_data): ...@@ -1041,7 +1050,7 @@ def _constructor_Function(maker, input_storage, inputs_data):
assert len(f.input_storage) == len(inputs_data) assert len(f.input_storage) == len(inputs_data)
for container, x in zip(f.input_storage, inputs_data): for container, x in zip(f.input_storage, inputs_data):
assert (container.data is x) or \ assert (container.data is x) or \
(isinstance(x, numpy.ndarray) and (container.data == x).all()) or \ (isinstance(x, np.ndarray) and (container.data == x).all()) or \
(container.data == x) (container.data == x)
return f return f
...@@ -1466,6 +1475,7 @@ class FunctionMaker(object): ...@@ -1466,6 +1475,7 @@ class FunctionMaker(object):
end_optimizer = time.time() end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer opt_time = end_optimizer - start_optimizer
theano.compile.profiling.total_graph_opt_time += opt_time
if profile: if profile:
profile.optimizer_time += opt_time profile.optimizer_time += opt_time
if theano.config.profile_optimizer: if theano.config.profile_optimizer:
...@@ -1655,6 +1665,7 @@ class FunctionMaker(object): ...@@ -1655,6 +1665,7 @@ class FunctionMaker(object):
end_linker = time.time() end_linker = time.time()
linker_time = end_linker - start_linker linker_time = end_linker - start_linker
theano.compile.profiling.total_time_linker += linker_time
_logger.debug('Linker took %f seconds', linker_time) _logger.debug('Linker took %f seconds', linker_time)
if self.profile: if self.profile:
self.profile.linker_time += linker_time self.profile.linker_time += linker_time
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
# Note: this code was initially copied from the 'pyutools' package by its # Note: this code was initially copied from the 'pyutools' package by its
# original author, and re-licensed under Theano's license. # original author, and re-licensed under Theano's license.
import numpy import numpy as np
import theano import theano
from theano.compile.mode import Mode from theano.compile.mode import Mode
...@@ -93,8 +93,8 @@ class MonitorMode(Mode): ...@@ -93,8 +93,8 @@ class MonitorMode(Mode):
def detect_nan(i, node, fn): def detect_nan(i, node, fn):
for output in fn.outputs: for output in fn.outputs:
if (not isinstance(output[0], numpy.random.RandomState) and if (not isinstance(output[0], np.random.RandomState) and
numpy.isnan(output[0]).any()): np.isnan(output[0]).any()):
print('*** NaN detected ***') print('*** NaN detected ***')
theano.printing.debugprint(node) theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs]) print('Inputs : %s' % [input[0] for input in fn.inputs])
......
...@@ -17,7 +17,7 @@ from six import iteritems, integer_types ...@@ -17,7 +17,7 @@ from six import iteritems, integer_types
from six.moves import xrange from six.moves import xrange
import numpy import numpy as np
def register_view_op_c_code(type, code, version=()): def register_view_op_c_code(type, code, version=()):
...@@ -338,7 +338,7 @@ class Shape_i(gof.Op): ...@@ -338,7 +338,7 @@ class Shape_i(gof.Op):
def __init__(self, i): def __init__(self, i):
# As i will be used in the hash and that ndarray are not hashable, # As i will be used in the hash and that ndarray are not hashable,
# we need to convert it to an int as it is hashable. # we need to convert it to an int as it is hashable.
if isinstance(i, numpy.ndarray): if isinstance(i, np.ndarray):
assert i.dtype in theano.tensor.integer_dtypes assert i.dtype in theano.tensor.integer_dtypes
assert i == int(i) assert i == int(i)
i = int(i) i = int(i)
...@@ -665,11 +665,11 @@ class Rebroadcast(gof.Op): ...@@ -665,11 +665,11 @@ class Rebroadcast(gof.Op):
items = sorted(axis) items = sorted(axis)
self.axis = OrderedDict(items) self.axis = OrderedDict(items)
for axis, broad in iteritems(self.axis): for axis, broad in iteritems(self.axis):
if not isinstance(axis, (numpy.integer, integer_types)): if not isinstance(axis, (np.integer, integer_types)):
raise TypeError("Rebroadcast needs integer axes. " raise TypeError("Rebroadcast needs integer axes. "
"Got {}".format(axis)) "Got {}".format(axis))
if not isinstance(broad, (numpy.bool_, bool)): if not isinstance(broad, (np.bool_, bool)):
raise TypeError("Rebroadcast needs bool for new broadcast " raise TypeError("Rebroadcast needs bool for new broadcast "
"pattern. Got {}".format(broad)) "pattern. Got {}".format(broad))
...@@ -835,8 +835,8 @@ class SpecifyShape(gof.Op): ...@@ -835,8 +835,8 @@ class SpecifyShape(gof.Op):
x, shape = inp x, shape = inp
out, = out_ out, = out_
assert x.ndim == shape.size assert x.ndim == shape.size
assert numpy.all(x.shape == shape), ("got shape", x.shape, assert np.all(x.shape == shape), ("got shape", x.shape,
"expected", shape) "expected", shape)
out[0] = x out[0] = x
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
......
...@@ -364,7 +364,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None, ...@@ -364,7 +364,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if givens is None: if givens is None:
givens = [] givens = []
if profile is None: if profile is None:
profile = config.profile profile = config.profile or config.print_global_stats
# profile -> True or False # profile -> True or False
if profile is False: if profile is False:
profile = None profile = None
......
...@@ -27,7 +27,7 @@ import sys ...@@ -27,7 +27,7 @@ import sys
import time import time
from collections import defaultdict from collections import defaultdict
import numpy import numpy as np
import theano import theano
from six import iteritems from six import iteritems
...@@ -36,6 +36,9 @@ from theano.gof import graph ...@@ -36,6 +36,9 @@ from theano.gof import graph
logger = logging.getLogger('theano.compile.profiling') logger = logging.getLogger('theano.compile.profiling')
theano_imported_time = time.time() theano_imported_time = time.time()
total_fct_exec_time = 0.
total_graph_opt_time = 0.
total_time_linker = 0.
config = theano.config config = theano.config
_atexit_print_list = [] _atexit_print_list = []
...@@ -47,7 +50,80 @@ def _atexit_print_fn(): ...@@ -47,7 +50,80 @@ def _atexit_print_fn():
Print ProfileStat objects in _atexit_print_list to _atexit_print_file. Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
""" """
to_sum = [] if config.profile:
to_sum = []
if config.profiling.destination == 'stderr':
destination_file = sys.stderr
elif config.profiling.destination == 'stdout':
destination_file = sys.stdout
else:
destination_file = open(config.profiling.destination, 'w')
# Reverse sort in the order of compile+exec time
for ps in sorted(_atexit_print_list,
key=lambda a:a.compile_time + a.fct_call_time)[::-1]:
if ps.fct_callcount >= 1 or ps.compile_time > 1:
ps.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if not isinstance(ps, ScanProfileStats):
to_sum.append(ps)
else:
# TODO print the name if there is one!
print('Skipping empty Profile')
if len(to_sum) > 1:
# Make a global profile
cum = copy.copy(to_sum[0])
msg = ("Sum of all(%d) printed profiles at exit excluding Scan op"
" profile." % len(to_sum))
cum.message = msg
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides",
"linker_make_thunk_time"]:
cum_attr = getattr(cum, attr)
for key, val in iteritems(getattr(ps, attr)):
assert key not in cum_attr
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
try:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
except Exception as e:
print("Got an exception while merging profile")
print(e)
cum.optimizer_profile = None
else:
cum.optimizer_profile = None
cum.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if config.print_global_stats:
print_global_stats()
def print_global_stats():
"""
Print the following stats:
-- Time elapsed since Theano was imported
-- Time spent inside Theano functions
-- Time spent in compiling Theano functions
-- on graph optimization
-- on linker
"""
if config.profiling.destination == 'stderr': if config.profiling.destination == 'stderr':
destination_file = sys.stderr destination_file = sys.stderr
...@@ -56,57 +132,18 @@ def _atexit_print_fn(): ...@@ -56,57 +132,18 @@ def _atexit_print_fn():
else: else:
destination_file = open(config.profiling.destination, 'w') destination_file = open(config.profiling.destination, 'w')
# Reverse sort in the order of compile+exec time print('='*50, file=destination_file)
for ps in sorted(_atexit_print_list, print('Global stats: ',
key=lambda a:a.compile_time + a.fct_call_time)[::-1]: 'Time elasped since Theano import = %6.3fs, '
if ps.fct_callcount >= 1 or ps.compile_time > 1: 'Time spent in Theano functions = %6.3fs, '
ps.summary(file=destination_file, 'Time spent compiling Theano functions: '
n_ops_to_print=config.profiling.n_ops, ' optimzation = %6.3fs, linker = %6.3fs ' %
n_apply_to_print=config.profiling.n_apply) (time.time() - theano_imported_time,
if not isinstance(ps, ScanProfileStats): total_fct_exec_time,
to_sum.append(ps) total_graph_opt_time,
else: total_time_linker),
# TODO print the name if there is one! file=destination_file)
print('Skipping empty Profile') print('='*50, file=destination_file)
if len(to_sum) > 1:
# Make a global profile
cum = copy.copy(to_sum[0])
msg = ("Sum of all(%d) printed profiles at exit excluding Scan op"
" profile." % len(to_sum))
cum.message = msg
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides",
"linker_make_thunk_time"]:
cum_attr = getattr(cum, attr)
for key, val in iteritems(getattr(ps, attr)):
assert key not in cum_attr
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
try:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
except Exception as e:
print("Got an exception while merging profile")
print(e)
cum.optimizer_profile = None
else:
cum.optimizer_profile = None
cum.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
class ProfileStats(object): class ProfileStats(object):
...@@ -440,7 +477,7 @@ class ProfileStats(object): ...@@ -440,7 +477,7 @@ class ProfileStats(object):
hs += ['<#apply>'] hs += ['<#apply>']
es += [' %4d '] es += [' %4d ']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs) upto_length = np.sum([len(x) for x in hs]) + len(hs)
maxlen = max(self.line_width - upto_length, 0) maxlen = max(self.line_width - upto_length, 0)
hs += ['<Class name>'] hs += ['<Class name>']
es += ['%s'] es += ['%s']
...@@ -522,7 +559,7 @@ class ProfileStats(object): ...@@ -522,7 +559,7 @@ class ProfileStats(object):
hs += ['<#apply>'] hs += ['<#apply>']
es += [' %4d '] es += [' %4d ']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs) upto_length = np.sum([len(x) for x in hs]) + len(hs)
maxlen = max(self.line_width - upto_length, 0) maxlen = max(self.line_width - upto_length, 0)
hs += ['<Op name>'] hs += ['<Op name>']
es += ['%s'] es += ['%s']
...@@ -590,7 +627,7 @@ class ProfileStats(object): ...@@ -590,7 +627,7 @@ class ProfileStats(object):
if self.variable_shape: if self.variable_shape:
hs += ['<Mflops>', '<Gflops/s>'] hs += ['<Mflops>', '<Gflops/s>']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs) upto_length = np.sum([len(x) for x in hs]) + len(hs)
maxlen = max(self.line_width - upto_length, 0) maxlen = max(self.line_width - upto_length, 0)
hs += ['<Apply name>'] hs += ['<Apply name>']
es += ['%s'] es += ['%s']
...@@ -892,7 +929,7 @@ class ProfileStats(object): ...@@ -892,7 +929,7 @@ class ProfileStats(object):
node_list = list(node_list) node_list = list(node_list)
mem_count = 0 mem_count = 0
max_mem_count = 0 max_mem_count = 0
mem_bound = numpy.inf mem_bound = np.inf
# This take only the inputs/outputs dependencies. # This take only the inputs/outputs dependencies.
dependencies = fgraph.profile.dependencies dependencies = fgraph.profile.dependencies
done_set = set([]) done_set = set([])
......
...@@ -9,7 +9,7 @@ import copy ...@@ -9,7 +9,7 @@ import copy
import logging import logging
# Third-party imports # Third-party imports
import numpy import numpy as np
# Theano imports # Theano imports
from theano.gof import Container, Variable, generic, utils from theano.gof import Container, Variable, generic, utils
...@@ -120,6 +120,31 @@ class SharedVariable(Variable): ...@@ -120,6 +120,31 @@ class SharedVariable(Variable):
Changes to this value will be visible to all functions using Changes to this value will be visible to all functions using
this SharedVariable. this SharedVariable.
Notes
-----
Set_value will work in-place on the GPU, if
the following conditions are met:
* The destination on the GPU must be c_contiguous.
* The source is on the CPU.
* The old value must have the same dtype as the new value
(which is a given for now, since only float32 is
supported).
* The old and new value must have the same shape.
* The old value is being completely replaced by the new
value (not partially modified, e.g. by replacing some
subtensor of it).
* You change the value of the shared variable via
set_value, not via the .value accessors. You should not
use the .value accessors anyway, since they will soon be
deprecated and removed.
It is also worth mentioning that, for efficient transfer to the GPU,
Theano will make the new data ``c_contiguous``. This can require an
extra copy of the data on the host.
The inplace on gpu memory work when borrow is either True or False.
""" """
if borrow: if borrow:
self.container.value = new_value self.container.value = new_value
...@@ -162,7 +187,7 @@ class SharedVariable(Variable): ...@@ -162,7 +187,7 @@ class SharedVariable(Variable):
# implemented at all, but with a more explicit error message to help # implemented at all, but with a more explicit error message to help
# Theano users figure out the root of the problem more easily. # Theano users figure out the root of the problem more easily.
value = self.get_value(borrow=True) value = self.get_value(borrow=True)
if isinstance(value, numpy.ndarray): if isinstance(value, np.ndarray):
# Array probably had an unknown dtype. # Array probably had an unknown dtype.
msg = ("a Numpy array with dtype: '%s'. This data type is not " msg = ("a Numpy array with dtype: '%s'. This data type is not "
"currently recognized by Theano tensors: please cast " "currently recognized by Theano tensors: please cast "
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
from theano import config, shared from theano import config, shared
...@@ -23,14 +23,14 @@ class T_OpFromGraph(unittest_tools.InferShapeTester): ...@@ -23,14 +23,14 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
f = op(x, y, z) - op(y, z, x) f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f) fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX) xv = np.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3 yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5 zv = np.ones((2, 2), dtype=config.floatX) * 5
# print function, function.__module__ # print function, function.__module__
# print fn.maker.fgraph.toposort() # print fn.maker.fgraph.toposort()
fn(xv, yv, zv) fn(xv, yv, zv)
assert numpy.all(8.0 == fn(xv, yv, zv)) assert np.all(8.0 == fn(xv, yv, zv))
assert numpy.all(8.0 == fn(xv, yv, zv)) assert np.all(8.0 == fn(xv, yv, zv))
def test_size_changes(self): def test_size_changes(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
...@@ -38,15 +38,15 @@ class T_OpFromGraph(unittest_tools.InferShapeTester): ...@@ -38,15 +38,15 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
op = OpFromGraph([x, y], [e]) op = OpFromGraph([x, y], [e])
f = op(x, op(y, z)) f = op(x, op(y, z))
fn = function([x, y, z], f) fn = function([x, y, z], f)
xv = numpy.ones((2, 3), dtype=config.floatX) xv = np.ones((2, 3), dtype=config.floatX)
yv = numpy.ones((3, 4), dtype=config.floatX) * 3 yv = np.ones((3, 4), dtype=config.floatX) * 3
zv = numpy.ones((4, 5), dtype=config.floatX) * 5 zv = np.ones((4, 5), dtype=config.floatX) * 5
res = fn(xv, yv, zv) res = fn(xv, yv, zv)
assert res.shape == (2, 5) assert res.shape == (2, 5)
assert numpy.all(180.0 == res) assert np.all(180.0 == res)
res = fn(xv, yv, zv) res = fn(xv, yv, zv)
assert res.shape == (2, 5) assert res.shape == (2, 5)
assert numpy.all(180.0 == res) assert np.all(180.0 == res)
def test_grad(self): def test_grad(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
...@@ -55,10 +55,10 @@ class T_OpFromGraph(unittest_tools.InferShapeTester): ...@@ -55,10 +55,10 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
f = op(x, y, z) f = op(x, y, z)
f = f - T.grad(T.sum(f), y) f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f) fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX) xv = np.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3 yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5 zv = np.ones((2, 2), dtype=config.floatX) * 5
assert numpy.all(11.0 == fn(xv, yv, zv)) assert np.all(11.0 == fn(xv, yv, zv))
def test_grad_grad(self): def test_grad_grad(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
...@@ -68,47 +68,47 @@ class T_OpFromGraph(unittest_tools.InferShapeTester): ...@@ -68,47 +68,47 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
f = f - T.grad(T.sum(f), y) f = f - T.grad(T.sum(f), y)
f = f - T.grad(T.sum(f), y) f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f) fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX) xv = np.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3 yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5 zv = np.ones((2, 2), dtype=config.floatX) * 5
assert numpy.allclose(6.0, fn(xv, yv, zv)) assert np.allclose(6.0, fn(xv, yv, zv))
def test_shared(self): def test_shared(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX)) s = shared(np.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s e = x + y * z + s
op = OpFromGraph([x, y, z], [e]) op = OpFromGraph([x, y, z], [e])
# (1+3*5=array of 16) - (3+1*5=array of 8) # (1+3*5=array of 16) - (3+1*5=array of 8)
f = op(x, y, z) - op(y, z, x) f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f) fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX) xv = np.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3 yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5 zv = np.ones((2, 2), dtype=config.floatX) * 5
# print function, function.__module__ # print function, function.__module__
# print fn.maker.fgraph.toposort() # print fn.maker.fgraph.toposort()
assert numpy.allclose(8.0, fn(xv, yv, zv)) assert np.allclose(8.0, fn(xv, yv, zv))
assert numpy.allclose(8.0, fn(xv, yv, zv)) assert np.allclose(8.0, fn(xv, yv, zv))
def test_shared_grad(self): def test_shared_grad(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX)) s = shared(np.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s e = x + y * z + s
op = OpFromGraph([x, y, z], [e]) op = OpFromGraph([x, y, z], [e])
f = op(x, y, z) f = op(x, y, z)
f = f - T.grad(T.sum(f), y) f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f) fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX) xv = np.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3 yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5 zv = np.ones((2, 2), dtype=config.floatX) * 5
assert numpy.allclose(11.0 + s.get_value(), fn(xv, yv, zv)) assert np.allclose(11.0 + s.get_value(), fn(xv, yv, zv))
# grad again the shared variable # grad again the shared variable
f = op(x, y, z) f = op(x, y, z)
f = f - T.grad(T.sum(f), s) f = f - T.grad(T.sum(f), s)
fn = function([x, y, z], f) fn = function([x, y, z], f)
assert numpy.allclose(15.0 + s.get_value(), assert np.allclose(15.0 + s.get_value(),
fn(xv, yv, zv)) fn(xv, yv, zv))
def test_connection_pattern(self): def test_connection_pattern(self):
# Basic case # Basic case
...@@ -163,6 +163,6 @@ class T_OpFromGraph(unittest_tools.InferShapeTester): ...@@ -163,6 +163,6 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
p = T.matrix('p') p = T.matrix('p')
self._compile_and_check([q, p], self._compile_and_check([q, p],
op_graph(q, p), op_graph(q, p),
[numpy.ones([3, 4], dtype=config.floatX), [np.ones([3, 4], dtype=config.floatX),
numpy.ones([3, 4], dtype=config.floatX)], np.ones([3, 4], dtype=config.floatX)],
OpFromGraph) OpFromGraph)
...@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division ...@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import unittest import unittest
import numpy import numpy as np
from theano import config from theano import config
from theano import gof from theano import gof
...@@ -316,7 +316,7 @@ def test_just_c_code(): ...@@ -316,7 +316,7 @@ def test_just_c_code():
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], wb2(x), f = theano.function([x], wb2(x),
mode=debugmode.DebugMode(check_py_code=False)) mode=debugmode.DebugMode(check_py_code=False))
assert numpy.all(f([1, 2]) == [2, 4]) assert np.all(f([1, 2]) == [2, 4])
def test_baddestroymap(): def test_baddestroymap():
...@@ -349,7 +349,7 @@ def test_baddestroymap_c(): ...@@ -349,7 +349,7 @@ def test_baddestroymap_c():
f = theano.function([x], wb2i(x), f = theano.function([x], wb2i(x),
mode=debugmode.DebugMode(check_py_code=False)) mode=debugmode.DebugMode(check_py_code=False))
try: try:
assert numpy.all(f([1, 2]) == [2, 4]) assert np.all(f([1, 2]) == [2, 4])
assert False # failed to raise error assert False # failed to raise error
except debugmode.BadDestroyMap: except debugmode.BadDestroyMap:
pass pass
...@@ -445,8 +445,8 @@ class Test_ViewMap(unittest.TestCase): ...@@ -445,8 +445,8 @@ class Test_ViewMap(unittest.TestCase):
r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8]) r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [1, 2, 3, 4]) assert np.all(r0 == [1, 2, 3, 4])
assert numpy.all(r1 == [2, 3, 4]) assert np.all(r1 == [2, 3, 4])
def test_aliased_outputs_ok_output(self): def test_aliased_outputs_ok_output(self):
# here aliased outputs is ok because they are both outputs of the # here aliased outputs is ok because they are both outputs of the
...@@ -470,8 +470,8 @@ class Test_ViewMap(unittest.TestCase): ...@@ -470,8 +470,8 @@ class Test_ViewMap(unittest.TestCase):
r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8]) r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [2, 4, 6, 8]) assert np.all(r0 == [2, 4, 6, 8])
assert numpy.all(r1 == [4, 6, 8]) assert np.all(r1 == [4, 6, 8])
def test_aliased_outputs_ok_shadow(self): def test_aliased_outputs_ok_shadow(self):
# here the alias between outputs is ok because one of them is not used # here the alias between outputs is ok because one of them is not used
...@@ -496,7 +496,7 @@ class Test_ViewMap(unittest.TestCase): ...@@ -496,7 +496,7 @@ class Test_ViewMap(unittest.TestCase):
r0 = f([1, 2, 3, 4], [5, 6, 7, 8]) r0 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [2, 4, 6, 8]) assert np.all(r0 == [2, 4, 6, 8])
def test_aliased_outputs_bad(self): def test_aliased_outputs_bad(self):
# here the alias between outputs is not ok because destroying one # here the alias between outputs is not ok because destroying one
...@@ -555,31 +555,31 @@ class Test_check_isfinite(unittest.TestCase): ...@@ -555,31 +555,31 @@ class Test_check_isfinite(unittest.TestCase):
g = theano.function([x], theano.tensor.log(x), mode='DEBUG_MODE') g = theano.function([x], theano.tensor.log(x), mode='DEBUG_MODE')
# this should work # this should work
f(numpy.log([3, 4, 5]).astype(config.floatX)) f(np.log([3, 4, 5]).astype(config.floatX))
# if TensorType.filter_checks_isfinite were true, these would raise # if TensorType.filter_checks_isfinite were true, these would raise
# ValueError # ValueError
# if not, DebugMode will check internally, and raise InvalidValueError # if not, DebugMode will check internally, and raise InvalidValueError
# passing an invalid value as an input should trigger ValueError # passing an invalid value as an input should trigger ValueError
self.assertRaises(debugmode.InvalidValueError, f, self.assertRaises(debugmode.InvalidValueError, f,
numpy.log([3, -4, 5]).astype(config.floatX)) np.log([3, -4, 5]).astype(config.floatX))
self.assertRaises(debugmode.InvalidValueError, f, self.assertRaises(debugmode.InvalidValueError, f,
(numpy.asarray([0, 1.0, 0]) / 0).astype(config.floatX)) (np.asarray([0, 1.0, 0]) / 0).astype(config.floatX))
self.assertRaises(debugmode.InvalidValueError, f, self.assertRaises(debugmode.InvalidValueError, f,
(numpy.asarray([1.0, 1.0, 1.0]) / 0).astype(config.floatX)) (np.asarray([1.0, 1.0, 1.0]) / 0).astype(config.floatX))
# generating an invalid value internally should trigger # generating an invalid value internally should trigger
# InvalidValueError # InvalidValueError
self.assertRaises(debugmode.InvalidValueError, g, self.assertRaises(debugmode.InvalidValueError, g,
numpy.asarray([3, -4, 5], dtype=config.floatX)) np.asarray([3, -4, 5], dtype=config.floatX))
# this should disable the exception # this should disable the exception
theano.tensor.TensorType.filter_checks_isfinite = False theano.tensor.TensorType.filter_checks_isfinite = False
theano.compile.mode.predefined_modes[ theano.compile.mode.predefined_modes[
'DEBUG_MODE'].check_isfinite = False 'DEBUG_MODE'].check_isfinite = False
# insert several Inf # insert several Inf
f(numpy.asarray(numpy.asarray([1.0, 1.0, 1.0]) / 0, f(np.asarray(np.asarray([1.0, 1.0, 1.0]) / 0,
dtype=config.floatX)) dtype=config.floatX))
def test_check_isfinite_disabled(self): def test_check_isfinite_disabled(self):
x = theano.tensor.dvector() x = theano.tensor.dvector()
...@@ -587,10 +587,10 @@ class Test_check_isfinite(unittest.TestCase): ...@@ -587,10 +587,10 @@ class Test_check_isfinite(unittest.TestCase):
mode=debugmode.DebugMode(check_isfinite=False)) mode=debugmode.DebugMode(check_isfinite=False))
# nan should go through # nan should go through
f(numpy.log([3, -4, 5])) f(np.log([3, -4, 5]))
# inf should go through # inf should go through
infs = numpy.asarray([1.0, 1., 1.]) / 0 infs = np.asarray([1.0, 1., 1.]) / 0
# print infs # print infs
f(infs) f(infs)
return return
...@@ -721,14 +721,14 @@ class VecAsRowAndCol(gof.Op): ...@@ -721,14 +721,14 @@ class VecAsRowAndCol(gof.Op):
class Test_preallocated_output(unittest.TestCase): class Test_preallocated_output(unittest.TestCase):
def setUp(self): def setUp(self):
self.rng = numpy.random.RandomState(seed=utt.fetch_seed()) self.rng = np.random.RandomState(seed=utt.fetch_seed())
def test_f_contiguous(self): def test_f_contiguous(self):
a = theano.tensor.fmatrix('a') a = theano.tensor.fmatrix('a')
b = theano.tensor.fmatrix('b') b = theano.tensor.fmatrix('b')
z = BrokenCImplementationAdd()(a, b) z = BrokenCImplementationAdd()(a, b)
# In this test, we do not want z to be an output of the graph. # In this test, we do not want z to be an output of the graph.
out = theano.tensor.dot(z, numpy.eye(7)) out = theano.tensor.dot(z, np.eye(7))
a_val = self.rng.randn(7, 7).astype('float32') a_val = self.rng.randn(7, 7).astype('float32')
b_val = self.rng.randn(7, 7).astype('float32') b_val = self.rng.randn(7, 7).astype('float32')
......
...@@ -5,7 +5,7 @@ import shutil ...@@ -5,7 +5,7 @@ import shutil
import tempfile import tempfile
import unittest import unittest
import numpy import numpy as np
import theano import theano
from theano.compile.io import In from theano.compile.io import In
...@@ -27,7 +27,7 @@ def test_function_dump(): ...@@ -27,7 +27,7 @@ def test_function_dump():
fct2 = theano.function(**l) fct2 = theano.function(**l)
x = [1, 2, 3] x = [1, 2, 3]
assert numpy.allclose(fct1(x), fct2(x)) assert np.allclose(fct1(x), fct2(x))
class TestFunctionIn(unittest.TestCase): class TestFunctionIn(unittest.TestCase):
...@@ -40,14 +40,14 @@ class TestFunctionIn(unittest.TestCase): ...@@ -40,14 +40,14 @@ class TestFunctionIn(unittest.TestCase):
f = theano.function([In(a, strict=False)], out) f = theano.function([In(a, strict=False)], out)
# works, rand generates float64 by default # works, rand generates float64 by default
f(numpy.random.rand(8)) f(np.random.rand(8))
# works, casting is allowed # works, casting is allowed
f(numpy.array([1, 2, 3, 4], dtype='int32')) f(np.array([1, 2, 3, 4], dtype='int32'))
f = theano.function([In(a, strict=True)], out) f = theano.function([In(a, strict=True)], out)
try: try:
# fails, f expects float64 # fails, f expects float64
f(numpy.array([1, 2, 3, 4], dtype='int32')) f(np.array([1, 2, 3, 4], dtype='int32'))
except TypeError: except TypeError:
pass pass
...@@ -70,17 +70,17 @@ class TestFunctionIn(unittest.TestCase): ...@@ -70,17 +70,17 @@ class TestFunctionIn(unittest.TestCase):
# using mutable=True will let f change the value in aval # using mutable=True will let f change the value in aval
f = theano.function([In(a, mutable=True)], a_out, mode='FAST_RUN') f = theano.function([In(a, mutable=True)], a_out, mode='FAST_RUN')
aval = numpy.random.rand(10) aval = np.random.rand(10)
aval2 = aval.copy() aval2 = aval.copy()
assert numpy.all(f(aval) == (aval2 * 2)) assert np.all(f(aval) == (aval2 * 2))
assert not numpy.all(aval == aval2) assert not np.all(aval == aval2)
# using mutable=False should leave the input untouched # using mutable=False should leave the input untouched
f = theano.function([In(a, mutable=False)], a_out, mode='FAST_RUN') f = theano.function([In(a, mutable=False)], a_out, mode='FAST_RUN')
aval = numpy.random.rand(10) aval = np.random.rand(10)
aval2 = aval.copy() aval2 = aval.copy()
assert numpy.all(f(aval) == (aval2 * 2)) assert np.all(f(aval) == (aval2 * 2))
assert numpy.all(aval == aval2) assert np.all(aval == aval2)
def test_in_update(self): def test_in_update(self):
a = theano.tensor.dscalar('a') a = theano.tensor.dscalar('a')
...@@ -115,7 +115,7 @@ class TestFunctionIn(unittest.TestCase): ...@@ -115,7 +115,7 @@ class TestFunctionIn(unittest.TestCase):
# changes occur at the same time and one doesn't overwrite the other. # changes occur at the same time and one doesn't overwrite the other.
for i in range(5): for i in range(5):
f() f()
assert numpy.allclose(shared_var.get_value(), i % 2) assert np.allclose(shared_var.get_value(), i % 2)
def test_in_allow_downcast_int(self): def test_in_allow_downcast_int(self):
a = theano.tensor.wvector('a') # int16 a = theano.tensor.wvector('a') # int16
...@@ -128,16 +128,16 @@ class TestFunctionIn(unittest.TestCase): ...@@ -128,16 +128,16 @@ class TestFunctionIn(unittest.TestCase):
# Both values are in range. Since they're not ndarrays (but lists), # Both values are in range. Since they're not ndarrays (but lists),
# they will be converted, and their value checked. # they will be converted, and their value checked.
assert numpy.all(f([3], [6], 1) == 10) assert np.all(f([3], [6], 1) == 10)
# Values are in range, but a dtype too large has explicitly been given # Values are in range, but a dtype too large has explicitly been given
# For performance reasons, no check of the data is explicitly performed # For performance reasons, no check of the data is explicitly performed
# (It might be OK to change this in the future.) # (It might be OK to change this in the future.)
self.assertRaises(TypeError, f, [3], numpy.array([6], dtype='int16'), self.assertRaises(TypeError, f, [3], np.array([6], dtype='int16'),
1) 1)
# Value too big for a, silently ignored # Value too big for a, silently ignored
assert numpy.all(f([2 ** 20], numpy.ones(1, dtype='int8'), 1) == 2) assert np.all(f([2 ** 20], np.ones(1, dtype='int8'), 1) == 2)
# Value too big for b, raises TypeError # Value too big for b, raises TypeError
self.assertRaises(TypeError, f, [3], [312], 1) self.assertRaises(TypeError, f, [3], [312], 1)
...@@ -156,17 +156,17 @@ class TestFunctionIn(unittest.TestCase): ...@@ -156,17 +156,17 @@ class TestFunctionIn(unittest.TestCase):
(a + b + c)) (a + b + c))
# If the values can be accurately represented, everything is OK # If the values can be accurately represented, everything is OK
assert numpy.all(f(0, 0, 0) == 0) assert np.all(f(0, 0, 0) == 0)
# If allow_downcast is True, idem # If allow_downcast is True, idem
assert numpy.allclose(f(0.1, 0, 0), 0.1) assert np.allclose(f(0.1, 0, 0), 0.1)
# If allow_downcast is False, nope # If allow_downcast is False, nope
self.assertRaises(TypeError, f, 0, 0.1, 0) self.assertRaises(TypeError, f, 0, 0.1, 0)
# If allow_downcast is None, it should work iff floatX=float32 # If allow_downcast is None, it should work iff floatX=float32
if theano.config.floatX == 'float32': if theano.config.floatX == 'float32':
assert numpy.allclose(f(0, 0, 0.1), 0.1) assert np.allclose(f(0, 0, 0.1), 0.1)
else: else:
self.assertRaises(TypeError, f, 0, 0, 0.1) self.assertRaises(TypeError, f, 0, 0, 0.1)
...@@ -182,10 +182,10 @@ class TestFunctionIn(unittest.TestCase): ...@@ -182,10 +182,10 @@ class TestFunctionIn(unittest.TestCase):
# If the values can be accurately represented, everything is OK # If the values can be accurately represented, everything is OK
z = [0] z = [0]
assert numpy.all(f(z, z, z) == 0) assert np.all(f(z, z, z) == 0)
# If allow_downcast is True, idem # If allow_downcast is True, idem
assert numpy.allclose(f([0.1], z, z), 0.1) assert np.allclose(f([0.1], z, z), 0.1)
# If allow_downcast is False, nope # If allow_downcast is False, nope
self.assertRaises(TypeError, f, z, [0.1], z) self.assertRaises(TypeError, f, z, [0.1], z)
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import copy import copy
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
import numpy import numpy as np
import unittest import unittest
...@@ -18,8 +18,6 @@ from theano import tensor ...@@ -18,8 +18,6 @@ from theano import tensor
from theano import tensor as T from theano import tensor as T
import theano import theano
import numpy as N
def PatternOptimizer(p1, p2, ign=True): def PatternOptimizer(p1, p2, ign=True):
return gof.OpKeyOptimizer(gof.PatternSub(p1, p2), ignore_newtrees=ign) return gof.OpKeyOptimizer(gof.PatternSub(p1, p2), ignore_newtrees=ign)
...@@ -281,7 +279,7 @@ class T_function(unittest.TestCase): ...@@ -281,7 +279,7 @@ class T_function(unittest.TestCase):
def test_swap_SharedVariable(self): def test_swap_SharedVariable(self):
i = T.iscalar() i = T.iscalar()
x_list = theano.shared(value=numpy.random.rand(10).astype(config.floatX)) x_list = theano.shared(value=np.random.rand(10).astype(config.floatX))
x = T.scalar('x') x = T.scalar('x')
# SharedVariable for tests, one of them has update # SharedVariable for tests, one of them has update
...@@ -343,11 +341,11 @@ class T_function(unittest.TestCase): ...@@ -343,11 +341,11 @@ class T_function(unittest.TestCase):
A special testcase for logistic_sgd.py in Deep Learning Tutorial A special testcase for logistic_sgd.py in Deep Learning Tutorial
This test assert that SharedVariable in different function have same storage This test assert that SharedVariable in different function have same storage
""" """
train_x = theano.shared(value=numpy.random.rand(10, 10).astype(config.floatX)) train_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))
test_x = theano.shared(value=numpy.random.rand(10, 10).astype(config.floatX)) test_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))
train_y = theano.shared(value=numpy.random.rand(10, 1).astype(config.floatX)) train_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))
test_y = theano.shared(value=numpy.random.rand(10, 1).astype(config.floatX)) test_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))
i = T.iscalar('index') i = T.iscalar('index')
x = T.vector('x') x = T.vector('x')
...@@ -500,42 +498,42 @@ class T_function(unittest.TestCase): ...@@ -500,42 +498,42 @@ class T_function(unittest.TestCase):
when borrow=True is implemented. when borrow=True is implemented.
""" """
a = T.dmatrix() a = T.dmatrix()
aval = numpy.random.rand(3, 3) aval = np.random.rand(3, 3)
# when borrow=False, test that a destroy map cannot alias output to input # when borrow=False, test that a destroy map cannot alias output to input
f = theano.function([In(a, borrow=False)], Out(a + 1, borrow=True)) f = theano.function([In(a, borrow=False)], Out(a + 1, borrow=True))
assert numpy.all(f(aval) == aval + 1) assert np.all(f(aval) == aval + 1)
assert not numpy.may_share_memory(aval, f(aval)) assert not np.may_share_memory(aval, f(aval))
# when borrow=False, test that a viewmap cannot alias output to input # when borrow=False, test that a viewmap cannot alias output to input
f = theano.function([In(a, borrow=False)], Out(a[0, :], borrow=True)) f = theano.function([In(a, borrow=False)], Out(a[0, :], borrow=True))
assert numpy.all(f(aval) == aval[0, :]) assert np.all(f(aval) == aval[0, :])
assert not numpy.may_share_memory(aval, f(aval)) assert not np.may_share_memory(aval, f(aval))
def test_borrow_output(self): def test_borrow_output(self):
a = T.dmatrix() a = T.dmatrix()
f = function([a], Out(a, borrow=False)) f = function([a], Out(a, borrow=False))
o = N.ones((3, 3)) o = np.ones((3, 3))
assert o is not f(o) # function no longer permits aliasing outputs to inputs assert o is not f(o) # function no longer permits aliasing outputs to inputs
f = function([a], Out(a * 4, borrow=False)) f = function([a], Out(a * 4, borrow=False))
o = N.ones((3, 3)) o = np.ones((3, 3))
four = f(o) four = f(o)
assert numpy.all(four == 4) assert np.all(four == 4)
f(o + .1) # should not clobber the memory used to store four f(o + .1) # should not clobber the memory used to store four
assert numpy.all(four == 4) assert np.all(four == 4)
f = function([a], Out(a * 4, borrow=True), mode=theano.Mode('c|py_nogc', 'fast_run')) f = function([a], Out(a * 4, borrow=True), mode=theano.Mode('c|py_nogc', 'fast_run'))
o = N.ones((3, 3)) o = np.ones((3, 3))
four = f(o) four = f(o)
assert numpy.all(four == 4) assert np.all(four == 4)
f(o + .1) # should clobber the memory used to store four f(o + .1) # should clobber the memory used to store four
if theano.config.cxx: if theano.config.cxx:
assert not numpy.all(four == 4) assert not np.all(four == 4)
else: else:
# The Elemwise.perform method don't reuse memory # The Elemwise.perform method don't reuse memory
# as some numpy version don't support that correctly. # as some numpy version don't support that correctly.
assert numpy.all(four == 4) assert np.all(four == 4)
def test_disconnected_input(self): def test_disconnected_input(self):
a = T.scalar('a') a = T.scalar('a')
...@@ -579,6 +577,20 @@ class T_function(unittest.TestCase): ...@@ -579,6 +577,20 @@ class T_function(unittest.TestCase):
if not isinstance(key, theano.gof.Constant): if not isinstance(key, theano.gof.Constant):
assert (val[0] is None) assert (val[0] is None)
def test_default_values(self):
"""
Check that default values are restored
when an exception occurs in interactive mode.
"""
a, b = T.dscalars('a', 'b')
c = a + b
func = theano.function([theano.In(a, name='first'), theano.In(b, value=1, name='second')], c)
x = func(first=1)
try:
func(second=2)
except TypeError:
assert(func(first=1) == x)
class T_picklefunction(unittest.TestCase): class T_picklefunction(unittest.TestCase):
...@@ -753,7 +765,7 @@ class T_picklefunction(unittest.TestCase): ...@@ -753,7 +765,7 @@ class T_picklefunction(unittest.TestCase):
assert f2.container[s].storage is f1.container[s].storage assert f2.container[s].storage is f1.container[s].storage
# now put in a function with non-scalar # now put in a function with non-scalar
v_value = numpy.asarray([2, 3, 4.], dtype=config.floatX) v_value = np.asarray([2, 3, 4.], dtype=config.floatX)
f3 = function([x, In(v, value=v_value)], x + v) f3 = function([x, In(v, value=v_value)], x + v)
list_of_things.append(f3) list_of_things.append(f3)
...@@ -800,13 +812,13 @@ class T_picklefunction(unittest.TestCase): ...@@ -800,13 +812,13 @@ class T_picklefunction(unittest.TestCase):
assert nl[5](3) == ol[5](3) assert nl[5](3) == ol[5](3)
assert nl[4].value[nl[0]] == 6 assert nl[4].value[nl[0]] == 6
assert numpy.all(nl[6][nl[2]] == numpy.asarray([2, 3., 4])) assert np.all(nl[6][nl[2]] == np.asarray([2, 3., 4]))
def test_broken_pickle_with_shared(self): def test_broken_pickle_with_shared(self):
saves = [] saves = []
def pers_save(obj): def pers_save(obj):
if isinstance(obj, numpy.ndarray): if isinstance(obj, np.ndarray):
saves.append(obj) saves.append(obj)
return len(saves) - 1 return len(saves) - 1
else: else:
...@@ -815,7 +827,7 @@ class T_picklefunction(unittest.TestCase): ...@@ -815,7 +827,7 @@ class T_picklefunction(unittest.TestCase):
def pers_load(id): def pers_load(id):
return saves[id] return saves[id]
b = numpy.random.rand(5, 4) b = np.random.rand(5, 4)
x = theano.tensor.matrix() x = theano.tensor.matrix()
y = theano.shared(b) y = theano.shared(b)
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import unittest import unittest
from theano.compile.pfunc import pfunc from theano.compile.pfunc import pfunc
...@@ -20,8 +20,8 @@ class NNet(object): ...@@ -20,8 +20,8 @@ class NNet(object):
self.input = input self.input = input
self.target = target self.target = target
self.lr = shared(lr, 'learning_rate') self.lr = shared(lr, 'learning_rate')
self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1') self.w1 = shared(np.zeros((n_hidden, n_input)), 'w1')
self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2') self.w2 = shared(np.zeros((n_output, n_hidden)), 'w2')
# print self.lr.type # print self.lr.type
self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.hidden = sigmoid(tensor.dot(self.w1, self.input))
...@@ -45,7 +45,7 @@ class NNet(object): ...@@ -45,7 +45,7 @@ class NNet(object):
class TestNnet(unittest.TestCase): class TestNnet(unittest.TestCase):
def test_nnet(self): def test_nnet(self):
rng = numpy.random.RandomState(1827) rng = np.random.RandomState(1827)
data = rng.rand(10, 4) data = rng.rand(10, 4)
nnet = NNet(n_input=3, n_hidden=10) nnet = NNet(n_input=3, n_hidden=10)
for epoch in range(3): for epoch in range(3):
...@@ -60,4 +60,4 @@ class TestNnet(unittest.TestCase): ...@@ -60,4 +60,4 @@ class TestNnet(unittest.TestCase):
self.assertTrue(abs(mean_cost - 0.20588975452) < 1e-6) self.assertTrue(abs(mean_cost - 0.20588975452) < 1e-6)
# Just call functions to make sure they do not crash. # Just call functions to make sure they do not crash.
nnet.compute_output(input) nnet.compute_output(input)
nnet.output_from_hidden(numpy.ones(10)) nnet.output_from_hidden(np.ones(10))
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import theano import theano
...@@ -12,7 +12,7 @@ def test_detect_nan(): ...@@ -12,7 +12,7 @@ def test_detect_nan():
def detect_nan(i, node, fn): def detect_nan(i, node, fn):
for output in fn.outputs: for output in fn.outputs:
if numpy.isnan(output[0]).any(): if np.isnan(output[0]).any():
print('*** NaN detected ***') print('*** NaN detected ***')
theano.printing.debugprint(node) theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs]) print('Inputs : %s' % [input[0] for input in fn.inputs])
...@@ -36,7 +36,7 @@ def test_optimizer(): ...@@ -36,7 +36,7 @@ def test_optimizer():
def detect_nan(i, node, fn): def detect_nan(i, node, fn):
for output in fn.outputs: for output in fn.outputs:
if numpy.isnan(output[0]).any(): if np.isnan(output[0]).any():
print('*** NaN detected ***') print('*** NaN detected ***')
theano.printing.debugprint(node) theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs]) print('Inputs : %s' % [input[0] for input in fn.inputs])
...@@ -65,7 +65,7 @@ def test_not_inplace(): ...@@ -65,7 +65,7 @@ def test_not_inplace():
def detect_nan(i, node, fn): def detect_nan(i, node, fn):
for output in fn.outputs: for output in fn.outputs:
if numpy.isnan(output[0]).any(): if np.isnan(output[0]).any():
print('*** NaN detected ***') print('*** NaN detected ***')
theano.printing.debugprint(node) theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs]) print('Inputs : %s' % [input[0] for input in fn.inputs])
......
...@@ -6,7 +6,7 @@ from __future__ import absolute_import, print_function, division ...@@ -6,7 +6,7 @@ from __future__ import absolute_import, print_function, division
import logging import logging
from nose.tools import assert_raises from nose.tools import assert_raises
import numpy import numpy as np
from theano.compile.nanguardmode import NanGuardMode from theano.compile.nanguardmode import NanGuardMode
import theano import theano
...@@ -18,20 +18,20 @@ def test_NanGuardMode(): ...@@ -18,20 +18,20 @@ def test_NanGuardMode():
# intentionally. A working implementation should be able to capture all # intentionally. A working implementation should be able to capture all
# the abnormalties. # the abnormalties.
x = T.matrix() x = T.matrix()
w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX)) w = theano.shared(np.random.randn(5, 7).astype(theano.config.floatX))
y = T.dot(x, w) y = T.dot(x, w)
fun = theano.function( fun = theano.function(
[x], y, [x], y,
mode=NanGuardMode(nan_is_error=True, inf_is_error=True) mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
) )
a = numpy.random.randn(3, 5).astype(theano.config.floatX) a = np.random.randn(3, 5).astype(theano.config.floatX)
infa = numpy.tile( infa = np.tile(
(numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5)) (np.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
nana = numpy.tile( nana = np.tile(
numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5)) np.asarray(np.nan).astype(theano.config.floatX), (3, 5))
biga = numpy.tile( biga = np.tile(
numpy.asarray(1e20).astype(theano.config.floatX), (3, 5)) np.asarray(1e20).astype(theano.config.floatX), (3, 5))
fun(a) # normal values fun(a) # normal values
...@@ -46,14 +46,14 @@ def test_NanGuardMode(): ...@@ -46,14 +46,14 @@ def test_NanGuardMode():
_logger.propagate = True _logger.propagate = True
# slices # slices
a = numpy.random.randn(3, 4, 5).astype(theano.config.floatX) a = np.random.randn(3, 4, 5).astype(theano.config.floatX)
infa = numpy.tile( infa = np.tile(
(numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (np.asarray(100.) ** 1000000).astype(theano.config.floatX),
(3, 4, 5)) (3, 4, 5))
nana = numpy.tile( nana = np.tile(
numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 4, 5)) np.asarray(np.nan).astype(theano.config.floatX), (3, 4, 5))
biga = numpy.tile( biga = np.tile(
numpy.asarray(1e20).astype(theano.config.floatX), (3, 4, 5)) np.asarray(1e20).astype(theano.config.floatX), (3, 4, 5))
x = T.tensor3() x = T.tensor3()
y = x[:, T.arange(2), T.arange(2)] y = x[:, T.arange(2), T.arange(2)]
......
...@@ -9,7 +9,6 @@ from theano.tests import unittest_tools as utt ...@@ -9,7 +9,6 @@ from theano.tests import unittest_tools as utt
from theano import function from theano import function
import theano import theano
from theano.tensor import dmatrix, dvector from theano.tensor import dmatrix, dvector
from numpy import allclose
from theano.compile import as_op from theano.compile import as_op
import pickle import pickle
...@@ -34,7 +33,7 @@ class OpDecoratorTests(utt.InferShapeTester): ...@@ -34,7 +33,7 @@ class OpDecoratorTests(utt.InferShapeTester):
r = fn([[1.5, 5], [2, 2]]) r = fn([[1.5, 5], [2, 2]])
r0 = np.array([1.5, 7.5, 15., 30.]) r0 = np.array([1.5, 7.5, 15., 30.])
assert allclose(r, r0), (r, r0) assert np.allclose(r, r0), (r, r0)
def test_2arg(self): def test_2arg(self):
x = dmatrix('x') x = dmatrix('x')
...@@ -50,7 +49,7 @@ class OpDecoratorTests(utt.InferShapeTester): ...@@ -50,7 +49,7 @@ class OpDecoratorTests(utt.InferShapeTester):
r = fn([[1.5, 5], [2, 2]], [1, 100, 2, 200]) r = fn([[1.5, 5], [2, 2]], [1, 100, 2, 200])
r0 = np.array([2.5, 107.5, 17., 230.]) r0 = np.array([2.5, 107.5, 17., 230.])
assert allclose(r, r0), (r, r0) assert np.allclose(r, r0), (r, r0)
def test_infer_shape(self): def test_infer_shape(self):
x = dmatrix('x') x = dmatrix('x')
......
...@@ -6,7 +6,7 @@ from __future__ import absolute_import, print_function, division ...@@ -6,7 +6,7 @@ from __future__ import absolute_import, print_function, division
import unittest import unittest
import numpy import numpy as np
import theano import theano
from six.moves import StringIO from six.moves import StringIO
...@@ -45,7 +45,7 @@ class Test_profiling(unittest.TestCase): ...@@ -45,7 +45,7 @@ class Test_profiling(unittest.TestCase):
f = theano.function(x, z, profile=p, name="test_profiling", f = theano.function(x, z, profile=p, name="test_profiling",
mode=m) mode=m)
inp = [numpy.arange(1024, dtype='float32') + 1 for i in range(len(x))] inp = [np.arange(1024, dtype='float32') + 1 for i in range(len(x))]
f(*inp) f(*inp)
buf = StringIO() buf = StringIO()
......
...@@ -126,6 +126,12 @@ AddConfigVar( ...@@ -126,6 +126,12 @@ AddConfigVar(
BoolParam(False, allow_override=False), BoolParam(False, allow_override=False),
in_c_key=False) in_c_key=False)
AddConfigVar(
'print_global_stats',
"Print some global statistics (time spent) at the end",
BoolParam(False),
in_c_key=False)
class ContextsParam(ConfigParam): class ContextsParam(ConfigParam):
def __init__(self): def __init__(self):
...@@ -1111,7 +1117,7 @@ AddConfigVar('optdb.position_cutoff', ...@@ -1111,7 +1117,7 @@ AddConfigVar('optdb.position_cutoff',
AddConfigVar('optdb.max_use_ratio', AddConfigVar('optdb.max_use_ratio',
'A ratio that prevent infinite loop in EquilibriumOptimizer.', 'A ratio that prevent infinite loop in EquilibriumOptimizer.',
FloatParam(5), FloatParam(8),
in_c_key=False) in_c_key=False)
AddConfigVar('gcc.cxxflags', AddConfigVar('gcc.cxxflags',
......
...@@ -2510,10 +2510,14 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -2510,10 +2510,14 @@ class EquilibriumOptimizer(NavigatorOptimizer):
end_nb_nodes = len(fgraph.apply_nodes) end_nb_nodes = len(fgraph.apply_nodes)
if max_use_abort: if max_use_abort:
_logger.error("EquilibriumOptimizer max'ed out by '%s'" % opt_name + msg = ("EquilibriumOptimizer max'ed out by '%s'" % opt_name +
". You can safely raise the current threshold of " + ". You can safely raise the current threshold of " +
"%f with the theano flag 'optdb.max_use_ratio'." % "%f with the theano flag 'optdb.max_use_ratio'." %
config.optdb.max_use_ratio) config.optdb.max_use_ratio)
if theano.config.on_opt_error == 'raise':
raise AssertionError(msg)
else:
_logger.error(msg)
fgraph.remove_feature(change_tracker) fgraph.remove_feature(change_tracker)
assert len(loop_process_count) == len(loop_timing) assert len(loop_process_count) == len(loop_timing)
assert len(loop_process_count) == len(global_opt_timing) assert len(loop_process_count) == len(global_opt_timing)
......
...@@ -571,6 +571,7 @@ class TestEquilibrium(object): ...@@ -571,6 +571,7 @@ class TestEquilibrium(object):
opt.optimize(g) opt.optimize(g)
assert str(g) == '[Op2(x, y)]' assert str(g) == '[Op2(x, y)]'
@theano.configparser.change_flags(on_opt_error='ignore')
def test_low_use_ratio(self): def test_low_use_ratio(self):
x, y, z = map(MyVariable, 'xyz') x, y, z = map(MyVariable, 'xyz')
e = op3(op4(x, y)) e = op3(op4(x, y))
......
...@@ -503,6 +503,8 @@ def hist(coll): ...@@ -503,6 +503,8 @@ def hist(coll):
return counts return counts
@deprecated("theano.gof.utils",
msg="Use a_theano_variable.auto_name instead")
def give_variables_names(variables): def give_variables_names(variables):
""" """
Gives unique names to an iterable of variables. Modifies input. Gives unique names to an iterable of variables. Modifies input.
......
...@@ -482,7 +482,7 @@ class Stack(VM): ...@@ -482,7 +482,7 @@ class Stack(VM):
try: try:
_, dt = self.run_thunk_of_node(current_apply) _, dt = self.run_thunk_of_node(current_apply)
del _ del _
if config.profile: if config.profile or config.print_global_stats:
current_idx = self.node_idx[current_apply] current_idx = self.node_idx[current_apply]
self.call_counts[current_idx] += 1 self.call_counts[current_idx] += 1
self.call_times[current_idx] += dt self.call_times[current_idx] += dt
...@@ -596,7 +596,7 @@ class Stack(VM): ...@@ -596,7 +596,7 @@ class Stack(VM):
if current_apply.inputs[r].owner: if current_apply.inputs[r].owner:
apply_stack.append(current_apply.inputs[r].owner) apply_stack.append(current_apply.inputs[r].owner)
else: else:
if config.profile: if config.profile or config.print_global_stats:
for (idx, o) in enumerate(thunks[ for (idx, o) in enumerate(thunks[
self.node_idx[current_apply]].outputs): self.node_idx[current_apply]].outputs):
var = self.nodes[ var = self.nodes[
...@@ -757,7 +757,7 @@ class VM_Linker(link.LocalLinker): ...@@ -757,7 +757,7 @@ class VM_Linker(link.LocalLinker):
associated to self, else, a new VM_Linker associated to fgraph. associated to self, else, a new VM_Linker associated to fgraph.
""" """
if (config.profile and if ((config.profile or config.print_global_stats) and
((hasattr(theano, 'sandbox') and ((hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled) or theano.sandbox.cuda.cuda_enabled) or
...@@ -856,7 +856,7 @@ class VM_Linker(link.LocalLinker): ...@@ -856,7 +856,7 @@ class VM_Linker(link.LocalLinker):
pre_call_clear = [storage_map[v] for v in self.no_recycling] pre_call_clear = [storage_map[v] for v in self.no_recycling]
if (self.callback is not None or self.callback_input is not None or if (self.callback is not None or self.callback_input is not None or
(config.profile and config.profile_memory) or ((config.profile or config.print_global_stats) and config.profile_memory) or
(self.allow_partial_eval and not self.use_cloop)): (self.allow_partial_eval and not self.use_cloop)):
if self.use_cloop and (self.callback is not None or if self.use_cloop and (self.callback is not None or
...@@ -1086,7 +1086,7 @@ class VM_Linker(link.LocalLinker): ...@@ -1086,7 +1086,7 @@ class VM_Linker(link.LocalLinker):
lazy = config.vm.lazy lazy = config.vm.lazy
if lazy is None: if lazy is None:
lazy = not all([(not th.lazy) for th in thunks]) lazy = not all([(not th.lazy) for th in thunks])
if not (lazy or (config.profile and config.profile_memory) or if not (lazy or ((config.profile or config.print_global_stats) and config.profile_memory) or
self.use_cloop or self.callback or self.callback_input): self.use_cloop or self.callback or self.callback_input):
for pair in itervalues(reallocated_info): for pair in itervalues(reallocated_info):
storage_map[pair[1]] = storage_map[pair[0]] storage_map[pair[1]] = storage_map[pair[0]]
......
差异被折叠。
...@@ -2,8 +2,19 @@ ...@@ -2,8 +2,19 @@
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, npy_float64 epsilon, PyGpuArrayObject *bias, npy_float64 epsilon,
PyGpuArrayObject **outp, PyGpuArrayObject **x_mean, npy_float64 running_average_factor,
PyGpuArrayObject **x_invstd, cudnnHandle_t _handle) { #ifdef RUNNING_AVERAGES
PyGpuArrayObject *in_running_mean,
PyGpuArrayObject *in_running_var,
#endif
PyGpuArrayObject **outp,
PyGpuArrayObject **x_mean,
PyGpuArrayObject **x_invstd,
#ifdef RUNNING_AVERAGES
PyGpuArrayObject **out_running_mean,
PyGpuArrayObject **out_running_var,
#endif
cudnnHandle_t _handle) {
PyGpuContextObject *c = inp->context; PyGpuContextObject *c = inp->context;
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
...@@ -11,11 +22,19 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -11,11 +22,19 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
return 1; return 1;
if (epsilon < 1e-5) if (epsilon < 1e-5) {
PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
return 1; return 1;
}
#ifdef INPLACE_OUTPUT
Py_XDECREF(*outp);
*outp = inp;
Py_INCREF(*outp);
#else
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
#endif
if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
...@@ -24,6 +43,31 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -24,6 +43,31 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (c_set_tensorNd(*outp, bn_output) != 0) if (c_set_tensorNd(*outp, bn_output) != 0)
return 1; return 1;
#ifdef RUNNING_AVERAGES
#ifdef INPLACE_RUNNING_MEAN
Py_XDECREF(out_running_mean);
PyGpuArrayObject *running_mean = in_running_mean;
Py_INCREF(running_mean);
#else
PyGpuArrayObject *running_mean = *out_running_mean;
running_mean = theano_try_copy(running_mean, in_running_mean);
if (running_mean == NULL) {
return 1;
}
#endif
#ifdef INPLACE_RUNNING_VAR
Py_XDECREF(out_running_var);
PyGpuArrayObject *running_var = in_running_var;
Py_INCREF(running_var);
#else
PyGpuArrayObject *running_var = *out_running_var;
running_var = theano_try_copy(running_var, in_running_var);
if (running_var == NULL) {
return 1;
}
#endif
#endif
{ {
const float falpha = 1.; const float falpha = 1.;
const float fbeta = 0.; const float fbeta = 0.;
...@@ -50,9 +94,15 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -50,9 +94,15 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
bn_params, bn_params,
PyGpuArray_DEV_DATA(scale), PyGpuArray_DEV_DATA(scale),
PyGpuArray_DEV_DATA(bias), PyGpuArray_DEV_DATA(bias),
#ifdef RUNNING_AVERAGES
running_average_factor,
PyGpuArray_DEV_DATA(running_mean),
PyGpuArray_DEV_DATA(running_var),
#else
0, 0,
NULL, // running mean, deliberately unused NULL, // running mean, deliberately unused
NULL, // running var, deliberately unused NULL, // running var, deliberately unused
#endif
epsilon, epsilon,
PyGpuArray_DEV_DATA(*x_mean), PyGpuArray_DEV_DATA(*x_mean),
PyGpuArray_DEV_DATA(*x_invstd) PyGpuArray_DEV_DATA(*x_invstd)
...@@ -62,6 +112,10 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -62,6 +112,10 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
cudnnGetErrorString(err)); cudnnGetErrorString(err));
return 1; return 1;
} }
#ifdef RUNNING_AVERAGES
*out_running_mean = running_mean;
*out_running_var = running_var;
#endif
} }
return 0; return 0;
} }
...@@ -34,8 +34,10 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp, ...@@ -34,8 +34,10 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
return 1; return 1;
if (epsilon < 1e-5) if (epsilon < 1e-5) {
PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
return 1; return 1;
}
if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
......
...@@ -11,11 +11,19 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -11,11 +11,19 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
return 1; return 1;
if (epsilon < 1e-5) if (epsilon < 1e-5) {
PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
return 1; return 1;
}
#ifdef INPLACE_OUTPUT
Py_XDECREF(*outp);
*outp = inp;
Py_INCREF(*outp);
#else
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
#endif
if (c_set_tensorNd(*outp, bn_output) != 0) if (c_set_tensorNd(*outp, bn_output) != 0)
return 1; return 1;
......
...@@ -252,3 +252,7 @@ class TestDnnConvTypes(test_abstract_conv.TestConvTypes): ...@@ -252,3 +252,7 @@ class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
self.constant_tensor = gpuarray.array( self.constant_tensor = gpuarray.array(
np.zeros((3, 5, 7, 11), dtype='float32'), np.zeros((3, 5, 7, 11), dtype='float32'),
context=get_context(test_ctx_name)) context=get_context(test_ctx_name))
class TestConv2dTranspose(test_abstract_conv.TestConv2dTranspose):
mode = mode_with_gpu
...@@ -13,7 +13,7 @@ import time ...@@ -13,7 +13,7 @@ import time
from optparse import OptionParser from optparse import OptionParser
import subprocess import subprocess
import numpy import numpy as np
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -47,10 +47,10 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, ...@@ -47,10 +47,10 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
print() print()
print('Numpy config: (used when the Theano flag' print('Numpy config: (used when the Theano flag'
' "blas.ldflags" is empty)') ' "blas.ldflags" is empty)')
numpy.show_config() np.show_config()
print('Numpy dot module:', numpy.dot.__module__) print('Numpy dot module:', np.dot.__module__)
print('Numpy location:', numpy.__file__) print('Numpy location:', np.__file__)
print('Numpy version:', numpy.__version__) print('Numpy version:', np.__version__)
if (theano.config.device.startswith("gpu") or if (theano.config.device.startswith("gpu") or
theano.config.init_gpu_device.startswith("gpu")): theano.config.init_gpu_device.startswith("gpu")):
print('nvcc version:') print('nvcc version:')
...@@ -58,12 +58,12 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, ...@@ -58,12 +58,12 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
"--version")) "--version"))
print() print()
a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX, a = theano.shared(np.ones((M, N), dtype=theano.config.floatX,
order=order)) order=order))
b = theano.shared(numpy.ones((N, K), dtype=theano.config.floatX, b = theano.shared(np.ones((N, K), dtype=theano.config.floatX,
order=order)) order=order))
c = theano.shared(numpy.ones((M, K), dtype=theano.config.floatX, c = theano.shared(np.ones((M, K), dtype=theano.config.floatX,
order=order)) order=order))
f = theano.function([], updates=[(c, 0.4 * c + .8 * T.dot(a, b))]) f = theano.function([], updates=[(c, 0.4 * c + .8 * T.dot(a, b))])
if any([x.op.__class__.__name__ == 'Gemm' for x in if any([x.op.__class__.__name__ == 'Gemm' for x in
......
...@@ -9,7 +9,7 @@ from __future__ import absolute_import, print_function, division ...@@ -9,7 +9,7 @@ from __future__ import absolute_import, print_function, division
import threading import threading
import time import time
import numpy import numpy as np
import theano import theano
from theano.gpuarray import init_dev from theano.gpuarray import init_dev
...@@ -21,7 +21,7 @@ def main(dev1, dev2): ...@@ -21,7 +21,7 @@ def main(dev1, dev2):
init_dev(dev2, 'ctx2') init_dev(dev2, 'ctx2')
size = 1024 * 16 size = 1024 * 16
data = numpy.random.randn(size, size).astype('float32') data = np.random.randn(size, size).astype('float32')
val1a = theano.shared(data, target='ctx1') val1a = theano.shared(data, target='ctx1')
val1b = theano.shared(data, target='ctx1') val1b = theano.shared(data, target='ctx1')
val1c = theano.shared(data, target='ctx1') val1c = theano.shared(data, target='ctx1')
......
...@@ -2,18 +2,18 @@ from __future__ import absolute_import, print_function, division ...@@ -2,18 +2,18 @@ from __future__ import absolute_import, print_function, division
import time import time
import numpy import numpy as np
import theano import theano
y = theano.tensor.fvector() y = theano.tensor.fvector()
x = theano.shared(numpy.zeros(1, dtype='float32')) x = theano.shared(np.zeros(1, dtype='float32'))
f1 = theano.function([y], updates={x: y}) f1 = theano.function([y], updates={x: y})
f2 = theano.function([], theano.sandbox.cuda.host_from_gpu(x)) f2 = theano.function([], theano.sandbox.cuda.host_from_gpu(x))
print(f1.maker.fgraph.toposort()) print(f1.maker.fgraph.toposort())
print(f2.maker.fgraph.toposort()) print(f2.maker.fgraph.toposort())
for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]: for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
o = numpy.zeros(i, dtype='float32') o = np.zeros(i, dtype='float32')
t0 = time.time() t0 = time.time()
f1(o) f1(o)
t1 = time.time() t1 = time.time()
......
...@@ -4,7 +4,7 @@ numpy version support only ndarray. ...@@ -4,7 +4,7 @@ numpy version support only ndarray.
""" """
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
from theano.tensor.basic import TensorType from theano.tensor.basic import TensorType
try: try:
...@@ -42,8 +42,8 @@ else: ...@@ -42,8 +42,8 @@ else:
def may_share_memory(a, b, raise_other_type=True): def may_share_memory(a, b, raise_other_type=True):
a_ndarray = isinstance(a, numpy.ndarray) a_ndarray = isinstance(a, np.ndarray)
b_ndarray = isinstance(b, numpy.ndarray) b_ndarray = isinstance(b, np.ndarray)
if a_ndarray and b_ndarray: if a_ndarray and b_ndarray:
return TensorType.may_share_memory(a, b) return TensorType.may_share_memory(a, b)
a_cuda = _is_cuda(a) a_cuda = _is_cuda(a)
......
...@@ -5,7 +5,7 @@ These pickled graphs can be used, for instance, as cases for ...@@ -5,7 +5,7 @@ These pickled graphs can be used, for instance, as cases for
unit tests or regression tests. unit tests or regression tests.
""" """
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import os import os
import pickle import pickle
import sys import sys
...@@ -188,10 +188,10 @@ class PersistentNdarrayID(object): ...@@ -188,10 +188,10 @@ class PersistentNdarrayID(object):
return name return name
def __call__(self, obj): def __call__(self, obj):
if type(obj) is numpy.ndarray: if type(obj) is np.ndarray:
if id(obj) not in self.seen: if id(obj) not in self.seen:
def write_array(f): def write_array(f):
numpy.lib.format.write_array(f, obj) np.lib.format.write_array(f, obj)
name = self._resolve_name(obj) name = self._resolve_name(obj)
zipadd(write_array, self.zip_file, name) zipadd(write_array, self.zip_file, name)
self.seen[id(obj)] = 'ndarray.{0}'.format(name) self.seen[id(obj)] = 'ndarray.{0}'.format(name)
...@@ -204,7 +204,7 @@ class PersistentCudaNdarrayID(PersistentNdarrayID): ...@@ -204,7 +204,7 @@ class PersistentCudaNdarrayID(PersistentNdarrayID):
type(obj) is cuda_ndarray.cuda_ndarray.CudaNdarray): type(obj) is cuda_ndarray.cuda_ndarray.CudaNdarray):
if id(obj) not in self.seen: if id(obj) not in self.seen:
def write_array(f): def write_array(f):
numpy.lib.format.write_array(f, numpy.asarray(obj)) np.lib.format.write_array(f, np.asarray(obj))
name = self._resolve_name(obj) name = self._resolve_name(obj)
zipadd(write_array, self.zip_file, name) zipadd(write_array, self.zip_file, name)
self.seen[id(obj)] = 'cuda_ndarray.{0}'.format(name) self.seen[id(obj)] = 'cuda_ndarray.{0}'.format(name)
...@@ -283,7 +283,7 @@ class PersistentNdarrayLoad(object): ...@@ -283,7 +283,7 @@ class PersistentNdarrayLoad(object):
if name in self.cache: if name in self.cache:
return self.cache[name] return self.cache[name]
ret = None ret = None
array = numpy.lib.format.read_array(self.zip_file.open(name)) array = np.lib.format.read_array(self.zip_file.open(name))
if array_type == 'cuda_ndarray': if array_type == 'cuda_ndarray':
if config.experimental.unpickle_gpu_on_cpu: if config.experimental.unpickle_gpu_on_cpu:
# directly return numpy array # directly return numpy array
...@@ -335,10 +335,10 @@ def dump(obj, file_handler, protocol=DEFAULT_PROTOCOL, ...@@ -335,10 +335,10 @@ def dump(obj, file_handler, protocol=DEFAULT_PROTOCOL,
>>> foo_1 = theano.shared(0, name='foo') >>> foo_1 = theano.shared(0, name='foo')
>>> foo_2 = theano.shared(1, name='foo') >>> foo_2 = theano.shared(1, name='foo')
>>> with open('model.zip', 'wb') as f: >>> with open('model.zip', 'wb') as f:
... dump((foo_1, foo_2, numpy.array(2)), f) ... dump((foo_1, foo_2, np.array(2)), f)
>>> numpy.load('model.zip').keys() >>> np.load('model.zip').keys()
['foo', 'foo_2', 'array_0', 'pkl'] ['foo', 'foo_2', 'array_0', 'pkl']
>>> numpy.load('model.zip')['foo'] >>> np.load('model.zip')['foo']
array(0) array(0)
>>> with open('model.zip', 'rb') as f: >>> with open('model.zip', 'rb') as f:
... foo_1, foo_2, array = load(f) ... foo_1, foo_2, array = load(f)
......
...@@ -22,7 +22,7 @@ TheanoElementwiseKernel. ...@@ -22,7 +22,7 @@ TheanoElementwiseKernel.
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
from itertools import chain from itertools import chain
import numpy import numpy as np
import theano import theano
from six.moves import xrange from six.moves import xrange
...@@ -257,13 +257,13 @@ class PycudaElemwiseSourceModuleOp(GpuOp): ...@@ -257,13 +257,13 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
" inputs don't have the same shape!") " inputs don't have the same shape!")
if inputs[0].size > 512: if inputs[0].size > 512:
grid = (int(numpy.ceil(inputs[0].size / 512.)), 1) grid = (int(np.ceil(inputs[0].size / 512.)), 1)
block = (512, 1, 1) block = (512, 1, 1)
else: else:
grid = (1, 1) grid = (1, 1)
block = (inputs[0].shape[0], inputs[0].shape[1], 1) block = (inputs[0].shape[0], inputs[0].shape[1], 1)
self.pycuda_fct(inputs[0], inputs[1], z[0], self.pycuda_fct(inputs[0], inputs[1], z[0],
numpy.intc(inputs[1].size), block=block, grid=grid) np.intc(inputs[1].size), block=block, grid=grid)
class PycudaElemwiseSourceModuleMakeThunkOp(Op): class PycudaElemwiseSourceModuleMakeThunkOp(Op):
...@@ -349,13 +349,13 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op): ...@@ -349,13 +349,13 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
" inputs don't have the same shape!") " inputs don't have the same shape!")
if inputs[0][0].size > 512: if inputs[0][0].size > 512:
grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1) grid = (int(np.ceil(inputs[0][0].size / 512.)), 1)
block = (512, 1, 1) block = (512, 1, 1)
else: else:
grid = (1, 1) grid = (1, 1)
block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1) block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1)
pycuda_fct(inputs[0][0], inputs[1][0], z[0], pycuda_fct(inputs[0][0], inputs[1][0], z[0],
numpy.intc(inputs[1][0].size), block=block, np.intc(inputs[1][0].size), block=block,
grid=grid) grid=grid)
thunk.inputs = inputs thunk.inputs = inputs
thunk.outputs = outputs thunk.outputs = outputs
......
...@@ -3,7 +3,7 @@ Helper function to safely convert an array to a new data type. ...@@ -3,7 +3,7 @@ Helper function to safely convert an array to a new data type.
""" """
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import theano import theano
...@@ -30,8 +30,8 @@ def _asarray(a, dtype, order=None): ...@@ -30,8 +30,8 @@ def _asarray(a, dtype, order=None):
""" """
if str(dtype) == 'floatX': if str(dtype) == 'floatX':
dtype = theano.config.floatX dtype = theano.config.floatX
dtype = numpy.dtype(dtype) # Convert into dtype object. dtype = np.dtype(dtype) # Convert into dtype object.
rval = numpy.asarray(a, dtype=dtype, order=order) rval = np.asarray(a, dtype=dtype, order=order)
# Note that dtype comparison must be done by comparing their `num` # Note that dtype comparison must be done by comparing their `num`
# attribute. One cannot assume that two identical data types are pointers # attribute. One cannot assume that two identical data types are pointers
# towards the same object (e.g. under Windows this appears not to be the # towards the same object (e.g. under Windows this appears not to be the
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import theano import theano
from theano.misc.cudamat_utils import cudamat_available from theano.misc.cudamat_utils import cudamat_available
...@@ -20,7 +20,7 @@ def test(shape=(3, 4)): ...@@ -20,7 +20,7 @@ def test(shape=(3, 4)):
U = gpu(theano.tensor.fmatrix('U')) U = gpu(theano.tensor.fmatrix('U'))
ii = theano.function([U], gpu(U + 1)) ii = theano.function([U], gpu(U + 1))
A_cpu = numpy.asarray(numpy.random.rand(*shape), dtype="float32") A_cpu = np.asarray(np.random.rand(*shape), dtype="float32")
A_cnd = theano.sandbox.cuda.CudaNdarray(A_cpu) A_cnd = theano.sandbox.cuda.CudaNdarray(A_cpu)
A_cmat = cudandarray_to_cudamat(A_cnd) A_cmat = cudandarray_to_cudamat(A_cnd)
...@@ -28,9 +28,9 @@ def test(shape=(3, 4)): ...@@ -28,9 +28,9 @@ def test(shape=(3, 4)):
B_cnd = ii(A_cnd) B_cnd = ii(A_cnd)
u = A_cnd.copy() u = A_cnd.copy()
u += theano.sandbox.cuda.CudaNdarray(numpy.asarray([[1]], dtype='float32')) u += theano.sandbox.cuda.CudaNdarray(np.asarray([[1]], dtype='float32'))
u = numpy.asarray(u) u = np.asarray(u)
v = numpy.asarray(B_cnd) v = np.asarray(B_cnd)
w = A_cmat.add(1).asarray() w = A_cmat.add(1).asarray()
assert abs(u - v).max() == 0 assert abs(u - v).max() == 0
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import theano import theano
from theano.misc.gnumpy_utils import gnumpy_available from theano.misc.gnumpy_utils import gnumpy_available
...@@ -31,11 +31,10 @@ def test(shape=(3, 4, 5)): ...@@ -31,11 +31,10 @@ def test(shape=(3, 4, 5)):
B_cnd = ii(A_cnd) B_cnd = ii(A_cnd)
B = cudandarray_to_garray(B_cnd) B = cudandarray_to_garray(B_cnd)
assert A_cnd.shape == A.shape assert A_cnd.shape == A.shape
from numpy import array
u = (A + 1).asarray() u = (A + 1).asarray()
v = B.asarray() v = B.asarray()
w = array(B_cnd) w = np.array(B_cnd)
assert (u == v).all() assert (u == v).all()
assert (u == w).all() assert (u == w).all()
...@@ -49,7 +48,7 @@ def test2(shape=(3, 4, 5)): ...@@ -49,7 +48,7 @@ def test2(shape=(3, 4, 5)):
U = gpu(theano.tensor.ftensor3('U')) U = gpu(theano.tensor.ftensor3('U'))
theano.function([U], gpu(U + 1)) theano.function([U], gpu(U + 1))
A = numpy.random.rand(*shape).astype('float32') A = np.random.rand(*shape).astype('float32')
A_cnd = theano.sandbox.cuda.CudaNdarray(A) A_cnd = theano.sandbox.cuda.CudaNdarray(A)
A_gar = cudandarray_to_garray(A_cnd) A_gar = cudandarray_to_garray(A_cnd)
assert A_cnd.shape == A_gar.shape assert A_cnd.shape == A_gar.shape
...@@ -62,7 +61,7 @@ def test2(shape=(3, 4, 5)): ...@@ -62,7 +61,7 @@ def test2(shape=(3, 4, 5)):
# dtype always float32 # dtype always float32
assert A_cnd._strides == B._strides assert A_cnd._strides == B._strides
assert A_cnd.gpudata == B.gpudata assert A_cnd.gpudata == B.gpudata
v = numpy.asarray(B) v = np.asarray(B)
assert (v == A).all() assert (v == A).all()
......
...@@ -3,7 +3,7 @@ test the tensor and sparse type. The CudaNdarray type is tested in ...@@ -3,7 +3,7 @@ test the tensor and sparse type. The CudaNdarray type is tested in
sandbox/cuda/tests/test_tensor_op.py.test_may_share_memory_cuda sandbox/cuda/tests/test_tensor_op.py.test_may_share_memory_cuda
""" """
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import theano import theano
try: try:
...@@ -16,8 +16,8 @@ from theano.misc.may_share_memory import may_share_memory ...@@ -16,8 +16,8 @@ from theano.misc.may_share_memory import may_share_memory
def test_may_share_memory(): def test_may_share_memory():
a = numpy.random.rand(5, 4) a = np.random.rand(5, 4)
b = numpy.random.rand(5, 4) b = np.random.rand(5, 4)
va = a.view() va = a.view()
vb = b.view() vb = b.view()
ra = a.reshape((4, 5)) ra = a.reshape((4, 5))
......
...@@ -4,8 +4,7 @@ import shutil ...@@ -4,8 +4,7 @@ import shutil
import unittest import unittest
from tempfile import mkdtemp from tempfile import mkdtemp
import numpy import numpy as np
from numpy.testing import assert_allclose
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import theano import theano
...@@ -44,7 +43,7 @@ class T_dump_load(unittest.TestCase): ...@@ -44,7 +43,7 @@ class T_dump_load(unittest.TestCase):
x = load(f) x = load(f)
assert x.name == 'x' assert x.name == 'x'
assert_allclose(x.get_value(), [[1]]) np.testing.assert_allclose(x.get_value(), [[1]])
def test_dump_load_mrg(self): def test_dump_load_mrg(self):
rng = MRG_RandomStreams(use_cuda=cuda_ndarray.cuda_enabled) rng = MRG_RandomStreams(use_cuda=cuda_ndarray.cuda_enabled)
...@@ -62,14 +61,14 @@ class T_dump_load(unittest.TestCase): ...@@ -62,14 +61,14 @@ class T_dump_load(unittest.TestCase):
foo_2 = theano.shared(1, name='foo') foo_2 = theano.shared(1, name='foo')
foo_3 = theano.shared(2, name='foo') foo_3 = theano.shared(2, name='foo')
with open('model.zip', 'wb') as f: with open('model.zip', 'wb') as f:
dump((foo_1, foo_2, foo_3, numpy.array(3)), f) dump((foo_1, foo_2, foo_3, np.array(3)), f)
keys = list(numpy.load('model.zip').keys()) keys = list(np.load('model.zip').keys())
assert keys == ['foo', 'foo_2', 'foo_3', 'array_0', 'pkl'] assert keys == ['foo', 'foo_2', 'foo_3', 'array_0', 'pkl']
foo_3 = numpy.load('model.zip')['foo_3'] foo_3 = np.load('model.zip')['foo_3']
assert foo_3 == numpy.array(2) assert foo_3 == np.array(2)
with open('model.zip', 'rb') as f: with open('model.zip', 'rb') as f:
foo_1, foo_2, foo_3, array = load(f) foo_1, foo_2, foo_3, array = load(f)
assert array == numpy.array(3) assert array == np.array(3)
class TestStripPickler(unittest.TestCase): class TestStripPickler(unittest.TestCase):
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import theano import theano
import theano.misc.pycuda_init import theano.misc.pycuda_init
...@@ -58,11 +58,11 @@ def test_pycuda_elemwise_source_module(): ...@@ -58,11 +58,11 @@ def test_pycuda_elemwise_source_module():
PycudaElemwiseSourceModuleMakeThunkOp) PycudaElemwiseSourceModuleMakeThunkOp)
for node in f4.maker.fgraph.toposort()]) for node in f4.maker.fgraph.toposort()])
val1 = numpy.asarray(numpy.random.rand(*shape), dtype='float32') val1 = np.asarray(np.random.rand(*shape), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(*shape), dtype='float32') val2 = np.asarray(np.random.rand(*shape), dtype='float32')
assert numpy.allclose(f(val1, val2), f2(val1, val2)) assert np.allclose(f(val1, val2), f2(val1, val2))
assert numpy.allclose(f(val1, val2), f3(val1, val2)) assert np.allclose(f(val1, val2), f3(val1, val2))
assert numpy.allclose(f(val1, val2), f4(val1, val2)) assert np.allclose(f(val1, val2), f4(val1, val2))
# print f(val1,val2) # print f(val1,val2)
# print f2(val1,val2) # print f2(val1,val2)
...@@ -82,10 +82,10 @@ def test_pycuda_elemwise_kernel(): ...@@ -82,10 +82,10 @@ def test_pycuda_elemwise_kernel():
assert any([isinstance(node.op, PycudaElemwiseKernelOp) assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f2.maker.fgraph.toposort()]) for node in f2.maker.fgraph.toposort()])
val1 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32') val1 = np.asarray(np.random.rand(5, 5), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32') val2 = np.asarray(np.random.rand(5, 5), dtype='float32')
#val1 = numpy.ones((5,5)) #val1 = np.ones((5,5))
#val2 = numpy.arange(25).reshape(5,5) #val2 = np.arange(25).reshape(5,5)
assert (f(val1, val2) == f2(val1, val2)).all() assert (f(val1, val2) == f2(val1, val2)).all()
print(f(val1, val2)) print(f(val1, val2))
print(f2(val1, val2)) print(f2(val1, val2))
...@@ -99,8 +99,8 @@ def test_pycuda_elemwise_kernel(): ...@@ -99,8 +99,8 @@ def test_pycuda_elemwise_kernel():
assert any([isinstance(node.op, PycudaElemwiseKernelOp) assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f4.maker.fgraph.toposort()]) for node in f4.maker.fgraph.toposort()])
val1 = numpy.random.rand(2, 2, 2) val1 = np.random.rand(2, 2, 2)
print(val1) print(val1)
print(f4(val1, val1, val1)) print(f4(val1, val1, val1))
assert numpy.allclose(f4(val1, val1, val1), val1 * val1 + val1) assert np.allclose(f4(val1, val1, val1), val1 * val1 + val1)
""" """
...@@ -8,7 +8,7 @@ from __future__ import absolute_import, print_function, division ...@@ -8,7 +8,7 @@ from __future__ import absolute_import, print_function, division
import sys import sys
import numpy import numpy as np
import theano import theano
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
...@@ -42,9 +42,9 @@ __global__ void multiply_them(float *dest, float *a, float *b) ...@@ -42,9 +42,9 @@ __global__ void multiply_them(float *dest, float *a, float *b)
multiply_them = mod.get_function("multiply_them") multiply_them = mod.get_function("multiply_them")
# Test with pycuda in/out of numpy.ndarray # Test with pycuda in/out of numpy.ndarray
a = numpy.random.randn(100).astype(numpy.float32) a = np.random.randn(100).astype(np.float32)
b = numpy.random.randn(100).astype(numpy.float32) b = np.random.randn(100).astype(np.float32)
dest = numpy.zeros_like(a) dest = np.zeros_like(a)
multiply_them( multiply_them(
drv.Out(dest), drv.In(a), drv.In(b), drv.Out(dest), drv.In(a), drv.In(b),
block=(400, 1, 1), grid=(1, 1)) block=(400, 1, 1), grid=(1, 1))
...@@ -64,8 +64,8 @@ __global__ void multiply_them(float *dest, float *a, float *b) ...@@ -64,8 +64,8 @@ __global__ void multiply_them(float *dest, float *a, float *b)
multiply_them = mod.get_function("multiply_them") multiply_them = mod.get_function("multiply_them")
a = numpy.random.randn(100).astype(numpy.float32) a = np.random.randn(100).astype(np.float32)
b = numpy.random.randn(100).astype(numpy.float32) b = np.random.randn(100).astype(np.float32)
# Test with Theano object # Test with Theano object
ga = cuda_ndarray.CudaNdarray(a) ga = cuda_ndarray.CudaNdarray(a)
...@@ -73,7 +73,7 @@ __global__ void multiply_them(float *dest, float *a, float *b) ...@@ -73,7 +73,7 @@ __global__ void multiply_them(float *dest, float *a, float *b)
dest = cuda_ndarray.CudaNdarray.zeros(a.shape) dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
multiply_them(dest, ga, gb, multiply_them(dest, ga, gb,
block=(400, 1, 1), grid=(1, 1)) block=(400, 1, 1), grid=(1, 1))
assert (numpy.asarray(dest) == a * b).all() assert (np.asarray(dest) == a * b).all()
def test_pycuda_memory_to_theano(): def test_pycuda_memory_to_theano():
...@@ -87,7 +87,7 @@ def test_pycuda_memory_to_theano(): ...@@ -87,7 +87,7 @@ def test_pycuda_memory_to_theano():
print("gpuarray ref count before creating a CudaNdarray", end=' ') print("gpuarray ref count before creating a CudaNdarray", end=' ')
print(sys.getrefcount(y)) print(sys.getrefcount(y))
assert sys.getrefcount(y) == initial_refcount assert sys.getrefcount(y) == initial_refcount
rand = numpy.random.randn(*y.shape).astype(numpy.float32) rand = np.random.randn(*y.shape).astype(np.float32)
cuda_rand = cuda_ndarray.CudaNdarray(rand) cuda_rand = cuda_ndarray.CudaNdarray(rand)
strides = [1] strides = [1]
...@@ -102,7 +102,7 @@ def test_pycuda_memory_to_theano(): ...@@ -102,7 +102,7 @@ def test_pycuda_memory_to_theano():
z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y) z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y)
print("gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y)) print("gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y))
assert sys.getrefcount(y) == initial_refcount + 1 assert sys.getrefcount(y) == initial_refcount + 1
assert (numpy.asarray(z) == 0).all() assert (np.asarray(z) == 0).all()
assert z.base is y assert z.base is y
# Test that we can take a view from this cuda view on pycuda memory # Test that we can take a view from this cuda view on pycuda memory
...@@ -112,17 +112,17 @@ def test_pycuda_memory_to_theano(): ...@@ -112,17 +112,17 @@ def test_pycuda_memory_to_theano():
del zz del zz
assert sys.getrefcount(y) == initial_refcount + 1 assert sys.getrefcount(y) == initial_refcount + 1
cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]], cuda_ones = cuda_ndarray.CudaNdarray(np.asarray([[[1]]],
dtype='float32')) dtype='float32'))
z += cuda_ones z += cuda_ones
assert (numpy.asarray(z) == numpy.ones(y.shape)).all() assert (np.asarray(z) == np.ones(y.shape)).all()
assert (numpy.asarray(z) == 1).all() assert (np.asarray(z) == 1).all()
assert cuda_rand.shape == z.shape assert cuda_rand.shape == z.shape
assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides) assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides)
assert (numpy.asarray(cuda_rand) == rand).all() assert (np.asarray(cuda_rand) == rand).all()
z += cuda_rand z += cuda_rand
assert (numpy.asarray(z) == (rand + 1)).all() assert (np.asarray(z) == (rand + 1)).all()
# Check that the ref count to the gpuarray is right. # Check that the ref count to the gpuarray is right.
del z del z
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
import theano.misc.pycuda_init import theano.misc.pycuda_init
...@@ -22,30 +22,30 @@ def test_to_gpuarray(): ...@@ -22,30 +22,30 @@ def test_to_gpuarray():
px = to_gpuarray(cx) px = to_gpuarray(cx)
assert isinstance(px, pycuda.gpuarray.GPUArray) assert isinstance(px, pycuda.gpuarray.GPUArray)
cx[0, 0] = numpy.asarray(1, dtype="float32") cx[0, 0] = np.asarray(1, dtype="float32")
# Check that they share the same memory space # Check that they share the same memory space
assert px.gpudata == cx.gpudata assert px.gpudata == cx.gpudata
assert numpy.asarray(cx[0, 0]) == 1 assert np.asarray(cx[0, 0]) == 1
assert numpy.allclose(numpy.asarray(cx), px.get()) assert np.allclose(np.asarray(cx), px.get())
assert px.dtype == cx.dtype assert px.dtype == cx.dtype
assert px.shape == cx.shape assert px.shape == cx.shape
assert all(numpy.asarray(cx._strides) * 4 == px.strides) assert all(np.asarray(cx._strides) * 4 == px.strides)
# Test when the CudaNdarray is strided # Test when the CudaNdarray is strided
cx = cx[::2, ::] cx = cx[::2, ::]
px = to_gpuarray(cx, copyif=True) px = to_gpuarray(cx, copyif=True)
assert isinstance(px, pycuda.gpuarray.GPUArray) assert isinstance(px, pycuda.gpuarray.GPUArray)
cx[0, 0] = numpy.asarray(2, dtype="float32") cx[0, 0] = np.asarray(2, dtype="float32")
# Check that they do not share the same memory space # Check that they do not share the same memory space
assert px.gpudata != cx.gpudata assert px.gpudata != cx.gpudata
assert numpy.asarray(cx[0, 0]) == 2 assert np.asarray(cx[0, 0]) == 2
assert not numpy.allclose(numpy.asarray(cx), px.get()) assert not np.allclose(np.asarray(cx), px.get())
assert px.dtype == cx.dtype assert px.dtype == cx.dtype
assert px.shape == cx.shape assert px.shape == cx.shape
assert not all(numpy.asarray(cx._strides) * 4 == px.strides) assert not all(np.asarray(cx._strides) * 4 == px.strides)
# Test that we return an error # Test that we return an error
try: try:
...@@ -59,11 +59,11 @@ def test_to_cudandarray(): ...@@ -59,11 +59,11 @@ def test_to_cudandarray():
px = pycuda.gpuarray.zeros((3, 4, 5), 'float32') px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
cx = to_cudandarray(px) cx = to_cudandarray(px)
assert isinstance(cx, cuda.CudaNdarray) assert isinstance(cx, cuda.CudaNdarray)
assert numpy.allclose(px.get(), assert np.allclose(px.get(),
numpy.asarray(cx)) np.asarray(cx))
assert px.dtype == cx.dtype assert px.dtype == cx.dtype
assert px.shape == cx.shape assert px.shape == cx.shape
assert all(numpy.asarray(cx._strides) * 4 == px.strides) assert all(np.asarray(cx._strides) * 4 == px.strides)
try: try:
px = pycuda.gpuarray.zeros((3, 4, 5), 'float64') px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
...@@ -73,7 +73,7 @@ def test_to_cudandarray(): ...@@ -73,7 +73,7 @@ def test_to_cudandarray():
pass pass
try: try:
to_cudandarray(numpy.zeros(4)) to_cudandarray(np.zeros(4))
assert False assert False
except ValueError: except ValueError:
pass pass
...@@ -12,7 +12,7 @@ import warnings ...@@ -12,7 +12,7 @@ import warnings
import theano import theano
from theano.compat import get_unbound_function from theano.compat import get_unbound_function
from theano.compile import optdb from theano.compile import optdb
from theano.gof import EquilibriumDB, SequenceDB from theano.gof import EquilibriumDB, SequenceDB, TopoOptimizer
from theano.gof.cmodule import get_lib_extension from theano.gof.cmodule import get_lib_extension
from theano.gof.compilelock import get_lock, release_lock from theano.gof.compilelock import get_lock, release_lock
from theano import config from theano import config
...@@ -40,6 +40,17 @@ def register_opt(*tags, **kwargs): ...@@ -40,6 +40,17 @@ def register_opt(*tags, **kwargs):
return f return f
def register_inplace(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
optdb.register(
name, TopoOptimizer(
local_opt, failure_callback=TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace', 'gpu', *tags)
return local_opt
return f
_logger_name = 'theano.sandbox.cuda' _logger_name = 'theano.sandbox.cuda'
_logger = logging.getLogger(_logger_name) _logger = logging.getLogger(_logger_name)
......
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论