提交 fd7875ad authored 作者: bscellier's avatar bscellier 提交者: GitHub

Merge branch 'master' into import_numpy_gpuarray

......@@ -13,5 +13,5 @@ echo "===== Testing theano core"
# Test theano core
PARTS="theano -e cuda -e gpuarray"
THEANO_PARAM="${PARTS} --with-timer --timer-top-n 10 --with-xunit --xunit-file=theanocore_tests.xml"
FLAGS="mode=FAST_RUN,floatX=float32"
FLAGS="mode=FAST_RUN,floatX=float32,on_opt_error=raise,on_shape_error=raise"
THEANO_FLAGS=${FLAGS} bin/theano-nose ${THEANO_PARAM}
......@@ -76,5 +76,5 @@ THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \
theano/sandbox/tests/test_rng_mrg.py:test_GPUA_full_fill \
theano/scan_module/tests/test_scan.py:T_Scan_Gpuarray"
FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN"
FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN,on_opt_error=raise,on_shape_error=raise"
THEANO_FLAGS=${FLAGS} time nosetests -v --with-xunit --xunit-file=theanogpuarray_tests.xml ${THEANO_GPUARRAY_TESTS}
......@@ -5,11 +5,11 @@ import os
import sys
if sys.platform == 'win32':
config_cxx = 'cxx='
config_for_theano_cache_script = 'cxx=,device=cpu'
theano_flags = os.environ['THEANO_FLAGS'] if 'THEANO_FLAGS' in os.environ else ''
if theano_flags:
theano_flags += ','
theano_flags += config_cxx
theano_flags += config_for_theano_cache_script
os.environ['THEANO_FLAGS'] = theano_flags
import theano
......
......@@ -64,11 +64,18 @@ The documentation will be automatically regenerated in the next few hours.
Generate and upload the package
===============================
For release candidates, only upload on PyPI.
On PyPI
-------
Set your umask to ``0022`` to ensure that the package file will be readable from other people.
To check your umask::
umask
To set your umask::
umask 0022
Now change ``ISRELEASED`` in ``setup.py`` to ``True``.
Finally, use setuptools to register and upload the release::
......@@ -84,8 +91,8 @@ UnicodeDecodeError if there are non-ASCII characters in NEWS.txt. You
would need to change NEWS.txt so it contains only ASCII characters (the
problem usually comes from diacritics in people's names).
On mloss.org
------------
On mloss.org (for final releases only)
--------------------------------------
Project page is at http://mloss.org/software/view/241/.
Account jaberg is listed as submitter.
......@@ -138,8 +145,10 @@ then run the script.
Announce the release
====================
Generate an e-mail from the template in in ``EMAIL.txt``, including content
from ``NEWS.txt``, and send it to the following mailing lists:
Generate an e-mail from the template in ``EMAIL.txt``, including content
from ``NEWS.txt``.
For final releases, send the e-mail to the following mailing lists:
* theano-users
* theano-announce
......@@ -152,3 +161,8 @@ For release candidates, only e-mail:
* theano-announce
* theano-dev
* theano-users
For alpha and beta releases, only e-mail:
* theano-dev
* theano-users
......@@ -19,11 +19,34 @@
The user-friendly constructor is :func:`shared`
.. attribute:: value
Read/write access to the [non-symbolic] value/data associated with this SharedVariable.
Changes to this value will be visible to all functions using this SharedVariable.
.. method:: get_value(self, borrow=False, return_internal_type=False)
:param borrow: True to permit returning of an object aliased to internal memory.
:type borrow: bool
:param return_internal_type: True to permit the returning of an arbitrary type object used
internally to store the shared variable.
:type return_internal_type: bool
By default, return a copy of the data. If ``borrow=True`` (and
``return_internal_type=False``), maybe it will return a copy.
For tensor, it will always return a ndarray by default, so if
the data is on the GPU, it will return a copy, but if the data
is on the CPU, it will return the original data. If you do
``borrow=True`` and ``return_internal_type=True``, it will
always return the original data, not a copy, but this can be a
GPU object.
.. method:: set_value(self, new_value, borrow=False)
:param new_value: The new value.
:type new_value: A compatible type for this shared variable.
:param borrow: True to use the new_value directly, potentially creating problems
related to aliased memory.
:type borrow: bool
The new value will be seen by all functions using this SharedVariable.
.. method:: __init__(self, name, type, value, strict, container=None)
......
......@@ -10,6 +10,9 @@
.. moduleauthor:: LISA
.. seealso:: cuDNN batch normalization: :class:`theano.gpuarray.dnn.dnn_batch_normalization_train`, :class:`theano.gpuarray.dnn.dnn_batch_normalization_test>`. They must be added manually as they do not have the same user interface.
.. autofunction:: theano.tensor.nnet.bn.batch_normalization_train
.. autofunction:: theano.tensor.nnet.bn.batch_normalization_test
.. seealso:: cuDNN batch normalization: :class:`theano.gpuarray.dnn.dnn_batch_normalization_train`, :class:`theano.gpuarray.dnn.dnn_batch_normalization_test>`.
.. autofunction:: theano.tensor.nnet.bn.batch_normalization
......@@ -59,11 +59,11 @@ class OpFromGraph(gof.Op):
.. code-block:: python
import numpy
import numpy as np
import theano
from theano import config, function, OpFromGraph, tensor
x, y, z = tensor.scalars('xyz')
s = theano.shared(numpy.random.rand(2, 2).astype(config.floatX))
s = theano.shared(np.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e])
# op behaves like a normal theano op
......
......@@ -14,7 +14,7 @@ import six.moves.copyreg as copyreg
from itertools import chain, product as itertools_product
from theano.compat import izip
import numpy
import numpy as np
import theano
from theano import gof, config
......@@ -270,15 +270,15 @@ class BadOptimization(DebugModeError):
print(" New Value: ", str(self.new_r_val), file=sio)
try:
ov = numpy.asarray(self.old_r_val)
nv = numpy.asarray(self.new_r_val)
ov = np.asarray(self.old_r_val)
nv = np.asarray(self.new_r_val)
ssio = StringIO()
abs_diff = numpy.absolute(nv - ov)
print(" Max Abs Diff: ", numpy.max(abs_diff), file=ssio)
print(" Mean Abs Diff: ", numpy.mean(abs_diff), file=ssio)
print(" Median Abs Diff: ", numpy.median(abs_diff), file=ssio)
print(" Std Abs Diff: ", numpy.std(abs_diff), file=ssio)
arg_max_val = numpy.argmax(abs_diff)
abs_diff = np.absolute(nv - ov)
print(" Max Abs Diff: ", np.max(abs_diff), file=ssio)
print(" Mean Abs Diff: ", np.mean(abs_diff), file=ssio)
print(" Median Abs Diff: ", np.median(abs_diff), file=ssio)
print(" Std Abs Diff: ", np.std(abs_diff), file=ssio)
arg_max_val = np.argmax(abs_diff)
values_at_max = (nv.flatten()[arg_max_val],
ov.flatten()[arg_max_val])
print(" Value at Max Diff: ", values_at_max, file=ssio)
......@@ -286,13 +286,13 @@ class BadOptimization(DebugModeError):
# N.B. the maximum(..., 1e-8) protects against div by 0 when
# nv == ov == 0
reldiff = (abs_diff /
numpy.maaximum(numpy.absolute(nv) + numpy.absolute(ov),
1e-8))
print(" Max Rel Diff: ", numpy.max(reldiff), file=ssio)
print(" Mean Rel Diff: ", numpy.mean(reldiff), file=ssio)
print(" Median Rel Diff: ", numpy.median(reldiff), file=ssio)
print(" Std Rel Diff: ", numpy.std(reldiff), file=ssio)
arg_max_val = numpy.argmax(reldiff)
np.maximum(np.absolute(nv) + np.absolute(ov),
1e-8))
print(" Max Rel Diff: ", np.max(reldiff), file=ssio)
print(" Mean Rel Diff: ", np.mean(reldiff), file=ssio)
print(" Median Rel Diff: ", np.median(reldiff), file=ssio)
print(" Std Rel Diff: ", np.std(reldiff), file=ssio)
arg_max_val = np.argmax(reldiff)
values_at_max = (nv.flatten()[arg_max_val],
ov.flatten()[arg_max_val])
print(" Value at Max Diff: ", values_at_max, file=ssio)
......@@ -342,8 +342,8 @@ class BadDestroyMap(DebugModeError):
print(" repr (old val):", repr(self.old_val), file=sio)
print(" repr (new val):", repr(self.new_val), file=sio)
try:
npy_old_val = numpy.asarray(self.old_val)
npy_new_val = numpy.asarray(self.new_val)
npy_old_val = np.asarray(self.old_val)
npy_new_val = np.asarray(self.new_val)
print(" value dtype (new <space> old):", npy_new_val.dtype,
npy_old_val.dtype, file=sio)
print(" value shape (new <space> old):", npy_new_val.shape,
......@@ -356,13 +356,13 @@ class BadDestroyMap(DebugModeError):
print(" value min (new-old):", delta.min(), file=sio)
print(" value max (new-old):", delta.max(), file=sio)
print(" value argmin (new-old):",
numpy.unravel_index(delta.argmin(), npy_new_val.shape),
np.unravel_index(delta.argmin(), npy_new_val.shape),
file=sio)
print(" value argmax (new-old):",
numpy.unravel_index(delta.argmax(), npy_new_val.shape),
np.unravel_index(delta.argmax(), npy_new_val.shape),
file=sio)
print(" location of first 10 mismatches:",
numpy.transpose(numpy.nonzero(delta))[:10], file=sio)
np.transpose(np.nonzero(delta))[:10], file=sio)
print("", file=sio)
except Exception as e:
print("(Numpy-hints failed with: %s)" % str(e), file=sio)
......@@ -453,7 +453,7 @@ class InvalidValueError(DebugModeError):
v_dtype = v.dtype
v_min = v.min()
v_max = v.max()
v_isfinite = numpy.all(numpy.isfinite(v))
v_isfinite = np.all(np.isfinite(v))
except Exception:
pass
client_node = self.client_node
......@@ -1025,7 +1025,7 @@ def _lessbroken_deepcopy(a):
# this exists because copy.deepcopy on numpy arrays is broken
# This logic is also in link.py
from theano.gof.type import _cdata_type
if type(a) in (numpy.ndarray, numpy.memmap):
if type(a) in (np.ndarray, np.memmap):
rval = a.copy()
elif type(a) is _cdata_type:
# This is not copyable (and should be used for constant data).
......@@ -1034,7 +1034,7 @@ def _lessbroken_deepcopy(a):
rval = copy.deepcopy(a)
assert type(rval) == type(a), (type(rval), type(a))
if isinstance(rval, numpy.ndarray):
if isinstance(rval, np.ndarray):
assert rval.dtype == a.dtype
return rval
......@@ -1241,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# There is no risk to overwrite inputs, since r does not work
# inplace.
if isinstance(r.type, (TensorType, CudaNdarrayType)):
reuse_outputs[r][...] = numpy.asarray(
reuse_outputs[r][...] = np.asarray(
def_val).astype(r.type.dtype)
if reuse_outputs:
......@@ -1259,7 +1259,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
new_buf = r.type.value_zeros(r_vals[r].shape)
# CudaNdarray don't have flags field
# assert new_buf.flags["C_CONTIGUOUS"]
new_buf[...] = numpy.asarray(def_val).astype(r.type.dtype)
new_buf[...] = np.asarray(def_val).astype(r.type.dtype)
c_cont_outputs[r] = new_buf
......@@ -1273,7 +1273,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
f_cont_outputs = {}
for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)):
new_buf = numpy.zeros(
new_buf = np.zeros(
shape=r_vals[r].shape,
dtype=r_vals[r].dtype,
order='F')
......@@ -1331,7 +1331,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
else:
buf_shape.append(s * 2)
new_buf = r.type.value_zeros(buf_shape)
new_buf[...] = numpy.asarray(def_val).astype(r.type.dtype)
new_buf[...] = np.asarray(def_val).astype(r.type.dtype)
init_strided[r] = new_buf
# The number of combinations is exponential in the number of
......@@ -1377,7 +1377,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
r_buf = r_buf[tuple(strides)][tuple(shapes)]
assert r_buf.shape == r_vals[r].shape
r_buf[...] = numpy.asarray(def_val).astype(r_buf.dtype)
r_buf[...] = np.asarray(def_val).astype(r_buf.dtype)
strided[r] = r_buf
if strided:
......@@ -1405,7 +1405,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for s, sd in zip(r_vals[r].shape,
r_shape_diff)]
new_buf = r.type.value_zeros(out_shape)
new_buf[...] = numpy.asarray(
new_buf[...] = np.asarray(
def_val).astype(r.type.dtype)
wrong_size[r] = new_buf
......@@ -2261,7 +2261,7 @@ class _Linker(gof.link.LocalLinker):
# HACK TO LOOK LIKE A REAL DESTRUCTIVE ACTION
# TOOK PLACE
if ((type(dr_vals[r][0]) in
(numpy.ndarray, numpy.memmap)) and
(np.ndarray, np.memmap)) and
(dr_vals[r][0].dtype ==
storage_map[r][0].dtype) and
(dr_vals[r][0].shape ==
......
......@@ -13,7 +13,7 @@ from six import string_types
from theano.compile.io import In
from theano.compile.function_module import orig_function
from theano.compile.pfunc import pfunc
from numpy import any
import numpy as np
import warnings
from theano import compat
......@@ -286,7 +286,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
"input.")
# compute some features of the arguments:
uses_tuple = any([isinstance(i, (list, tuple)) for i in inputs])
uses_tuple = np.any([isinstance(i, (list, tuple)) for i in inputs])
uses_updates = bool(updates)
uses_givens = bool(givens)
......
......@@ -12,13 +12,14 @@ import six.moves.cPickle as pickle
from itertools import chain
import time
import warnings
import numpy
import numpy as np
import theano
from theano import config, gof
from theano.compat import izip
from theano.gof import graph
import theano.compile.mode
import theano.compile.profiling
from theano.compile.io import (
In, SymbolicInput, SymbolicOutput)
from theano.compile.ops import deep_copy_op, view_op
......@@ -663,7 +664,7 @@ class Function(object):
input_storage = [i.value for i in ins]
# reinitialize new maker and create new function
if profile is None:
profile = config.profile
profile = config.profile or config.print_global_stats
# profile -> True or False
if profile is True:
if name:
......@@ -749,6 +750,12 @@ class Function(object):
List of outputs on indices/keys from ``output_subset`` or all of them,
if ``output_subset`` is not passed.
"""
def restore_defaults():
for i, (required, refeed, value) in enumerate(self.defaults):
if refeed:
if isinstance(value, gof.Container):
value = value.storage[0]
self[i] = value
profile = self.profile
t0 = time.time()
......@@ -804,6 +811,7 @@ class Function(object):
e.args = ("Bad input " + argument_name + " to " +
function_name + " at index %d (0-based). %s"
% (i, where),) + e.args
restore_defaults()
raise
s.provided += 1
i += 1
......@@ -829,9 +837,9 @@ class Function(object):
in args_share_memory[j]],
[self.input_storage[k].storage[0] for k
in args_share_memory[j]])
if numpy.any([(var.type is i_var.type and
var.type.may_share_memory(val, i_val))
for (var, val) in group_j]):
if np.any([(var.type is i_var.type and
var.type.may_share_memory(val, i_val))
for (var, val) in group_j]):
is_aliased = True
args_share_memory[j].append(i)
......@@ -853,14 +861,17 @@ class Function(object):
if not self.trust_input:
for c in self.input_storage:
if c.required and not c.provided:
restore_defaults()
raise TypeError("Missing required input: %s" %
getattr(self.inv_finder[c], 'variable',
self.inv_finder[c]))
if c.provided > 1:
restore_defaults()
raise TypeError("Multiple values for input: %s" %
getattr(self.inv_finder[c], 'variable',
self.inv_finder[c]))
if c.implicit and c.provided > 0:
restore_defaults()
raise TypeError(
'Tried to provide value for implicit input: %s'
% getattr(self.inv_finder[c], 'variable',
......@@ -873,6 +884,7 @@ class Function(object):
self.fn() if output_subset is None else\
self.fn(output_subset=output_subset)
except Exception:
restore_defaults()
if hasattr(self.fn, 'position_of_error'):
# this is a new vm-provided function or c linker
# they need this because the exception manipulation
......@@ -925,11 +937,7 @@ class Function(object):
outputs = outputs[:self.n_returned_outputs]
# Put default values back in the storage
for i, (required, refeed, value) in enumerate(self.defaults):
if refeed:
if isinstance(value, gof.Container):
value = value.storage[0]
self[i] = value
restore_defaults()
#
# NOTE: This logic needs to be replicated in
# scan.
......@@ -937,6 +945,7 @@ class Function(object):
#
dt_call = time.time() - t0
theano.compile.profiling.total_fct_exec_time += dt_call
self.maker.mode.call_time += dt_call
if profile:
profile.fct_callcount += 1
......@@ -1019,9 +1028,9 @@ def _pickle_Function(f):
all_data = input_storage + inputs_data
for i, d_i in enumerate(all_data):
for j, d_j in enumerate(all_data):
if ((i < j) and isinstance(d_i, numpy.ndarray) and
isinstance(d_j, numpy.ndarray)):
if numpy.may_share_memory(d_i, d_j):
if ((i < j) and isinstance(d_i, np.ndarray) and
isinstance(d_j, np.ndarray)):
if np.may_share_memory(d_i, d_j):
if f.pickle_aliased_memory_strategy == 'warn':
_logger.warning('aliased relationship between '
'Function arguments %s, %s '
......@@ -1041,7 +1050,7 @@ def _constructor_Function(maker, input_storage, inputs_data):
assert len(f.input_storage) == len(inputs_data)
for container, x in zip(f.input_storage, inputs_data):
assert (container.data is x) or \
(isinstance(x, numpy.ndarray) and (container.data == x).all()) or \
(isinstance(x, np.ndarray) and (container.data == x).all()) or \
(container.data == x)
return f
......@@ -1466,6 +1475,7 @@ class FunctionMaker(object):
end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer
theano.compile.profiling.total_graph_opt_time += opt_time
if profile:
profile.optimizer_time += opt_time
if theano.config.profile_optimizer:
......@@ -1655,6 +1665,7 @@ class FunctionMaker(object):
end_linker = time.time()
linker_time = end_linker - start_linker
theano.compile.profiling.total_time_linker += linker_time
_logger.debug('Linker took %f seconds', linker_time)
if self.profile:
self.profile.linker_time += linker_time
......
from __future__ import absolute_import, print_function, division
# Note: this code was initially copied from the 'pyutools' package by its
# original author, and re-licensed under Theano's license.
import numpy
import numpy as np
import theano
from theano.compile.mode import Mode
......@@ -93,8 +93,8 @@ class MonitorMode(Mode):
def detect_nan(i, node, fn):
for output in fn.outputs:
if (not isinstance(output[0], numpy.random.RandomState) and
numpy.isnan(output[0]).any()):
if (not isinstance(output[0], np.random.RandomState) and
np.isnan(output[0]).any()):
print('*** NaN detected ***')
theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs])
......
......@@ -17,7 +17,7 @@ from six import iteritems, integer_types
from six.moves import xrange
import numpy
import numpy as np
def register_view_op_c_code(type, code, version=()):
......@@ -338,7 +338,7 @@ class Shape_i(gof.Op):
def __init__(self, i):
# As i will be used in the hash and that ndarray are not hashable,
# we need to convert it to an int as it is hashable.
if isinstance(i, numpy.ndarray):
if isinstance(i, np.ndarray):
assert i.dtype in theano.tensor.integer_dtypes
assert i == int(i)
i = int(i)
......@@ -665,11 +665,11 @@ class Rebroadcast(gof.Op):
items = sorted(axis)
self.axis = OrderedDict(items)
for axis, broad in iteritems(self.axis):
if not isinstance(axis, (numpy.integer, integer_types)):
if not isinstance(axis, (np.integer, integer_types)):
raise TypeError("Rebroadcast needs integer axes. "
"Got {}".format(axis))
if not isinstance(broad, (numpy.bool_, bool)):
if not isinstance(broad, (np.bool_, bool)):
raise TypeError("Rebroadcast needs bool for new broadcast "
"pattern. Got {}".format(broad))
......@@ -835,8 +835,8 @@ class SpecifyShape(gof.Op):
x, shape = inp
out, = out_
assert x.ndim == shape.size
assert numpy.all(x.shape == shape), ("got shape", x.shape,
"expected", shape)
assert np.all(x.shape == shape), ("got shape", x.shape,
"expected", shape)
out[0] = x
def infer_shape(self, node, shapes):
......
......@@ -364,7 +364,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if givens is None:
givens = []
if profile is None:
profile = config.profile
profile = config.profile or config.print_global_stats
# profile -> True or False
if profile is False:
profile = None
......
......@@ -27,7 +27,7 @@ import sys
import time
from collections import defaultdict
import numpy
import numpy as np
import theano
from six import iteritems
......@@ -36,6 +36,9 @@ from theano.gof import graph
logger = logging.getLogger('theano.compile.profiling')
theano_imported_time = time.time()
total_fct_exec_time = 0.
total_graph_opt_time = 0.
total_time_linker = 0.
config = theano.config
_atexit_print_list = []
......@@ -47,7 +50,80 @@ def _atexit_print_fn():
Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
"""
to_sum = []
if config.profile:
to_sum = []
if config.profiling.destination == 'stderr':
destination_file = sys.stderr
elif config.profiling.destination == 'stdout':
destination_file = sys.stdout
else:
destination_file = open(config.profiling.destination, 'w')
# Reverse sort in the order of compile+exec time
for ps in sorted(_atexit_print_list,
key=lambda a:a.compile_time + a.fct_call_time)[::-1]:
if ps.fct_callcount >= 1 or ps.compile_time > 1:
ps.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if not isinstance(ps, ScanProfileStats):
to_sum.append(ps)
else:
# TODO print the name if there is one!
print('Skipping empty Profile')
if len(to_sum) > 1:
# Make a global profile
cum = copy.copy(to_sum[0])
msg = ("Sum of all(%d) printed profiles at exit excluding Scan op"
" profile." % len(to_sum))
cum.message = msg
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides",
"linker_make_thunk_time"]:
cum_attr = getattr(cum, attr)
for key, val in iteritems(getattr(ps, attr)):
assert key not in cum_attr
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
try:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
except Exception as e:
print("Got an exception while merging profile")
print(e)
cum.optimizer_profile = None
else:
cum.optimizer_profile = None
cum.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if config.print_global_stats:
print_global_stats()
def print_global_stats():
"""
Print the following stats:
-- Time elapsed since Theano was imported
-- Time spent inside Theano functions
-- Time spent in compiling Theano functions
-- on graph optimization
-- on linker
"""
if config.profiling.destination == 'stderr':
destination_file = sys.stderr
......@@ -56,57 +132,18 @@ def _atexit_print_fn():
else:
destination_file = open(config.profiling.destination, 'w')
# Reverse sort in the order of compile+exec time
for ps in sorted(_atexit_print_list,
key=lambda a:a.compile_time + a.fct_call_time)[::-1]:
if ps.fct_callcount >= 1 or ps.compile_time > 1:
ps.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if not isinstance(ps, ScanProfileStats):
to_sum.append(ps)
else:
# TODO print the name if there is one!
print('Skipping empty Profile')
if len(to_sum) > 1:
# Make a global profile
cum = copy.copy(to_sum[0])
msg = ("Sum of all(%d) printed profiles at exit excluding Scan op"
" profile." % len(to_sum))
cum.message = msg
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides",
"linker_make_thunk_time"]:
cum_attr = getattr(cum, attr)
for key, val in iteritems(getattr(ps, attr)):
assert key not in cum_attr
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
try:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
except Exception as e:
print("Got an exception while merging profile")
print(e)
cum.optimizer_profile = None
else:
cum.optimizer_profile = None
cum.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
print('='*50, file=destination_file)
print('Global stats: ',
'Time elasped since Theano import = %6.3fs, '
'Time spent in Theano functions = %6.3fs, '
'Time spent compiling Theano functions: '
' optimzation = %6.3fs, linker = %6.3fs ' %
(time.time() - theano_imported_time,
total_fct_exec_time,
total_graph_opt_time,
total_time_linker),
file=destination_file)
print('='*50, file=destination_file)
class ProfileStats(object):
......@@ -440,7 +477,7 @@ class ProfileStats(object):
hs += ['<#apply>']
es += [' %4d ']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs)
upto_length = np.sum([len(x) for x in hs]) + len(hs)
maxlen = max(self.line_width - upto_length, 0)
hs += ['<Class name>']
es += ['%s']
......@@ -522,7 +559,7 @@ class ProfileStats(object):
hs += ['<#apply>']
es += [' %4d ']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs)
upto_length = np.sum([len(x) for x in hs]) + len(hs)
maxlen = max(self.line_width - upto_length, 0)
hs += ['<Op name>']
es += ['%s']
......@@ -590,7 +627,7 @@ class ProfileStats(object):
if self.variable_shape:
hs += ['<Mflops>', '<Gflops/s>']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs)
upto_length = np.sum([len(x) for x in hs]) + len(hs)
maxlen = max(self.line_width - upto_length, 0)
hs += ['<Apply name>']
es += ['%s']
......@@ -892,7 +929,7 @@ class ProfileStats(object):
node_list = list(node_list)
mem_count = 0
max_mem_count = 0
mem_bound = numpy.inf
mem_bound = np.inf
# This take only the inputs/outputs dependencies.
dependencies = fgraph.profile.dependencies
done_set = set([])
......
......@@ -9,7 +9,7 @@ import copy
import logging
# Third-party imports
import numpy
import numpy as np
# Theano imports
from theano.gof import Container, Variable, generic, utils
......@@ -120,6 +120,31 @@ class SharedVariable(Variable):
Changes to this value will be visible to all functions using
this SharedVariable.
Notes
-----
Set_value will work in-place on the GPU, if
the following conditions are met:
* The destination on the GPU must be c_contiguous.
* The source is on the CPU.
* The old value must have the same dtype as the new value
(which is a given for now, since only float32 is
supported).
* The old and new value must have the same shape.
* The old value is being completely replaced by the new
value (not partially modified, e.g. by replacing some
subtensor of it).
* You change the value of the shared variable via
set_value, not via the .value accessors. You should not
use the .value accessors anyway, since they will soon be
deprecated and removed.
It is also worth mentioning that, for efficient transfer to the GPU,
Theano will make the new data ``c_contiguous``. This can require an
extra copy of the data on the host.
The inplace on gpu memory work when borrow is either True or False.
"""
if borrow:
self.container.value = new_value
......@@ -162,7 +187,7 @@ class SharedVariable(Variable):
# implemented at all, but with a more explicit error message to help
# Theano users figure out the root of the problem more easily.
value = self.get_value(borrow=True)
if isinstance(value, numpy.ndarray):
if isinstance(value, np.ndarray):
# Array probably had an unknown dtype.
msg = ("a Numpy array with dtype: '%s'. This data type is not "
"currently recognized by Theano tensors: please cast "
......
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
from theano import config, shared
......@@ -23,14 +23,14 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5
xv = np.ones((2, 2), dtype=config.floatX)
yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = np.ones((2, 2), dtype=config.floatX) * 5
# print function, function.__module__
# print fn.maker.fgraph.toposort()
fn(xv, yv, zv)
assert numpy.all(8.0 == fn(xv, yv, zv))
assert numpy.all(8.0 == fn(xv, yv, zv))
assert np.all(8.0 == fn(xv, yv, zv))
assert np.all(8.0 == fn(xv, yv, zv))
def test_size_changes(self):
x, y, z = T.matrices('xyz')
......@@ -38,15 +38,15 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
op = OpFromGraph([x, y], [e])
f = op(x, op(y, z))
fn = function([x, y, z], f)
xv = numpy.ones((2, 3), dtype=config.floatX)
yv = numpy.ones((3, 4), dtype=config.floatX) * 3
zv = numpy.ones((4, 5), dtype=config.floatX) * 5
xv = np.ones((2, 3), dtype=config.floatX)
yv = np.ones((3, 4), dtype=config.floatX) * 3
zv = np.ones((4, 5), dtype=config.floatX) * 5
res = fn(xv, yv, zv)
assert res.shape == (2, 5)
assert numpy.all(180.0 == res)
assert np.all(180.0 == res)
res = fn(xv, yv, zv)
assert res.shape == (2, 5)
assert numpy.all(180.0 == res)
assert np.all(180.0 == res)
def test_grad(self):
x, y, z = T.matrices('xyz')
......@@ -55,10 +55,10 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5
assert numpy.all(11.0 == fn(xv, yv, zv))
xv = np.ones((2, 2), dtype=config.floatX)
yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = np.ones((2, 2), dtype=config.floatX) * 5
assert np.all(11.0 == fn(xv, yv, zv))
def test_grad_grad(self):
x, y, z = T.matrices('xyz')
......@@ -68,47 +68,47 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
f = f - T.grad(T.sum(f), y)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5
assert numpy.allclose(6.0, fn(xv, yv, zv))
xv = np.ones((2, 2), dtype=config.floatX)
yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = np.ones((2, 2), dtype=config.floatX) * 5
assert np.allclose(6.0, fn(xv, yv, zv))
def test_shared(self):
x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX))
s = shared(np.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e])
# (1+3*5=array of 16) - (3+1*5=array of 8)
f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5
xv = np.ones((2, 2), dtype=config.floatX)
yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = np.ones((2, 2), dtype=config.floatX) * 5
# print function, function.__module__
# print fn.maker.fgraph.toposort()
assert numpy.allclose(8.0, fn(xv, yv, zv))
assert numpy.allclose(8.0, fn(xv, yv, zv))
assert np.allclose(8.0, fn(xv, yv, zv))
assert np.allclose(8.0, fn(xv, yv, zv))
def test_shared_grad(self):
x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX))
s = shared(np.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e])
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5
assert numpy.allclose(11.0 + s.get_value(), fn(xv, yv, zv))
xv = np.ones((2, 2), dtype=config.floatX)
yv = np.ones((2, 2), dtype=config.floatX) * 3
zv = np.ones((2, 2), dtype=config.floatX) * 5
assert np.allclose(11.0 + s.get_value(), fn(xv, yv, zv))
# grad again the shared variable
f = op(x, y, z)
f = f - T.grad(T.sum(f), s)
fn = function([x, y, z], f)
assert numpy.allclose(15.0 + s.get_value(),
fn(xv, yv, zv))
assert np.allclose(15.0 + s.get_value(),
fn(xv, yv, zv))
def test_connection_pattern(self):
# Basic case
......@@ -163,6 +163,6 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
p = T.matrix('p')
self._compile_and_check([q, p],
op_graph(q, p),
[numpy.ones([3, 4], dtype=config.floatX),
numpy.ones([3, 4], dtype=config.floatX)],
[np.ones([3, 4], dtype=config.floatX),
np.ones([3, 4], dtype=config.floatX)],
OpFromGraph)
......@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest
import unittest
import numpy
import numpy as np
from theano import config
from theano import gof
......@@ -316,7 +316,7 @@ def test_just_c_code():
x = theano.tensor.dvector()
f = theano.function([x], wb2(x),
mode=debugmode.DebugMode(check_py_code=False))
assert numpy.all(f([1, 2]) == [2, 4])
assert np.all(f([1, 2]) == [2, 4])
def test_baddestroymap():
......@@ -349,7 +349,7 @@ def test_baddestroymap_c():
f = theano.function([x], wb2i(x),
mode=debugmode.DebugMode(check_py_code=False))
try:
assert numpy.all(f([1, 2]) == [2, 4])
assert np.all(f([1, 2]) == [2, 4])
assert False # failed to raise error
except debugmode.BadDestroyMap:
pass
......@@ -445,8 +445,8 @@ class Test_ViewMap(unittest.TestCase):
r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [1, 2, 3, 4])
assert numpy.all(r1 == [2, 3, 4])
assert np.all(r0 == [1, 2, 3, 4])
assert np.all(r1 == [2, 3, 4])
def test_aliased_outputs_ok_output(self):
# here aliased outputs is ok because they are both outputs of the
......@@ -470,8 +470,8 @@ class Test_ViewMap(unittest.TestCase):
r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [2, 4, 6, 8])
assert numpy.all(r1 == [4, 6, 8])
assert np.all(r0 == [2, 4, 6, 8])
assert np.all(r1 == [4, 6, 8])
def test_aliased_outputs_ok_shadow(self):
# here the alias between outputs is ok because one of them is not used
......@@ -496,7 +496,7 @@ class Test_ViewMap(unittest.TestCase):
r0 = f([1, 2, 3, 4], [5, 6, 7, 8])
assert numpy.all(r0 == [2, 4, 6, 8])
assert np.all(r0 == [2, 4, 6, 8])
def test_aliased_outputs_bad(self):
# here the alias between outputs is not ok because destroying one
......@@ -555,31 +555,31 @@ class Test_check_isfinite(unittest.TestCase):
g = theano.function([x], theano.tensor.log(x), mode='DEBUG_MODE')
# this should work
f(numpy.log([3, 4, 5]).astype(config.floatX))
f(np.log([3, 4, 5]).astype(config.floatX))
# if TensorType.filter_checks_isfinite were true, these would raise
# ValueError
# if not, DebugMode will check internally, and raise InvalidValueError
# passing an invalid value as an input should trigger ValueError
self.assertRaises(debugmode.InvalidValueError, f,
numpy.log([3, -4, 5]).astype(config.floatX))
np.log([3, -4, 5]).astype(config.floatX))
self.assertRaises(debugmode.InvalidValueError, f,
(numpy.asarray([0, 1.0, 0]) / 0).astype(config.floatX))
(np.asarray([0, 1.0, 0]) / 0).astype(config.floatX))
self.assertRaises(debugmode.InvalidValueError, f,
(numpy.asarray([1.0, 1.0, 1.0]) / 0).astype(config.floatX))
(np.asarray([1.0, 1.0, 1.0]) / 0).astype(config.floatX))
# generating an invalid value internally should trigger
# InvalidValueError
self.assertRaises(debugmode.InvalidValueError, g,
numpy.asarray([3, -4, 5], dtype=config.floatX))
np.asarray([3, -4, 5], dtype=config.floatX))
# this should disable the exception
theano.tensor.TensorType.filter_checks_isfinite = False
theano.compile.mode.predefined_modes[
'DEBUG_MODE'].check_isfinite = False
# insert several Inf
f(numpy.asarray(numpy.asarray([1.0, 1.0, 1.0]) / 0,
dtype=config.floatX))
f(np.asarray(np.asarray([1.0, 1.0, 1.0]) / 0,
dtype=config.floatX))
def test_check_isfinite_disabled(self):
x = theano.tensor.dvector()
......@@ -587,10 +587,10 @@ class Test_check_isfinite(unittest.TestCase):
mode=debugmode.DebugMode(check_isfinite=False))
# nan should go through
f(numpy.log([3, -4, 5]))
f(np.log([3, -4, 5]))
# inf should go through
infs = numpy.asarray([1.0, 1., 1.]) / 0
infs = np.asarray([1.0, 1., 1.]) / 0
# print infs
f(infs)
return
......@@ -721,14 +721,14 @@ class VecAsRowAndCol(gof.Op):
class Test_preallocated_output(unittest.TestCase):
def setUp(self):
self.rng = numpy.random.RandomState(seed=utt.fetch_seed())
self.rng = np.random.RandomState(seed=utt.fetch_seed())
def test_f_contiguous(self):
a = theano.tensor.fmatrix('a')
b = theano.tensor.fmatrix('b')
z = BrokenCImplementationAdd()(a, b)
# In this test, we do not want z to be an output of the graph.
out = theano.tensor.dot(z, numpy.eye(7))
out = theano.tensor.dot(z, np.eye(7))
a_val = self.rng.randn(7, 7).astype('float32')
b_val = self.rng.randn(7, 7).astype('float32')
......
......@@ -5,7 +5,7 @@ import shutil
import tempfile
import unittest
import numpy
import numpy as np
import theano
from theano.compile.io import In
......@@ -27,7 +27,7 @@ def test_function_dump():
fct2 = theano.function(**l)
x = [1, 2, 3]
assert numpy.allclose(fct1(x), fct2(x))
assert np.allclose(fct1(x), fct2(x))
class TestFunctionIn(unittest.TestCase):
......@@ -40,14 +40,14 @@ class TestFunctionIn(unittest.TestCase):
f = theano.function([In(a, strict=False)], out)
# works, rand generates float64 by default
f(numpy.random.rand(8))
f(np.random.rand(8))
# works, casting is allowed
f(numpy.array([1, 2, 3, 4], dtype='int32'))
f(np.array([1, 2, 3, 4], dtype='int32'))
f = theano.function([In(a, strict=True)], out)
try:
# fails, f expects float64
f(numpy.array([1, 2, 3, 4], dtype='int32'))
f(np.array([1, 2, 3, 4], dtype='int32'))
except TypeError:
pass
......@@ -70,17 +70,17 @@ class TestFunctionIn(unittest.TestCase):
# using mutable=True will let f change the value in aval
f = theano.function([In(a, mutable=True)], a_out, mode='FAST_RUN')
aval = numpy.random.rand(10)
aval = np.random.rand(10)
aval2 = aval.copy()
assert numpy.all(f(aval) == (aval2 * 2))
assert not numpy.all(aval == aval2)
assert np.all(f(aval) == (aval2 * 2))
assert not np.all(aval == aval2)
# using mutable=False should leave the input untouched
f = theano.function([In(a, mutable=False)], a_out, mode='FAST_RUN')
aval = numpy.random.rand(10)
aval = np.random.rand(10)
aval2 = aval.copy()
assert numpy.all(f(aval) == (aval2 * 2))
assert numpy.all(aval == aval2)
assert np.all(f(aval) == (aval2 * 2))
assert np.all(aval == aval2)
def test_in_update(self):
a = theano.tensor.dscalar('a')
......@@ -115,7 +115,7 @@ class TestFunctionIn(unittest.TestCase):
# changes occur at the same time and one doesn't overwrite the other.
for i in range(5):
f()
assert numpy.allclose(shared_var.get_value(), i % 2)
assert np.allclose(shared_var.get_value(), i % 2)
def test_in_allow_downcast_int(self):
a = theano.tensor.wvector('a') # int16
......@@ -128,16 +128,16 @@ class TestFunctionIn(unittest.TestCase):
# Both values are in range. Since they're not ndarrays (but lists),
# they will be converted, and their value checked.
assert numpy.all(f([3], [6], 1) == 10)
assert np.all(f([3], [6], 1) == 10)
# Values are in range, but a dtype too large has explicitly been given
# For performance reasons, no check of the data is explicitly performed
# (It might be OK to change this in the future.)
self.assertRaises(TypeError, f, [3], numpy.array([6], dtype='int16'),
self.assertRaises(TypeError, f, [3], np.array([6], dtype='int16'),
1)
# Value too big for a, silently ignored
assert numpy.all(f([2 ** 20], numpy.ones(1, dtype='int8'), 1) == 2)
assert np.all(f([2 ** 20], np.ones(1, dtype='int8'), 1) == 2)
# Value too big for b, raises TypeError
self.assertRaises(TypeError, f, [3], [312], 1)
......@@ -156,17 +156,17 @@ class TestFunctionIn(unittest.TestCase):
(a + b + c))
# If the values can be accurately represented, everything is OK
assert numpy.all(f(0, 0, 0) == 0)
assert np.all(f(0, 0, 0) == 0)
# If allow_downcast is True, idem
assert numpy.allclose(f(0.1, 0, 0), 0.1)
assert np.allclose(f(0.1, 0, 0), 0.1)
# If allow_downcast is False, nope
self.assertRaises(TypeError, f, 0, 0.1, 0)
# If allow_downcast is None, it should work iff floatX=float32
if theano.config.floatX == 'float32':
assert numpy.allclose(f(0, 0, 0.1), 0.1)
assert np.allclose(f(0, 0, 0.1), 0.1)
else:
self.assertRaises(TypeError, f, 0, 0, 0.1)
......@@ -182,10 +182,10 @@ class TestFunctionIn(unittest.TestCase):
# If the values can be accurately represented, everything is OK
z = [0]
assert numpy.all(f(z, z, z) == 0)
assert np.all(f(z, z, z) == 0)
# If allow_downcast is True, idem
assert numpy.allclose(f([0.1], z, z), 0.1)
assert np.allclose(f([0.1], z, z), 0.1)
# If allow_downcast is False, nope
self.assertRaises(TypeError, f, z, [0.1], z)
......
from __future__ import absolute_import, print_function, division
import copy
import six.moves.cPickle as pickle
import numpy
import numpy as np
import unittest
......@@ -18,8 +18,6 @@ from theano import tensor
from theano import tensor as T
import theano
import numpy as N
def PatternOptimizer(p1, p2, ign=True):
return gof.OpKeyOptimizer(gof.PatternSub(p1, p2), ignore_newtrees=ign)
......@@ -281,7 +279,7 @@ class T_function(unittest.TestCase):
def test_swap_SharedVariable(self):
i = T.iscalar()
x_list = theano.shared(value=numpy.random.rand(10).astype(config.floatX))
x_list = theano.shared(value=np.random.rand(10).astype(config.floatX))
x = T.scalar('x')
# SharedVariable for tests, one of them has update
......@@ -343,11 +341,11 @@ class T_function(unittest.TestCase):
A special testcase for logistic_sgd.py in Deep Learning Tutorial
This test assert that SharedVariable in different function have same storage
"""
train_x = theano.shared(value=numpy.random.rand(10, 10).astype(config.floatX))
test_x = theano.shared(value=numpy.random.rand(10, 10).astype(config.floatX))
train_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))
test_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))
train_y = theano.shared(value=numpy.random.rand(10, 1).astype(config.floatX))
test_y = theano.shared(value=numpy.random.rand(10, 1).astype(config.floatX))
train_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))
test_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))
i = T.iscalar('index')
x = T.vector('x')
......@@ -500,42 +498,42 @@ class T_function(unittest.TestCase):
when borrow=True is implemented.
"""
a = T.dmatrix()
aval = numpy.random.rand(3, 3)
aval = np.random.rand(3, 3)
# when borrow=False, test that a destroy map cannot alias output to input
f = theano.function([In(a, borrow=False)], Out(a + 1, borrow=True))
assert numpy.all(f(aval) == aval + 1)
assert not numpy.may_share_memory(aval, f(aval))
assert np.all(f(aval) == aval + 1)
assert not np.may_share_memory(aval, f(aval))
# when borrow=False, test that a viewmap cannot alias output to input
f = theano.function([In(a, borrow=False)], Out(a[0, :], borrow=True))
assert numpy.all(f(aval) == aval[0, :])
assert not numpy.may_share_memory(aval, f(aval))
assert np.all(f(aval) == aval[0, :])
assert not np.may_share_memory(aval, f(aval))
def test_borrow_output(self):
a = T.dmatrix()
f = function([a], Out(a, borrow=False))
o = N.ones((3, 3))
o = np.ones((3, 3))
assert o is not f(o) # function no longer permits aliasing outputs to inputs
f = function([a], Out(a * 4, borrow=False))
o = N.ones((3, 3))
o = np.ones((3, 3))
four = f(o)
assert numpy.all(four == 4)
assert np.all(four == 4)
f(o + .1) # should not clobber the memory used to store four
assert numpy.all(four == 4)
assert np.all(four == 4)
f = function([a], Out(a * 4, borrow=True), mode=theano.Mode('c|py_nogc', 'fast_run'))
o = N.ones((3, 3))
o = np.ones((3, 3))
four = f(o)
assert numpy.all(four == 4)
assert np.all(four == 4)
f(o + .1) # should clobber the memory used to store four
if theano.config.cxx:
assert not numpy.all(four == 4)
assert not np.all(four == 4)
else:
# The Elemwise.perform method don't reuse memory
# as some numpy version don't support that correctly.
assert numpy.all(four == 4)
assert np.all(four == 4)
def test_disconnected_input(self):
a = T.scalar('a')
......@@ -579,6 +577,20 @@ class T_function(unittest.TestCase):
if not isinstance(key, theano.gof.Constant):
assert (val[0] is None)
def test_default_values(self):
"""
Check that default values are restored
when an exception occurs in interactive mode.
"""
a, b = T.dscalars('a', 'b')
c = a + b
func = theano.function([theano.In(a, name='first'), theano.In(b, value=1, name='second')], c)
x = func(first=1)
try:
func(second=2)
except TypeError:
assert(func(first=1) == x)
class T_picklefunction(unittest.TestCase):
......@@ -753,7 +765,7 @@ class T_picklefunction(unittest.TestCase):
assert f2.container[s].storage is f1.container[s].storage
# now put in a function with non-scalar
v_value = numpy.asarray([2, 3, 4.], dtype=config.floatX)
v_value = np.asarray([2, 3, 4.], dtype=config.floatX)
f3 = function([x, In(v, value=v_value)], x + v)
list_of_things.append(f3)
......@@ -800,13 +812,13 @@ class T_picklefunction(unittest.TestCase):
assert nl[5](3) == ol[5](3)
assert nl[4].value[nl[0]] == 6
assert numpy.all(nl[6][nl[2]] == numpy.asarray([2, 3., 4]))
assert np.all(nl[6][nl[2]] == np.asarray([2, 3., 4]))
def test_broken_pickle_with_shared(self):
saves = []
def pers_save(obj):
if isinstance(obj, numpy.ndarray):
if isinstance(obj, np.ndarray):
saves.append(obj)
return len(saves) - 1
else:
......@@ -815,7 +827,7 @@ class T_picklefunction(unittest.TestCase):
def pers_load(id):
return saves[id]
b = numpy.random.rand(5, 4)
b = np.random.rand(5, 4)
x = theano.tensor.matrix()
y = theano.shared(b)
......
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import unittest
from theano.compile.pfunc import pfunc
......@@ -20,8 +20,8 @@ class NNet(object):
self.input = input
self.target = target
self.lr = shared(lr, 'learning_rate')
self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
self.w1 = shared(np.zeros((n_hidden, n_input)), 'w1')
self.w2 = shared(np.zeros((n_output, n_hidden)), 'w2')
# print self.lr.type
self.hidden = sigmoid(tensor.dot(self.w1, self.input))
......@@ -45,7 +45,7 @@ class NNet(object):
class TestNnet(unittest.TestCase):
def test_nnet(self):
rng = numpy.random.RandomState(1827)
rng = np.random.RandomState(1827)
data = rng.rand(10, 4)
nnet = NNet(n_input=3, n_hidden=10)
for epoch in range(3):
......@@ -60,4 +60,4 @@ class TestNnet(unittest.TestCase):
self.assertTrue(abs(mean_cost - 0.20588975452) < 1e-6)
# Just call functions to make sure they do not crash.
nnet.compute_output(input)
nnet.output_from_hidden(numpy.ones(10))
nnet.output_from_hidden(np.ones(10))
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import theano
......@@ -12,7 +12,7 @@ def test_detect_nan():
def detect_nan(i, node, fn):
for output in fn.outputs:
if numpy.isnan(output[0]).any():
if np.isnan(output[0]).any():
print('*** NaN detected ***')
theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs])
......@@ -36,7 +36,7 @@ def test_optimizer():
def detect_nan(i, node, fn):
for output in fn.outputs:
if numpy.isnan(output[0]).any():
if np.isnan(output[0]).any():
print('*** NaN detected ***')
theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs])
......@@ -65,7 +65,7 @@ def test_not_inplace():
def detect_nan(i, node, fn):
for output in fn.outputs:
if numpy.isnan(output[0]).any():
if np.isnan(output[0]).any():
print('*** NaN detected ***')
theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs])
......
......@@ -6,7 +6,7 @@ from __future__ import absolute_import, print_function, division
import logging
from nose.tools import assert_raises
import numpy
import numpy as np
from theano.compile.nanguardmode import NanGuardMode
import theano
......@@ -18,20 +18,20 @@ def test_NanGuardMode():
# intentionally. A working implementation should be able to capture all
# the abnormalties.
x = T.matrix()
w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
w = theano.shared(np.random.randn(5, 7).astype(theano.config.floatX))
y = T.dot(x, w)
fun = theano.function(
[x], y,
mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
)
a = numpy.random.randn(3, 5).astype(theano.config.floatX)
infa = numpy.tile(
(numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
nana = numpy.tile(
numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5))
biga = numpy.tile(
numpy.asarray(1e20).astype(theano.config.floatX), (3, 5))
a = np.random.randn(3, 5).astype(theano.config.floatX)
infa = np.tile(
(np.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
nana = np.tile(
np.asarray(np.nan).astype(theano.config.floatX), (3, 5))
biga = np.tile(
np.asarray(1e20).astype(theano.config.floatX), (3, 5))
fun(a) # normal values
......@@ -46,14 +46,14 @@ def test_NanGuardMode():
_logger.propagate = True
# slices
a = numpy.random.randn(3, 4, 5).astype(theano.config.floatX)
infa = numpy.tile(
(numpy.asarray(100.) ** 1000000).astype(theano.config.floatX),
a = np.random.randn(3, 4, 5).astype(theano.config.floatX)
infa = np.tile(
(np.asarray(100.) ** 1000000).astype(theano.config.floatX),
(3, 4, 5))
nana = numpy.tile(
numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 4, 5))
biga = numpy.tile(
numpy.asarray(1e20).astype(theano.config.floatX), (3, 4, 5))
nana = np.tile(
np.asarray(np.nan).astype(theano.config.floatX), (3, 4, 5))
biga = np.tile(
np.asarray(1e20).astype(theano.config.floatX), (3, 4, 5))
x = T.tensor3()
y = x[:, T.arange(2), T.arange(2)]
......
......@@ -9,7 +9,6 @@ from theano.tests import unittest_tools as utt
from theano import function
import theano
from theano.tensor import dmatrix, dvector
from numpy import allclose
from theano.compile import as_op
import pickle
......@@ -34,7 +33,7 @@ class OpDecoratorTests(utt.InferShapeTester):
r = fn([[1.5, 5], [2, 2]])
r0 = np.array([1.5, 7.5, 15., 30.])
assert allclose(r, r0), (r, r0)
assert np.allclose(r, r0), (r, r0)
def test_2arg(self):
x = dmatrix('x')
......@@ -50,7 +49,7 @@ class OpDecoratorTests(utt.InferShapeTester):
r = fn([[1.5, 5], [2, 2]], [1, 100, 2, 200])
r0 = np.array([2.5, 107.5, 17., 230.])
assert allclose(r, r0), (r, r0)
assert np.allclose(r, r0), (r, r0)
def test_infer_shape(self):
x = dmatrix('x')
......
......@@ -6,7 +6,7 @@ from __future__ import absolute_import, print_function, division
import unittest
import numpy
import numpy as np
import theano
from six.moves import StringIO
......@@ -45,7 +45,7 @@ class Test_profiling(unittest.TestCase):
f = theano.function(x, z, profile=p, name="test_profiling",
mode=m)
inp = [numpy.arange(1024, dtype='float32') + 1 for i in range(len(x))]
inp = [np.arange(1024, dtype='float32') + 1 for i in range(len(x))]
f(*inp)
buf = StringIO()
......
......@@ -126,6 +126,12 @@ AddConfigVar(
BoolParam(False, allow_override=False),
in_c_key=False)
AddConfigVar(
'print_global_stats',
"Print some global statistics (time spent) at the end",
BoolParam(False),
in_c_key=False)
class ContextsParam(ConfigParam):
def __init__(self):
......@@ -1111,7 +1117,7 @@ AddConfigVar('optdb.position_cutoff',
AddConfigVar('optdb.max_use_ratio',
'A ratio that prevent infinite loop in EquilibriumOptimizer.',
FloatParam(5),
FloatParam(8),
in_c_key=False)
AddConfigVar('gcc.cxxflags',
......
......@@ -2510,10 +2510,14 @@ class EquilibriumOptimizer(NavigatorOptimizer):
end_nb_nodes = len(fgraph.apply_nodes)
if max_use_abort:
_logger.error("EquilibriumOptimizer max'ed out by '%s'" % opt_name +
". You can safely raise the current threshold of " +
"%f with the theano flag 'optdb.max_use_ratio'." %
config.optdb.max_use_ratio)
msg = ("EquilibriumOptimizer max'ed out by '%s'" % opt_name +
". You can safely raise the current threshold of " +
"%f with the theano flag 'optdb.max_use_ratio'." %
config.optdb.max_use_ratio)
if theano.config.on_opt_error == 'raise':
raise AssertionError(msg)
else:
_logger.error(msg)
fgraph.remove_feature(change_tracker)
assert len(loop_process_count) == len(loop_timing)
assert len(loop_process_count) == len(global_opt_timing)
......
......@@ -571,6 +571,7 @@ class TestEquilibrium(object):
opt.optimize(g)
assert str(g) == '[Op2(x, y)]'
@theano.configparser.change_flags(on_opt_error='ignore')
def test_low_use_ratio(self):
x, y, z = map(MyVariable, 'xyz')
e = op3(op4(x, y))
......
......@@ -503,6 +503,8 @@ def hist(coll):
return counts
@deprecated("theano.gof.utils",
msg="Use a_theano_variable.auto_name instead")
def give_variables_names(variables):
"""
Gives unique names to an iterable of variables. Modifies input.
......
......@@ -482,7 +482,7 @@ class Stack(VM):
try:
_, dt = self.run_thunk_of_node(current_apply)
del _
if config.profile:
if config.profile or config.print_global_stats:
current_idx = self.node_idx[current_apply]
self.call_counts[current_idx] += 1
self.call_times[current_idx] += dt
......@@ -596,7 +596,7 @@ class Stack(VM):
if current_apply.inputs[r].owner:
apply_stack.append(current_apply.inputs[r].owner)
else:
if config.profile:
if config.profile or config.print_global_stats:
for (idx, o) in enumerate(thunks[
self.node_idx[current_apply]].outputs):
var = self.nodes[
......@@ -757,7 +757,7 @@ class VM_Linker(link.LocalLinker):
associated to self, else, a new VM_Linker associated to fgraph.
"""
if (config.profile and
if ((config.profile or config.print_global_stats) and
((hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled) or
......@@ -856,7 +856,7 @@ class VM_Linker(link.LocalLinker):
pre_call_clear = [storage_map[v] for v in self.no_recycling]
if (self.callback is not None or self.callback_input is not None or
(config.profile and config.profile_memory) or
((config.profile or config.print_global_stats) and config.profile_memory) or
(self.allow_partial_eval and not self.use_cloop)):
if self.use_cloop and (self.callback is not None or
......@@ -1086,7 +1086,7 @@ class VM_Linker(link.LocalLinker):
lazy = config.vm.lazy
if lazy is None:
lazy = not all([(not th.lazy) for th in thunks])
if not (lazy or (config.profile and config.profile_memory) or
if not (lazy or ((config.profile or config.print_global_stats) and config.profile_memory) or
self.use_cloop or self.callback or self.callback_input):
for pair in itervalues(reallocated_info):
storage_map[pair[1]] = storage_map[pair[0]]
......
差异被折叠。
......@@ -2,8 +2,19 @@
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, npy_float64 epsilon,
PyGpuArrayObject **outp, PyGpuArrayObject **x_mean,
PyGpuArrayObject **x_invstd, cudnnHandle_t _handle) {
npy_float64 running_average_factor,
#ifdef RUNNING_AVERAGES
PyGpuArrayObject *in_running_mean,
PyGpuArrayObject *in_running_var,
#endif
PyGpuArrayObject **outp,
PyGpuArrayObject **x_mean,
PyGpuArrayObject **x_invstd,
#ifdef RUNNING_AVERAGES
PyGpuArrayObject **out_running_mean,
PyGpuArrayObject **out_running_var,
#endif
cudnnHandle_t _handle) {
PyGpuContextObject *c = inp->context;
if (c_set_tensorNd(inp, bn_input) != 0)
......@@ -11,11 +22,19 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (c_set_tensorNd(scale, bn_params) != 0)
return 1;
if (epsilon < 1e-5)
if (epsilon < 1e-5) {
PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
return 1;
}
#ifdef INPLACE_OUTPUT
Py_XDECREF(*outp);
*outp = inp;
Py_INCREF(*outp);
#else
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
#endif
if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
......@@ -24,6 +43,31 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (c_set_tensorNd(*outp, bn_output) != 0)
return 1;
#ifdef RUNNING_AVERAGES
#ifdef INPLACE_RUNNING_MEAN
Py_XDECREF(out_running_mean);
PyGpuArrayObject *running_mean = in_running_mean;
Py_INCREF(running_mean);
#else
PyGpuArrayObject *running_mean = *out_running_mean;
running_mean = theano_try_copy(running_mean, in_running_mean);
if (running_mean == NULL) {
return 1;
}
#endif
#ifdef INPLACE_RUNNING_VAR
Py_XDECREF(out_running_var);
PyGpuArrayObject *running_var = in_running_var;
Py_INCREF(running_var);
#else
PyGpuArrayObject *running_var = *out_running_var;
running_var = theano_try_copy(running_var, in_running_var);
if (running_var == NULL) {
return 1;
}
#endif
#endif
{
const float falpha = 1.;
const float fbeta = 0.;
......@@ -50,9 +94,15 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
bn_params,
PyGpuArray_DEV_DATA(scale),
PyGpuArray_DEV_DATA(bias),
#ifdef RUNNING_AVERAGES
running_average_factor,
PyGpuArray_DEV_DATA(running_mean),
PyGpuArray_DEV_DATA(running_var),
#else
0,
NULL, // running mean, deliberately unused
NULL, // running var, deliberately unused
#endif
epsilon,
PyGpuArray_DEV_DATA(*x_mean),
PyGpuArray_DEV_DATA(*x_invstd)
......@@ -62,6 +112,10 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
cudnnGetErrorString(err));
return 1;
}
#ifdef RUNNING_AVERAGES
*out_running_mean = running_mean;
*out_running_var = running_var;
#endif
}
return 0;
}
......@@ -34,8 +34,10 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
if (c_set_tensorNd(scale, bn_params) != 0)
return 1;
if (epsilon < 1e-5)
if (epsilon < 1e-5) {
PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
return 1;
}
if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
......
......@@ -11,11 +11,19 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
if (c_set_tensorNd(scale, bn_params) != 0)
return 1;
if (epsilon < 1e-5)
if (epsilon < 1e-5) {
PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
return 1;
}
#ifdef INPLACE_OUTPUT
Py_XDECREF(*outp);
*outp = inp;
Py_INCREF(*outp);
#else
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
#endif
if (c_set_tensorNd(*outp, bn_output) != 0)
return 1;
......
......@@ -252,3 +252,7 @@ class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
self.constant_tensor = gpuarray.array(
np.zeros((3, 5, 7, 11), dtype='float32'),
context=get_context(test_ctx_name))
class TestConv2dTranspose(test_abstract_conv.TestConv2dTranspose):
mode = mode_with_gpu
......@@ -13,7 +13,7 @@ import time
from optparse import OptionParser
import subprocess
import numpy
import numpy as np
import theano
import theano.tensor as T
......@@ -47,10 +47,10 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
print()
print('Numpy config: (used when the Theano flag'
' "blas.ldflags" is empty)')
numpy.show_config()
print('Numpy dot module:', numpy.dot.__module__)
print('Numpy location:', numpy.__file__)
print('Numpy version:', numpy.__version__)
np.show_config()
print('Numpy dot module:', np.dot.__module__)
print('Numpy location:', np.__file__)
print('Numpy version:', np.__version__)
if (theano.config.device.startswith("gpu") or
theano.config.init_gpu_device.startswith("gpu")):
print('nvcc version:')
......@@ -58,12 +58,12 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
"--version"))
print()
a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX,
order=order))
b = theano.shared(numpy.ones((N, K), dtype=theano.config.floatX,
order=order))
c = theano.shared(numpy.ones((M, K), dtype=theano.config.floatX,
order=order))
a = theano.shared(np.ones((M, N), dtype=theano.config.floatX,
order=order))
b = theano.shared(np.ones((N, K), dtype=theano.config.floatX,
order=order))
c = theano.shared(np.ones((M, K), dtype=theano.config.floatX,
order=order))
f = theano.function([], updates=[(c, 0.4 * c + .8 * T.dot(a, b))])
if any([x.op.__class__.__name__ == 'Gemm' for x in
......
......@@ -9,7 +9,7 @@ from __future__ import absolute_import, print_function, division
import threading
import time
import numpy
import numpy as np
import theano
from theano.gpuarray import init_dev
......@@ -21,7 +21,7 @@ def main(dev1, dev2):
init_dev(dev2, 'ctx2')
size = 1024 * 16
data = numpy.random.randn(size, size).astype('float32')
data = np.random.randn(size, size).astype('float32')
val1a = theano.shared(data, target='ctx1')
val1b = theano.shared(data, target='ctx1')
val1c = theano.shared(data, target='ctx1')
......
......@@ -2,18 +2,18 @@ from __future__ import absolute_import, print_function, division
import time
import numpy
import numpy as np
import theano
y = theano.tensor.fvector()
x = theano.shared(numpy.zeros(1, dtype='float32'))
x = theano.shared(np.zeros(1, dtype='float32'))
f1 = theano.function([y], updates={x: y})
f2 = theano.function([], theano.sandbox.cuda.host_from_gpu(x))
print(f1.maker.fgraph.toposort())
print(f2.maker.fgraph.toposort())
for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
o = numpy.zeros(i, dtype='float32')
o = np.zeros(i, dtype='float32')
t0 = time.time()
f1(o)
t1 = time.time()
......
......@@ -4,7 +4,7 @@ numpy version support only ndarray.
"""
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
from theano.tensor.basic import TensorType
try:
......@@ -42,8 +42,8 @@ else:
def may_share_memory(a, b, raise_other_type=True):
a_ndarray = isinstance(a, numpy.ndarray)
b_ndarray = isinstance(b, numpy.ndarray)
a_ndarray = isinstance(a, np.ndarray)
b_ndarray = isinstance(b, np.ndarray)
if a_ndarray and b_ndarray:
return TensorType.may_share_memory(a, b)
a_cuda = _is_cuda(a)
......
......@@ -5,7 +5,7 @@ These pickled graphs can be used, for instance, as cases for
unit tests or regression tests.
"""
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import os
import pickle
import sys
......@@ -188,10 +188,10 @@ class PersistentNdarrayID(object):
return name
def __call__(self, obj):
if type(obj) is numpy.ndarray:
if type(obj) is np.ndarray:
if id(obj) not in self.seen:
def write_array(f):
numpy.lib.format.write_array(f, obj)
np.lib.format.write_array(f, obj)
name = self._resolve_name(obj)
zipadd(write_array, self.zip_file, name)
self.seen[id(obj)] = 'ndarray.{0}'.format(name)
......@@ -204,7 +204,7 @@ class PersistentCudaNdarrayID(PersistentNdarrayID):
type(obj) is cuda_ndarray.cuda_ndarray.CudaNdarray):
if id(obj) not in self.seen:
def write_array(f):
numpy.lib.format.write_array(f, numpy.asarray(obj))
np.lib.format.write_array(f, np.asarray(obj))
name = self._resolve_name(obj)
zipadd(write_array, self.zip_file, name)
self.seen[id(obj)] = 'cuda_ndarray.{0}'.format(name)
......@@ -283,7 +283,7 @@ class PersistentNdarrayLoad(object):
if name in self.cache:
return self.cache[name]
ret = None
array = numpy.lib.format.read_array(self.zip_file.open(name))
array = np.lib.format.read_array(self.zip_file.open(name))
if array_type == 'cuda_ndarray':
if config.experimental.unpickle_gpu_on_cpu:
# directly return numpy array
......@@ -335,10 +335,10 @@ def dump(obj, file_handler, protocol=DEFAULT_PROTOCOL,
>>> foo_1 = theano.shared(0, name='foo')
>>> foo_2 = theano.shared(1, name='foo')
>>> with open('model.zip', 'wb') as f:
... dump((foo_1, foo_2, numpy.array(2)), f)
>>> numpy.load('model.zip').keys()
... dump((foo_1, foo_2, np.array(2)), f)
>>> np.load('model.zip').keys()
['foo', 'foo_2', 'array_0', 'pkl']
>>> numpy.load('model.zip')['foo']
>>> np.load('model.zip')['foo']
array(0)
>>> with open('model.zip', 'rb') as f:
... foo_1, foo_2, array = load(f)
......
......@@ -22,7 +22,7 @@ TheanoElementwiseKernel.
from __future__ import absolute_import, print_function, division
from itertools import chain
import numpy
import numpy as np
import theano
from six.moves import xrange
......@@ -257,13 +257,13 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
" inputs don't have the same shape!")
if inputs[0].size > 512:
grid = (int(numpy.ceil(inputs[0].size / 512.)), 1)
grid = (int(np.ceil(inputs[0].size / 512.)), 1)
block = (512, 1, 1)
else:
grid = (1, 1)
block = (inputs[0].shape[0], inputs[0].shape[1], 1)
self.pycuda_fct(inputs[0], inputs[1], z[0],
numpy.intc(inputs[1].size), block=block, grid=grid)
np.intc(inputs[1].size), block=block, grid=grid)
class PycudaElemwiseSourceModuleMakeThunkOp(Op):
......@@ -349,13 +349,13 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
" inputs don't have the same shape!")
if inputs[0][0].size > 512:
grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1)
grid = (int(np.ceil(inputs[0][0].size / 512.)), 1)
block = (512, 1, 1)
else:
grid = (1, 1)
block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1)
pycuda_fct(inputs[0][0], inputs[1][0], z[0],
numpy.intc(inputs[1][0].size), block=block,
np.intc(inputs[1][0].size), block=block,
grid=grid)
thunk.inputs = inputs
thunk.outputs = outputs
......
......@@ -3,7 +3,7 @@ Helper function to safely convert an array to a new data type.
"""
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import theano
......@@ -30,8 +30,8 @@ def _asarray(a, dtype, order=None):
"""
if str(dtype) == 'floatX':
dtype = theano.config.floatX
dtype = numpy.dtype(dtype) # Convert into dtype object.
rval = numpy.asarray(a, dtype=dtype, order=order)
dtype = np.dtype(dtype) # Convert into dtype object.
rval = np.asarray(a, dtype=dtype, order=order)
# Note that dtype comparison must be done by comparing their `num`
# attribute. One cannot assume that two identical data types are pointers
# towards the same object (e.g. under Windows this appears not to be the
......
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import theano
from theano.misc.cudamat_utils import cudamat_available
......@@ -20,7 +20,7 @@ def test(shape=(3, 4)):
U = gpu(theano.tensor.fmatrix('U'))
ii = theano.function([U], gpu(U + 1))
A_cpu = numpy.asarray(numpy.random.rand(*shape), dtype="float32")
A_cpu = np.asarray(np.random.rand(*shape), dtype="float32")
A_cnd = theano.sandbox.cuda.CudaNdarray(A_cpu)
A_cmat = cudandarray_to_cudamat(A_cnd)
......@@ -28,9 +28,9 @@ def test(shape=(3, 4)):
B_cnd = ii(A_cnd)
u = A_cnd.copy()
u += theano.sandbox.cuda.CudaNdarray(numpy.asarray([[1]], dtype='float32'))
u = numpy.asarray(u)
v = numpy.asarray(B_cnd)
u += theano.sandbox.cuda.CudaNdarray(np.asarray([[1]], dtype='float32'))
u = np.asarray(u)
v = np.asarray(B_cnd)
w = A_cmat.add(1).asarray()
assert abs(u - v).max() == 0
......
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import theano
from theano.misc.gnumpy_utils import gnumpy_available
......@@ -31,11 +31,10 @@ def test(shape=(3, 4, 5)):
B_cnd = ii(A_cnd)
B = cudandarray_to_garray(B_cnd)
assert A_cnd.shape == A.shape
from numpy import array
u = (A + 1).asarray()
v = B.asarray()
w = array(B_cnd)
w = np.array(B_cnd)
assert (u == v).all()
assert (u == w).all()
......@@ -49,7 +48,7 @@ def test2(shape=(3, 4, 5)):
U = gpu(theano.tensor.ftensor3('U'))
theano.function([U], gpu(U + 1))
A = numpy.random.rand(*shape).astype('float32')
A = np.random.rand(*shape).astype('float32')
A_cnd = theano.sandbox.cuda.CudaNdarray(A)
A_gar = cudandarray_to_garray(A_cnd)
assert A_cnd.shape == A_gar.shape
......@@ -62,7 +61,7 @@ def test2(shape=(3, 4, 5)):
# dtype always float32
assert A_cnd._strides == B._strides
assert A_cnd.gpudata == B.gpudata
v = numpy.asarray(B)
v = np.asarray(B)
assert (v == A).all()
......
......@@ -3,7 +3,7 @@ test the tensor and sparse type. The CudaNdarray type is tested in
sandbox/cuda/tests/test_tensor_op.py.test_may_share_memory_cuda
"""
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import theano
try:
......@@ -16,8 +16,8 @@ from theano.misc.may_share_memory import may_share_memory
def test_may_share_memory():
a = numpy.random.rand(5, 4)
b = numpy.random.rand(5, 4)
a = np.random.rand(5, 4)
b = np.random.rand(5, 4)
va = a.view()
vb = b.view()
ra = a.reshape((4, 5))
......
......@@ -4,8 +4,7 @@ import shutil
import unittest
from tempfile import mkdtemp
import numpy
from numpy.testing import assert_allclose
import numpy as np
from nose.plugins.skip import SkipTest
import theano
......@@ -44,7 +43,7 @@ class T_dump_load(unittest.TestCase):
x = load(f)
assert x.name == 'x'
assert_allclose(x.get_value(), [[1]])
np.testing.assert_allclose(x.get_value(), [[1]])
def test_dump_load_mrg(self):
rng = MRG_RandomStreams(use_cuda=cuda_ndarray.cuda_enabled)
......@@ -62,14 +61,14 @@ class T_dump_load(unittest.TestCase):
foo_2 = theano.shared(1, name='foo')
foo_3 = theano.shared(2, name='foo')
with open('model.zip', 'wb') as f:
dump((foo_1, foo_2, foo_3, numpy.array(3)), f)
keys = list(numpy.load('model.zip').keys())
dump((foo_1, foo_2, foo_3, np.array(3)), f)
keys = list(np.load('model.zip').keys())
assert keys == ['foo', 'foo_2', 'foo_3', 'array_0', 'pkl']
foo_3 = numpy.load('model.zip')['foo_3']
assert foo_3 == numpy.array(2)
foo_3 = np.load('model.zip')['foo_3']
assert foo_3 == np.array(2)
with open('model.zip', 'rb') as f:
foo_1, foo_2, foo_3, array = load(f)
assert array == numpy.array(3)
assert array == np.array(3)
class TestStripPickler(unittest.TestCase):
......
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import theano
import theano.misc.pycuda_init
......@@ -58,11 +58,11 @@ def test_pycuda_elemwise_source_module():
PycudaElemwiseSourceModuleMakeThunkOp)
for node in f4.maker.fgraph.toposort()])
val1 = numpy.asarray(numpy.random.rand(*shape), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(*shape), dtype='float32')
assert numpy.allclose(f(val1, val2), f2(val1, val2))
assert numpy.allclose(f(val1, val2), f3(val1, val2))
assert numpy.allclose(f(val1, val2), f4(val1, val2))
val1 = np.asarray(np.random.rand(*shape), dtype='float32')
val2 = np.asarray(np.random.rand(*shape), dtype='float32')
assert np.allclose(f(val1, val2), f2(val1, val2))
assert np.allclose(f(val1, val2), f3(val1, val2))
assert np.allclose(f(val1, val2), f4(val1, val2))
# print f(val1,val2)
# print f2(val1,val2)
......@@ -82,10 +82,10 @@ def test_pycuda_elemwise_kernel():
assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f2.maker.fgraph.toposort()])
val1 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32')
#val1 = numpy.ones((5,5))
#val2 = numpy.arange(25).reshape(5,5)
val1 = np.asarray(np.random.rand(5, 5), dtype='float32')
val2 = np.asarray(np.random.rand(5, 5), dtype='float32')
#val1 = np.ones((5,5))
#val2 = np.arange(25).reshape(5,5)
assert (f(val1, val2) == f2(val1, val2)).all()
print(f(val1, val2))
print(f2(val1, val2))
......@@ -99,8 +99,8 @@ def test_pycuda_elemwise_kernel():
assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f4.maker.fgraph.toposort()])
val1 = numpy.random.rand(2, 2, 2)
val1 = np.random.rand(2, 2, 2)
print(val1)
print(f4(val1, val1, val1))
assert numpy.allclose(f4(val1, val1, val1), val1 * val1 + val1)
assert np.allclose(f4(val1, val1, val1), val1 * val1 + val1)
"""
......@@ -8,7 +8,7 @@ from __future__ import absolute_import, print_function, division
import sys
import numpy
import numpy as np
import theano
import theano.sandbox.cuda as cuda_ndarray
......@@ -42,9 +42,9 @@ __global__ void multiply_them(float *dest, float *a, float *b)
multiply_them = mod.get_function("multiply_them")
# Test with pycuda in/out of numpy.ndarray
a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32)
dest = numpy.zeros_like(a)
a = np.random.randn(100).astype(np.float32)
b = np.random.randn(100).astype(np.float32)
dest = np.zeros_like(a)
multiply_them(
drv.Out(dest), drv.In(a), drv.In(b),
block=(400, 1, 1), grid=(1, 1))
......@@ -64,8 +64,8 @@ __global__ void multiply_them(float *dest, float *a, float *b)
multiply_them = mod.get_function("multiply_them")
a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32)
a = np.random.randn(100).astype(np.float32)
b = np.random.randn(100).astype(np.float32)
# Test with Theano object
ga = cuda_ndarray.CudaNdarray(a)
......@@ -73,7 +73,7 @@ __global__ void multiply_them(float *dest, float *a, float *b)
dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
multiply_them(dest, ga, gb,
block=(400, 1, 1), grid=(1, 1))
assert (numpy.asarray(dest) == a * b).all()
assert (np.asarray(dest) == a * b).all()
def test_pycuda_memory_to_theano():
......@@ -87,7 +87,7 @@ def test_pycuda_memory_to_theano():
print("gpuarray ref count before creating a CudaNdarray", end=' ')
print(sys.getrefcount(y))
assert sys.getrefcount(y) == initial_refcount
rand = numpy.random.randn(*y.shape).astype(numpy.float32)
rand = np.random.randn(*y.shape).astype(np.float32)
cuda_rand = cuda_ndarray.CudaNdarray(rand)
strides = [1]
......@@ -102,7 +102,7 @@ def test_pycuda_memory_to_theano():
z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y)
print("gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y))
assert sys.getrefcount(y) == initial_refcount + 1
assert (numpy.asarray(z) == 0).all()
assert (np.asarray(z) == 0).all()
assert z.base is y
# Test that we can take a view from this cuda view on pycuda memory
......@@ -112,17 +112,17 @@ def test_pycuda_memory_to_theano():
del zz
assert sys.getrefcount(y) == initial_refcount + 1
cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]],
dtype='float32'))
cuda_ones = cuda_ndarray.CudaNdarray(np.asarray([[[1]]],
dtype='float32'))
z += cuda_ones
assert (numpy.asarray(z) == numpy.ones(y.shape)).all()
assert (numpy.asarray(z) == 1).all()
assert (np.asarray(z) == np.ones(y.shape)).all()
assert (np.asarray(z) == 1).all()
assert cuda_rand.shape == z.shape
assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides)
assert (numpy.asarray(cuda_rand) == rand).all()
assert (np.asarray(cuda_rand) == rand).all()
z += cuda_rand
assert (numpy.asarray(z) == (rand + 1)).all()
assert (np.asarray(z) == (rand + 1)).all()
# Check that the ref count to the gpuarray is right.
del z
......
from __future__ import absolute_import, print_function, division
import numpy
import numpy as np
import theano.sandbox.cuda as cuda
import theano.misc.pycuda_init
......@@ -22,30 +22,30 @@ def test_to_gpuarray():
px = to_gpuarray(cx)
assert isinstance(px, pycuda.gpuarray.GPUArray)
cx[0, 0] = numpy.asarray(1, dtype="float32")
cx[0, 0] = np.asarray(1, dtype="float32")
# Check that they share the same memory space
assert px.gpudata == cx.gpudata
assert numpy.asarray(cx[0, 0]) == 1
assert np.asarray(cx[0, 0]) == 1
assert numpy.allclose(numpy.asarray(cx), px.get())
assert np.allclose(np.asarray(cx), px.get())
assert px.dtype == cx.dtype
assert px.shape == cx.shape
assert all(numpy.asarray(cx._strides) * 4 == px.strides)
assert all(np.asarray(cx._strides) * 4 == px.strides)
# Test when the CudaNdarray is strided
cx = cx[::2, ::]
px = to_gpuarray(cx, copyif=True)
assert isinstance(px, pycuda.gpuarray.GPUArray)
cx[0, 0] = numpy.asarray(2, dtype="float32")
cx[0, 0] = np.asarray(2, dtype="float32")
# Check that they do not share the same memory space
assert px.gpudata != cx.gpudata
assert numpy.asarray(cx[0, 0]) == 2
assert not numpy.allclose(numpy.asarray(cx), px.get())
assert np.asarray(cx[0, 0]) == 2
assert not np.allclose(np.asarray(cx), px.get())
assert px.dtype == cx.dtype
assert px.shape == cx.shape
assert not all(numpy.asarray(cx._strides) * 4 == px.strides)
assert not all(np.asarray(cx._strides) * 4 == px.strides)
# Test that we return an error
try:
......@@ -59,11 +59,11 @@ def test_to_cudandarray():
px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
cx = to_cudandarray(px)
assert isinstance(cx, cuda.CudaNdarray)
assert numpy.allclose(px.get(),
numpy.asarray(cx))
assert np.allclose(px.get(),
np.asarray(cx))
assert px.dtype == cx.dtype
assert px.shape == cx.shape
assert all(numpy.asarray(cx._strides) * 4 == px.strides)
assert all(np.asarray(cx._strides) * 4 == px.strides)
try:
px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
......@@ -73,7 +73,7 @@ def test_to_cudandarray():
pass
try:
to_cudandarray(numpy.zeros(4))
to_cudandarray(np.zeros(4))
assert False
except ValueError:
pass
......@@ -12,7 +12,7 @@ import warnings
import theano
from theano.compat import get_unbound_function
from theano.compile import optdb
from theano.gof import EquilibriumDB, SequenceDB
from theano.gof import EquilibriumDB, SequenceDB, TopoOptimizer
from theano.gof.cmodule import get_lib_extension
from theano.gof.compilelock import get_lock, release_lock
from theano import config
......@@ -40,6 +40,17 @@ def register_opt(*tags, **kwargs):
return f
def register_inplace(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
optdb.register(
name, TopoOptimizer(
local_opt, failure_callback=TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace', 'gpu', *tags)
return local_opt
return f
_logger_name = 'theano.sandbox.cuda'
_logger = logging.getLogger(_logger_name)
......
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论