提交 2d2e548c authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

...@@ -511,16 +511,24 @@ class CLinker(link.Linker): ...@@ -511,16 +511,24 @@ class CLinker(link.Linker):
op = node.op op = node.op
# type-specific support code # type-specific support code
try: c_support_code_apply.append(op.c_support_code_apply(node, name)) try:
except utils.MethodNotDefined: pass c_support_code_apply.append(op.c_support_code_apply(node, name))
except utils.MethodNotDefined:
pass
else:
# The following will be executed if the "try" block succeeds
assert isinstance(c_support_code_apply[-1], str), (
str(node.op)+" didn't returned a string for c_support_code_apply")
# emit c_code # emit c_code
try: behavior = op.c_code(node, name, isyms, osyms, sub) try:
behavior = op.c_code(node, name, isyms, osyms, sub)
except utils.MethodNotDefined: except utils.MethodNotDefined:
raise NotImplementedError("%s cannot produce C code" % op) raise NotImplementedError("%s cannot produce C code" % op)
assert isinstance(behavior,str), str(node.op)+" didn't returned a string for c_code" assert isinstance(behavior,str), str(node.op)+" didn't returned a string for c_code"
try: cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub) try:
cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub)
except utils.MethodNotDefined: except utils.MethodNotDefined:
cleanup = "" cleanup = ""
...@@ -1305,4 +1313,3 @@ class DualLinker(link.Linker): ...@@ -1305,4 +1313,3 @@ class DualLinker(link.Linker):
link.raise_with_op(node1) link.raise_with_op(node1)
return f, i1, o1 return f, i1, o1
...@@ -214,8 +214,9 @@ class PureType(object): ...@@ -214,8 +214,9 @@ class PureType(object):
data passed as an argument. If it is False, and allow_downcast data passed as an argument. If it is False, and allow_downcast
is True, filter may cast it to an appropriate type. If is True, filter may cast it to an appropriate type. If
allow_downcast is False, filter may only upcast it, not lose allow_downcast is False, filter may only upcast it, not lose
precision. If allow_downcast is None, only Python float can be precision. If allow_downcast is None, the behaviour can be
downcasted, and only to a floatX scalar. Type-dependant, but for now only Python float can be downcasted,
and only to a floatX scalar.
:Exceptions: :Exceptions:
- `MethodNotDefined`: subclass doesn't implement this function. - `MethodNotDefined`: subclass doesn't implement this function.
...@@ -390,4 +391,3 @@ class Generic(SingletonType): ...@@ -390,4 +391,3 @@ class Generic(SingletonType):
""" % locals() """ % locals()
generic = Generic() generic = Generic()
...@@ -189,7 +189,7 @@ def local_gpu_dot_to_dot22(node): ...@@ -189,7 +189,7 @@ def local_gpu_dot_to_dot22(node):
# case two: matrix X vector # case two: matrix X vector
elif _is_real_matrix(x) and _is_real_vector(y): elif _is_real_matrix(x) and _is_real_vector(y):
new_op = GpuDimShuffle((False,), [0,'x']) new_op = GpuDimShuffle((False,), [0,'x'])
shape_out = x.shape[1].dimshuffle(['x']) shape_out = x.shape[0].dimshuffle(['x'])
gpu_x = gpu_from_host(x) gpu_x = gpu_from_host(x)
gpu_y = new_op(gpu_from_host(y)) gpu_y = new_op(gpu_from_host(y))
else: else:
...@@ -207,7 +207,7 @@ def local_gpu_dot_to_dot22(node): ...@@ -207,7 +207,7 @@ def local_gpu_dot_to_dot22(node):
elif _is_real_matrix(x) and _is_real_vector(y): elif _is_real_matrix(x) and _is_real_vector(y):
new_op = GpuDimShuffle((False,), [0,'x']) new_op = GpuDimShuffle((False,), [0,'x'])
shape_out = x.shape[1].dimshuffle(['x']) shape_out = x.shape[0].dimshuffle(['x'])
gpu_x = gpu_from_host(x) gpu_x = gpu_from_host(x)
gpu_y = new_op(gpu_from_host(y)) gpu_y = new_op(gpu_from_host(y))
else: else:
......
...@@ -118,7 +118,7 @@ class CudaNdarraySharedVariable(SharedVariable, _operators): ...@@ -118,7 +118,7 @@ class CudaNdarraySharedVariable(SharedVariable, _operators):
It is also worth mentioning that, for efficient transfer to the GPU, Theano will make the new data It is also worth mentioning that, for efficient transfer to the GPU, Theano will make the new data
``c_contiguous``. This can require an extra copy of the data on the host. ``c_contiguous``. This can require an extra copy of the data on the host.
This work what when borrow=True and when borrow=False The inplace on gpu memory work when borrow is either True or False.
""" """
if not borrow: if not borrow:
#TODO: check for cuda_ndarray type #TODO: check for cuda_ndarray type
......
...@@ -340,7 +340,7 @@ class Conv3D(theano.Op): ...@@ -340,7 +340,7 @@ class Conv3D(theano.Op):
codeSource += """ codeSource += """
if (inputChannels > 20 && outputChannels > 20 && ws4 == sizeof(ELEM_AT(%(W)s,0))) if (inputChannels > 20 && outputChannels > 20 && ws4 == sizeof(ELEM_AT(%(W)s,0)))
{ {
std::cout << "lots of channels special case code" << std::endl; //std::cout << "lots of channels special case code" << std::endl;
#define blas_type dtype_ ## %(V)s #define blas_type dtype_ ## %(V)s
const blas_type constant_one = 1.0; const blas_type constant_one = 1.0;
char N = 'T'; char N = 'T';
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论