提交 1d2b3245 authored 作者: Vivek Kulkarni's avatar Vivek Kulkarni

Correcting alignment

上级 36c30634
......@@ -2516,11 +2516,11 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
unsigned int num_threads_per_block = std::min(numcolsX, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
unsigned int num_blocks = std::min(size ,(unsigned int)NUM_VECTOR_OP_BLOCKS);
dim3 n_blocks(num_blocks);
dim3 n_blocks(num_blocks);
dim3 n_threads(num_threads_per_block);
long *d_indices_arr = NULL;
PyArrayObject *cpu_indices_arr = PyArray_GETCONTIGUOUS(indices_arr);
PyArrayObject *cpu_indices_arr = PyArray_GETCONTIGUOUS(indices_arr);
d_indices_arr = (long *)device_malloc(PyArray_NBYTES(cpu_indices_arr));
assert(d_indices_arr);
......@@ -2530,7 +2530,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
PyArray_NBYTES(cpu_indices_arr),
cudaMemcpyHostToDevice);
assert(err == cudaSuccess);
assert(err == cudaSuccess);
k_vector_add_fast<<<n_blocks, n_threads>>>(shapeX[0],
shapeX[1],
......
......@@ -1776,7 +1776,6 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
return 0;
}
/*
* We need this inplace Add to support IncSubTensor
* It returns py_self on success with an additional reference. Else NULL.
......
......@@ -490,7 +490,6 @@ DllExport PyObject * CudaNdarray_View(const CudaNdarray * self);
DllExport PyObject * CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other);
DllExport PyObject * CudaNdarray_Subscript(PyObject * py_self, PyObject * key);
DllExport int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t fct_nb);
DllExport void CudaNdarray_vector_add_fast(CudaNdarray* py_self, CudaNdarray *py_other, PyArrayObject *indices_arr);
// Ensures that *arr is a pointer to a contiguous ndarray of the specified
// dimensions.
......
......@@ -1353,6 +1353,7 @@ def local_gpualloc_memset_0(node):
new_out = GpuAlloc(memset_0=True)(*node.inputs)
return [new_out]
def safe_to_gpu(x):
if (isinstance(x.type, tensor.TensorType) and
x.type.dtype == 'float32'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论