在下面的代码中,如何计算sum_array值而不使用atomicAdd。如何在不使用CUDA原子的情况下进行总和计算
内核方法
__global__ void calculate_sum(int width,
int height,
int *pntrs,
int2 *sum_array)
{
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
if (row >= height || col >= width) return;
int idx = pntrs[ row * width + col ];
//atomicAdd(&sum_array[ idx ].x, col);
//atomicAdd(&sum_array[ idx ].y, row);
sum_array[ idx ].x += col;
sum_array[ idx ].y += row;
}
启动内核
dim3 dimBlock(16, 16);
dim3 dimGrid((width + (dimBlock.x - 1))/dimBlock.x,
(height + (dimBlock.y - 1))/dimBlock.y);