我对使用OpenCL映射缓冲区的代码是否正确有点困惑。OpenCL缓冲区分配和映射的最佳实践
我有两个例子,一个使用CL_MEM_USE_HOST_PTR,一个使用CL_MEM_ALLOC_HOST_PTR。两者都在我的本地机器和OpenCL设备上运行并运行,但我对这是否是正确的映射方式感兴趣,以及它是否适用于所有OpenCL设备。我特别不确定USE_HOST_PTR的例子。
我只对缓冲区/地图的具体操作感兴趣。我知道我应该做错误检查等等。
CL_MEM_ALLOC_HOST_PTR:
// pointer to hold the result
int * host_ptr = malloc(size * sizeof(int));
d_mem = clCreateBuffer(context,CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR,
size*sizeof(cl_int), NULL, &ret);
int * map_ptr = clEnqueueMapBuffer(command_queue,d_mem,CL_TRUE,CL_MAP_WRITE,
0,size*sizeof(int),0,NULL,NULL,&ret);
// initialize data
for (i=0; i<size;i++) {
map_ptr[i] = i;
}
ret = clEnqueueUnmapMemObject(command_queue,d_mem,map_ptr,0,NULL,NULL);
//Set OpenCL Kernel Parameters
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_mem);
size_t global_work[1] = { size };
//Execute OpenCL Kernel
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
global_work, NULL, 0, 0, NULL);
map_ptr = clEnqueueMapBuffer(command_queue,d_mem,CL_TRUE,CL_MAP_READ,
0,size*sizeof(int),0,NULL,NULL,&ret);
// copy the data to result array
for (i=0; i<size;i++){
host_ptr[i] = map_ptr[i];
}
ret = clEnqueueUnmapMemObject(command_queue,d_mem,map_ptr,0,NULL,NULL);
// cl finish etc
CL_MEM_USE_HOST_PTR:
// pointer to hold the result
int * host_ptr = malloc(size * sizeof(int));
int i;
for(i=0; i<size;i++) {
host_ptr[i] = i;
}
d_mem = clCreateBuffer(context,CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR,
size*sizeof(cl_int), host_ptr, &ret);
// No need to map or unmap here, as we use the HOST_PTR the original data
// is already initialized into the buffer?
//Set OpenCL Kernel Parameters
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_mem);
size_t global_work[1] = { size };
//Execute OpenCL Kernel
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
global_work, NULL, 0, 0, NULL);
// this returns the host_ptr so need need to save it (I assume it always will?)
// although we do need to call the map function
// to ensure the data is copied back.
// There's no need to manually copy it back into host_ptr
// as it uses this by default
clEnqueueMapBuffer(command_queue,d_mem,CL_TRUE,CL_MAP_READ,
0,size*sizeof(int),0,NULL,NULL,&ret);
ret = clEnqueueUnmapMemObject(command_queue,d_mem,map_ptr,0,NULL,NULL);
// cl finish, cleanup etc
是的,看来你的理解是正确的。 – 2014-10-09 11:39:26