1
如果我宣布一个bindless纹理对象与tex1Dfetch和2D纹理
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeLinear;
resDesc.res.linear.devPtr = device_global_memory_ptr;
resDesc.res.linear.desc.f = cudaChannelFormatKindUnsigned;
resDesc.res.linear.desc.x = 8 /* 8 bit */ ;
resDesc.res.linear.desc.y = resDesc.res.linear.desc.x;
resDesc.res.linear.desc.z = resDesc.res.linear.desc.x;
resDesc.res.linear.desc.w = resDesc.res.linear.desc.x;
resDesc.res.linear.sizeInBytes = buffer_bytes_size;
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.readMode = cudaReadModeElementType;
texDesc.filterMode = cudaFilterModePoint;
texDesc.addressMode[0] = cudaAddressModeBorder;
texDesc.addressMode[1] = cudaAddressModeBorder;
texDesc.addressMode[2] = cudaAddressModeBorder;
cudaTextureObject_t tex1;
cudaCreateTextureObject(&tex1, &resDesc, &texDesc, NULL);
,我以后使用它在CUDA内核
uchar4 pixel = tex1Dfetch<uchar4>(tex1, index);
我仍然会得到一个2D纹理缓存的好处?或者缓存取决于tex1Dfetch
指令?我不能得到上面的代码与tex2D
不幸地工作。
我很确定答案是否定的。 “2D”缓存需要正确分配,纹理控制器已知宽度/间距。在这种情况下,我认为你不能这样做。 – talonmies
@talonmies所以我应该用'cudaMallocArray'分配一个缓冲区,然后为''cudaMemcpy(device2device)'分配一个缓冲区?我要试试这个,让你知道 – Dean
@talonmies工作。请让它成为答案,我会接受它。 – Dean