-2
我正在做一个使用CUDA的图像处理小项目。我试图使用高斯模糊模糊图像。一切都很好,但我不明白为什么内核启动语句显示此奇怪的错误:CUDA中的内核启动语句错误
这里是我完整的代码,如果它可以是任何帮助:
#include<time.h>
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<math.h>
#include<cuda_runtime.h>
#include<device_launch_parameters.h>
#include <helper_cuda.h>
#include <helper_cuda_gl.h>
#include<helper_image.h>
#include< helper_cuda_gl.h>
#include<helper_cuda_drvapi.h>
unsigned int width, height;
int mask[3][3] = { 1, 2, 1,
2, 3, 2,
1, 2, 1,
};
int getPixel(unsigned char *arr, int col, int row)
{
int sum = 0;
for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++)
{
int color = arr[(row + j)*width + (col + i)];
sum += color*mask[i + 1][j + 1];
}
}
return sum/15;
}
void h_blur(unsigned char * arr, unsigned char * result){
int offset = 2 * width;
for (int row = 2; row < height - 3; row++)
{
for (int col = 2; col < width - 3; col++)
{
result[offset + col] = getPixel(arr, col, row);
}
offset += width;
}
}
__global__ void d_blur(unsigned char *arr, unsigned char * result, int width, int height)
{
int col = blockIdx.x*blockDim.x + threadIdx.x;
int row = blockIdx.y*blockDim.y + threadIdx.y;
if (row < 2 || col < 2 || row >= height - 3 || col >= width - 3)
return;
int mask[3][3] = { 1, 2, 1, 2, 3, 2, 1, 2, 1 };
int sum = 0;
for (int j = -1; j <= 1; j++)
{
int color = arr[(row + j)*width + (col + i)];
sum += color*mask[i + 1][j + 1];
}
result[row*width + col] = sum/15;
}
int main(int argc, char ** argv)
{
unsigned char *d_resultPixels;
unsigned char *h_resultPixels;
unsigned char *h_pixels = NULL;
unsigned char *d_pixels = NULL;
char *srcPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm";
char *h_ResultPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm";
char *d_ResultPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm";
sdkLoadPGM(srcPath, &h_pixels, &width, &height);
int ImageSize = sizeof(unsigned char) * width * height;
h_resultPixels = (unsigned char *)malloc(ImageSize);
cudaMalloc((void**)&d_pixels, ImageSize);
cudaMalloc((void**)&d_resultPixels, ImageSize);
cudaMemcpy(d_pixels, h_pixels, ImageSize, cudaMemcpyHostToDevice);
dim3 block(16, 16);
dim3 grid(width/16, height/16);
d_blur << < grid, block >> >(d_pixels, d_resultPixels, width, height);
cudaThreadSynchronize();
cudaMemcpy(h_resultPixels, d_resultPixels, ImageSize, cudaMemcpyDeviceToHost);
sdkSavePGM(d_ResultPath, h_resultPixels, width, height);
printf("Press enter to exit ...\n");
getchar();
}
您是否更新了智能感知? http://www.ademiller.com/blogs/tech/2010/10/visual-studio-2010-adding-intellisense-support-for-cuda-c/ – Mehno
我想我已经做到了。 –
intellisense不识别CUDA构造。 '<<<...> >>'是一个CUDA构造。如果您在CUDA标签(或“CUDA红色下划线”)上的intellisense上进行搜索,您会发现讨论它的很多问题。没有实际的错误。如果这是你的代码中唯一的问题,它应该编译并运行得很好。 –