CUDA中的内核启动语句错误

-2

我正在做一个使用CUDA的图像处理小项目。我试图使用高斯模糊模糊图像。一切都很好，但我不明白为什么内核启动语句显示此奇怪的错误：CUDA中的内核启动语句错误

这里是我完整的代码，如果它可以是任何帮助：

#include<time.h> 
#include<stdlib.h> 
#include<stdio.h> 
#include<string.h> 
#include<math.h> 
#include<cuda_runtime.h> 
#include<device_launch_parameters.h> 
#include <helper_cuda.h> 
#include <helper_cuda_gl.h> 
#include<helper_image.h> 
#include< helper_cuda_gl.h> 
#include<helper_cuda_drvapi.h> 

unsigned int width, height; 

int mask[3][3] = { 1, 2, 1, 
        2, 3, 2, 
        1, 2, 1, 
       }; 

int getPixel(unsigned char *arr, int col, int row) 
{ 
int sum = 0; 
for (int j = -1; j <= 1; j++) 
{ 
    for (int i = -1; i <= 1; i++) 
    { 
     int color = arr[(row + j)*width + (col + i)]; 
     sum += color*mask[i + 1][j + 1]; 
    } 
} 
return sum/15; 
} 

void h_blur(unsigned char * arr, unsigned char * result){ 
int offset = 2 * width; 
for (int row = 2; row < height - 3; row++) 
{ 
    for (int col = 2; col < width - 3; col++) 
    { 
     result[offset + col] = getPixel(arr, col, row); 

    } 
    offset += width; 
} 
} 

__global__ void d_blur(unsigned char *arr, unsigned char * result, int width, int height) 
{ 
int col = blockIdx.x*blockDim.x + threadIdx.x; 
int row = blockIdx.y*blockDim.y + threadIdx.y; 

if (row < 2 || col < 2 || row >= height - 3 || col >= width - 3) 
    return; 

int mask[3][3] = { 1, 2, 1, 2, 3, 2, 1, 2, 1 }; 

int sum = 0; 

for (int j = -1; j <= 1; j++) 
{ 
    int color = arr[(row + j)*width + (col + i)]; 
    sum += color*mask[i + 1][j + 1]; 
} 
result[row*width + col] = sum/15; 
} 

int main(int argc, char ** argv) 
{ 
unsigned char *d_resultPixels; 
unsigned char *h_resultPixels; 
unsigned char *h_pixels = NULL; 
unsigned char *d_pixels = NULL; 

char *srcPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm"; 
char *h_ResultPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm"; 
char *d_ResultPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm"; 

sdkLoadPGM(srcPath, &h_pixels, &width, &height); 
int ImageSize = sizeof(unsigned char) * width * height; 

h_resultPixels = (unsigned char *)malloc(ImageSize); 
cudaMalloc((void**)&d_pixels, ImageSize); 
cudaMalloc((void**)&d_resultPixels, ImageSize); 
cudaMemcpy(d_pixels, h_pixels, ImageSize, cudaMemcpyHostToDevice); 

dim3 block(16, 16); 
dim3 grid(width/16, height/16); 

d_blur << < grid, block >> >(d_pixels, d_resultPixels, width, height); 

cudaThreadSynchronize(); 
cudaMemcpy(h_resultPixels, d_resultPixels, ImageSize, cudaMemcpyDeviceToHost); 
sdkSavePGM(d_ResultPath, h_resultPixels, width, height); 
printf("Press enter to exit ...\n"); 
getchar(); 
}

来源

2016-03-21 Prashant Pandey

您是否更新了智能感知？ http://www.ademiller.com/blogs/tech/2010/10/visual-studio-2010-adding-intellisense-support-for-cuda-c/ – Mehno

我想我已经做到了。 –

intellisense不识别CUDA构造。 '<<<...> >>'是一个CUDA构造。如果您在CUDA标签（或“CUDA红色下划线”）上的intellisense上进行搜索，您会发现讨论它的很多问题。没有实际的错误。如果这是你的代码中唯一的问题，它应该编译并运行得很好。 –