cudaGetLastError返回“未知错误”

我新的CUDA C.我写一个简单的数组添加和减少，当它运行从设备复制错误校验回主机我得到一个“未知错误”。我不知道如果错误检查器出现故障，不能返回正确的cudaError，但我不能工作了什么是错的.......cudaGetLastError返回“未知错误”

using namespace std; 


#include <iostream> 

void CudaAddReduce(int *input, int *output, size_t size); 

__global__ void Fill(int *fillItem); 

__global__ void Add(int *input1, int *result); 

__global__ void Reduce(int *intputArray, int *outputArray); 



main(int argc, char *argv[]) 
{ 
const int N = 100; 
int inp[N]; 
int outp[N]; 
size_t size = (N * sizeof(int)); 

CudaAddReduce(inp,outp,size); 

cout << outp[N] << endl; 

} 

void CudaAddReduce(int *input, int *output, size_t size) 
{ 

// allocate buffers to device 

//input 
int *d_input; 
if (cudaMalloc(&d_input,size) != cudaSuccess){ 
    cerr << cudaGetErrorString(cudaGetLastError()) << endl; 
    cout << "Input allocation to device" << endl; 
    exit(1); 
} 
//////////////////////////// 
//output 
int *d_output; 
if (cudaMalloc(&d_output,size) != cudaSuccess){ 
    cerr << cudaGetErrorString(cudaGetLastError()) <<endl; 
    cout << "output allocation to device" << endl; 
    exit(1); 
} 

////////////////////////////////// 
//copy buffers to device from host 
////////////////////////////////// 
//input 
if (cudaMemcpy(d_input, input, size, cudaMemcpyHostToDevice) != cudaSuccess){ 
    cerr << cudaGetErrorString(cudaGetLastError()) << endl; 
    cout << "Input Copy from host to device" << endl; 
    exit(1); 
} 

///////////////////////////////// 
//execute device kernals 
///////////////////////////////// 
int numThreads = 256; 
int numBlocks = 1; 

//Fill Kernal 
Fill<<<numBlocks,numThreads>>>(d_input); 



// Add Kernal 
Add<<<numBlocks,numThreads>>>(d_input,d_output); 


//execute Reduce Kernal 
Reduce<<<numBlocks,numThreads>>>(d_output,d_input); 

cudaThreadSynchronize(); 

///////////////////////////////// 
//copy result from device to host 
///////////////////////////////// 
//output 


if (cudaMemcpy(output,d_output,size,cudaMemcpyDeviceToHost)!= cudaSuccess){ 
    cerr << cudaGetErrorString(cudaGetLastError()) << endl; 
    cout << "Output Copy from device to host" << endl; 
    exit(1); 
} 

//clear device buffers 
cudaFree(d_input); 
cudaFree(d_output); 
} 

__global__ void Fill(int *fillItem) 
{ 
int id = blockIdx.x * blockDim.x + threadIdx.x; 
fillItem[id] = 1; 
} 
__global__ void Add (int *input1, int* result) 
{ 
int id = blockIdx.x * blockDim.x + threadIdx.x; 
result[id] = input1[id] + input1[id]; 
} 
__global__ void Reduce(int *inputArray, int *outputArray) 
{ 
extern __shared__ int sdata[]; 

// each thread loads one element from global to shared mem 
unsigned int tid = threadIdx.x; 
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; 
sdata[tid] = inputArray[i]; 
__syncthreads(); 

// do reduction in shared mem 
for(unsigned int s=1; s < blockDim.x; s *= 2) 
{ 
    if(tid % (2*s) == 0) 
    { 
     sdata[tid] += sdata[tid + s]; 
    } 
__syncthreads(); 
} 


// write result for this block to global mem 
if(tid == 0) outputArray[blockIdx.x] = sdata[0]; 
}

感谢

来源

2012-04-22 Igoren

从哪里开始...每个内核包含越界内存操作，减少了内核缺乏一个共享内存大小的内核参数，而'cout'在'main'包含越界内存访问。 – talonmies 2012-04-22 20:40:07

我认为一个问题不应该因为提问者编写错误代码而被低估。这个问题是合法的（它甚至吸引了一个合理的答案，它可能不值得赞成票，但是不会投票赞成 – harrism 2012-04-22 23:04:12

我同意哈里斯的观点，虽然生活可能因错误重新命名为YouveDoneSomethingStupidWithMemoryError而变得更容易，因为这是我唯一一次看到它 – 3Pi 2012-04-23 00:30:08

编辑：在您的kernals ，你试图访问数组元素的方式出界！您的数组大小为100，但您使用的是256的线程维度，其中每个线程都试图写入该数据！你需要使用一致的尺寸。

在这一点，你得到的错误？两个malloc函数看起来应该正常工作，并且cudaGetErrorString不太可能出错。我对未知错误的典型体验是，您正在尝试从您应该复制的地方复制，或者以错误的大小复制。

你为什么要复制一个未分配的数组的内存？你从来没有填写主要的数组。

此外，您不需要声明内核函数< < < >>>。这些仅在您使用该功能时才需要。

来源

2012-04-22 20:27:01 3Pi

谢谢，解决了这个问题。使用您的推荐，并使用<<< numBlocks，numThreads，SizeOfSharedMemory >>>启动共享内存核心 – Igoren 2012-04-23 15:15:47

我遇到了这个错误，它竟然是我的操作系统版本。与其中一个相关库不匹配。一旦我升级了操作系统，然后重新安装了驱动程序，这个错误就消失了。

--John

来源

2017-10-06 20:07:21 hokiegeek2

cudaGetLastError返回“未知错误”

回答

相关问题