2014-08-28 229 views
-1

我有一个300,000点的数组,我想每600点的fft。我试图用cufftPlanMany来执行,但我及彼未知错误:CuFFT未知错误

cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500)); 

retrevialfft.cu(82) : cufftSafeCall() CUFFT error: <unknown> 

这里的上下文中

cudaSetDevice(0); 

    // Allocate host memory for the signal 
    cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE); 


    // Initalize the memory for the signal 
    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) { 
    h_signal[i].x = rand()/(float)RAND_MAX; 
    h_signal[i].y = 0; 

    // printf("Orignal: %f %f \n", h_signal[i].x, h_signal[i].y); 
    } 




    int mem_size = sizeof(cufftComplex) * SIGNAL_SIZE; 

    // Allocate device memory for signal 
    cufftComplex* d_signal; 
    cudaMalloc((void**)&d_signal, mem_size); 

    int rank = 1; //1d plan                              
    int numCols = 300000; 
    int n[] = {numCols}; 

    int batch = 500; 
    int istride = 1; 
    int ostride = 1; 
    int idist = numCols; 

    // CUFFT plan                                 
    cufftHandle plan; 
    cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500)); 

    // Transform signal                               
    printf("Transforming signal cufftExecC2C\n"); 
    cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD)); 



    // Copy device memory to host                             
    cufftComplex* h_transformed = (cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);; 
    cudaMemcpy(h_transformed, d_signal, mem_size, 
          cudaMemcpyDeviceToHost); 



//Destroy CUFFT context                              
    cufftDestroy(plan); 

    // cleanup memory                                
    free(h_signal); 

    free(h_transformed); 
    cudaFree(d_signal); 
    cudaDeviceReset(); 

代码什么的错误实际上是任何想法?

+1

您设置了多少“SIGNAL_SIZE”?请发布完整的代码,让其他人可以复制,粘贴,编译和运行,而无需添加任何其他代码行。 – JackOLantern 2014-08-28 21:08:05

+0

对不起,我下次肯定会这样做。谢谢你的帮助! – 2014-08-31 02:31:24

回答

1

您决定不显示任何关于您的问题的更多细节。下面,我使用cufftPlanMany()提供完整的工作代码来执行批量化的1D FFT。我希望它有帮助。

#include <stdio.h> 
#include <stdlib.h> 
#include <cufft.h> 
#include <assert.h> 

/********************/ 
/* CUDA ERROR CHECK */ 
/********************/ 
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) 
{ 
    if (code != cudaSuccess) 
    { 
     fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 
     if (abort) { getchar(); exit(code); } 
    } 
} 

/*********************/ 
/* CUFFT ERROR CHECK */ 
/*********************/ 
static const char *_cudaGetErrorEnum(cufftResult error) 
{ 
    switch (error) 
    { 
     case CUFFT_SUCCESS: 
      return "CUFFT_SUCCESS"; 

     case CUFFT_INVALID_PLAN: 
      return "CUFFT_INVALID_PLAN"; 

     case CUFFT_ALLOC_FAILED: 
      return "CUFFT_ALLOC_FAILED"; 

     case CUFFT_INVALID_TYPE: 
      return "CUFFT_INVALID_TYPE"; 

     case CUFFT_INVALID_VALUE: 
      return "CUFFT_INVALID_VALUE"; 

     case CUFFT_INTERNAL_ERROR: 
      return "CUFFT_INTERNAL_ERROR"; 

     case CUFFT_EXEC_FAILED: 
      return "CUFFT_EXEC_FAILED"; 

     case CUFFT_SETUP_FAILED: 
      return "CUFFT_SETUP_FAILED"; 

     case CUFFT_INVALID_SIZE: 
      return "CUFFT_INVALID_SIZE"; 

     case CUFFT_UNALIGNED_DATA: 
      return "CUFFT_UNALIGNED_DATA"; 
    } 

    return "<unknown>"; 
} 

#define cufftSafeCall(err)  __cufftSafeCall(err, __FILE__, __LINE__) 
inline void __cufftSafeCall(cufftResult err, const char *file, const int line) 
{ 
    if(CUFFT_SUCCESS != err) { 
     fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \ 
      _cudaGetErrorEnum(err)); \ 
      cudaDeviceReset(); assert(0); \ 
    } 
} 

/********/ 
/* MAIN */ 
/********/ 
void main() { 

    int batch = 3;       // --- How many transforms to be performed 
    int numCols = 16;      // --- Size of each transform 

    int SIGNAL_SIZE = batch * numCols;  // --- Overall size for all the signals 

    // --- Allocate host memory for all the signals 
    cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE); 

    // --- Initalize host memory for all the signals 
    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) { 
     h_signal[i].x = 1.f; 
     h_signal[i].y = 0.f; 
    } 

    // --- Allocate device memory for all the signals 
    cufftComplex* d_signal; gpuErrchk(cudaMalloc((void**)&d_signal, sizeof(cufftComplex) * SIGNAL_SIZE)); 

    // --- Host to Device memcopy 
    gpuErrchk(cudaMemcpy(d_signal, h_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyHostToDevice)); 

    int rank = 1; // --- 1d plan                              
    int n[] = {numCols}; 

    int istride = 1; 
    int ostride = 1; 
    int idist = numCols; 
    int odist = numCols; 

    // --- CUFFT plan                                 
    cufftHandle plan; 
    cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, ostride, odist, CUFFT_C2C, 500)); 

    // --- Signals transformations 
    cufftSafeCall(cufftExecC2C(plan, (cufftComplex*)d_signal, (cufftComplex*)d_signal, CUFFT_FORWARD)); 

    // --- Device to Host memcopy 
    gpuErrchk(cudaMemcpy(h_signal, d_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost)); 

    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) printf("Real part = %f; Imaginar part = %f\n", h_signal[i].x, h_signal[i].y); 

    // --- Destroy CUFFT context                              
    cufftSafeCall(cufftDestroy(plan)); 

    // --- Memory cleanup 
    free(h_signal); 
    gpuErrchk(cudaFree(d_signal)); 

    cudaDeviceReset(); 

}