2013-03-15 51 views
-3

我必须比较两个浮点阵列(A,B)在CUDA使得
if a > b then a = a/a ; else a = 0Comparsion在CUDA

请告诉正确的方法和语法来调用此。

+0

什么呢A = A/A是什么意思?那不就是说a = 1吗? – 2013-03-15 04:43:47

+0

是A = A/A表示1 ,,基本上如果A [1]> B [I] ,,我想要返回值1,否则为0,这我想要重复对所有的索引值(比方说20) – Ani 2013-03-15 06:36:27

+0

先生,基本上我正在使用CUFFT库。因为我有两个输出,我必须比较4096个索引值。所以PLZ指导我。 – Ani 2013-03-15 06:55:20

回答

1

这样的事情应该工作。为了简洁起见,我简化了通常的cuda错误检查。

#include <stdio.h> 
#define DSIZE 10000 
#define nTPB 512 

__global__ void cmp(float *a, float *b, int size){ 
    int idx = threadIdx.x + blockDim.x*blockIdx.x; 
    if (idx < size) 
    a[idx]=(a[idx] > b[idx])?1.0f:0.0f; // could also be: ?(a[idx]/a[idx]):0; 
} 

int main() { 
    cudaError_t err; 
    float *h_a, *h_b, *d_a, *d_b; 
    h_a = (float *)malloc(DSIZE*sizeof(float)); 
    if (h_a == 0) {printf("malloc fail\n"); return 1;} 
    h_b = (float *)malloc(DSIZE*sizeof(float)); 
    if (h_b == 0) {printf("malloc fail\n"); return 1;} 
    for (int i=0; i< DSIZE; i++){ 
    h_a[i] = 10.0f; 
    h_b[i] = (float)i;} 
    err = cudaMalloc((void **)&d_a, DSIZE*sizeof(float)); 
    if (err != cudaSuccess) {printf("cuda fail\n"); return 1;} 
    err = cudaMalloc((void **)&d_b, DSIZE*sizeof(float)); 
    if (err != cudaSuccess) {printf("cuda fail\n"); return 1;} 
    err = cudaMemcpy(d_a, h_a, DSIZE*sizeof(float), cudaMemcpyHostToDevice); 
    if (err != cudaSuccess) {printf("cuda fail\n"); return 1;} 
    err = cudaMemcpy(d_b, h_b, DSIZE*sizeof(float), cudaMemcpyHostToDevice); 
    if (err != cudaSuccess) {printf("cuda fail\n"); return 1;} 

    cmp<<<(DSIZE+nTPB-1)/nTPB, nTPB>>>(d_a, d_b, DSIZE); 
    err=cudaMemcpy(h_a, d_a, DSIZE*sizeof(float), cudaMemcpyDeviceToHost); 
    if (err != cudaSuccess) {printf("cuda fail\n"); return 1;} 
    for (int i=0; i< 20; i++) 
    printf("h_a[%d] = %f\n", i, h_a[i]); 
    return 0; 
}