2009-11-18 70 views
1

我写了一个简单的基于SDK的OpenCL程序,它编译并运行,但输出错误。有什么我做错了吗?简单的OpenCL程序编译并运行,但输出不正确

任何有关学习调试C和OpenCL的建议都非常感谢。我对这个平台很陌生。

代码如下。

数组c中的输出全为零。

谢谢。

test_opencl.h

#ifndef _TEST_OPENCL_H_ 
#define _TEST_OPENCL_H_ 

int main(int argc, const char** argv); 
int runTest(int argc, const char** argv); 

#endif 

test_opencl.cl

// simple test of adding a[i] to b[i] to get c[i] 
__kernel void add_array(__global float *a, __global float *b, __global float *c) 
{ 
    int xid = get_global_id(0); 
    c[xid] = a[xid] + b[xid]; 
} 

test_opencl.cpp

// standard utility and system includes 
#include <oclUtils.h> 
#include "test_opencl.h" 

// OpenCL error catcher 
cl_int err = 0; 

// Main Program 
// ********************************************************************* 
int main(int argc, const char** argv) 
{  
    // set logfile name and start logs 
    shrSetLogFileName ("test_opencl.txt"); 
    shrLog(LOGBOTH, 0, "%s Starting...\n\n", argv[0]); 

    // run the main test 
    int result = runTest(argc, argv); 
    shrCheckError(result, 0); 

    // finish 
    shrEXIT(argc, argv); 
} 

//! Run a simple test for OPENCL 
// ********************************************************************* 
int runTest(int argc, const char** argv) 
{ 
    cl_context gpu_context; 
    cl_command_queue cmd_queue; 
    cl_program program; 
    cl_kernel test_kernel; 

    const size_t szGlobalWorkSize = 10; 
    const size_t szLocalWorkSize = 10; 

    // size of memory required to store the array 
    const unsigned int mem_size = sizeof(int) * 10; 

    // create the OpenCL context on a GPU device 
    gpu_context = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &err); 
    shrCheckError(err, CL_SUCCESS); 

    // get devices 
    cl_device_id device; 
    if(shrCheckCmdLineFlag(argc, argv, "device")) { 
     int device_nr = 0; 
     shrGetCmdLineArgumenti(argc, argv, "device", &device_nr); 
     device = oclGetDev(gpu_context, device_nr); 
    } else { 
     device = oclGetMaxFlopsDev(gpu_context); 
    } 

    // create a command-queue 
    cmd_queue = clCreateCommandQueue(gpu_context, device, 0, &err); 
    shrCheckError(err, CL_SUCCESS); 

    // allocate and initalize host memory 
    int a[10], b[10], c[10]; 
    for (int i = 0; i < 10; i++) { 
     a[i] = i; 
     b[i] = i * i; 
    } 

    // create buffers on device 
    cl_mem vol_a = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, a, &err); 
    shrCheckError(err, CL_SUCCESS); 

    cl_mem vol_b = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, b, &err); 
    shrCheckError(err, CL_SUCCESS); 

    cl_mem vol_c = clCreateBuffer(gpu_context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, c, &err); 
    shrCheckError(err, CL_SUCCESS); 

    // copy data from host to device 
    err = clEnqueueWriteBuffer(cmd_queue, vol_a, CL_TRUE, 0, mem_size, a, 0, NULL, NULL); 
    err |= clEnqueueWriteBuffer(cmd_queue, vol_b, CL_TRUE, 0, mem_size, b, 0, NULL, NULL); 
    shrCheckError(err, CL_SUCCESS); 

    // Program Setup 
    size_t program_length; 
    char* source_path = shrFindFilePath("test_opencl.cl", argv[0]); 
    shrCheckError(source_path != NULL, shrTRUE); 
    char *source = oclLoadProgSource(source_path, "", &program_length); 
    shrCheckError(source != NULL, shrTRUE); 

    // create the program 
    program = clCreateProgramWithSource(gpu_context, 1, (const char **)&source, &program_length, &err); 
    shrCheckError(err, CL_SUCCESS); 

    // build the program 
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
    if (err != CL_SUCCESS) 
    { 
     // write out standard error, Build Log and PTX, then return error 
     shrLog(LOGBOTH | ERRORMSG, err, STDERROR); 
     return(EXIT_FAILURE); 
    } 

    clFinish(cmd_queue); 

    shrLog(LOGBOTH, 0, "%s Starting kernel operation...\n\n", argv[0]); 

    // create the test kernel 
    test_kernel = clCreateKernel(program, "add_array", &err); 
    shrCheckError(err, CL_SUCCESS); 

    // set the args values for the kernel 
    err = clSetKernelArg(test_kernel, 0, sizeof(cl_mem), (void *) &vol_a); 
    err |= clSetKernelArg(test_kernel, 1, sizeof(cl_mem), (void *) &vol_b); 
    err |= clSetKernelArg(test_kernel, 2, sizeof(cl_mem), (void *) &vol_c); 
    shrCheckError(err, CL_SUCCESS); 

    err = clEnqueueNDRangeKernel(cmd_queue, test_kernel, 1, NULL, &szGlobalWorkSize, NULL, 0, NULL, NULL); 
    shrCheckError(err, CL_SUCCESS); 

    clFinish(cmd_queue); 

    // copy result from device to host 
    err = clEnqueueReadBuffer(cmd_queue, vol_c, CL_TRUE, 0, mem_size, c, 0, NULL, NULL); 
    shrCheckError(err, CL_SUCCESS); 

    int d[10]; 
    err = clEnqueueReadBuffer(cmd_queue, vol_a, CL_TRUE, 0, mem_size, d, 0, NULL, NULL); 
    shrCheckError(err, CL_SUCCESS); 

    clFinish(cmd_queue); 

    shrLog(LOGBOTH, 0, "%s Finished kernel operation...\n\n", argv[0]); 

    bool passed = true; 

    for (int i = 0; i < 10; i++) { 
     if (c[i] != i + i * i) 
      passed = false; 
      shrLog(LOGBOTH, 0, "c = %d d = %d\n", c[i], d[i]); 
    } 

    if (passed) 
     shrLog(LOGBOTH, 0, "%s Test Passed\n\n", argv[0]); 
    else 
     shrLog(LOGBOTH, 0, "%s Test Failed\n\n", argv[0]); 

    // cleanup OpenCL 
    clReleaseMemObject(vol_a); 
    clReleaseMemObject(vol_b); 
    clReleaseMemObject(vol_c); 

    clReleaseKernel(test_kernel); 
    clReleaseProgram(program); 
    clReleaseCommandQueue(cmd_queue); 
    clReleaseContext(gpu_context); 

    return 0; 
} 

回答

1

在代码并将该溶液中存在的问题,可以发现here

+1

你应该标记自己是正确的,所以这不会显示在未答复的列表上? (感谢发布答案,即使它是其他人发现它的答案:o) – 2009-11-25 22:00:07