我想了解指针如何在CUDA中运行。无法在CUDA的内核中打印设备变量值
下面给出的是一个简单的程序,它为设备中分配的变量赋予一定的值。 (我使用CUDA工具包8.0与NVIDIA Quadro K2000显卡)
当我使用printf()在内核中打印值时,它显示错误的值。
然而,当我从主机功能做了cudaMemcopy,然后从主机功能打印,它会显示正确的值...
//CODE...
#include <stdio.h>
#include <stdlib.h>
#include<cuda.h>
#include<cuda_runtime.h>
#define N 3
__global__ void tempker(int *jk,int value)
{
(*jk) = value*2;
printf("displayed from inside the kernel :\nvalue of jk = %d\nvalue of *jk = %d\n",jk,*jk);
}
int tempfunc(int *kp)
{
int *jk = NULL,*lm=NULL;
lm = (int *)(malloc(sizeof(int)));
*lm = 150;
cudaError_t err = cudaSuccess;
cudaMalloc((void**)&jk, sizeof(int));
printf("jk pointer after cudaMalloc: displayed from host = %d\n",jk);
tempker<<<1,1>>>(jk,150);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
err = cudaMemcpy(lm, jk, (sizeof(int)), cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy jk from device to host (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("Displayed in host function after memcopy: value of *lm = *jk = %d\n",*lm);
cudaFree(jk);
err = cudaMalloc((void**)&kp, sizeof(int));
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to allocate device kp (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
tempker<<<1,1>>>(kp,(N*N*N));
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
err = cudaMemcpy(lm, kp, (sizeof(int)), cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy kp from device to host (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("Displayed in host function after memcopy: value of *lm = *kp = %d\n",*lm);
cudaFree(kp);
free(lm);
return 100;
}
int main(){
int *kp = NULL;
printf("tempfunc(): return value = %d\n",tempfunc(kp));
return 0;
}
输出:
jk pointer after cudaMalloc: displayed from host = 13238272
displayed from inside the kernel :
value of jk = 13238272
value of *jk = 9
Displayed in host function after memcopy: value of *lm = *jk = 300
displayed from inside the kernel :
value of jk = 13238272
value of *jk = 9
Displayed in host function after memcopy: value of *lm = *kp = 54
tempfunc(): return value = 100
的问题是: 是否可以在内核中打印设备中分配的变量的值?
我不明白你的问题。你的代码*是打印出内核中设备变量的值。是什么让你觉得它不是? – talonmies
然而,它打印错误的值,例如,当内核启动时,* jk应该= 150 * 2 = 300.但在内核中,它打印'9'... –