我们有以下结构定义CUDA:主机存储器指针不会被复制到设备存储器
typedef struct PurchaseOrder
{
char* Value1;
double Value2;
double* Value3;
int Value3Length;
__device__ int GetValue3Length() { return Value3Length; }
__device__ double GetValue3(int i) { return Value3[i]; }
__device__ void SetValue3(int i, double value) { Value3[i] = value; }
};
的PurchaseOrder的数据(结构的数组)从C#应用程序被整理成下面的C DLL函数
int RunMonteCarlo(PurchaseOrder *hostPurchaseOrders, int length) {
PurchaseOrder *devPurchaseOrders;
// display the results
for (int i = 0; i < length; i++)
{
//printf("\n\nAddress: %u",hostPurchaseOrders+i);
printf("\n\nIndex: %d", i);
printf("\nValue1: %s",(hostPurchaseOrders+i)->Value1);
printf("\nValue2: %f",(hostPurchaseOrders+i)->Value2);
for(int j = 0; j < (hostPurchaseOrders+i)->Value3Length; j++)
{
printf("\nValue3[%d]: %fl", j, (hostPurchaseOrders+i)->Value3[j]);
}
}
// allocate the memory on the GPU
HANDLE_ERROR(cudaMalloc((void**)&devPurchaseOrders, length * sizeof(PurchaseOrder)));
// copy the array 'PurchaseOrder' to the GPU
HANDLE_ERROR(cudaMemcpy(devPurchaseOrders, hostPurchaseOrders, length * sizeof(PurchaseOrder), cudaMemcpyHostToDevice));
// Run the kernel code
MonteCarloKernel<<<60,32>>>(devPurchaseOrders, length);
// copy the array 'PurchaseOrders' back from the GPU to the CPU
HANDLE_ERROR(cudaMemcpy(hostPurchaseOrders, devPurchaseOrders, length * sizeof(PurchaseOrder), cudaMemcpyDeviceToHost));
// free the memory allocated on the GPU
HANDLE_ERROR(cudaFree(devPurchaseOrders));
return 0;
}
__global__ void MonteCarloKernel(PurchaseOrder *purchaseorders, long length) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
int stride = blockDim.x * gridDim.x;
while (i < length)
{
purchaseorders[i].SetAAUS(1.11);
for (int j=0; j < purchaseorders[i].GetValue3Length(); j++)
{
//purchaseorders[i].SetValue3(j,1.0);
}
i += stride;
}
}
数据经过正确的编组,正如printf代码在开始时验证的那样。
然而,值3(双阵列)似乎不会被复制到内核设备存储器作为行个PurchaseOrders [I] .SetValue3(J,1.0)的应用程序崩溃。
我应该怎么做才能解决这个问题呢?
当应用程序崩溃时,控制台窗口刚刚关闭。我可以使用什么调试技术来获得一些有意义的消息?