2016-05-30 67 views
2

我试图在两个共享库运行重定位装置代码,无论使用CUDA的推力RDC&推力。如果我停止在kernel.cu中使用推力,一切运行良好,这不是一个选项。CUDA&在SIGSEV多个共享对象导致registerEntryFunction

编辑:如果rdc被禁用,程序也可以工作。对我来说也不是一种选择。

它编译正常,但在运行时停止并出现段错误。 gdb告诉我这个:

Program received signal SIGSEGV, Segmentation fault. 
0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)() 
(cuda-gdb) bt 
#0 0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)() 
#1 0x000000000040876c in __cudaRegisterFunction() 
#2 0x0000000000402b58 in __nv_cudaEntityRegisterCallback(void**)() 
#3 0x00007ffff75051a3 in __cudaRegisterLinkedBinary(__fatBinC_Wrapper_t const*, void (*)(void**), void*)() 
from /home/mindoms/rdctestmcsimple/libkernel.so 
#4 0x00007ffff75050b1 in __cudaRegisterLinkedBinary_66_tmpxft_00007a5f_00000000_16_cuda_device_runtime_ compute_52_cpp1_ii_8b1a5d37() from /home/user/rdctestmcsimple/libkernel.so 
#5 0x000000000045285d in __libc_csu_init() 
#6 0x00007ffff65ea50f in __libc_start_main() from /lib64/libc.so.6 

这里是我剥去的示例(使用cmake),显示错误。

main.cpp中:

#include "kernel.cuh" 
#include "kernel2.cuh" 

int main(){ 
    Kernel k; 
    k.callKernel(); 

    Kernel2 k2; 
    k2.callKernel2(); 
} 

kernel.cuh:

#ifndef __KERNEL_CUH__ 
#define __KERNEL_CUH__ 
    class Kernel{ 
    public: 
    void callKernel(); 
    }; 
#endif 

kernel.cu:

#include "kernel.cuh" 
#include <stdio.h> 
#include <iostream> 
#include <thrust/device_vector.h> 

__global__ 
void thekernel(int *data){ 
    if (threadIdx.x == 0) 
    printf("the kernel says hello\n"); 

    data[threadIdx.x] = threadIdx.x * 2; 
} 

void Kernel::callKernel(){ 

    thrust::device_vector<int> D2; 
    D2.resize(11); 
    int * raw_ptr = thrust::raw_pointer_cast(&D2[0]); 

    printf("Kernel::callKernel called\n"); 
    thekernel <<< 1, 10 >>> (raw_ptr); 

    cudaThreadSynchronize(); 
    cudaError_t code = cudaGetLastError(); 
    if (code != cudaSuccess) { 
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel!" << std::endl; 
    } 

    for (int i = 0; i < D2.size(); i++) 
    std::cout << "Kernel D[" << i << "]=" << D2[i] << std::endl; 
} 

kernel2.cuh:

#ifndef __KERNEL2_CUH__ 
#define __KERNEL2_CUH__ 
    class Kernel2{ 
    public: 
    void callKernel2(); 
    }; 
#endif 

kernel2.cu

#include "kernel2.cuh" 
#include <stdio.h> 
#include <iostream> 
#include <thrust/device_vector.h> 

__global__ 
void thekernel2(int *data2){ 
    if (threadIdx.x == 0) 
    printf("the kernel2 says hello\n"); 

    data2[threadIdx.x] = threadIdx.x * 2; 
} 

void Kernel2::callKernel2(){ 
    thrust::device_vector<int> D; 
    D.resize(11); 
    int * raw_ptr = thrust::raw_pointer_cast(&D[0]); 
    printf("Kernel2::callKernel2 called\n"); 
    thekernel2 <<< 1, 10 >>> (raw_ptr); 

    cudaThreadSynchronize(); 
    cudaError_t code = cudaGetLastError(); 
    if (code != cudaSuccess) { 
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel2!" << std::endl; 
} 

    for (int i = 0; i < D.size(); i++) 
    std::cout << "Kernel2 D[" << i << "]=" << D[i] << std::endl; 
} 

CMake的文件下的最初使用,但我得到了同样的问题,当我编译“手动”:

nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel2.cu 
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel2.o -o libkernel2.so 
nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel.cu 
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel.o -o libkernel.so 
g++ -o main main.cpp libkernel.so libkernel2.so -L/opt/cuda/current/lib64 

添加-cudart shared每NVCC呼叫的建议某处结果在一个不同的错误:

warning: Cuda API error detected: cudaFuncGetAttributes returned (0x8) 

terminate called after throwing an instance of 'thrust::system::system_error' 
    what(): function_attributes(): after cudaFuncGetAttributes: invalid device function 

Program received signal SIGABRT, Aborted. 
0x000000313c432625 in raise() from /lib64/libc.so.6 
(cuda-gdb) bt 
#0 0x000000313c432625 in raise() from /lib64/libc.so.6 
#1 0x000000313c433e05 in abort() from /lib64/libc.so.6 
#2 0x00000031430bea7d in __gnu_cxx::__verbose_terminate_handler()() from /usr/lib64/libstdc++.so.6 
#3 0x00000031430bcbd6 in std::set_unexpected(void (*)())() from /usr/lib64/libstdc++.so.6 
#4 0x00000031430bcc03 in std::terminate()() from /usr/lib64/libstdc++.so.6 
#5 0x00000031430bcc86 in __cxa_rethrow() from /usr/lib64/libstdc++.so.6 
#6 0x00007ffff7d600eb in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::append(unsigned long)() from ./libkernel.so 
#7 0x00007ffff7d5f740 in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::resize(unsigned long)() from ./libkernel.so 
#8 0x00007ffff7d5b19a in Kernel::callKernel()() from ./libkernel.so 
#9 0x00000000004006f8 in main() 

的CMakeLists.txt:请调整你的环境

cmake_minimum_required(VERSION 2.6.2) 

project(Cuda-project) 

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake/cuda" ${CMAKE_MODULE_PATH}) 

SET(CUDA_TOOLKIT_ROOT_DIR "/opt/cuda/current") 

SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52) 

find_package(CUDA REQUIRED) 
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64) 

set(CUDA_SEPARABLE_COMPILATION ON) 

set(BUILD_SHARED_LIBS ON) 

list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC) 

CUDA_ADD_LIBRARY(kernel 
    kernel.cu 
) 

CUDA_ADD_LIBRARY(kernel2 
    kernel2.cu 
) 

cuda_add_executable(rdctest main.cpp) 
TARGET_LINK_LIBRARIES(rdctest kernel kernel2 cudadevrt) 

关于我的系统:

Fedora 23 
kernel: 4.4.2-301.fc23.x86_64 
Nvidia Driver: 361.28 
Nvidia Toolkit: 7.5.18 
g++: g++ (GCC) 5.3.1 20151207 (Red Hat 5.3.1-2) 

转载于:

CentOS release 6.7 (Final) 
Kernel: 2.6.32-573.8.1.el6.x86_64 
Nvidia Driver: 352.55 
Nvidia Toolkit: 7.5.18 
g++ (GCC) 4.4.7 20120313 (Red Hat 4.4.7-16) 
glibc 2.12 
cmake to 3.5 
+1

的fedora 23和g ++ 5.3.1为[不适合CUDA 7.5正式支持的环境](HTTP://docs.nvidia .com/cuda/cuda-installation-guide-linux/index.html#system-requirements) –

+0

@RobertCrovella感谢您的输入。我在一个支持的系统上重现了这个问题并更新了这个问题。 – estefan

+0

相关讨论[这里](https://groups.google.com/forum/#!topic/thrust-users/LJ8vPiY6-78)。 –

回答

0

显然,这已经是与使用什么CUDA运行时:共享或静态的。

我稍微修改您的例子:与其建造两个共享库,并将其链接到单独的可执行文件,我创建连接在一起一个共享库中有两个静态库,以及一个链接到可执行文件。

而且,这里是使用新的(> = 3.8)原生CUDA语言支持更新的CMake的文件。

cmake_minimum_required(VERSION 3.8) 
project (CudaSharedThrust CXX CUDA) 

string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_61,code=compute_61") 

if(BUILD_SHARED_LIBS) 
    set(CMAKE_POSITION_INDEPENDENT_CODE ON) 
endif() 

add_library(kernel STATIC kernel.cu) 
set_target_properties(kernel PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 

add_library(kernel2 STATIC kernel2.cu) 
set_target_properties(kernel2 PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 

add_library(allkernels empty.cu) # empty.cu is an empty file 
set_target_properties(allkernels PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 
target_link_libraries(allkernels kernel kernel2) 


add_executable(rdctest main.cpp) 
set_target_properties(rdctest PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 
target_link_libraries(rdctest allkernels) 

没有任何CMake标志(静态构建),构建成功,程序工作。

-DBUILD_SHARED_LIBS=ON的基础上,程序编译,但它崩溃与同样的错误是你的。

楼内有

cmake .. -DBUILD_SHARED_LIBS=ON -DCMAKE_CUDA_FLAGS:STRING="--cudart shared" 

编译,实际上使得它跑了!因此,出于某种原因,共享CUDA运行时对于这类事情是必需的。

另请注意,从1个SO中的2个SO's - > 2个静态库是必要的,否则程序会崩溃,出现hrust::system::system_error

然而这是预期的,因为NVCC设备期间实际忽略共享对象文件链接:http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#libraries