在std :: set <boost :: uuids :: uuid>的析构函数中，我在_int_free中遇到了段错误（core dump）。为什么？

我正在使用boost :: shared_ptr来处理我的类指针。而在我的班级中，有一个std:set<boost::uuids::uuid>类型的成员。我只在初始代码中设置此成员值一次。我有双重检查没有缓冲区溢出。我已经运行valgrind来检查，并且没有内存错误报告。在std :: set <boost :: uuids :: uuid>的析构函数中，我在_int_free中遇到了段错误（core dump）。为什么？

但是，在我的类析构函数中，我在_int_free中获得了一个核心转储。这是它的调用堆栈：

(gdb) bt 
#0 0x0098c02e in _int_free() from /lib/libc.so.6 
#1 0x048ec552 in operator delete(void*)() from /usr/lib/libstdc++.so.6 
#2 0x08056372 in std::_Rb_tree<boost::uuids::uuid, boost::uuids::uuid, std::_Identity<boost::uuids::uuid>, std::less<boost::uuids::uuid>, std::allocator<boost::uuids::uuid> >::_M_erase(std::_Rb_tree_node<boost::uuids::uuid>*)() 
#3 0x08056367 in std::_Rb_tree<boost::uuids::uuid, boost::uuids::uuid, std::_Identity<boost::uuids::uuid>, std::less<boost::uuids::uuid>, std::allocator<boost::uuids::uuid> >::_M_erase(std::_Rb_tree_node<boost::uuids::uuid>*)() 
#4 0x08056367 in std::_Rb_tree<boost::uuids::uuid, boost::uuids::uuid, std::_Identity<boost::uuids::uuid>, std::less<boost::uuids::uuid>, std::allocator<boost::uuids::uuid> >::_M_erase(std::_Rb_tree_node<boost::uuids::uuid>*)() 
#5 0x08056367 in std::_Rb_tree<boost::uuids::uuid, boost::uuids::uuid, std::_Identity<boost::uuids::uuid>, std::less<boost::uuids::uuid>, std::allocator<boost::uuids::uuid> >::_M_erase(std::_Rb_tree_node<boost::uuids::uuid>*)() 
#6 0x001ceb8c in vsdk::radius::CRadiusAttribute::~CRadiusAttribute()() from ./libRadiusHandler.so 
#7 0x001d5a33 in vsdk::radius::CRadiusMsg::~CRadiusMsg()() from ./libRadiusHandler.so 
#8 0x001d3509 in boost::detail::sp_counted_impl_pd<vsdk::radius::CRadiusClientReq*, vsdk::radius::CRadiusClientReq::Deleter>::dispose()() from ./libRadiusHandler.so 
#9 0x08055d48 in boost::detail::shared_count::~shared_count()() 
#10 0x001d1eff in vsdk::radius::CRadiusClientHandler::handleRecv(ACE_INET_Addr const&, ACE_INET_Addr const&, ACE_Message_Block&, bool&)() from ./libRadiusHandler.so 
#11 0x001e27ba in vsdk::radius::CUdpMsg::run()() from ./libRadiusHandler.so 
#12 0x001c8a9c in vsdk::radius::CHandlerMgr::svc()() from ./libRadiusHandler.so 
#13 0x00d55172 in ACE_Task_Base::svc_run (args=0x96f24d8) at Task.cpp:271 
#14 0x00d56798 in ACE_Thread_Adapter::invoke_i (this=0x9737168) at Thread_Adapter.cpp:161 
#15 0x00d56835 in ACE_Thread_Adapter::invoke (this=0x9737168) at Thread_Adapter.cpp:96 
#16 0x00cefa31 in ace_thread_adapter (args=0x9737168) at Base_Thread_Adapter.cpp:122 
#17 0x00aeba49 in start_thread() from /lib/libpthread.so.0 
#18 0x009fbaee in clone() from /lib/libc.so.6 
(gdb)

似乎在std::set<boost::uuids::uuid>的析构函数。为什么？

这里是我的代码，一些不重要的代码被省略：

class CRadiusClientReq 
{ 
private: 
    CRadiusMsg  m_radiusMsg;   ///< radius msg 
}; 
typedef boost::shared_ptr<CRadiusClientReq>  CSpCRadiusClientReq; 

class CRadiusMsg 
{ 
private: 
    static const uint32_t MAX_ATTR_NUM = 23; 
    CRadiusAttribute        m_attributes[MAX_ATTR_NUM]; 
}; 

class CRadiusAttribute 
{ 
private: 
    EAttributeType     m_type; 
    uint32_t      m_uint32; 
    uint8_t       m_array[CHAP_PASSWORD_LEN]; 
    std::string      m_string; 
    std::set<boost::uuids::uuid> m_resSet; // seems core dump in free this member 
}; 

class CRadiusRequestRspWaitMgr 
{ 
public: 
    bool queryRequest(const uint8_t id, CSpCRadiusClientReq & spRadiusClientReq) 
    { 
     // lock 
     boost::mutex::scoped_lock l(m_mutex); 

     RadiusRequestRspWaitMap::iterator itr = m_requestRspWaitMap.find(id); 
     if (m_requestRspWaitMap.end() == itr) 
     { 
      // not found 
      return false; 
     } 

     spRadiusClientReq = itr->second; 
     if (!spRadiusClientReq) 
     { 
      return false; 
     } 

     return true; 
    } 

    bool delRequest(const uint8_t id) 
    { 
     boost::mutex::scoped_lock l(m_mutex); 

     RadiusRequestRspWaitMap::iterator itr = m_requestRspWaitMap.find(id); 
     if (m_requestRspWaitMap.end() == itr) 
     { 
      // not found 
      return false; 
     } 

     // erase it 
     m_requestRspWaitMap.erase(itr); 

     return true; 
    } 

private: 
    typedef std::map< uint8_t, CSpCRadiusClientReq> RadiusRequestRspWaitMap; 
    RadiusRequestRspWaitMap   m_requestRspWaitMap; // requests which is waiting response. 
    boost::mutex     m_mutex;    // 
}; 

class CRadiusClientHandler 
{ 
private: 
    CRadiusRequestRspWaitMgr m_authReqRspWaitMgr; ///< auth rsp wait manager 
    CRadiusRequestRspWaitMgr m_acctReqRspWaitMgr; ///< acct rsp wait manager 
}; 

void CRadiusClientHandler::handleRecv(const ACE_INET_Addr& localAddr, const ACE_INET_Addr& peer, ACE_Message_Block& msg, bool &bReuse) 
{ 
    // set reuse this message block 
    bReuse = true; 

    uint8_t  *buf = (uint8_t*)msg.rd_ptr(); 
    uint32_t len = msg.length(); 
    if (len < 20) 
    { 
     return; 
    } 

    // read code and id. 
    uint8_t  code = buf[0]; 
    uint8_t  id = buf[1]; 

    CRadiusRequestRspWaitMgr *rspWaitMgr = 0; 
    if (D_PACKET_ACCESS_ACCEPT == code 
     || D_PACKET_ACCESS_REJECT == code) 
    { 
     // auth response 

     if (localAddr.get_port_number() != m_srcAuthPort) 
     { 
      // not auth port 
      return; 
     } 

     // set rspWaitMgr 
     rspWaitMgr = &m_authReqRspWaitMgr; 
    } 
    else if (D_PACKET_ACCOUNTING_RESPONSE == code) 
    { 
     // acct response 

     if (localAddr.get_port_number() != m_srcAcctPort) 
     { 
      // not acct port 
      return; 
     } 

     // set rspWaitMgr 
     rspWaitMgr = &m_acctReqRspWaitMgr; 
    } 
    else 
    { 
     // error type msg 
     return; 
    } 

    // find a match request in waiting response queue 
    CSpCRadiusClientReq  spClientReq; 
    if (!rspWaitMgr->queryRequest(id, spClientReq) 
     || !spClientReq) 
    { 
     return; 
    } 

    // some handle process code here, omitted. 
    ... 

    // delete request in waiting response queue 
    rspWaitMgr->delRequest(id); 
} 

int32_t CUdpMsg::run() 
{ 
    if (!m_udpMsgNotify) 
    { 
     return -1; 
    } 

    bool bReuse = true; 

    m_udpMsgNotify->handleRecv(m_localAddr, m_srcAddr, *m_aceMb, bReuse); 

    if (bReuse) 
    { 
     delete m_aceMb; 
     m_aceMb = 0; 
    } 

    return 0; 
} 

// send request 
bool CRadiusClientHandler::sendNonLoginAuthReq(const std::string & userName 
           , const uint8_t chapId 
           , const boost::array<uint8_t,16> & chapChallenge 
           , const boost::array<uint8_t,16> & pwdCaculated 
           , const boost::array<uint8_t,16> & uid 
           , const uint32_t authority 
           , const std::set<boost::uuids::uuid> & resIds 
           , uint8_t & authId 
           , EClientHandlerError & result) 
{ 
    result = ECLIENTHANDLERERROR_FAIL; 
    CSpCRadiusClientReq spRadiusClientReq(new CRadiusClientReq()); 
    if (!spRadiusClientReq) 
    { 
     return false; 
    } 

    CRadiusMsg &radiusMsg = spRadiusClientReq->getRadiusMsg(); 

    // set the boost 
    if (!radiusMsg.setResList(resIds)) 
    { 
     result = ECLIENTHANDLERERROR_RESNUM_OVERTOP; 
     return false; 
    } 


    // some other unimportant codes, include generate reqId, omitted. 
    uint8_t  reqId = 0; 
    ... 


    // add to rspWaitMgr 
    if (!m_authReqRspWaitMgr.addRequest(reqId, spRadiusClientReq)) 
    { 
     return false; 
    } 

    // some other unimportant codes, include message sending, omitted. 
    ... 


    // return reqId 
    authId = reqId; 
    result = ECLIENTHANDLERERROR_SUCCESS; 
    return true; 
}

CRadiusClientHandler::handleRecv处理接收到的UDP数据包。它在rspWaitMgr中找到匹配请求spClientReq，这是等待响应的所有请求池。

在CRadiusClientHandler::handleRecv完成并退出范围后，spClientReq自动删除保存在此shared_ptr中的指针。

而且我在shared_ptr中使用了所有的指针。我认为boost::shared_ptr是线程安全的。所以我看不到任何双重自由。

在CRadiusClientHandler我有两个成员m_authReqRspWaitMgr和m_acctReqRspWaitMgr。根据收到的消息类型，我选择使用哪一种。

所以当这个核心转储发生时，这两个成员仍然存在。而且我只有一个CRadiusClientHandler的实例，它将在我的程序退出时被删除。

这里是我的分析。

这些都是_int_free拆解代码：

0x0098bff8 <+152>: shr $0x3,%edi 
    0x0098bffb <+155>: mov %ecx,%eax 
    0x0098bffd <+157>: sub $0x2,%edi 
    0x0098c000 <+160>: mov 0x8(%eax,%edi,4),%edx 
    0x0098c004 <+164>: lea 0x8(%ecx,%edi,4),%ecx 
    0x0098c008 <+168>: mov %edi,-0x10(%ebp) 
    0x0098c00b <+171>: cmp %edx,%esi 
    0x0098c00d <+173>: je  0x98c514 <_int_free+1460> 
    0x0098c013 <+179>: mov $0xffffffff,%edi 
    0x0098c018 <+184>: jmp 0x98c02a <_int_free+202> 
    0x0098c01a <+186>: nopw 0x0(%eax,%eax,1) 
    0x0098c020 <+192>: cmp %eax,%esi 
    0x0098c022 <+194>: mov %eax,%edx 
    0x0098c024 <+196>: je  0x98c514 <_int_free+1460> 
    0x0098c02a <+202>: test %edx,%edx 
    0x0098c02c <+204>: je  0x98c037 <_int_free+215> 
=> 0x0098c02e <+206>: mov 0x4(%edx),%edi 
    0x0098c031 <+209>: shr $0x3,%edi 
    0x0098c034 <+212>: sub $0x2,%edi 
    0x0098c037 <+215>: mov %edx,0x8(%esi) 
    0x0098c03a <+218>: mov %edx,%eax 
    0x0098c03c <+220>: cmpl $0x0,%gs:0xc 
    0x0098c044 <+228>: je  0x98c047 <_int_free+231> 
    0x0098c046 <+230>: lock cmpxchg %esi,(%ecx) 
    0x0098c04a <+234>: cmp %eax,%edx 
    0x0098c04c <+236>: jne 0x98c020 <_int_free+192> 
    0x0098c04e <+238>: test %edx,%edx 
    0x0098c050 <+240>: je  0x98c05b <_int_free+251> 
    0x0098c052 <+242>: cmp -0x10(%ebp),%edi 
    0x0098c055 <+245>: jne 0x98c5a9 <_int_free+1609>

我尝试匹配malloc.c（的glibc-2.12.1）的源代码，也许在这里：

set_fastchunks(av); 
    unsigned int idx = fastbin_index(size); 
    fb = &fastbin (av, idx); 

#ifdef ATOMIC_FASTBINS 
    mchunkptr fd; 
    mchunkptr old = *fb; 
    unsigned int old_idx = ~0u; 
    do 
     { 
     /* Another simple check: make sure the top of the bin is not the 
      record we are going to add (i.e., double free). */ 
     if (__builtin_expect (old == p, 0)) 
      { 
      errstr = "double free or corruption (fasttop)"; 
      goto errout; 
      } 
     if (old != NULL) 
      old_idx = fastbin_index(chunksize(old)); **-------- maybe core dump here** 
     p->fd = fd = old; 
     } 
    while ((old = catomic_compare_and_exchange_val_rel (fb, p, fd)) != fd); 

    if (fd != NULL && __builtin_expect (old_idx != idx, 0)) 
     { 
     errstr = "invalid fastbin entry (free)"; 
     goto errout; 
     } 
#else 
    /* Another simple check: make sure the top of the bin is not the 
     record we are going to add (i.e., double free). */ 
    if (__builtin_expect (*fb == p, 0)) 
     { 
     errstr = "double free or corruption (fasttop)"; 
     goto errout; 
     } 
    if (*fb != NULL 
     && __builtin_expect (fastbin_index(chunksize(*fb)) != idx, 0)) 
     { 
     errstr = "invalid fastbin entry (free)"; 
     goto errout; 
     } 

    p->fd = *fb; 
    *fb = p; 
#endif

似乎变量old指向一个无效的地址。

寄存器信息是：

(gdb) info register 
eax   0xb5d00010  -1244659696 
ecx   0xb5d00024  -1244659676 
edx   0xb4304ce8  -1271903000 
ebx   0xaabff4 11190260 
esp   0xb6179a84  0xb6179a84 
ebp   0xb6179ad8  0xb6179ad8 
esi   0xb5d235e0  -1244514848 
edi   0xffffffff  -1 
eip   0x98c02e 0x98c02e <_int_free+206> 
eflags   0x10286 [ PF SF IF RF ] 
cs    0x73  115 
ss    0x7b  123 
ds    0x7b  123 
es    0x7b  123 
fs    0x0  0 
gs    0x33  51 
(gdb) x /8xw 0xb4304ce8 
0xb4304ce8:  Cannot access memory at address 0xb4304ce8 
(gdb)

根据源代码和反汇编代码，我有以下结论： -The %eax是av。 -The %ecx是fb。 -The %edi是old_idx。 -The %esi是内存块需要是免费的。

%edi仍然是初始值0xFFFFFFFF，所以我觉得这是第一次进入循环。根据mchunkptr old = *fb，可能%edx等于地址%ecx的内容。但现在，%edx（old）是0xb4304ce8，地址%ecx（*fb）的含量为0xb5d5a958。他们是不同的。

(gdb) x /8xw 0xb5d00024 
0xb5d00024:  0xb5d5a958  0xb5dc3088  0x00000000  0x00000000 
0xb5d00034:  0x00000000  0x00000000  0x00000000  0xb45d9208 
(gdb)

而且我注意到av是0x00000002的flags，这意味着没有fastbin？

(gdb) x /8xw 0xb5d00010 
0xb5d00010:  0x00000000  0x00000002  0x00000000  0xb5ddc848 
0xb5d00020:  0xb5d5e4f8  0xb5d5a958  0xb5dc3088  0x00000000 
(gdb)

而且我也注意到这些评论：

/* 
    FASTCHUNKS_BIT held in max_fast indicates that there are probably 
    some fastbin chunks. It is set true on entering a chunk into any 
    fastbin, and cleared only in malloc_consolidate. 

    The truth value is inverted so that have_fastchunks will be true 
    upon startup (since statics are zero-filled), simplifying 
    initialization checks. 
*/

所以我认为，当前线程后，再执行mchunkptr old = *fb;，其他线程触发malloc_consolidate清洁和fastbin巩固块。

那么也许会通过%edx记忆点已trimed或释放。但是有可能吗？

我对glibc的内存管理一知半解，也许一些人可以减轻我的怀疑。

这里是操作系统的信息：

[[email protected] log]# cat /etc/redhat-release 
Red Hat Enterprise Linux Server release 6.4 (Santiago) 
[[email protected] log]# uname -a 
Linux mdssdk 2.6.32-358.el6.i686 #1 SMP Tue Jan 29 11:48:01 EST 2013 i686 i686 i386 GNU/Linux 
[[email protected] log]#

最后，我下载glibc的最新版本2.24，并发现真正的原因。这是一个错误，并在glibc 2.19中修复。

固定代码在这里：

set_fastchunks(av); 
unsigned int idx = fastbin_index(size); 
fb = &fastbin (av, idx); 

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */ 
mchunkptr old = *fb, old2; 
unsigned int old_idx = ~0u; 
do 
    { 
    /* Check that the top of the bin is not the record we are going to add 
     (i.e., double free). */ 
    if (__builtin_expect (old == p, 0)) 
     { 
     errstr = "double free or corruption (fasttop)"; 
     goto errout; 
     } 
    **/* Check that size of fastbin chunk at the top is the same as 
     size of the chunk that we are adding. We can dereference OLD 
     only if we have the lock, otherwise it might have already been 
     deallocated. See use of OLD_IDX below for the actual check. */** 
    if (have_lock && old != NULL) 
     old_idx = fastbin_index(chunksize(old)); 
    p->fd = old2 = old; 
    } 
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2); 

if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0)) 
    { 
    errstr = "invalid fastbin entry (free)"; 
    goto errout; 
    }

来源

2016-11-07 Jun Huang

的问题应该在你的代码，而不是''malloc'或boost''的std :: set'，它应该是双自由'＃10 VSDK ::半径:: CRadiusClientHandler 0x001d1eff某处:: handleRecv'和'＃11 0x001e27ba在vsdk :: radius :: CUdpMsg :: run'中，这是C++，而不是C – Danh

请正确格式化您的代码 –

对不起，错误代码和抱歉的代码格式。我从malloc.c复制它。而在这些代码中，制表符和空格混合在一起，所以看起来很糟糕。我已经重新格式化了它们。我添加了一些实现代码。我找不到任何双倍免费。不正确？ –

感谢您提供的答案，我的问题。

我找到了真正的原因。就像我怀疑的那样，_int_free中存在一个错误。它固定在glibc 2.19。

这是错误报告。

https://sourceware.org/bugzilla/show_bug.cgi?id=15073

来源

2016-11-09 06:45:47

在std :: set <boost :: uuids :: uuid>的析构函数中，我在_int_free中遇到了段错误（core dump）。为什么？

回答

相关问题