根据公开组基本规格和longjump docs:运行时功能测试,那么setjmp,longjmp的,和信号掩蔽
它是未指定的longjmp()是否恢复信号掩模,叶信号掩码不变,或还原它在调用setjmp()时的值。
我认为我遇到了ARMv8 Mustang board的警告。我们通过捕获SIGILL
来执行运行时功能检测。在测试CRC32扩展时,第一个SIGILL
陷阱按预期执行。第二个SIGILL
陷阱确实不是在测试AES扩展时按预期执行。以下是它在调试器下的样子。
我不要相信代码会落入由docs调用的未定义行为。例如,不使用嵌套的信号处理程序,同一线程执行setjmp
和longjmp
魔术等。
我的问题是,如何安全地多次执行运行时功能测试?
gdb ./test.exe
...
(gdb) b TryCRC32()
Breakpoint 1 at 0x401034: file test.cc, line 92.
(gdb) b TryAES()
Breakpoint 2 at 0x401120: file test.cc, line 120.
...
(gdb) r
Starting program: /home/cryptopp/test.exe v
Breakpoint 1, TryCRC32() at test.cc:92
92 volatile bool result = true;
(gdb) n
94 SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
(gdb)
95 if (oldHandler == SIG_ERR)
(gdb)
98 if (setjmp(s_jmpNoCRC32))
(gdb)
102 word32 w=0, x=0; word16 y=0; byte z=0;
(gdb)
103 w = __crc32cw(w,x);
(gdb)
Program received signal SIGILL, Illegal instruction.
0x00000000004010b4 in __crc32cw (__b=0, __a=0)
at /usr/lib/gcc/aarch64-linux-gnu/4.9/include/arm_acle.h:57
57 return __builtin_aarch64_crc32cw (__a, __b);
(gdb) c
Continuing.
Breakpoint 2, TryAES() at test.cc:120
120 volatile bool result = true;
(gdb) n
122 SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
(gdb)
123 if (oldHandler == SIG_ERR)
(gdb)
126 if (setjmp(s_jmpNoAES))
(gdb)
130 uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
(gdb)
131 uint8x16_t r1 = vaeseq_u8(data, key);
(gdb)
Program received signal SIGILL, Illegal instruction.
0x0000000000400a64 in vaeseq_u8 (data=..., key=...)
at /usr/lib/gcc/aarch64-linux-gnu/4.9/include/arm_neon.h:13731
13731 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
(gdb) c
Continuing.
Program terminated with signal SIGILL, Illegal instruction.
The program no longer exists.
这里的测试程序。它被编译以:
$ export CXXFLAGS="-g3 -O0 -march=armv8-a+crc+crypto"
$ g++ $CXXFLAGS test.cc -o test.exe
-march=armv8-a+crc+crypto
的装置等__ARM_NEON
和__ARM_FEATURE_CRYPTO
预处理器符号定义。
声明像static volatile bool TryNEON()
是野马揭示的另一个问题(即海湾合作委员会正在优化掉支票)。它导致程序与SIGILL
一起死亡。不要因此而分心,因为它现在只是一个止损点。
#include <signal.h>
#include <setjmp.h>
#include <stdint.h>
#include <arm_neon.h>
#include <arm_acle.h>
#include <iostream>
#define UNUSED(x) ((void)(x))
typedef uint8_t byte;
typedef uint16_t word16;
typedef uint32_t word32;
typedef uint64_t word64;
typedef void (*SigHandler)(int);
extern "C" {
static jmp_buf s_jmpNoNEON;
static void SigIllHandlerNEON(int)
{
longjmp(s_jmpNoNEON, 1);
}
static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int)
{
longjmp(s_jmpNoCRC32, 1);
}
static jmp_buf s_jmpNoAES;
static void SigIllHandlerAES(int)
{
longjmp(s_jmpNoAES, 1);
}
static jmp_buf s_jmpNoSHA1;
static void SigIllHandlerSHA1(int)
{
longjmp(s_jmpNoSHA1, 1);
}
static jmp_buf s_jmpNoSHA2;
static void SigIllHandlerSHA2(int)
{
longjmp(s_jmpNoSHA2, 1);
}
};
static volatile bool TryNEON()
{
#if defined(__ARM_NEON)
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoNEON))
result = false;
else
{
uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
}
signal(SIGILL, oldHandler);
return result;
#else
return false;
#endif // __ARM_NEON
}
static volatile bool TryCRC32()
{
#if defined(__ARM_FEATURE_CRC32)
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=0; word16 y=0; byte z=0;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
}
signal(SIGILL, oldHandler);
return result;
#else
return false;
#endif // __ARM_FEATURE_CRC32
}
static volatile bool TryAES()
{
#if defined(__ARM_FEATURE_CRYPTO)
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoAES))
result = false;
else
{
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
UNUSED(r1), UNUSED(r2);
}
signal(SIGILL, oldHandler);
return result;
#else
return false;
#endif // __ARM_FEATURE_CRYPTO
}
static volatile bool TrySHA1()
{
#if defined(__ARM_FEATURE_CRYPTO)
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoSHA1))
result = false;
else
{
uint32x4_t data = vdupq_n_u32(0);
uint32_t hash = 0x0;
uint32x4_t r1 = vsha1cq_u32 (data, hash, data);
uint32x4_t r2 = vsha1mq_u32 (data, hash, data);
uint32x4_t r3 = vsha1pq_u32 (data, hash, data);
UNUSED(r1), UNUSED(r2), UNUSED(r3);
}
signal(SIGILL, oldHandler);
return result;
#else
return false;
#endif // __ARM_FEATURE_CRYPTO
}
static volatile bool TrySHA2()
{
#if defined(__ARM_FEATURE_CRYPTO)
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoSHA2))
result = false;
else
{
uint32x4_t data = vdupq_n_u32(0);
uint32x4_t hash = vdupq_n_u32(0);
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data);
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data);
uint32x4_t r3 = vsha256su0q_u32 (data, data);
uint32x4_t r4 = vsha256su1q_u32 (data, data, data);
UNUSED(r1), UNUSED(r2), UNUSED(r3), UNUSED(r4);
}
signal(SIGILL, oldHandler);
return result;
#else
return false;
#endif // __ARM_FEATURE_CRYPTO
}
bool hasNEON = TryNEON();
bool hasCRC32 = TryCRC32();
bool hasAES = TryAES();
bool hasSHA1 = TrySHA1();
bool hasSHA2 = TrySHA2();
int main(int argc, char* argv[])
{
std::cout << "Has NEON: " << hasNEON << std::endl;
std::cout << "Has CRC32: " << hasCRC32 << std::endl;
std::cout << "Has AES: " << hasAES << std::endl;
std::cout << "Has SHA1: " << hasSHA1 << std::endl;
std::cout << "Has SHA2: " << hasSHA2 << std::endl;
return 0;
}
切圆,这将是一个简单得多,以实现使用'getauxval()'最后的结果,因为[有所有这些功能hwcaps(https://开头的git .kernel.org/CGT文件/ LINUX /内核/的git /托沃兹/ linux.git /树/弓/ arm64 /包括/ uapi/ASM/hwcap.h)。 – Notlikethat
这就是说,为什么“longjmp”舞蹈是必要的?难道你不能只是初始化全局变量为'true',让每个信号处理器简单地'hasX = false;'并且从'main'的顶部开始测试? – Notlikethat
@Notlikethat - 我厌倦了使用'getauxval',但我们遇到了两个问题。首先,它不像我们想要的那样便携,所以我们几乎每个平台都要特殊情况。可读性*真的受到了影响,代码非常混乱。其次,缺少在Aarch64下的Aarch32执行环境中的Crypto扩展的hwcap定义。这听起来可能不好,但它在现场很重要。例如,HTC的Desire是第一批采用ARMv8处理器的手机之一,但他们为Aarch32执行环境作弊和配置。 – jww