你引用的所有具有汇编代码片段(inline assembler),这是在自己的代码中使用一些C/C++软件(如apangin, the JVM expertpointed的文件,主要是GC代码)。实际上有这样的区别:Linux,Solaris和BSD x86_64热点的变体在热点中有预取,而且窗口已禁用/未实现,这是部分奇怪的,部分无法解释的原因,也可能使JVM位(一些百分数;更多没有硬件预取的平台)在Windows上速度较慢,但仍然无助于销售更多Sun/Oracle的solaris/solaris付费支持合同。 Ross also guessed内联asm语法可能不支持MS C++编译器,但_mm_prefetch
应该(谁将打开JDK bug来添加它to the file?)。 (JIT将代码从它自己的函数复制到生成的代码或发出对支持函数的调用,预取是JIT代码是由JIT发出(生成)为字节作为热点中的字节发射)。我们怎样才能找到它是如何发射的?简单的在线方式是找到一些在线搜索jdk8u的副本(或更好地在cross-reference like metager),例如在github上:https://github.com/JetBrains/jdk8u_hotspot并搜索prefetch或prefetch emit或prefetchr或lir_prefetchr。有一些相关的结果:
在JVM的c1 compiler/LIR发出的实际字节jdk8u_hotspot/src/cpu/x86/vm/assembler_x86.cpp
:
void Assembler::prefetch_prefix(Address src) {
prefix(src);
emit_int8(0x0F);
}
void Assembler::prefetchnta(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rax, src); // 0, src
}
void Assembler::prefetchr(Address src) {
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x0D);
emit_operand(rax, src); // 0, src
}
void Assembler::prefetcht0(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rcx, src); // 1, src
}
void Assembler::prefetcht1(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rdx, src); // 2, src
}
void Assembler::prefetcht2(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rbx, src); // 3, src
}
void Assembler::prefetchw(Address src) {
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x0D);
emit_operand(rcx, src); // 1, src
}
使用在C1 LIR:src/share/vm/c1/c1_LIRAssembler.cpp
void LIR_Assembler::emit_op1(LIR_Op1* op) {
switch (op->code()) {
...
case lir_prefetchr:
prefetchr(op->in_opr());
break;
case lir_prefetchw:
prefetchw(op->in_opr());
break;
现在我们知道the opcode lir_prefetchr
and can search for it或和lir_prefetchw ,找到唯一的例子在src/share/vm/c1/c1_LIR.cpp
void LIR_List::prefetch(LIR_Address* addr, bool is_store) {
append(new LIR_Op1(
is_store ? lir_prefetchw : lir_prefetchr,
LIR_OprFact::address(addr)));
}
存在其中预取指令的定义(对于C2,如noted by apangin)其他地方,the src/cpu/x86/vm/x86_64.ad
:
// Prefetch instructions. ...
instruct prefetchr(memory mem) %{
predicate(ReadPrefetchInstr==3);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
ins_encode %{
__ prefetchr($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrNTA(memory mem) %{
predicate(ReadPrefetchInstr==0);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrT0(memory mem) %{
predicate(ReadPrefetchInstr==1);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrT2(memory mem) %{
predicate(ReadPrefetchInstr==2);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwNTA(memory mem) %{
match(PrefetchWrite mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
// Prefetch instructions for allocation.
instruct prefetchAlloc(memory mem) %{
predicate(AllocatePrefetchInstr==3);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
ins_encode %{
__ prefetchw($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocNTA(memory mem) %{
predicate(AllocatePrefetchInstr==0);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocT0(memory mem) %{
predicate(AllocatePrefetchInstr==1);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocT2(memory mem) %{
predicate(AllocatePrefetchInstr==2);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
另外也预取用于Solaris x86_64的:VM/solaris_x86_64.il https://github.com/openjdk-mirror/jdk7u-hotspot/blob/50bdefc3afe944ca74c3093e7448d6b889cd20d1/src/os_cpu /solaris_x86/vm/solaris_x86_64.il#L122;但所有列出的预取不用于发出预取,它们是JVM热点机器代码本身使用的预取。在生成的(JITted)代码中发出预取是在所有操作系统的x86代码中:https://github.com/openjdk-mirror/jdk7u-hotspot/blob/50bdefc3afe944ca74c3093e7448d6b889cd20d1/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp#L1335' LIR_Assembler :: prefetchr' /'LIR_Assembler :: prefetchw' – osgx
谢谢,那至少解释了一些事情。也许添加这个作为评论,我会接受它。我仍然在寻找JVM决定插入预取指令的部分。 – naze