Skip to content
This repository was archived by the owner on Aug 27, 2022. It is now read-only.
/ lanai Public archive

Commit ccdde49

Browse files
committedFeb 24, 2020
8234160: Enable optimized mitigation for Intel jcc erratum in C2
Reviewed-by: thartmann, vlivanov, pliden
1 parent 0f21211 commit ccdde49

File tree

7 files changed

+344
-16
lines changed

7 files changed

+344
-16
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#include "precompiled.hpp"
26+
#include "asm/macroAssembler.hpp"
27+
#include "c2_intelJccErratum_x86.hpp"
28+
#include "opto/cfgnode.hpp"
29+
#include "opto/compile.hpp"
30+
#include "opto/machnode.hpp"
31+
#include "opto/node.hpp"
32+
#include "opto/regalloc.hpp"
33+
#include "utilities/align.hpp"
34+
#include "utilities/debug.hpp"
35+
36+
// Compute which 32 byte boundary an address corresponds to
37+
uintptr_t IntelJccErratum::boundary(uintptr_t addr) {
38+
return addr >> 5;
39+
}
40+
41+
bool IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc) {
42+
int jcc_size = int(end_pc - start_pc);
43+
assert(jcc_size <= largest_jcc_size(), "invalid jcc size: %d", jcc_size);
44+
return boundary(start_pc) != boundary(end_pc);
45+
}
46+
47+
bool IntelJccErratum::is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index) {
48+
if (node->is_MachCall() && !node->is_MachCallJava()) {
49+
return true;
50+
}
51+
return node_index == (block->number_of_nodes() - 1);
52+
}
53+
54+
int IntelJccErratum::jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc) {
55+
node->add_flag(Node::Flag_intel_jcc_erratum);
56+
return node->size(regalloc);
57+
}
58+
59+
int IntelJccErratum::tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc) {
60+
ResourceMark rm;
61+
int nop_size = 0;
62+
MachNode* last_m = NULL;
63+
64+
for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
65+
const Block* const block = cfg->get_block(i);
66+
for (uint j = 0; j < block->number_of_nodes(); ++j) {
67+
const Node* const node = block->get_node(j);
68+
if (!node->is_Mach()) {
69+
continue;
70+
}
71+
MachNode* m = node->as_Mach();
72+
if (is_jcc_erratum_branch(block, m, j)) {
73+
// Found a root jcc erratum branch, flag it as problematic
74+
nop_size += jcc_erratum_taint_node(m, regalloc);
75+
76+
if (!m->is_MachReturn() && !m->is_MachCall()) {
77+
// We might fuse a problematic jcc erratum branch with a preceding
78+
// ALU instruction - we must catch such problematic macro fusions
79+
// and flag the ALU instruction as problematic too.
80+
for (uint k = 1; k < m->req(); ++k) {
81+
const Node* const use = m->in(k);
82+
if (use == last_m && !m->is_MachReturn()) {
83+
// Flag fused conditions too
84+
nop_size += jcc_erratum_taint_node(last_m, regalloc);
85+
}
86+
}
87+
}
88+
last_m = NULL;
89+
} else {
90+
last_m = m;
91+
}
92+
}
93+
}
94+
return nop_size;
95+
}
96+
97+
int IntelJccErratum::compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc) {
98+
int jcc_size = mach->size(regalloc);
99+
if (index_in_block < block->number_of_nodes() - 1) {
100+
Node* next = block->get_node(index_in_block + 1);
101+
if (next->is_Mach() && (next->as_Mach()->flags() & Node::Flag_intel_jcc_erratum)) {
102+
jcc_size += mach->size(regalloc);
103+
}
104+
}
105+
if (jcc_size > largest_jcc_size()) {
106+
// Let's not try fixing this for nodes that seem unreasonably large
107+
return false;
108+
}
109+
if (is_crossing_or_ending_at_32_byte_boundary(current_offset, current_offset + jcc_size)) {
110+
return int(align_up(current_offset, 32) - current_offset);
111+
} else {
112+
return 0;
113+
}
114+
}
115+
116+
#define __ _masm.
117+
118+
uintptr_t IntelJccErratumAlignment::pc() {
119+
return (uintptr_t)__ pc();
120+
}
121+
122+
IntelJccErratumAlignment::IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size) :
123+
_masm(masm),
124+
_start_pc(pc()) {
125+
if (!VM_Version::has_intel_jcc_erratum()) {
126+
return;
127+
}
128+
129+
if (Compile::current()->in_scratch_emit_size()) {
130+
// When we measure the size of this 32 byte alignment, we apply a conservative guess.
131+
__ nop(jcc_size);
132+
} else if (IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, _start_pc + jcc_size)) {
133+
// The affected branch might get slowed down by micro code mitigations
134+
// as it could be susceptible to the erratum. Place nops until the next
135+
// 32 byte boundary to make sure the branch will be cached.
136+
const int alignment_nops = (int)(align_up(_start_pc, 32) - _start_pc);
137+
__ nop(alignment_nops);
138+
_start_pc = pc();
139+
}
140+
}
141+
142+
IntelJccErratumAlignment::~IntelJccErratumAlignment() {
143+
if (!VM_Version::has_intel_jcc_erratum() ||
144+
Compile::current()->in_scratch_emit_size()) {
145+
return;
146+
}
147+
148+
assert(!IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, pc()), "Invalid jcc_size estimate");
149+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#ifndef CPU_X86_INTELJCCERRATUM_X86_HPP
26+
#define CPU_X86_INTELJCCERRATUM_X86_HPP
27+
28+
#include "memory/allocation.hpp"
29+
#include "utilities/globalDefinitions.hpp"
30+
31+
class Block;
32+
class Compile;
33+
class MachNode;
34+
class MacroAssembler;
35+
class PhaseCFG;
36+
class PhaseRegAlloc;
37+
38+
class IntelJccErratum : public AllStatic {
39+
private:
40+
// Compute which 32 byte boundary an address corresponds to
41+
static uintptr_t boundary(uintptr_t addr);
42+
static int jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc);
43+
44+
public:
45+
static bool is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc);
46+
static bool is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index);
47+
// Analyze JCC erratum branches. Affected nodes get tagged with Flag_intel_jcc_erratum.
48+
// The function returns a conservative estimate of all required nops on all mach nodes.
49+
static int tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc);
50+
// Computes the exact padding for a mach node
51+
static int compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc);
52+
static int largest_jcc_size() { return 20; }
53+
};
54+
55+
class IntelJccErratumAlignment {
56+
private:
57+
MacroAssembler& _masm;
58+
uintptr_t _start_pc;
59+
60+
uintptr_t pc();
61+
62+
public:
63+
IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size);
64+
~IntelJccErratumAlignment();
65+
};
66+
67+
#endif // CPU_X86_INTELJCCERRATUM_X86_HPP
68+

‎src/hotspot/cpu/x86/gc/z/z_x86_64.ad

+20-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
2+
// Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
33
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
//
55
// This code is free software; you can redistribute it and/or modify it
@@ -30,16 +30,29 @@ source_hpp %{
3030

3131
source %{
3232

33+
#include "c2_intelJccErratum_x86.hpp"
34+
3335
static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
3436
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
35-
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
36-
__ jcc(Assembler::notZero, *stub->entry());
37+
{
38+
IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
39+
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
40+
__ jcc(Assembler::notZero, *stub->entry());
41+
}
3742
__ bind(*stub->continuation());
3843
}
3944

40-
static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
45+
static void z_load_barrier_cmpxchg(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, Label& good) {
4146
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
42-
__ jmp(*stub->entry());
47+
{
48+
IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
49+
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
50+
__ jcc(Assembler::zero, good);
51+
}
52+
{
53+
IntelJccErratumAlignment intel_alignment(_masm, 5 /* jcc_size */);
54+
__ jmp(*stub->entry());
55+
}
4356
__ bind(*stub->continuation());
4457
}
4558

@@ -101,9 +114,7 @@ instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP t
101114
__ cmpxchgptr($newval$$Register, $mem$$Address);
102115
if (barrier_data() != ZLoadBarrierElided) {
103116
Label good;
104-
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
105-
__ jcc(Assembler::zero, good);
106-
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
117+
z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
107118
__ movptr($oldval$$Register, $tmp$$Register);
108119
__ lock();
109120
__ cmpxchgptr($newval$$Register, $mem$$Address);
@@ -133,9 +144,7 @@ instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlags
133144
__ cmpxchgptr($newval$$Register, $mem$$Address);
134145
if (barrier_data() != ZLoadBarrierElided) {
135146
Label good;
136-
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
137-
__ jcc(Assembler::zero, good);
138-
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
147+
z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
139148
__ movptr($oldval$$Register, $tmp$$Register);
140149
__ lock();
141150
__ cmpxchgptr($newval$$Register, $mem$$Address);

‎src/hotspot/cpu/x86/vm_version_x86.cpp

+67
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
int VM_Version::_cpu;
4242
int VM_Version::_model;
4343
int VM_Version::_stepping;
44+
bool VM_Version::_has_intel_jcc_erratum;
4445
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
4546

4647
// Address of instruction which causes SEGV
@@ -720,6 +721,8 @@ void VM_Version::get_processor_features() {
720721
}
721722
}
722723

724+
_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
725+
723726
char buf[256];
724727
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
725728
cores_per_cpu(), threads_per_core(),
@@ -1698,6 +1701,70 @@ bool VM_Version::use_biased_locking() {
16981701
return UseBiasedLocking;
16991702
}
17001703

1704+
bool VM_Version::compute_has_intel_jcc_erratum() {
1705+
if (!is_intel_family_core()) {
1706+
// Only Intel CPUs are affected.
1707+
return false;
1708+
}
1709+
// The following table of affected CPUs is based on the following document released by Intel:
1710+
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1711+
switch (_model) {
1712+
case 0x8E:
1713+
// 06_8EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
1714+
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U
1715+
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U 23e
1716+
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Y
1717+
// 06_8EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake U43e
1718+
// 06_8EH | B | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
1719+
// 06_8EH | C | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
1720+
// 06_8EH | C | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U42
1721+
// 06_8EH | C | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
1722+
return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1723+
case 0x4E:
1724+
// 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake U
1725+
// 06_4E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake U23e
1726+
// 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake Y
1727+
return _stepping == 0x3;
1728+
case 0x55:
1729+
// 06_55H | 4 | Intel® Xeon® Processor D Family based on microarchitecture code name Skylake D, Bakerville
1730+
// 06_55H | 4 | Intel® Xeon® Scalable Processors based on microarchitecture code name Skylake Server
1731+
// 06_55H | 4 | Intel® Xeon® Processor W Family based on microarchitecture code name Skylake W
1732+
// 06_55H | 4 | Intel® Core™ X-series Processors based on microarchitecture code name Skylake X
1733+
// 06_55H | 4 | Intel® Xeon® Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1734+
// 06_55 | 7 | 2nd Generation Intel® Xeon® Scalable Processors based on microarchitecture code name Cascade Lake (server)
1735+
return _stepping == 0x4 || _stepping == 0x7;
1736+
case 0x5E:
1737+
// 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake H
1738+
// 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake S
1739+
return _stepping == 0x3;
1740+
case 0x9E:
1741+
// 06_9EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake G
1742+
// 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake H
1743+
// 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake S
1744+
// 06_9EH | 9 | Intel® Core™ X-series Processors based on microarchitecture code name Kaby Lake X
1745+
// 06_9EH | 9 | Intel® Xeon® Processor E3 v6 Family Kaby Lake Xeon E3
1746+
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake H
1747+
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S
1748+
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1749+
// 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1750+
// 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1751+
// 06_9EH | B | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1752+
// 06_9EH | B | Intel® Celeron® Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1753+
// 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1754+
// 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1755+
return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1756+
case 0xA6:
1757+
// 06_A6H | 0 | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U62
1758+
return _stepping == 0x0;
1759+
case 0xAE:
1760+
// 06_AEH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1761+
return _stepping == 0xA;
1762+
default:
1763+
// If we are running on another intel machine not recognized in the table, we are okay.
1764+
return false;
1765+
}
1766+
}
1767+
17011768
// On Xen, the cpuid instruction returns
17021769
// eax / registers[0]: Version of Xen
17031770
// ebx / registers[1]: chars 'XenV'

0 commit comments

Comments
 (0)
This repository has been archived.