Skip to content

Commit f353fcf

Browse files
robcaslozTobiHartmann
authored andcommittedJan 27, 2021
8258894: C2: Forbid GCM to move stores into loops
Prevent GCM from placing memory-writing nodes (such as stores) into loops deeper than their home loop (determined by their control input). Such placements are invalid, as they cause memory definitions to interfere, and risk causing miscompilations. This change complements JDK-8255763, which only addresses invalid placements in irreducible CFGs. Add control input to stores in generated stubs to ensure that all memory-writing nodes have control inputs from which their home block can be derived. Add a battery of simplified fuzzer test cases where, before this change, GCM moves stores into deeper loops. Reviewed-by: thartmann, kvn
1 parent ac276bb commit f353fcf

File tree

7 files changed

+233
-46
lines changed

7 files changed

+233
-46
lines changed
 

‎src/hotspot/share/opto/block.cpp

+21-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -1207,9 +1207,26 @@ void PhaseCFG::dump_headers() {
12071207
}
12081208
}
12091209
}
1210+
#endif // !PRODUCT
12101211

1211-
void PhaseCFG::verify() const {
12121212
#ifdef ASSERT
1213+
void PhaseCFG::verify_memory_writer_placement(const Block* b, const Node* n) const {
1214+
if (!n->is_memory_writer()) {
1215+
return;
1216+
}
1217+
CFGLoop* home_or_ancestor = find_block_for_node(n->in(0))->_loop;
1218+
bool found = false;
1219+
do {
1220+
if (b->_loop == home_or_ancestor) {
1221+
found = true;
1222+
break;
1223+
}
1224+
home_or_ancestor = home_or_ancestor->parent();
1225+
} while (home_or_ancestor != NULL);
1226+
assert(found, "block b is not in n's home loop or an ancestor of it");
1227+
}
1228+
1229+
void PhaseCFG::verify() const {
12131230
// Verify sane CFG
12141231
for (uint i = 0; i < number_of_blocks(); i++) {
12151232
Block* block = get_block(i);
@@ -1221,6 +1238,7 @@ void PhaseCFG::verify() const {
12211238
if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
12221239
assert(j == 1 || block->get_node(j-1)->is_Phi(), "CreateEx must be first instruction in block");
12231240
}
1241+
verify_memory_writer_placement(block, n);
12241242
if (n->needs_anti_dependence_check()) {
12251243
verify_anti_dependences(block, n);
12261244
}
@@ -1263,9 +1281,8 @@ void PhaseCFG::verify() const {
12631281
assert(block->_num_succs == 2, "Conditional branch must have two targets");
12641282
}
12651283
}
1266-
#endif
12671284
}
1268-
#endif
1285+
#endif // ASSERT
12691286

12701287
UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {
12711288
Copy::zero_to_bytes( _indices, sizeof(uint)*max );

‎src/hotspot/share/opto/block.hpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -620,11 +620,15 @@ class PhaseCFG : public Phase {
620620
// Debugging print of CFG
621621
void dump( ) const; // CFG only
622622
void _dump_cfg( const Node *end, VectorSet &visited ) const;
623-
void verify() const;
624623
void dump_headers();
625624
#else
626625
bool trace_opto_pipelining() const { return false; }
627626
#endif
627+
628+
// Check that block b is in the home loop (or an ancestor) of n, if n is a
629+
// memory writer.
630+
void verify_memory_writer_placement(const Block* b, const Node* n) const NOT_DEBUG_RETURN;
631+
void verify() const NOT_DEBUG_RETURN;
628632
};
629633

630634

@@ -719,6 +723,7 @@ class CFGLoop : public CFGElement {
719723
double trip_count() const { return 1.0 / _exit_prob; }
720724
virtual bool is_loop() { return true; }
721725
int id() { return _id; }
726+
int depth() { return _depth; }
722727

723728
#ifndef PRODUCT
724729
void dump( ) const;

‎src/hotspot/share/opto/compile.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -2748,7 +2748,7 @@ void Compile::Code_Gen() {
27482748

27492749
print_method(PHASE_GLOBAL_CODE_MOTION, 2);
27502750
NOT_PRODUCT( verify_graph_edges(); )
2751-
debug_only( cfg.verify(); )
2751+
cfg.verify();
27522752
}
27532753

27542754
PhaseChaitin regalloc(unique(), cfg, matcher, false);

‎src/hotspot/share/opto/gcm.cpp

+43-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -1163,6 +1163,14 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
11631163
if (mach && LCA == root_block)
11641164
break;
11651165

1166+
if (self->is_memory_writer() &&
1167+
(LCA->_loop->depth() > early->_loop->depth())) {
1168+
// LCA is an invalid placement for a memory writer: choosing it would
1169+
// cause memory interference, as illustrated in schedule_late().
1170+
continue;
1171+
}
1172+
verify_memory_writer_placement(LCA, self);
1173+
11661174
uint start_lat = get_latency_for_node(LCA->head());
11671175
uint end_idx = LCA->end_idx();
11681176
uint end_lat = get_latency_for_node(LCA->get_node(end_idx));
@@ -1250,6 +1258,17 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
12501258
if( self->pinned() ) // Pinned in block?
12511259
continue;
12521260

1261+
#ifdef ASSERT
1262+
// Assert that memory writers (e.g. stores) have a "home" block (the block
1263+
// given by their control input), and that this block corresponds to their
1264+
// earliest possible placement. This guarantees that
1265+
// hoist_to_cheaper_block() will always have at least one valid choice.
1266+
if (self->is_memory_writer()) {
1267+
assert(find_block_for_node(self->in(0)) == early,
1268+
"The home of a memory writer must also be its earliest placement");
1269+
}
1270+
#endif
1271+
12531272
MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
12541273
if (mach) {
12551274
switch (mach->ideal_Opcode()) {
@@ -1274,13 +1293,12 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
12741293
default:
12751294
break;
12761295
}
1277-
if (C->has_irreducible_loop() && self->bottom_type()->has_memory()) {
1278-
// If the CFG is irreducible, keep memory-writing nodes as close as
1279-
// possible to their original block (given by the control input). This
1280-
// prevents PhaseCFG::hoist_to_cheaper_block() from placing such nodes
1281-
// into descendants of their original loop, as in the following example:
1296+
if (C->has_irreducible_loop() && self->is_memory_writer()) {
1297+
// If the CFG is irreducible, place memory writers in their home block.
1298+
// This prevents hoist_to_cheaper_block() from accidentally placing such
1299+
// nodes into deeper loops, as in the following example:
12821300
//
1283-
// Original placement of store in B1 (loop L1):
1301+
// Home placement of store in B1 (loop L1):
12841302
//
12851303
// B1 (L1):
12861304
// m1 <- ..
@@ -1301,12 +1319,16 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
13011319
// B3 (L1):
13021320
// .. <- .. m2, ..
13031321
//
1304-
// This "hoist inversion" can happen due to CFGLoop::compute_freq()'s
1305-
// inaccurate estimation of frequencies for irreducible CFGs, which can
1306-
// lead to for example assigning B1 and B3 a higher frequency than B2.
1322+
// This "hoist inversion" can happen due to different factors such as
1323+
// inaccurate estimation of frequencies for irreducible CFGs, and loops
1324+
// with always-taken exits in reducible CFGs. In the reducible case,
1325+
// hoist inversion is prevented by discarding invalid blocks (those in
1326+
// deeper loops than the home block). In the irreducible case, the
1327+
// invalid blocks cannot be identified due to incomplete loop nesting
1328+
// information, hence a conservative solution is taken.
13071329
#ifndef PRODUCT
13081330
if (trace_opto_pipelining()) {
1309-
tty->print_cr("# Irreducible loops: schedule in earliest block B%d:",
1331+
tty->print_cr("# Irreducible loops: schedule in home block B%d:",
13101332
early->_pre_order);
13111333
self->dump();
13121334
}
@@ -1359,6 +1381,16 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
13591381
return;
13601382
}
13611383

1384+
if (self->is_memory_writer()) {
1385+
// If the LCA of a memory writer is a descendant of its home loop, hoist
1386+
// it into a valid placement.
1387+
while (LCA->_loop->depth() > early->_loop->depth()) {
1388+
LCA = LCA->_idom;
1389+
}
1390+
assert(LCA != NULL, "a valid LCA must exist");
1391+
verify_memory_writer_placement(LCA, self);
1392+
}
1393+
13621394
// If there is no opportunity to hoist, then we're done.
13631395
// In stress mode, try to hoist even the single operations.
13641396
bool try_to_hoist = StressGCM || (LCA != early);

‎src/hotspot/share/opto/generateOptoStub.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ void GraphKit::gen_stub(address C_function,
101101
//
102102
Node *adr_sp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_sp_offset()));
103103
Node *last_sp = frameptr();
104-
store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias, MemNode::unordered);
104+
store_to_memory(control(), adr_sp, last_sp, T_ADDRESS, NoAlias, MemNode::unordered);
105105

106106
// Set _thread_in_native
107107
// The order of stores into TLS is critical! Setting _thread_in_native MUST
@@ -221,12 +221,12 @@ void GraphKit::gen_stub(address C_function,
221221
//-----------------------------
222222

223223
// Clear last_Java_sp
224-
store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
224+
store_to_memory(control(), adr_sp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
225225
// Clear last_Java_pc
226-
store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias, MemNode::unordered);
226+
store_to_memory(control(), adr_last_Java_pc, null(), T_ADDRESS, NoAlias, MemNode::unordered);
227227
#if (defined(IA64) && !defined(AIX))
228228
Node* adr_last_Java_fp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_fp_offset()));
229-
store_to_memory(NULL, adr_last_Java_fp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
229+
store_to_memory(control(), adr_last_Java_fp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
230230
#endif
231231

232232
// For is-fancy-jump, the C-return value is also the branch target
@@ -237,7 +237,7 @@ void GraphKit::gen_stub(address C_function,
237237
Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, MemNode::unordered);
238238
map()->set_req(TypeFunc::Parms, vm_result); // vm_result passed as result
239239
// clear thread-local-storage(tls)
240-
store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias, MemNode::unordered);
240+
store_to_memory(control(), adr, null(), T_ADDRESS, NoAlias, MemNode::unordered);
241241
}
242242

243243
//-----------------------------

‎src/hotspot/share/opto/node.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,9 @@ class Node {
11481148
virtual int cisc_operand() const { return AdlcVMDeps::Not_cisc_spillable; }
11491149
bool is_cisc_alternate() const { return (_flags & Flag_is_cisc_alternate) != 0; }
11501150

1151+
// Whether this is a memory-writing machine node.
1152+
bool is_memory_writer() const { return is_Mach() && bottom_type()->has_memory(); }
1153+
11511154
//----------------- Printing, etc
11521155
#ifndef PRODUCT
11531156
private:

0 commit comments

Comments
 (0)
Please sign in to comment.