1
1
/*
2
- * Copyright (c) 1997, 2018 , Oracle and/or its affiliates. All rights reserved.
2
+ * Copyright (c) 1997, 2021 , Oracle and/or its affiliates. All rights reserved.
3
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
4
*
5
5
* This code is free software; you can redistribute it and/or modify it
@@ -1163,6 +1163,14 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
1163
1163
if (mach && LCA == root_block)
1164
1164
break ;
1165
1165
1166
+ if (self->is_memory_writer () &&
1167
+ (LCA->_loop ->depth () > early->_loop ->depth ())) {
1168
+ // LCA is an invalid placement for a memory writer: choosing it would
1169
+ // cause memory interference, as illustrated in schedule_late().
1170
+ continue ;
1171
+ }
1172
+ verify_memory_writer_placement (LCA, self);
1173
+
1166
1174
uint start_lat = get_latency_for_node (LCA->head ());
1167
1175
uint end_idx = LCA->end_idx ();
1168
1176
uint end_lat = get_latency_for_node (LCA->get_node (end_idx));
@@ -1250,6 +1258,17 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
1250
1258
if ( self->pinned () ) // Pinned in block?
1251
1259
continue ;
1252
1260
1261
+ #ifdef ASSERT
1262
+ // Assert that memory writers (e.g. stores) have a "home" block (the block
1263
+ // given by their control input), and that this block corresponds to their
1264
+ // earliest possible placement. This guarantees that
1265
+ // hoist_to_cheaper_block() will always have at least one valid choice.
1266
+ if (self->is_memory_writer ()) {
1267
+ assert (find_block_for_node (self->in (0 )) == early,
1268
+ " The home of a memory writer must also be its earliest placement" );
1269
+ }
1270
+ #endif
1271
+
1253
1272
MachNode* mach = self->is_Mach () ? self->as_Mach () : NULL ;
1254
1273
if (mach) {
1255
1274
switch (mach->ideal_Opcode ()) {
@@ -1274,13 +1293,12 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
1274
1293
default :
1275
1294
break ;
1276
1295
}
1277
- if (C->has_irreducible_loop () && self->bottom_type ()->has_memory ()) {
1278
- // If the CFG is irreducible, keep memory-writing nodes as close as
1279
- // possible to their original block (given by the control input). This
1280
- // prevents PhaseCFG::hoist_to_cheaper_block() from placing such nodes
1281
- // into descendants of their original loop, as in the following example:
1296
+ if (C->has_irreducible_loop () && self->is_memory_writer ()) {
1297
+ // If the CFG is irreducible, place memory writers in their home block.
1298
+ // This prevents hoist_to_cheaper_block() from accidentally placing such
1299
+ // nodes into deeper loops, as in the following example:
1282
1300
//
1283
- // Original placement of store in B1 (loop L1):
1301
+ // Home placement of store in B1 (loop L1):
1284
1302
//
1285
1303
// B1 (L1):
1286
1304
// m1 <- ..
@@ -1301,12 +1319,16 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
1301
1319
// B3 (L1):
1302
1320
// .. <- .. m2, ..
1303
1321
//
1304
- // This "hoist inversion" can happen due to CFGLoop::compute_freq()'s
1305
- // inaccurate estimation of frequencies for irreducible CFGs, which can
1306
- // lead to for example assigning B1 and B3 a higher frequency than B2.
1322
+ // This "hoist inversion" can happen due to different factors such as
1323
+ // inaccurate estimation of frequencies for irreducible CFGs, and loops
1324
+ // with always-taken exits in reducible CFGs. In the reducible case,
1325
+ // hoist inversion is prevented by discarding invalid blocks (those in
1326
+ // deeper loops than the home block). In the irreducible case, the
1327
+ // invalid blocks cannot be identified due to incomplete loop nesting
1328
+ // information, hence a conservative solution is taken.
1307
1329
#ifndef PRODUCT
1308
1330
if (trace_opto_pipelining ()) {
1309
- tty->print_cr (" # Irreducible loops: schedule in earliest block B%d:" ,
1331
+ tty->print_cr (" # Irreducible loops: schedule in home block B%d:" ,
1310
1332
early->_pre_order );
1311
1333
self->dump ();
1312
1334
}
@@ -1359,6 +1381,16 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
1359
1381
return ;
1360
1382
}
1361
1383
1384
+ if (self->is_memory_writer ()) {
1385
+ // If the LCA of a memory writer is a descendant of its home loop, hoist
1386
+ // it into a valid placement.
1387
+ while (LCA->_loop ->depth () > early->_loop ->depth ()) {
1388
+ LCA = LCA->_idom ;
1389
+ }
1390
+ assert (LCA != NULL , " a valid LCA must exist" );
1391
+ verify_memory_writer_placement (LCA, self);
1392
+ }
1393
+
1362
1394
// If there is no opportunity to hoist, then we're done.
1363
1395
// In stress mode, try to hoist even the single operations.
1364
1396
bool try_to_hoist = StressGCM || (LCA != early);
0 commit comments