Skip to content

Commit 3e9a17c

Browse files
committedJan 7, 2020
8235984: C2: assert(out->in(PhiNode::Region) == head || out->in(PhiNode::Region) == slow_head) failed: phi must be either part of the slow or the fast loop
Bailout from loop unswitching if loop predicates have a control dependency to partially peeled statements. Reviewed-by: neliasso, thartmann
1 parent cc99075 commit 3e9a17c

File tree

2 files changed

+188
-127
lines changed

2 files changed

+188
-127
lines changed
 

‎src/hotspot/share/opto/loopUnswitch.cpp

+14-120
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,20 @@ IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) co
118118
// execute.
119119
void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
120120

121-
// Find first invariant test that doesn't exit the loop
122121
LoopNode *head = loop->_head->as_Loop();
123-
122+
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
123+
if (find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check) != NULL
124+
|| (UseProfiledLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate) != NULL)
125+
|| (UseLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_predicate) != NULL)) {
126+
assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate");
127+
if (entry->outcnt() > 1) {
128+
// Bailout if there are loop predicates from which there are additional control dependencies (i.e. from
129+
// loop entry 'entry') to previously partially peeled statements since this case is not handled and can lead
130+
// to wrong execution. Remove this bailout, once this is fixed.
131+
return;
132+
}
133+
}
134+
// Find first invariant test that doesn't exit the loop
124135
IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop);
125136
assert(unswitch_iff != NULL, "should be at least one");
126137

@@ -140,7 +151,7 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
140151

141152
#ifdef ASSERT
142153
Node* uniqc = proj_true->unique_ctrl_out();
143-
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
154+
entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
144155
Node* predicate = find_predicate(entry);
145156
if (predicate != NULL) {
146157
entry = skip_loop_predicates(entry);
@@ -281,123 +292,6 @@ ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
281292
_igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred);
282293
set_idom(slow_l, ifslow_pred, dom_depth(l));
283294

284-
if (iffast != iffast_pred && entry->outcnt() > 1) {
285-
// This situation occurs when only non-CFG nodes (i.e. no control dependencies between them) with a control
286-
// input from the loop header were partially peeled before (now control dependent on loop entry control).
287-
// If additional CFG nodes were peeled, then the insertion point of the loop predicates from the parsing stage
288-
// would not be found anymore and the predicates not cloned at all (i.e. iffast == iffast_pred) as it happens
289-
// for normal peeling. Those partially peeled statements have a control input from the old loop entry control
290-
// and need to be executed after the predicates. These control dependencies need to be removed from the old
291-
// entry control and added to the new entry control nodes 'iffast_pred' and 'ifslow_pred'. Since each node can
292-
// only have one control input, we need to create clones for all statements (2) that can be reached over a path
293-
// from the old entry control 'entry' (1) to a loop phi (8, 9). The old nodes (2) will be moved to the fast loop and the
294-
// new cloned nodes (10) to the slow loop.
295-
//
296-
// The result of the following algorithm is visualized below. The cloned loop predicates for the fast loop
297-
// are between the loop selection node (3) and the entry control for the fast loop (4) and for the slow loop
298-
// between the loop selection node (3) and the entry control for the slow loop (5), respectively.
299-
//
300-
// 1 entry 1 entry
301-
// / \ |
302-
// 2 stmt 3 iff 3 iff
303-
// | / \ / \
304-
// | .. .. .. ..
305-
// | / \ / \
306-
// | 4 iffast_p 5 ifslow_p 4 iffast_p 5 ifslow_p
307-
// | | | / \ / \
308-
// | 6 head 7 slow_head ==> 6 head 2 stmt 7 slow_head 10 cloned_stmt
309-
// | | | \ / \ /
310-
// +--\ | +--\ | 8 phi 9 phi
311-
// | 8 phi | 9 phi
312-
// | |
313-
// +----------+
314-
//
315-
assert(ifslow != ifslow_pred, "sanity - must also be different");
316-
317-
ResourceMark rm;
318-
Unique_Node_List worklist;
319-
Unique_Node_List phis;
320-
Node_List old_clone;
321-
LoopNode* slow_head = old_new[head->_idx]->as_Loop();
322-
323-
// 1) Do a BFS starting from the outputs of the original entry control node 'entry' to all (loop) phis
324-
// and add the non-phi nodes to the worklist.
325-
// First get all outputs of 'entry' which are not the new "loop selection check" 'iff'.
326-
for (DUIterator_Fast imax, i = entry->fast_outs(imax); i < imax; i++) {
327-
Node* stmt = entry->fast_out(i);
328-
if (stmt != iff) {
329-
assert(!stmt->is_CFG(), "cannot be a CFG node");
330-
worklist.push(stmt);
331-
}
332-
}
333-
334-
// Then do a BFS from all collected nodes so far and stop if a phi node is hit.
335-
// Keep track of them on a separate 'phis' list to adjust their inputs later.
336-
for (uint i = 0; i < worklist.size(); i++) {
337-
Node* stmt = worklist.at(i);
338-
for (DUIterator_Fast jmax, j = stmt->fast_outs(jmax); j < jmax; j++) {
339-
Node* out = stmt->fast_out(j);
340-
assert(!out->is_CFG(), "cannot be a CFG node");
341-
if (out->is_Phi()) {
342-
assert(out->in(PhiNode::Region) == head || out->in(PhiNode::Region) == slow_head,
343-
"phi must be either part of the slow or the fast loop");
344-
phis.push(out);
345-
} else {
346-
worklist.push(out);
347-
}
348-
}
349-
}
350-
351-
// 2) All nodes of interest are in 'worklist' and are now cloned. This could not be done simultaneously
352-
// in step 1 in an easy way because we could have cloned a node which has an input that is added to the
353-
// worklist later. As a result, the BFS would hit a clone which does not need to be cloned again.
354-
// While cloning a node, the control inputs to 'entry' are updated such that the old node points to
355-
// 'iffast_pred' and the clone to 'ifslow_pred', respectively.
356-
for (uint i = 0; i < worklist.size(); i++) {
357-
Node* stmt = worklist.at(i);
358-
assert(!stmt->is_CFG(), "cannot be a CFG node");
359-
Node* cloned_stmt = stmt->clone();
360-
old_clone.map(stmt->_idx, cloned_stmt);
361-
_igvn.register_new_node_with_optimizer(cloned_stmt);
362-
363-
if (stmt->in(0) == entry) {
364-
_igvn.replace_input_of(stmt, 0, iffast_pred);
365-
set_ctrl(stmt, iffast_pred);
366-
_igvn.replace_input_of(cloned_stmt, 0, ifslow_pred);
367-
set_ctrl(cloned_stmt, ifslow_pred);
368-
}
369-
}
370-
371-
// 3) Update the entry control of all collected phi nodes of the slow loop to use the cloned nodes
372-
// instead of the old ones from the worklist
373-
for (uint i = 0; i < phis.size(); i++) {
374-
assert(phis.at(i)->is_Phi(), "must be a phi");
375-
PhiNode* phi = phis.at(i)->as_Phi();
376-
if (phi->in(PhiNode::Region) == slow_head) {
377-
// Slow loop: Update phi entry control to use the cloned version instead of the old one from the worklist
378-
Node* entry_control = phi->in(LoopNode::EntryControl);
379-
_igvn.replace_input_of(phi, LoopNode::EntryControl, old_clone[phi->in(LoopNode::EntryControl)->_idx]);
380-
}
381-
382-
}
383-
384-
// 4) Replace all input edges of cloned nodes from old nodes on the worklist by an input edge from their
385-
// corresponding cloned version.
386-
for (uint i = 0; i < worklist.size(); i++) {
387-
Node* stmt = worklist.at(i);
388-
for (uint j = 0; j < stmt->req(); j++) {
389-
Node* in = stmt->in(j);
390-
if (in == NULL) {
391-
continue;
392-
}
393-
394-
if (worklist.contains(in)) {
395-
// Replace the edge old1->clone_of_old_2 with an edge clone_of_old1->clone_of_old2
396-
old_clone[stmt->_idx]->set_req(j, old_clone[in->_idx]);
397-
}
398-
}
399-
}
400-
}
401295
recompute_dom_depth();
402296

403297
return iffast;

‎test/hotspot/jtreg/compiler/loopopts/PartialPeelingUnswitch.java

+174-7
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@
2323

2424
/*
2525
* @test
26-
* @bug 8233033
27-
* @summary Tests if partially peeled statements are not executed before the loop predicates of the unswitched fast loop.
26+
* @bug 8233033 8235984
27+
* @summary Tests if partially peeled statements are not executed before the loop predicates by bailing out of loop unswitching.
2828
*
29-
* @run main/othervm -Xbatch -XX:-TieredCompilation
29+
* @run main/othervm -Xbatch
3030
* -XX:CompileCommand=compileonly,compiler.loopopts.PartialPeelingUnswitch::test*
31+
* -XX:CompileCommand=dontinline,compiler.loopopts.PartialPeelingUnswitch::dontInline
3132
* compiler.loopopts.PartialPeelingUnswitch
3233
*/
3334

@@ -38,6 +39,7 @@ public class PartialPeelingUnswitch {
3839
public static int iFld;
3940
public static int x = 42;
4041
public static int y = 31;
42+
public static int z = 22;
4143
public static int[] iArr = new int[10];
4244

4345
public int test() {
@@ -46,8 +48,9 @@ public int test() {
4648
* of the cloned loop predicates for the fast loop (set up at unswitching stage). The only partially peeled
4749
* statement "iFld += 7" was wrongly executed before the predicates (and before the loop itself).
4850
* When hitting the uncommon trap, "iFld >>= 1" was not yet executed. As a result, the interpreter directly
49-
* reexecuted "iFld += 7" again. This resulted in a wrong result for "iFld". The fix makes peeled statements
50-
* control dependant on the cloned loop predicates such that they are executed after them.
51+
* reexecuted "iFld += 7" again. This resulted in a wrong result for "iFld". The fix in 8233033 makes peeled
52+
* statements control dependant on the cloned loop predicates such that they are executed after them. However,
53+
* some cases are not handled properly. For now, the new fix in 8235984 just bails out of loop unswitching.
5154
*/
5255
iFld = 13;
5356
for (int i = 0; i < 8; i++) {
@@ -103,16 +106,162 @@ public int test2() {
103106
return iFld + k;
104107
}
105108

109+
public int test3() {
110+
iFld = 13;
111+
if (z < 34) {
112+
z = 34;
113+
}
114+
115+
for (int i = 0; i < 8; i++) {
116+
int j = 10;
117+
while (--j > 0) {
118+
iFld += -7;
119+
iArr[5] = 8;
120+
x = iArr[6];
121+
y = x;
122+
for (int k = 50; k < 51; k++) {
123+
x = iArr[7];
124+
}
125+
switch ((i * 5) + 102) {
126+
case 120:
127+
return iFld;
128+
case 103:
129+
break;
130+
case 116:
131+
break;
132+
default:
133+
if (iFld == -7) {
134+
return iFld;
135+
}
136+
z = iArr[5];
137+
iFld >>= 1;
138+
}
139+
}
140+
iArr[5] = 34;
141+
dontInline(iArr[5]);
142+
}
143+
return iFld;
144+
}
145+
146+
public int test4() {
147+
iFld = 13;
148+
if (z < 34) {
149+
z = 34;
150+
}
151+
152+
for (int i = 0; i < 8; i++) {
153+
int j = 10;
154+
while (--j > 0) {
155+
iFld += -7;
156+
iArr[5] = 8;
157+
x = iArr[6];
158+
y = x;
159+
for (int k = 50; k < 51; k++) {
160+
x = iArr[7];
161+
}
162+
switch ((i * 5) + 102) {
163+
case 120:
164+
return iFld;
165+
case 103:
166+
break;
167+
case 116:
168+
break;
169+
default:
170+
if (iFld == -7) {
171+
return iFld;
172+
}
173+
z = iArr[5];
174+
iFld >>= 1;
175+
}
176+
}
177+
iArr[5] = 34;
178+
}
179+
return iFld;
180+
}
181+
182+
public int test5() {
183+
iFld = 13;
184+
for (int i = 0; i < 8; i++) {
185+
int j = 10;
186+
while (--j > 0) {
187+
iFld += -7;
188+
iArr[5] = 8;
189+
x = iArr[6];
190+
y = x;
191+
for (int k = 50; k < 51; k++) {
192+
x = iArr[7];
193+
}
194+
switch ((i * 5) + 102) {
195+
case 120:
196+
return iFld;
197+
case 103:
198+
break;
199+
case 116:
200+
break;
201+
default:
202+
iFld >>= 1;
203+
}
204+
}
205+
}
206+
return iFld;
207+
}
208+
209+
public int test6() {
210+
iFld = 13;
211+
for (int i = 0; i < 8; i++) {
212+
int j = 10;
213+
while (--j > 0) {
214+
iFld += -7;
215+
iArr[5] = 8;
216+
x = iArr[6];
217+
y = x;
218+
switch ((i * 5) + 102) {
219+
case 120:
220+
return iFld;
221+
case 103:
222+
break;
223+
case 116:
224+
break;
225+
default:
226+
iFld >>= 1;
227+
}
228+
}
229+
}
230+
return iFld;
231+
}
232+
233+
public int test7() {
234+
iFld = 13;
235+
for (int i = 0; i < 8; i++) {
236+
int j = 10;
237+
while (--j > 0) {
238+
iFld += -7;
239+
iArr[5] = 8;
240+
switch ((i * 5) + 102) {
241+
case 120:
242+
return iFld;
243+
case 103:
244+
break;
245+
case 116:
246+
break;
247+
default:
248+
iFld >>= 1;
249+
}
250+
}
251+
}
252+
return iFld;
253+
}
254+
106255
public static void main(String[] strArr) {
107256
PartialPeelingUnswitch _instance = new PartialPeelingUnswitch();
108-
for (int i = 0; i < 200; i++) {
257+
for (int i = 0; i < 2000; i++) {
109258
int result = _instance.test();
110259
if (result != -7) {
111260
throw new RuntimeException("Result should always be -7 but was " + result);
112261
}
113262
}
114263

115-
for (int i = 0; i < 200; i++) {
264+
for (int i = 0; i < 2000; i++) {
116265
int result = _instance.test2();
117266
check(-1, result);
118267
check(-7, iFld);
@@ -129,11 +278,29 @@ public static void main(String[] strArr) {
129278
x = 42;
130279
y = 31;
131280
}
281+
282+
for (int i = 0; i < 2000; i++) {
283+
_instance.test3();
284+
_instance.test4();
285+
_instance.test5();
286+
_instance.test6();
287+
_instance.test7();
288+
}
289+
290+
for (int i = 0; i < 2000; i++) {
291+
if (i % 2 == 0) {
292+
z = 23;
293+
}
294+
_instance.test3();
295+
_instance.test4();
296+
}
132297
}
133298

134299
public static void check(int expected, int actual) {
135300
if (expected != actual) {
136301
throw new RuntimeException("Wrong result, expected: " + expected + ", actual: " + actual);
137302
}
138303
}
304+
305+
public void dontInline(int i) { }
139306
}

0 commit comments

Comments
 (0)
Please sign in to comment.