Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8279508: Auto-vectorize Math.round API #7094

Closed
wants to merge 23 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0fe0150
8279508: Auto-vectorize Math.round API
Jan 14, 2022
575d293
8279508: Adding a test for scalar intrinsification.
Jan 19, 2022
d610bd6
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8279508
Feb 12, 2022
2dc364f
8279508: Adding vectorized algorithms to match the semantics of round…
Feb 12, 2022
1c9ff77
8279508: Replacing by efficient instruction sequence based on MXCSR.R…
Feb 15, 2022
2f55569
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8279508
Feb 15, 2022
73674fe
8279508: Adding few descriptive comments.
Feb 16, 2022
f35ed9c
8279508: Fixing for windows failure.
Feb 17, 2022
6c869c7
8279508: Review comments resolved.
Feb 22, 2022
f7dec3d
8279508: Review comments resolved.
jatin-bhateja Feb 24, 2022
54d4ea3
8279508: Adding descriptive comments.
Feb 24, 2022
3b90ae5
8279508: Review comments resolved.`
Mar 1, 2022
57b1b13
8279508: Removing +LogCompilation flag.
Mar 1, 2022
bf1532f
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8279508
Mar 8, 2022
547f4e3
8279508: Preventing domain switch-over penalty for Math.round(float) …
Mar 8, 2022
fcb7321
8279508: Review comments resolution.
Mar 10, 2022
2519a58
8279508: Reducing the invocation count and compile thresholds for Rou…
Mar 11, 2022
e4d4e29
8279508: Creating separate test for round double under feature check.
Mar 12, 2022
c881d11
8279508: Styling comments resolved.
Mar 12, 2022
b1323a8
8279508: Windows build failure fix.
Mar 12, 2022
962d751
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8279508
Mar 18, 2022
c17440c
8279508: Using an explicit scratch register since rscratch1 is bound …
Mar 18, 2022
621bd69
8279508: Removing redundant test point.
Apr 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
@@ -4142,8 +4142,7 @@ void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister sr
mov64(scratch, 4602678819172646912L);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mov64(scratch, 4602678819172646912L);
mov64(scratch, julong_cast(0.5));

evpbroadcastq(xtmp1, scratch, vec_enc);
vaddpd(xtmp1, src , xtmp1, vec_enc);
vrndscalepd(dst, xtmp1, 0x4, vec_enc);
evcvtpd2qq(dst, dst, vec_enc);
evcvtpd2qq(dst, xtmp1, vec_enc);
vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
ldmxcsr(mxcsr_std);
}
@@ -4158,8 +4157,7 @@ void C2_MacroAssembler::vector_round_float_evex(XMMRegister dst, XMMRegister src
movl(scratch, 1056964608);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is 1056964608 ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raw bits corresponding to floating point value 0.5f.

Copy link
Contributor

@theRealAph theRealAph Mar 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
movl(scratch, 1056964608);
movl(scratch, jint_cast(0.5f));

evpbroadcastd(xtmp1, scratch, vec_enc);
vaddps(xtmp1, src , xtmp1, vec_enc);
vrndscaleps(dst, xtmp1, 0x4, vec_enc);
vcvtps2dq(dst, dst, vec_enc);
vcvtps2dq(dst, xtmp1, vec_enc);
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
ldmxcsr(mxcsr_std);
}
@@ -4175,8 +4173,7 @@ void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src,
movq(xtmp1, scratch);
vpbroadcastd(xtmp1, xtmp1, vec_enc);
vaddps(xtmp1, src , xtmp1, vec_enc);
vroundps(dst, xtmp1, 0x4, vec_enc);
vcvtps2dq(dst, dst, vec_enc);
vcvtps2dq(dst, xtmp1, vec_enc);
vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
ldmxcsr(mxcsr_std);
}
2 changes: 1 addition & 1 deletion src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
@@ -8954,7 +8954,7 @@ void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
void MacroAssembler::round_float(Register dst, XMMRegister src, Register rtmp, Register rcx) {
// Following code is exactly mimicking the functionality of java.lang.Math.round(float) method.
Label L_special_case, L_block1, L_exit;
movl(rtmp, 0x7F800000);
movl(rtmp, 0x7f800000);
movdl(dst, src);
andl(dst, rtmp);
sarl(dst, 0x17);