Skip to content

Commit 6247be1

Browse files
feilongjiangguotaiping1
authored andcommittedDec 21, 2021
8278644: riscv: Intrinsify mulAdd
Co-authored-by: Taiping Guo <guotaiping1@huawei.com> Reviewed-by: fyang
1 parent 9086f8c commit 6247be1

File tree

4 files changed

+106
-0
lines changed

4 files changed

+106
-0
lines changed
 

‎src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

+66
Original file line numberDiff line numberDiff line change
@@ -3025,6 +3025,72 @@ void MacroAssembler::compute_match_mask(Register src, Register pattern, Register
30253025
andr(match_mask, match_mask, src);
30263026
}
30273027

3028+
#ifdef COMPILER2
3029+
// Code for BigInteger::mulAdd instrinsic
3030+
// out = x10
3031+
// in = x11
3032+
// offset = x12 (already out.length-offset)
3033+
// len = x13
3034+
// k = x14
3035+
// tmp = x28
3036+
//
3037+
// pseudo code from java implementation:
3038+
// carry = 0;
3039+
// offset = out.length-offset - 1;
3040+
// for (int j = len - 1; j >= 0; j--) {
3041+
// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
3042+
// out[offset--] = (int)product;
3043+
// carry = product >>> 32;
3044+
// }
3045+
// return (int)carry;
3046+
void MacroAssembler::mul_add(Register out, Register in, Register offset,
3047+
Register len, Register k, Register tmp) {
3048+
Label L_tail_loop, L_unroll, L_end;
3049+
mv(tmp, out);
3050+
mv(out, zr);
3051+
beqz(len, L_end);
3052+
zero_ext(k, k, 32);
3053+
slli(t0, offset, LogBytesPerInt);
3054+
add(offset, tmp, t0);
3055+
slli(t0, len, LogBytesPerInt);
3056+
add(in, in, t0);
3057+
3058+
const int unroll = 8;
3059+
li(tmp, unroll);
3060+
blt(len, tmp, L_tail_loop);
3061+
bind(L_unroll);
3062+
for (int i = 0; i < unroll; i++) {
3063+
sub(in, in, BytesPerInt);
3064+
lwu(t0, Address(in, 0));
3065+
mul(t1, t0, k);
3066+
add(t0, t1, out);
3067+
sub(offset, offset, BytesPerInt);
3068+
lwu(t1, Address(offset, 0));
3069+
add(t0, t0, t1);
3070+
sw(t0, Address(offset, 0));
3071+
srli(out, t0, 32);
3072+
}
3073+
sub(len, len, tmp);
3074+
bge(len, tmp, L_unroll);
3075+
3076+
bind(L_tail_loop);
3077+
beqz(len, L_end);
3078+
sub(in, in, BytesPerInt);
3079+
lwu(t0, Address(in, 0));
3080+
mul(t1, t0, k);
3081+
add(t0, t1, out);
3082+
sub(offset, offset, BytesPerInt);
3083+
lwu(t1, Address(offset, 0));
3084+
add(t0, t0, t1);
3085+
sw(t0, Address(offset, 0));
3086+
srli(out, t0, 32);
3087+
sub(len, len, 1);
3088+
j(L_tail_loop);
3089+
3090+
bind(L_end);
3091+
}
3092+
#endif
3093+
30283094
// Count bits of trailing zero chars from lsb to msb until first non-zero element.
30293095
// For LL case, one byte for one element, so shift 8 bits once, and for other case,
30303096
// shift 16 bits once.

‎src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,11 @@ class MacroAssembler: public Assembler {
656656
void compute_match_mask(Register src, Register pattern, Register match_mask,
657657
Register mask1, Register mask2);
658658

659+
#ifdef COMPILER2
660+
void mul_add(Register out, Register in, Register offset,
661+
Register len, Register k, Register tmp);
662+
#endif
663+
659664
void inflate_lo32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);
660665
void inflate_hi32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);
661666

‎src/hotspot/cpu/riscv/stubGenerator_riscv.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -2770,6 +2770,31 @@ class StubGenerator: public StubCodeGenerator {
27702770
StubRoutines::riscv64::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
27712771
}
27722772

2773+
#ifdef COMPILER2
2774+
address generate_mulAdd()
2775+
{
2776+
__ align(CodeEntryAlignment);
2777+
StubCodeMark mark(this, "StubRoutines", "mulAdd");
2778+
2779+
address entry = __ pc();
2780+
2781+
const Register out = x10;
2782+
const Register in = x11;
2783+
const Register offset = x12;
2784+
const Register len = x13;
2785+
const Register k = x14;
2786+
const Register tmp = x28;
2787+
2788+
BLOCK_COMMENT("Entry:");
2789+
__ enter();
2790+
__ mul_add(out, in, offset, len, k, tmp);
2791+
__ leave();
2792+
__ ret();
2793+
2794+
return entry;
2795+
}
2796+
#endif
2797+
27732798
// Continuation point for throwing of implicit exceptions that are
27742799
// not handled in the current activation. Fabricates an exception
27752800
// oop and initiates normal exception dispatching in this
@@ -2940,6 +2965,12 @@ class StubGenerator: public StubCodeGenerator {
29402965
// arraycopy stubs used by compilers
29412966
generate_arraycopy_stubs();
29422967

2968+
#ifdef COMPILER2
2969+
if (UseMulAddIntrinsic) {
2970+
StubRoutines::_mulAdd = generate_mulAdd();
2971+
}
2972+
#endif
2973+
29432974
generate_compare_long_strings();
29442975

29452976
generate_string_indexof_stubs();

‎src/hotspot/cpu/riscv/vm_version_riscv.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,10 @@ void VM_Version::initialize_c2() {
174174
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
175175
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
176176
}
177+
178+
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
179+
FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
180+
}
177181
}
178182
#endif // COMPILER2
179183

0 commit comments

Comments
 (0)
Please sign in to comment.