Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8263644: Add Extract float nodes implementation for Arm SVE #51

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions src/hotspot/cpu/aarch64/aarch64_sve.ad
Original file line number Diff line number Diff line change
@@ -222,8 +222,6 @@ source %{
case Op_MulReductionVL:
// Others
case Op_ExtractC:
case Op_ExtractD:
case Op_ExtractF:
case Op_ExtractUB:
// Vector API specific
case Op_LoadVectorGather:
@@ -3028,15 +3026,15 @@ instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%}


instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractI src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, S, zr, rscratch1\n\t"
"sve_lastb $dst, S, $pTmp, $src\n\t" %}
"sve_lastb $dst, S, $pTmp, $src\t# extract from vector(I)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
@@ -3045,19 +3043,53 @@ instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr
ins_pipe(pipe_slow);
%}

instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractL src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, D, zr, rscratch1\n\t"
"sve_lastb $dst, D, $pTmp, $src\n\t" %}
"sve_lastb $dst, D, $pTmp, $src\t# extract from vector(L)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_lastb(as_Register($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractF src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, S, zr, rscratch1\n\t"
"sve_lastb $dst, S, $pTmp, $src\t# extract from vector(F)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
__ sve_lastb(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractD src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, D, zr, rscratch1\n\t"
"sve_lastb $dst, D, $pTmp, $src\t# extract from vector(D)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_lastb(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
8 changes: 4 additions & 4 deletions src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Original file line number Diff line number Diff line change
@@ -218,8 +218,6 @@ source %{
case Op_MulReductionVL:
// Others
case Op_ExtractC:
case Op_ExtractD:
case Op_ExtractF:
case Op_ExtractUB:
// Vector API specific
case Op_LoadVectorGather:
@@ -1810,15 +1808,15 @@ VECTOR_EXTRACT_SXT(S, iRegINoSp, H, Register, 15U)

dnl
define(`VECTOR_EXTRACT', `
instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (Extract$1 src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, $3, zr, rscratch1\n\t"
"sve_lastb $dst, $3, $pTmp, $src\n\t" %}
"sve_lastb $dst, $3, $pTmp, $src\t# extract from vector($1)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ $3, zr, rscratch1);
@@ -1829,3 +1827,5 @@ instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
dnl $1 $2 $3 $4
VECTOR_EXTRACT(I, iRegINoSp, S, Register)
VECTOR_EXTRACT(L, iRegLNoSp, D, Register)
VECTOR_EXTRACT(F, vRegF, S, FloatRegister)
VECTOR_EXTRACT(D, vRegD, D, FloatRegister)
12 changes: 12 additions & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
@@ -3515,6 +3515,18 @@ void mvnw(Register Rd, Register Rm,
INSN(sve_lastb, 0b1);
#undef INSN

#define INSN(NAME, before) \
void NAME(FloatRegister Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \
starti; \
f(0b00000101, 31, 24), f(T, 23, 22), f(0b10001, 21, 17); \
f(before, 16), f(0b100, 15, 13); \
pgrf(Pg, 10), rf(Zn, 5), rf(Vd, 0); \
}

INSN(sve_lasta, 0b0);
INSN(sve_lastb, 0b1);
#undef INSN

Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

2 changes: 2 additions & 0 deletions test/hotspot/gtest/aarch64/aarch64-asmtest.py
Original file line number Diff line number Diff line change
@@ -1594,6 +1594,8 @@ def generate(kind, names):
["fcvtzu", "__ sve_fcvtzu(z19, __ D, p2, z18, __ D);", "fcvtzu\tz19.d, p2/m, z18.d"],
["lasta", "__ sve_lasta(r0, __ B, p0, z15);", "lasta\tw0, p0, z15.b"],
["lastb", "__ sve_lastb(r1, __ B, p1, z16);", "lastb\tw1, p1, z16.b"],
["lasta", "__ sve_lasta(v0, __ B, p0, z15);", "lasta\tb0, p0, z15.b"],
["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"],
])

print "\n// FloatImmediateOp"
128 changes: 65 additions & 63 deletions test/hotspot/gtest/aarch64/asmtest.out.h
Original file line number Diff line number Diff line change
@@ -779,6 +779,8 @@
__ sve_fcvtzu(z19, __ D, p2, z18, __ D); // fcvtzu z19.d, p2/m, z18.d
__ sve_lasta(r0, __ B, p0, z15); // lasta w0, p0, z15.b
__ sve_lastb(r1, __ B, p1, z16); // lastb w1, p1, z16.b
__ sve_lasta(v0, __ B, p0, z15); // lasta b0, p0, z15.b
__ sve_lastb(v1, __ B, p1, z16); // lastb b1, p1, z16.b

// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
@@ -984,30 +986,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x14000312, 0x94000000,
0x97ffffd4, 0x9400030f, 0x3400000a, 0x34fffa2a,
0x3400618a, 0x35000008, 0x35fff9c8, 0x35006128,
0xb400000b, 0xb4fff96b, 0xb40060cb, 0xb500001d,
0xb5fff91d, 0xb500607d, 0x10000013, 0x10fff8b3,
0x10006013, 0x90000013, 0x36300016, 0x3637f836,
0x36305f96, 0x3758000c, 0x375ff7cc, 0x37585f2c,
0x14000000, 0x17ffffd7, 0x14000314, 0x94000000,
0x97ffffd4, 0x94000311, 0x3400000a, 0x34fffa2a,
0x340061ca, 0x35000008, 0x35fff9c8, 0x35006168,
0xb400000b, 0xb4fff96b, 0xb400610b, 0xb500001d,
0xb5fff91d, 0xb50060bd, 0x10000013, 0x10fff8b3,
0x10006053, 0x90000013, 0x36300016, 0x3637f836,
0x36305fd6, 0x3758000c, 0x375ff7cc, 0x37585f6c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54005d00, 0x54000001, 0x54fff541, 0x54005ca1,
0x54000002, 0x54fff4e2, 0x54005c42, 0x54000002,
0x54fff482, 0x54005be2, 0x54000003, 0x54fff423,
0x54005b83, 0x54000003, 0x54fff3c3, 0x54005b23,
0x54000004, 0x54fff364, 0x54005ac4, 0x54000005,
0x54fff305, 0x54005a65, 0x54000006, 0x54fff2a6,
0x54005a06, 0x54000007, 0x54fff247, 0x540059a7,
0x54000008, 0x54fff1e8, 0x54005948, 0x54000009,
0x54fff189, 0x540058e9, 0x5400000a, 0x54fff12a,
0x5400588a, 0x5400000b, 0x54fff0cb, 0x5400582b,
0x5400000c, 0x54fff06c, 0x540057cc, 0x5400000d,
0x54fff00d, 0x5400576d, 0x5400000e, 0x54ffefae,
0x5400570e, 0x5400000f, 0x54ffef4f, 0x540056af,
0x54005d40, 0x54000001, 0x54fff541, 0x54005ce1,
0x54000002, 0x54fff4e2, 0x54005c82, 0x54000002,
0x54fff482, 0x54005c22, 0x54000003, 0x54fff423,
0x54005bc3, 0x54000003, 0x54fff3c3, 0x54005b63,
0x54000004, 0x54fff364, 0x54005b04, 0x54000005,
0x54fff305, 0x54005aa5, 0x54000006, 0x54fff2a6,
0x54005a46, 0x54000007, 0x54fff247, 0x540059e7,
0x54000008, 0x54fff1e8, 0x54005988, 0x54000009,
0x54fff189, 0x54005929, 0x5400000a, 0x54fff12a,
0x540058ca, 0x5400000b, 0x54fff0cb, 0x5400586b,
0x5400000c, 0x54fff06c, 0x5400580c, 0x5400000d,
0x54fff00d, 0x540057ad, 0x5400000e, 0x54ffefae,
0x5400574e, 0x5400000f, 0x54ffef4f, 0x540056ef,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@@ -1039,7 +1041,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x580046fb, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x5800473b, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@@ -1140,47 +1142,47 @@
0x25221420, 0x25640461, 0x25a614b2, 0x25eb0553,
0x25221c24, 0x25640c60, 0x25a61cb1, 0x25eb0d52,
0x65d0a001, 0x65d1a443, 0x65cbac85, 0x65deaa53,
0x65dfaa53, 0x0520a1e0, 0x0521a601, 0x1e601000,
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8238358,
0xf83702af, 0xf8231118, 0xf8392214, 0xf8313022,
0xf8205098, 0xf82343ec, 0xf83c734a, 0xf82261ec,
0xf8bf81a1, 0xf8bd0260, 0xf8ac12d1, 0xf8ad23dc,
0xf8bf3341, 0xf8bc53c4, 0xf8a443c6, 0xf8ba7130,
0xf8a8600c, 0xf8f48301, 0xf8e20120, 0xf8f8121a,
0xf8fe2143, 0xf8f7308a, 0xf8f05162, 0xf8e841ea,
0xf8f17142, 0xf8ec61ec, 0xf86d80e2, 0xf874021a,
0xf8641082, 0xf86c22b0, 0xf8703170, 0xf8755197,
0xf87a4397, 0xf86e730b, 0xf86163ec, 0xb82a80f0,
0xb82201a3, 0xb8331211, 0xb8232161, 0xb83e3105,
0xb82f53dd, 0xb82040f4, 0xb8347397, 0xb835633b,
0xb8a582e1, 0xb8b000bf, 0xb8ac1389, 0xb8af22dd,
0xb8bf33f3, 0xb8a551ee, 0xb8bf4370, 0xb8b47190,
0xb8ab60c9, 0xb8fe8371, 0xb8fc00fe, 0xb8ea1154,
0xb8e42238, 0xb8f13076, 0xb8fd52cf, 0xb8f342d3,
0xb8e270cf, 0xb8ec6170, 0xb86d8037, 0xb87e00b3,
0xb8711202, 0xb876214d, 0xb875337d, 0xb86c507b,
0xb861431f, 0xb8737131, 0xb87c61fb, 0xce367a86,
0xce1e6858, 0xce768d51, 0xce910451, 0xce768338,
0xce6c8622, 0xcec08363, 0xce708b9d, 0x04e900da,
0x042404f1, 0x6596012f, 0x65d40b62, 0x65c00745,
0x0456a72e, 0x04c0175b, 0x04109418, 0x041ab006,
0x0413812f, 0x04118b65, 0x04101694, 0x04d7aa0a,
0x045eb046, 0x04c81c5d, 0x044a1dd6, 0x040112fb,
0x04dcad42, 0x65809aca, 0x658d9603, 0x65c69201,
0x65878d8c, 0x65c28290, 0x04dda4e5, 0x65c2be0c,
0x6580a386, 0x65c1a624, 0x658dae6d, 0x65819638,
0x65f318ca, 0x65a030cd, 0x65a8532e, 0x65bb76d6,
0x04144e23, 0x04407ce4, 0x04363270, 0x04b6312f,
0x047e30b9, 0x052b6acd, 0x05b46d0d, 0x041a2c99,
0x04d828d1, 0x04d93e04, 0x040829da, 0x040a3c6b,
0x65c73aa1, 0x65c62a2e, 0x65d82678, 0x04c13611,

0x65dfaa53, 0x0520a1e0, 0x0521a601, 0x052281e0,
0x05238601, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8238358, 0xf83702af, 0xf8231118,
0xf8392214, 0xf8313022, 0xf8205098, 0xf82343ec,
0xf83c734a, 0xf82261ec, 0xf8bf81a1, 0xf8bd0260,
0xf8ac12d1, 0xf8ad23dc, 0xf8bf3341, 0xf8bc53c4,
0xf8a443c6, 0xf8ba7130, 0xf8a8600c, 0xf8f48301,
0xf8e20120, 0xf8f8121a, 0xf8fe2143, 0xf8f7308a,
0xf8f05162, 0xf8e841ea, 0xf8f17142, 0xf8ec61ec,
0xf86d80e2, 0xf874021a, 0xf8641082, 0xf86c22b0,
0xf8703170, 0xf8755197, 0xf87a4397, 0xf86e730b,
0xf86163ec, 0xb82a80f0, 0xb82201a3, 0xb8331211,
0xb8232161, 0xb83e3105, 0xb82f53dd, 0xb82040f4,
0xb8347397, 0xb835633b, 0xb8a582e1, 0xb8b000bf,
0xb8ac1389, 0xb8af22dd, 0xb8bf33f3, 0xb8a551ee,
0xb8bf4370, 0xb8b47190, 0xb8ab60c9, 0xb8fe8371,
0xb8fc00fe, 0xb8ea1154, 0xb8e42238, 0xb8f13076,
0xb8fd52cf, 0xb8f342d3, 0xb8e270cf, 0xb8ec6170,
0xb86d8037, 0xb87e00b3, 0xb8711202, 0xb876214d,
0xb875337d, 0xb86c507b, 0xb861431f, 0xb8737131,
0xb87c61fb, 0xce367a86, 0xce1e6858, 0xce768d51,
0xce910451, 0xce768338, 0xce6c8622, 0xcec08363,
0xce708b9d, 0x04e900da, 0x042404f1, 0x6596012f,
0x65d40b62, 0x65c00745, 0x0456a72e, 0x04c0175b,
0x04109418, 0x041ab006, 0x0413812f, 0x04118b65,
0x04101694, 0x04d7aa0a, 0x045eb046, 0x04c81c5d,
0x044a1dd6, 0x040112fb, 0x04dcad42, 0x65809aca,
0x658d9603, 0x65c69201, 0x65878d8c, 0x65c28290,
0x04dda4e5, 0x65c2be0c, 0x6580a386, 0x65c1a624,
0x658dae6d, 0x65819638, 0x65f318ca, 0x65a030cd,
0x65a8532e, 0x65bb76d6, 0x04144e23, 0x04407ce4,
0x04363270, 0x04b6312f, 0x047e30b9, 0x052b6acd,
0x05b46d0d, 0x041a2c99, 0x04d828d1, 0x04d93e04,
0x040829da, 0x040a3c6b, 0x65c73aa1, 0x65c62a2e,
0x65d82678, 0x04c13611,
};
// END Generated code -- do not edit