Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8263001: Add cast nodes from single precision float types to interger types implementation for Arm SVE #46

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64_sve.ad
Original file line number Diff line number Diff line change
@@ -2801,6 +2801,19 @@ instruct vcvtLtoD(vReg dst, vReg src)
ins_pipe(pipe_slow);
%}

instruct vcvtFtoI(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (VectorCastF2X src));
ins_cost(SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\t# convert F to I vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
%}
ins_pipe(pipe_slow);
%}


instruct vcvtItoD(vReg dst, vReg src)
%{
@@ -2846,3 +2859,61 @@ instruct vcvtFtoD(vReg dst, vReg src)
%}
ins_pipe(pipe_slow);
%}


instruct vcvtFtoS(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
match(Set dst (VectorCastF2X src));
effect(TEMP tmp);
ins_cost(3 * SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
"sve_dup $tmp, H, 0\n\t"
"sve_uzp1 $dst, H, $dst, tmp\t# convert F to S vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
__ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
__ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}



instruct vcvtFtoB(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
match(Set dst (VectorCastF2X src));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(4 * SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
"sve_dup $tmp, H, 0\n\t"
"sve_uzp1 $dst, H, $dst, tmp\n\t"
"sve_uzp1 $dst, B, $dst, tmp\n\t# convert F to B vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
__ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
__ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
__ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}



instruct vcvtFtoL(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (VectorCastF2X src));
ins_cost(2 * SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
"sve_sunpklo $dst, D, $dst\t# convert F to L vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}
76 changes: 72 additions & 4 deletions src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Original file line number Diff line number Diff line change
@@ -1654,7 +1654,7 @@ VECTOR_CAST_X2F_NARROW1(L, F, scvtf, S, D, dup, S, uzp1)
VECTOR_CAST_X2F_NARROW1(D, F, fcvt, S, D, dup, S, uzp1)

dnl
define(`VECTOR_CAST_I2F', `
define(`VECTOR_CAST_X2X', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
@@ -1667,9 +1667,10 @@ instruct vcvt$1to$2`'(vReg dst, vReg src)
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4
VECTOR_CAST_I2F(I, F, scvtf, S)
VECTOR_CAST_I2F(L, D, scvtf, D)
dnl $1 $2 $3 $4
VECTOR_CAST_X2X(I, F, scvtf, S)
VECTOR_CAST_X2X(L, D, scvtf, D)
VECTOR_CAST_X2X(F, I, fcvtzs, S)

dnl
define(`VECTOR_CAST_X2F_EXTEND1', `
@@ -1691,3 +1692,70 @@ dnl $1 $2 $3 $4 $5 $6
VECTOR_CAST_X2F_EXTEND1(I, D, sunpklo, D, scvtf, D)
VECTOR_CAST_X2F_EXTEND1(S, F, sunpklo, S, scvtf, S)
VECTOR_CAST_X2F_EXTEND1(F, D, sunpklo, D, fcvt, S)

dnl
define(`VECTOR_CAST_F2X_NARROW1', `
instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
effect(TEMP tmp);
ins_cost(3 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
"sve_$5 $tmp, $6, 0\n\t"
"sve_$7 $dst, $6, $dst, tmp\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
__ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
__ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7
VECTOR_CAST_F2X_NARROW1(F, S, fcvtzs, S, dup, H, uzp1)


dnl
define(`VECTOR_CAST_F2X_NARROW2', `
instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(4 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
"sve_$5 $tmp, $6, 0\n\t"
"sve_$7 $dst, $6, $dst, tmp\n\t"
"sve_$7 $dst, $8, $dst, tmp\n\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
__ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
__ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
__ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7 $8
VECTOR_CAST_F2X_NARROW2(F, B, fcvtzs, S, dup, H, uzp1, B)


dnl
define(`VECTOR_CAST_F2X_EXTEND1', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
ins_cost(2 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
"sve_$5 $dst, $6, $dst\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
__ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6
VECTOR_CAST_F2X_EXTEND1(F, L, fcvtzs, S, sunpklo, D)
50 changes: 50 additions & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
@@ -3452,6 +3452,56 @@ void mvnw(Register Rd, Register Rm,
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
}

private:

void encode_fcvtz_T (SIMD_RegVariant T_dst, SIMD_RegVariant T_src,
unsigned& opc, unsigned& opc2) {
assert(T_src != B && T_dst != B &&
T_src != Q && T_dst != Q, "invalid register variant");
if (T_src != D) {
assert(T_src <= T_dst, "invalid register variant");
} else {
assert(T_dst != H, "invalid register variant");
}
// In most cases we can treat T_dst,T_src as opc2,opc
// except following four cases. These cases should be converted
// according to Arm's architecture reference manual:
// +-----+------+---+-------------------------------------+
// | opc | opc2 | U | Instruction Details |
// +-----+------+---+-------------------------------------+
// | 11 | 10 | 0 | FCVTZS — Single-precision to 64-bit |
// | 11 | 10 | 1 | FCVTZU — Single-precision to 64-bit |
// | 11 | 00 | 0 | FCVTZS — Double-precision to 32-bit |
// | 11 | 00 | 1 | FCVTZU — Double-precision to 32-bit |
// +-----+------+---+-------------------------------------+
if (T_dst == D && T_src == S) { // Single-precision to 64-bit
T_dst = S;
T_src = D;
} else if (T_dst == S && T_src == D) { // Double-precision to 32-bit
T_dst = B;
T_src = D;
}
opc = T_src;
opc2 = T_dst;
}
public:

// SVE floating-point convert to integer (predicated)
#define INSN(NAME, sign) \
void NAME(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, \
FloatRegister Zn, SIMD_RegVariant T_src) { \
starti; \
unsigned opc, opc2; \
encode_fcvtz_T(T_dst, T_src, opc, opc2); \
f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19); \
f(opc2, 18, 17), f(sign, 16), f(0b101, 15, 13); \
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); \
}

INSN(sve_fcvtzs, 0b0);
INSN(sve_fcvtzu, 0b1);
#undef INSN

Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

2 changes: 2 additions & 0 deletions test/hotspot/gtest/aarch64/aarch64-asmtest.py
Original file line number Diff line number Diff line change
@@ -1590,6 +1590,8 @@ def generate(kind, names):
["scvtf", "__ sve_scvtf(z1, __ D, p0, z0, __ S);", "scvtf\tz1.d, p0/m, z0.s"],
["ucvtf", "__ sve_ucvtf(z3, __ D, p1, z2, __ S);", "ucvtf\tz3.d, p1/m, z2.s"],
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z18, __ D);", "fcvtzs\tz19.d, p2/m, z18.d"],
["fcvtzu", "__ sve_fcvtzu(z19, __ D, p2, z18, __ D);", "fcvtzu\tz19.d, p2/m, z18.d"],
])

print "\n// FloatImmediateOp"
Loading