Skip to content

Commit 4924513

Browse files
Hao SunPengfei Li
Hao Sun
authored and
Pengfei Li
committedMar 9, 2022
8265263: AArch64: Combine vneg with right shift count
Reviewed-by: adinn, dlong
1 parent ea19114 commit 4924513

File tree

4 files changed

+809
-374
lines changed

4 files changed

+809
-374
lines changed
 

‎src/hotspot/cpu/aarch64/aarch64.ad

+9
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,9 @@ public:
13111311
// predicate controlling translation of CompareAndSwapX
13121312
bool needs_acquiring_load_exclusive(const Node *load);
13131313

1314+
// Assert that the given node is not a variable shift.
1315+
bool assert_not_var_shift(const Node* n);
1316+
13141317
// predicate controlling addressing modes
13151318
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
13161319
%}
@@ -1725,6 +1728,12 @@ bool needs_acquiring_load_exclusive(const Node *n)
17251728
return true;
17261729
}
17271730

1731+
// Assert that the given node is not a variable shift.
1732+
bool assert_not_var_shift(const Node* n) {
1733+
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
1734+
return true;
1735+
}
1736+
17281737
#define __ _masm.
17291738

17301739
// advance declarations for helper functions to convert register

‎src/hotspot/cpu/aarch64/aarch64_neon.ad

+417-194
Large diffs are not rendered by default.

‎src/hotspot/cpu/aarch64/aarch64_neon_ad.m4

+254-180
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
2-
// Copyright (c) 2020, 2021, Arm Limited. All rights reserved.
1+
// Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
2+
// Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
33
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
//
55
// This code is free software; you can redistribute it and/or modify it
@@ -1972,223 +1972,277 @@ VLOGICAL(xor, eor, xor, Xor, 16, B, X)
19721972

19731973
// ------------------------------ Shift ---------------------------------------
19741974
dnl
1975-
define(`VSHIFTCNT', `
1976-
instruct vshiftcnt$3$4`'(vec$5 dst, iRegIorL2I cnt) %{
1977-
predicate(UseSVE == 0 && (ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||`
1978-
')n->as_Vector()->length_in_bytes() == $3));
1975+
define(`VSLCNT', `
1976+
instruct vslcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
1977+
predicate(UseSVE == 0 && ifelse($1, 8,
1978+
(n->as_Vector()->length_in_bytes() == 4 ||`
1979+
'n->as_Vector()->length_in_bytes() == $1),
1980+
n->as_Vector()->length_in_bytes() == $1));
19791981
match(Set dst (LShiftCntV cnt));
1980-
match(Set dst (RShiftCntV cnt));
1981-
format %{ "$1 $dst, $cnt\t# shift count vector ($3$4)" %}
1982+
ins_cost(INSN_COST);
1983+
format %{ "dup $dst, $cnt\t# shift count vector ($1$2)" %}
19821984
ins_encode %{
1983-
__ $2(as_FloatRegister($dst$$reg), __ T$3$4, as_Register($cnt$$reg));
1985+
__ dup(as_FloatRegister($dst$$reg), __ T$1$2, as_Register($cnt$$reg));
19841986
%}
1985-
ins_pipe(vdup_reg_reg`'ifelse($5, D, 64, 128));
1987+
ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
19861988
%}')dnl
1987-
dnl $1 $2 $3 $4 $5
1988-
VSHIFTCNT(dup, dup, 8, B, D)
1989-
VSHIFTCNT(dup, dup, 16, B, X)
1989+
dnl
1990+
define(`VSRCNT', `
1991+
instruct vsrcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
1992+
predicate(UseSVE == 0 && ifelse($1, 8,
1993+
(n->as_Vector()->length_in_bytes() == 4 ||`
1994+
'n->as_Vector()->length_in_bytes() == $1),
1995+
n->as_Vector()->length_in_bytes() == $1));
1996+
match(Set dst (RShiftCntV cnt));
1997+
ins_cost(INSN_COST * 2);
1998+
format %{ "negw rscratch1, $cnt\t"
1999+
"dup $dst, rscratch1\t# shift count vector ($1$2)" %}
2000+
ins_encode %{
2001+
__ negw(rscratch1, as_Register($cnt$$reg));
2002+
__ dup(as_FloatRegister($dst$$reg), __ T$1$2, rscratch1);
2003+
%}
2004+
ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
2005+
%}')dnl
2006+
dnl
2007+
2008+
// Vector shift count
2009+
// Note-1: Low 8 bits of each element are used, so it doesn't matter if we
2010+
// treat it as ints or bytes here.
2011+
// Note-2: Shift value is negated for RShiftCntV additionally. See the comments
2012+
// on vsra8B rule for more details.
2013+
dnl $1 $2 $3
2014+
VSLCNT(8, B, D)
2015+
VSLCNT(16, B, X)
2016+
VSRCNT(8, B, D)
2017+
VSRCNT(16, B, X)
2018+
dnl
2019+
define(`PREDICATE',
2020+
`ifelse($1, 8B,
2021+
ifelse($3, `', `predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);',
2022+
`predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&`
2023+
'$3);'),
2024+
$1, 4S,
2025+
ifelse($3, `', `predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);',
2026+
`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&`
2027+
'$3);'),
2028+
ifelse($3, `', `predicate(n->as_Vector()->length() == $2);',
2029+
`predicate(n->as_Vector()->length() == $2 && $3);'))')dnl
19902030
dnl
19912031
define(`VSLL', `
1992-
instruct vsll$3$4`'(vec$6 dst, vec$6 src, vec$6 shift) %{
1993-
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
1994-
',
1995-
$3$4, 4S, n->as_Vector()->length() == 2 ||`
1996-
')n->as_Vector()->length() == $3);
1997-
match(Set dst (LShiftV$4 src shift));
2032+
instruct vsll$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2033+
PREDICATE(`$1$2', $1, )
2034+
match(Set dst (LShiftV$2 src shift));
19982035
ins_cost(INSN_COST);
1999-
format %{ "$1 $dst,$src,$shift\t# vector ($3$5)" %}
2036+
format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %}
20002037
ins_encode %{
2001-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2038+
__ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
20022039
as_FloatRegister($src$$reg),
20032040
as_FloatRegister($shift$$reg));
20042041
%}
2005-
ins_pipe(vshift`'ifelse($6, D, 64, 128));
2042+
ins_pipe(vshift`'ifelse($4, D, 64, 128));
20062043
%}')dnl
20072044
dnl
20082045
define(`VSRA', `
2009-
instruct vsra$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{
2010-
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
2011-
',
2012-
$3$4, 4S, n->as_Vector()->length() == 2 ||`
2013-
')n->as_Vector()->length() == $3);
2014-
match(Set dst (RShiftV$4 src shift));
2046+
instruct vsra$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2047+
PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
2048+
match(Set dst (RShiftV$2 src shift));
20152049
ins_cost(INSN_COST);
2016-
effect(TEMP tmp);
2017-
format %{ "$1 $tmp,$shift\t"
2018-
"$2 $dst,$src,$tmp\t# vector ($3$5)" %}
2050+
format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %}
20192051
ins_encode %{
2020-
__ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B),
2052+
__ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
2053+
as_FloatRegister($src$$reg),
20212054
as_FloatRegister($shift$$reg));
2022-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2055+
%}
2056+
ins_pipe(vshift`'ifelse($4, D, 64, 128));
2057+
%}')dnl
2058+
dnl
2059+
define(`VSRA_VAR', `
2060+
instruct vsra$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2061+
PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
2062+
match(Set dst (RShiftV$2 src shift));
2063+
ins_cost(INSN_COST * 2);
2064+
effect(TEMP_DEF dst);
2065+
format %{ "negr $dst,$shift\t"
2066+
"sshl $dst,$src,$dst\t# vector ($1$3)" %}
2067+
ins_encode %{
2068+
__ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
2069+
as_FloatRegister($shift$$reg));
2070+
__ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
20232071
as_FloatRegister($src$$reg),
2024-
as_FloatRegister($tmp$$reg));
2072+
as_FloatRegister($dst$$reg));
20252073
%}
2026-
ins_pipe(vshift`'ifelse($6, D, 64, 128));
2074+
ins_pipe(vshift`'ifelse($4, D, 64, 128));
20272075
%}')dnl
20282076
dnl
20292077
define(`VSRL', `
2030-
instruct vsrl$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{
2031-
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
2032-
',
2033-
$3$4, 4S, n->as_Vector()->length() == 2 ||`
2034-
')n->as_Vector()->length() == $3);
2035-
match(Set dst (URShiftV$4 src shift));
2078+
instruct vsrl$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2079+
PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
2080+
match(Set dst (URShiftV$2 src shift));
20362081
ins_cost(INSN_COST);
2037-
effect(TEMP tmp);
2038-
format %{ "$1 $tmp,$shift\t"
2039-
"$2 $dst,$src,$tmp\t# vector ($3$5)" %}
2082+
format %{ "ushl $dst,$src,$shift\t# vector ($1$3)" %}
2083+
ins_encode %{
2084+
__ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
2085+
as_FloatRegister($src$$reg),
2086+
as_FloatRegister($shift$$reg));
2087+
%}
2088+
ins_pipe(vshift`'ifelse($4, D, 64, 128));
2089+
%}')dnl
2090+
dnl
2091+
define(`VSRL_VAR', `
2092+
instruct vsrl$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
2093+
PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
2094+
match(Set dst (URShiftV$2 src shift));
2095+
ins_cost(INSN_COST * 2);
2096+
effect(TEMP_DEF dst);
2097+
format %{ "negr $dst,$shift\t"
2098+
"ushl $dst,$src,$dst\t# vector ($1$3)" %}
20402099
ins_encode %{
2041-
__ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B),
2100+
__ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
20422101
as_FloatRegister($shift$$reg));
2043-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2102+
__ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
20442103
as_FloatRegister($src$$reg),
2045-
as_FloatRegister($tmp$$reg));
2104+
as_FloatRegister($dst$$reg));
20462105
%}
2047-
ins_pipe(vshift`'ifelse($6, D, 64, 128));
2106+
ins_pipe(vshift`'ifelse($4, D, 64, 128));
20482107
%}')dnl
20492108
dnl
20502109
define(`VSLL_IMM', `
2051-
instruct vsll$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
2052-
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
2053-
',
2054-
$3$4, 4S, n->as_Vector()->length() == 2 ||`
2055-
')n->as_Vector()->length() == $3);
2056-
match(Set dst (LShiftV$4 src (LShiftCntV shift)));
2057-
ins_cost(INSN_COST);
2058-
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
2059-
ins_encode %{ifelse($4, B,`
2110+
instruct vsll$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2111+
PREDICATE(`$1$2', $1, assert_not_var_shift(n))
2112+
match(Set dst (LShiftV$2 src (LShiftCntV shift)));
2113+
ins_cost(INSN_COST);
2114+
format %{ "shl $dst, $src, $shift\t# vector ($1$3)" %}
2115+
ins_encode %{ifelse($2, B,`
20602116
int sh = (int)$shift$$constant;
20612117
if (sh >= 8) {
2062-
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
2118+
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
20632119
as_FloatRegister($src$$reg),
20642120
as_FloatRegister($src$$reg));
20652121
} else {
2066-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2122+
__ shl(as_FloatRegister($dst$$reg), __ T$1$3,
20672123
as_FloatRegister($src$$reg), sh);
2068-
}', $4, S,`
2124+
}', $2, S,`
20692125
int sh = (int)$shift$$constant;
20702126
if (sh >= 16) {
2071-
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
2127+
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
20722128
as_FloatRegister($src$$reg),
20732129
as_FloatRegister($src$$reg));
20742130
} else {
2075-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2131+
__ shl(as_FloatRegister($dst$$reg), __ T$1$3,
20762132
as_FloatRegister($src$$reg), sh);
20772133
}', `
2078-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2134+
__ shl(as_FloatRegister($dst$$reg), __ T$1$3,
20792135
as_FloatRegister($src$$reg),
20802136
(int)$shift$$constant);')
20812137
%}
2082-
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
2138+
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
20832139
%}')dnl
2140+
dnl
20842141
define(`VSRA_IMM', `
2085-
instruct vsra$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
2086-
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
2087-
',
2088-
$3$4, 4S, n->as_Vector()->length() == 2 ||`
2089-
')n->as_Vector()->length() == $3);
2090-
match(Set dst (RShiftV$4 src (RShiftCntV shift)));
2091-
ins_cost(INSN_COST);
2092-
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
2093-
ins_encode %{ifelse($4, B,`
2142+
instruct vsra$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2143+
PREDICATE(`$1$2', $1, assert_not_var_shift(n))
2144+
match(Set dst (RShiftV$2 src (RShiftCntV shift)));
2145+
ins_cost(INSN_COST);
2146+
format %{ "sshr $dst, $src, $shift\t# vector ($1$3)" %}
2147+
ins_encode %{ifelse($2, B,`
20942148
int sh = (int)$shift$$constant;
20952149
if (sh >= 8) sh = 7;
2096-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2097-
as_FloatRegister($src$$reg), sh);', $4, S,`
2150+
__ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
2151+
as_FloatRegister($src$$reg), sh);', $2, S,`
20982152
int sh = (int)$shift$$constant;
20992153
if (sh >= 16) sh = 15;
2100-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2101-
as_FloatRegister($src$$reg), sh);', `
2102-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2154+
__ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
2155+
as_FloatRegister($src$$reg), sh);', `
2156+
__ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
21032157
as_FloatRegister($src$$reg),
21042158
(int)$shift$$constant);')
21052159
%}
2106-
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
2160+
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
21072161
%}')dnl
21082162
dnl
21092163
define(`VSRL_IMM', `
2110-
instruct vsrl$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
2111-
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
2112-
',
2113-
$3$4, 4S, n->as_Vector()->length() == 2 ||`
2114-
')n->as_Vector()->length() == $3);
2115-
match(Set dst (URShiftV$4 src (RShiftCntV shift)));
2116-
ins_cost(INSN_COST);
2117-
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
2118-
ins_encode %{ifelse($4, B,`
2164+
instruct vsrl$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2165+
PREDICATE(`$1$2', $1, assert_not_var_shift(n))
2166+
match(Set dst (URShiftV$2 src (RShiftCntV shift)));
2167+
ins_cost(INSN_COST);
2168+
format %{ "ushr $dst, $src, $shift\t# vector ($1$3)" %}
2169+
ins_encode %{ifelse($2, B,`
21192170
int sh = (int)$shift$$constant;
21202171
if (sh >= 8) {
2121-
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
2172+
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
21222173
as_FloatRegister($src$$reg),
21232174
as_FloatRegister($src$$reg));
21242175
} else {
2125-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2126-
as_FloatRegister($src$$reg), sh);
2127-
}', $4, S,`
2176+
__ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
2177+
as_FloatRegister($src$$reg), sh);
2178+
}', $2, S,`
21282179
int sh = (int)$shift$$constant;
21292180
if (sh >= 16) {
2130-
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
2181+
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
21312182
as_FloatRegister($src$$reg),
21322183
as_FloatRegister($src$$reg));
21332184
} else {
2134-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2135-
as_FloatRegister($src$$reg), sh);
2185+
__ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
2186+
as_FloatRegister($src$$reg), sh);
21362187
}', `
2137-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2188+
__ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
21382189
as_FloatRegister($src$$reg),
21392190
(int)$shift$$constant);')
21402191
%}
2141-
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
2192+
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
21422193
%}')dnl
21432194
dnl
21442195
define(`VSRLA_IMM', `
2145-
instruct vsrla$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
2146-
predicate(n->as_Vector()->length() == $3);
2147-
match(Set dst (AddV$4 dst (URShiftV$4 src (RShiftCntV shift))));
2196+
instruct vsrla$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2197+
predicate(n->as_Vector()->length() == $1);
2198+
match(Set dst (AddV$2 dst (URShiftV$2 src (RShiftCntV shift))));
21482199
ins_cost(INSN_COST);
2149-
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
2150-
ins_encode %{ifelse($4, B,`
2200+
format %{ "usra $dst, $src, $shift\t# vector ($1$3)" %}
2201+
ins_encode %{ifelse($2, B,`
21512202
int sh = (int)$shift$$constant;
21522203
if (sh < 8) {
2153-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2154-
as_FloatRegister($src$$reg), sh);
2155-
}', $4, S,`
2204+
__ usra(as_FloatRegister($dst$$reg), __ T$1$3,
2205+
as_FloatRegister($src$$reg), sh);
2206+
}', $2, S,`
21562207
int sh = (int)$shift$$constant;
21572208
if (sh < 16) {
2158-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2159-
as_FloatRegister($src$$reg), sh);
2209+
__ usra(as_FloatRegister($dst$$reg), __ T$1$3,
2210+
as_FloatRegister($src$$reg), sh);
21602211
}', `
2161-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2212+
__ usra(as_FloatRegister($dst$$reg), __ T$1$3,
21622213
as_FloatRegister($src$$reg),
21632214
(int)$shift$$constant);')
21642215
%}
2165-
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
2216+
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
21662217
%}')dnl
21672218
dnl
21682219
define(`VSRAA_IMM', `
2169-
instruct vsraa$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
2170-
predicate(n->as_Vector()->length() == $3);
2171-
match(Set dst (AddV$4 dst (RShiftV$4 src (RShiftCntV shift))));
2220+
instruct vsraa$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
2221+
predicate(n->as_Vector()->length() == $1);
2222+
match(Set dst (AddV$2 dst (RShiftV$2 src (RShiftCntV shift))));
21722223
ins_cost(INSN_COST);
2173-
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
2174-
ins_encode %{ifelse($4, B,`
2224+
format %{ "ssra $dst, $src, $shift\t# vector ($1$3)" %}
2225+
ins_encode %{ifelse($2, B,`
21752226
int sh = (int)$shift$$constant;
21762227
if (sh >= 8) sh = 7;
2177-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2178-
as_FloatRegister($src$$reg), sh);', $4, S,`
2228+
__ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
2229+
as_FloatRegister($src$$reg), sh);', $2, S,`
21792230
int sh = (int)$shift$$constant;
21802231
if (sh >= 16) sh = 15;
2181-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2182-
as_FloatRegister($src$$reg), sh);', `
2183-
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
2232+
__ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
2233+
as_FloatRegister($src$$reg), sh);', `
2234+
__ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
21842235
as_FloatRegister($src$$reg),
21852236
(int)$shift$$constant);')
21862237
%}
2187-
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
2238+
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
21882239
%}')dnl
2189-
dnl $1 $2 $3 $4 $5 $6
2190-
VSLL(sshl, sshl, 8, B, B, D)
2191-
VSLL(sshl, sshl, 16, B, B, X)
2240+
dnl
2241+
undefine(PREDICATE)dnl
2242+
dnl
2243+
dnl $1 $2 $3 $4
2244+
VSLL(8, B, B, D)
2245+
VSLL(16, B, B, X)
21922246

21932247
// Right shifts with vector shift count on aarch64 SIMD are implemented
21942248
// as left shift by negative shift count.
@@ -2199,8 +2253,6 @@ VSLL(sshl, sshl, 16, B, B, X)
21992253
// LoadVector RShiftCntV
22002254
// | /
22012255
// RShiftVI
2202-
// Note: In inner loop, multiple neg instructions are used, which can be
2203-
// moved to outer loop and merge into one neg instruction.
22042256
//
22052257
// Case 2: The vector shift count is from loading.
22062258
// This case isn't supported by middle-end now. But it's supported by
@@ -2210,61 +2262,83 @@ VSLL(sshl, sshl, 16, B, B, X)
22102262
// | /
22112263
// RShiftVI
22122264
//
2213-
dnl $1 $2 $3 $4 $5 $6
2214-
VSRA(negr, sshl, 8, B, B, D)
2215-
VSRA(negr, sshl, 16, B, B, X)
2216-
VSRL(negr, ushl, 8, B, B, D)
2217-
VSRL(negr, ushl, 16, B, B, X)
2218-
VSLL_IMM(shl, shl, 8, B, B, D)
2219-
VSLL_IMM(shl, shl, 16, B, B, X)
2220-
VSRA_IMM(sshr, sshr, 8, B, B, D)
2221-
VSRA_IMM(sshr, sshr, 16, B, B, X)
2222-
VSRL_IMM(ushr, ushr, 8, B, B, D)
2223-
VSRL_IMM(ushr, ushr, 16, B, B, X)
2224-
VSLL(sshl, sshl, 4, S, H, D)
2225-
VSLL(sshl, sshl, 8, S, H, X)
2226-
VSRA(negr, sshl, 4, S, H, D)
2227-
VSRA(negr, sshl, 8, S, H, X)
2228-
VSRL(negr, ushl, 4, S, H, D)
2229-
VSRL(negr, ushl, 8, S, H, X)
2230-
VSLL_IMM(shl, shl, 4, S, H, D)
2231-
VSLL_IMM(shl, shl, 8, S, H, X)
2232-
VSRA_IMM(sshr, sshr, 4, S, H, D)
2233-
VSRA_IMM(sshr, sshr, 8, S, H, X)
2234-
VSRL_IMM(ushr, ushr, 4, S, H, D)
2235-
VSRL_IMM(ushr, ushr, 8, S, H, X)
2236-
VSLL(sshl, sshl, 2, I, S, D)
2237-
VSLL(sshl, sshl, 4, I, S, X)
2238-
VSRA(negr, sshl, 2, I, S, D)
2239-
VSRA(negr, sshl, 4, I, S, X)
2240-
VSRL(negr, ushl, 2, I, S, D)
2241-
VSRL(negr, ushl, 4, I, S, X)
2242-
VSLL_IMM(shl, shl, 2, I, S, D)
2243-
VSLL_IMM(shl, shl, 4, I, S, X)
2244-
VSRA_IMM(sshr, sshr, 2, I, S, D)
2245-
VSRA_IMM(sshr, sshr, 4, I, S, X)
2246-
VSRL_IMM(ushr, ushr, 2, I, S, D)
2247-
VSRL_IMM(ushr, ushr, 4, I, S, X)
2248-
VSLL(sshl, sshl, 2, L, D, X)
2249-
VSRA(negr, sshl, 2, L, D, X)
2250-
VSRL(negr, ushl, 2, L, D, X)
2251-
VSLL_IMM(shl, shl, 2, L, D, X)
2252-
VSRA_IMM(sshr, sshr, 2, L, D, X)
2253-
VSRL_IMM(ushr, ushr, 2, L, D, X)
2254-
VSRAA_IMM(ssra, ssra, 8, B, B, D)
2255-
VSRAA_IMM(ssra, ssra, 16, B, B, X)
2256-
VSRAA_IMM(ssra, ssra, 4, S, H, D)
2257-
VSRAA_IMM(ssra, ssra, 8, S, H, X)
2258-
VSRAA_IMM(ssra, ssra, 2, I, S, D)
2259-
VSRAA_IMM(ssra, ssra, 4, I, S, X)
2260-
VSRAA_IMM(ssra, ssra, 2, L, D, X)
2261-
VSRLA_IMM(usra, usra, 8, B, B, D)
2262-
VSRLA_IMM(usra, usra, 16, B, B, X)
2263-
VSRLA_IMM(usra, usra, 4, S, H, D)
2264-
VSRLA_IMM(usra, usra, 8, S, H, X)
2265-
VSRLA_IMM(usra, usra, 2, I, S, D)
2266-
VSRLA_IMM(usra, usra, 4, I, S, X)
2267-
VSRLA_IMM(usra, usra, 2, L, D, X)
2265+
// The negate is conducted in RShiftCntV rule for case 1, whereas it's done in
2266+
// RShiftV* rules for case 2. Because there exists an optimization opportunity
2267+
// for case 1, that is, multiple neg instructions in inner loop can be hoisted
2268+
// to outer loop and merged into one neg instruction.
2269+
//
2270+
// Note that ShiftVNode::is_var_shift() indicates whether the vector shift
2271+
// count is a variable vector(case 2) or not(a vector generated by RShiftCntV,
2272+
// i.e. case 1).
2273+
dnl $1 $2 $3 $4
2274+
VSRA(8, B, B, D)
2275+
VSRA_VAR(8, B, B, D)
2276+
VSRA(16, B, B, X)
2277+
VSRA_VAR(16, B, B, X)
2278+
VSRL(8, B, B, D)
2279+
VSRL_VAR(8, B, B, D)
2280+
VSRL(16, B, B, X)
2281+
VSRL_VAR(16, B, B, X)
2282+
VSLL_IMM(8, B, B, D)
2283+
VSLL_IMM(16, B, B, X)
2284+
VSRA_IMM(8, B, B, D)
2285+
VSRA_IMM(16, B, B, X)
2286+
VSRL_IMM(8, B, B, D)
2287+
VSRL_IMM(16, B, B, X)
2288+
VSLL(4, S, H, D)
2289+
VSLL(8, S, H, X)
2290+
VSRA(4, S, H, D)
2291+
VSRA_VAR(4, S, H, D)
2292+
VSRA(8, S, H, X)
2293+
VSRA_VAR(8, S, H, X)
2294+
VSRL(4, S, H, D)
2295+
VSRL_VAR(4, S, H, D)
2296+
VSRL(8, S, H, X)
2297+
VSRL_VAR(8, S, H, X)
2298+
VSLL_IMM(4, S, H, D)
2299+
VSLL_IMM(8, S, H, X)
2300+
VSRA_IMM(4, S, H, D)
2301+
VSRA_IMM(8, S, H, X)
2302+
VSRL_IMM(4, S, H, D)
2303+
VSRL_IMM(8, S, H, X)
2304+
VSLL(2, I, S, D)
2305+
VSLL(4, I, S, X)
2306+
VSRA(2, I, S, D)
2307+
VSRA_VAR(2, I, S, D)
2308+
VSRA(4, I, S, X)
2309+
VSRA_VAR(4, I, S, X)
2310+
VSRL(2, I, S, D)
2311+
VSRL_VAR(2, I, S, D)
2312+
VSRL(4, I, S, X)
2313+
VSRL_VAR(4, I, S, X)
2314+
VSLL_IMM(2, I, S, D)
2315+
VSLL_IMM(4, I, S, X)
2316+
VSRA_IMM(2, I, S, D)
2317+
VSRA_IMM(4, I, S, X)
2318+
VSRL_IMM(2, I, S, D)
2319+
VSRL_IMM(4, I, S, X)
2320+
VSLL(2, L, D, X)
2321+
VSRA(2, L, D, X)
2322+
VSRA_VAR(2, L, D, X)
2323+
VSRL(2, L, D, X)
2324+
VSRL_VAR(2, L, D, X)
2325+
VSLL_IMM(2, L, D, X)
2326+
VSRA_IMM(2, L, D, X)
2327+
VSRL_IMM(2, L, D, X)
2328+
VSRAA_IMM(8, B, B, D)
2329+
VSRAA_IMM(16, B, B, X)
2330+
VSRAA_IMM(4, S, H, D)
2331+
VSRAA_IMM(8, S, H, X)
2332+
VSRAA_IMM(2, I, S, D)
2333+
VSRAA_IMM(4, I, S, X)
2334+
VSRAA_IMM(2, L, D, X)
2335+
VSRLA_IMM(8, B, B, D)
2336+
VSRLA_IMM(16, B, B, X)
2337+
VSRLA_IMM(4, S, H, D)
2338+
VSRLA_IMM(8, S, H, X)
2339+
VSRLA_IMM(2, I, S, D)
2340+
VSRLA_IMM(4, I, S, X)
2341+
VSRLA_IMM(2, L, D, X)
22682342
dnl
22692343
define(`VMINMAX', `
22702344
instruct v$1$3`'ifelse($5, S, F, D)`'(vec$6 dst, vec$6 src1, vec$6 src2)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright (c) 2022, Arm Limited. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
package org.openjdk.bench.vm.compiler;
24+
25+
import org.openjdk.jmh.annotations.*;
26+
import org.openjdk.jmh.infra.*;
27+
28+
import java.util.concurrent.TimeUnit;
29+
import java.util.Random;
30+
31+
@BenchmarkMode(Mode.AverageTime)
32+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
33+
@State(Scope.Thread)
34+
public class VectorShiftRight {
35+
@Param({"1024"})
36+
public int SIZE;
37+
38+
private byte[] bytesA, bytesB;
39+
private short[] shortsA, shortsB;
40+
private char[] charsA, charsB;
41+
private int[] intsA, intsB;
42+
private long[] longsA, longsB;
43+
44+
@Param("0")
45+
private int seed;
46+
private Random r = new Random(seed);
47+
48+
@Param("3")
49+
private int shiftCount;
50+
51+
@Setup
52+
public void init() {
53+
bytesA = new byte[SIZE];
54+
shortsA = new short[SIZE];
55+
charsA = new char[SIZE];
56+
intsA = new int[SIZE];
57+
longsA = new long[SIZE];
58+
59+
bytesB = new byte[SIZE];
60+
shortsB = new short[SIZE];
61+
charsB = new char[SIZE];
62+
intsB = new int[SIZE];
63+
longsB = new long[SIZE];
64+
65+
for (int i = 0; i < SIZE; i++) {
66+
bytesA[i] = (byte) r.nextInt();
67+
shortsA[i] = (short) r.nextInt();
68+
charsA[i] = (char) r.nextInt();
69+
intsA[i] = r.nextInt();
70+
longsA[i] = r.nextLong();
71+
}
72+
}
73+
74+
@Benchmark
75+
public void rShiftByte() {
76+
for (int i = 0; i < SIZE; i++) {
77+
bytesB[i] = (byte) (bytesA[i] >> shiftCount);
78+
}
79+
}
80+
81+
@Benchmark
82+
public void urShiftByte() {
83+
for (int i = 0; i < SIZE; i++) {
84+
bytesB[i] = (byte) (bytesA[i] >>> shiftCount);
85+
}
86+
}
87+
88+
@Benchmark
89+
public void rShiftShort() {
90+
for (int i = 0; i < SIZE; i++) {
91+
shortsB[i] = (short) (shortsA[i] >> shiftCount);
92+
}
93+
}
94+
95+
@Benchmark
96+
public void urShiftChar() {
97+
for (int i = 0; i < SIZE; i++) {
98+
charsB[i] = (char) (charsA[i] >>> shiftCount);
99+
}
100+
}
101+
102+
@Benchmark
103+
public void rShiftInt() {
104+
for (int i = 0; i < SIZE; i++) {
105+
intsB[i] = intsA[i] >> shiftCount;
106+
}
107+
}
108+
109+
@Benchmark
110+
public void urShiftInt() {
111+
for (int i = 0; i < SIZE; i++) {
112+
intsB[i] = intsA[i] >>> shiftCount;
113+
}
114+
}
115+
116+
@Benchmark
117+
public void rShiftLong() {
118+
for (int i = 0; i < SIZE; i++) {
119+
longsB[i] = longsA[i] >> shiftCount;
120+
}
121+
}
122+
123+
@Benchmark
124+
public void urShiftLong() {
125+
for (int i = 0; i < SIZE; i++) {
126+
longsB[i] = longsA[i] >>> shiftCount;
127+
}
128+
}
129+
}

0 commit comments

Comments
 (0)
Please sign in to comment.