@@ -946,6 +946,48 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
946
946
}
947
947
}
948
948
949
+ // Compress the least significant bit of each byte to the rightmost and clear
950
+ // the higher garbage bits.
951
+ void C2_MacroAssembler::bytemask_compress (Register dst) {
952
+ // Example input, dst = 0x01 00 00 00 01 01 00 01
953
+ // The "??" bytes are garbage.
954
+ orr (dst, dst, dst, Assembler::LSR, 7 ); // dst = 0x?? 02 ?? 00 ?? 03 ?? 01
955
+ orr (dst, dst, dst, Assembler::LSR, 14 ); // dst = 0x????????08 ??????0D
956
+ orr (dst, dst, dst, Assembler::LSR, 28 ); // dst = 0x????????????????8D
957
+ andr (dst, dst, 0xff ); // dst = 0x8D
958
+ }
959
+
960
+ // Pack the lowest-numbered bit of each mask element in src into a long value
961
+ // in dst, at most the first 64 lane elements.
962
+ // Clobbers: rscratch1
963
+ void C2_MacroAssembler::sve_vmask_tolong (Register dst, PRegister src, BasicType bt, int lane_cnt,
964
+ FloatRegister vtmp1, FloatRegister vtmp2, PRegister pgtmp) {
965
+ assert (pgtmp->is_governing (), " This register has to be a governing predicate register." );
966
+ assert (lane_cnt <= 64 && is_power_of_2 (lane_cnt), " Unsupported lane count" );
967
+ assert_different_registers (dst, rscratch1);
968
+
969
+ Assembler::SIMD_RegVariant size = elemType_to_regVariant (bt);
970
+
971
+ // Pack the mask into vector with sequential bytes.
972
+ sve_cpy (vtmp1, size, src, 1 , false );
973
+ if (bt != T_BYTE) {
974
+ sve_vector_narrow (vtmp1, B, vtmp1, size, vtmp2);
975
+ }
976
+
977
+ // Compress the lowest 8 bytes.
978
+ fmovd (dst, vtmp1);
979
+ bytemask_compress (dst);
980
+ if (lane_cnt <= 8 ) return ;
981
+
982
+ // Repeat on higher bytes and join the results.
983
+ // Compress 8 bytes in each iteration.
984
+ for (int idx = 1 ; idx < (lane_cnt / 8 ); idx++) {
985
+ idx == 1 ? fmovhid (rscratch1, vtmp1) : sve_extract (rscratch1, D, pgtmp, vtmp1, idx);
986
+ bytemask_compress (rscratch1);
987
+ orr (dst, dst, rscratch1, Assembler::LSL, idx << 3 );
988
+ }
989
+ }
990
+
949
991
void C2_MacroAssembler::sve_compare (PRegister pd, BasicType bt, PRegister pg,
950
992
FloatRegister zn, FloatRegister zm, int cond) {
951
993
assert (pg->is_governing (), " This register has to be a governing predicate register" );
@@ -1021,6 +1063,7 @@ void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst
1021
1063
FloatRegister src, SIMD_RegVariant src_size,
1022
1064
FloatRegister tmp) {
1023
1065
assert (dst_size < src_size && dst_size <= S && src_size <= D, " invalid element size" );
1066
+ assert_different_registers (src, tmp);
1024
1067
sve_dup (tmp, src_size, 0 );
1025
1068
if (src_size == D) {
1026
1069
switch (dst_size) {
0 commit comments