diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index f1a7831eb64..b862b871bc0 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2461,6 +2461,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType case Op_LoadVectorGather: case Op_StoreVectorScatter: case Op_CompressV: + case Op_CompressM: return false; default: break; diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad index e22602ea5e7..c31a3251492 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve.ad +++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad @@ -149,8 +149,6 @@ source %{ case Op_LoadVector: case Op_StoreVector: return Matcher::vector_size_supported(bt, vlen); - case Op_CompressV: - return (bt == T_INT || bt == T_LONG); default: break; } @@ -5789,11 +5787,31 @@ instruct vmask_tolong(iRegLNoSp dst, pReg src, vReg vtmp1, vReg vtmp2, pRegGov p // ---------------------------- Compress/Expand Operations --------------------------- -instruct vcompress(vReg dst, vReg src, pRegGov pg) %{ +instruct mcompress(pReg dst, pReg mask, rFlagsReg cr) %{ predicate(UseSVE > 0); + match(Set dst (CompressM mask)); + effect(KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cntp rscratch1, $mask\n\t" + "sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cntp(rscratch1, size, ptrue, as_PRegister($mask$$reg)); + __ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1); + %} + ins_pipe(pipe_slow); +%} + +instruct vcompress(vReg dst, vReg src, pRegGov pg) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT || + n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (CompressV src pg)); ins_cost(SVE_COST); - format %{ "sve_compact $dst, $src, $pg\t# vector compress (sve)" %} + format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); @@ -5802,6 +5820,35 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{ ins_pipe(pipe_slow); %} +instruct vcompressB(vReg dst, vReg src, pReg mask, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4, + pReg ptmp, pRegGov pgtmp) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp); + match(Set dst (CompressV src mask)); + ins_cost(13 * SVE_COST); + format %{ "sve_compact $dst, $src, $mask\t# vector compress (B)" %} + ins_encode %{ + __ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($mask$$reg), + as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), + as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg), + as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcompressS(vReg dst, vReg src, pReg mask, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp); + match(Set dst (CompressV src mask)); + ins_cost(38 * SVE_COST); + format %{ "sve_compact $dst, $src, $mask\t# vector compress (H)" %} + ins_encode %{ + __ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($mask$$reg), + as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + // ---------------------------- Vector mask generation --------------------------- instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{ predicate(UseSVE > 0); diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 index 4b0e941f957..3d681660fb5 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 @@ -144,8 +144,6 @@ source %{ case Op_LoadVector: case Op_StoreVector: return Matcher::vector_size_supported(bt, vlen); - case Op_CompressV: - return (bt == T_INT || bt == T_LONG); default: break; } @@ -3207,11 +3205,31 @@ instruct vmask_tolong(iRegLNoSp dst, pReg src, vReg vtmp1, vReg vtmp2, pRegGov p // ---------------------------- Compress/Expand Operations --------------------------- -instruct vcompress(vReg dst, vReg src, pRegGov pg) %{ +instruct mcompress(pReg dst, pReg mask, rFlagsReg cr) %{ predicate(UseSVE > 0); + match(Set dst (CompressM mask)); + effect(KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cntp rscratch1, $mask\n\t" + "sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cntp(rscratch1, size, ptrue, as_PRegister($mask$$reg)); + __ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1); + %} + ins_pipe(pipe_slow); +%} + +instruct vcompress(vReg dst, vReg src, pRegGov pg) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT || + n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (CompressV src pg)); ins_cost(SVE_COST); - format %{ "sve_compact $dst, $src, $pg\t# vector compress (sve)" %} + format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); @@ -3220,6 +3238,35 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{ ins_pipe(pipe_slow); %} +instruct vcompressB(vReg dst, vReg src, pReg mask, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4, + pReg ptmp, pRegGov pgtmp) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp); + match(Set dst (CompressV src mask)); + ins_cost(13 * SVE_COST); + format %{ "sve_compact $dst, $src, $mask\t# vector compress (B)" %} + ins_encode %{ + __ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($mask$$reg), + as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), + as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg), + as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcompressS(vReg dst, vReg src, pReg mask, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp); + match(Set dst (CompressV src mask)); + ins_cost(38 * SVE_COST); + format %{ "sve_compact $dst, $src, $mask\t# vector compress (H)" %} + ins_encode %{ + __ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($mask$$reg), + as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + // ---------------------------- Vector mask generation --------------------------- instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{ predicate(UseSVE > 0); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index c7e5cda0881..90614c5018b 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -3659,14 +3659,26 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T, INSN(sve_lastb, 0b1); #undef INSN - // SVE create index starting from and incremented by immediate + // SVE Index Generation: + // Create index starting from and incremented by immediate void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) { starti; + assert(T != Q, "invalid size"); f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21); sf(imm2, 20, 16), f(0b010000, 15, 10); sf(imm1, 9, 5), rf(Zd, 0); } + // SVE Index Generation: + // Create index starting from general-purpose register and incremented by immediate + void sve_index(FloatRegister Zd, SIMD_RegVariant T, Register Rn, int imm) { + starti; + assert(T != Q, "invalid size"); + f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21); + sf(imm, 20, 16), f(0b010001, 15, 10); + zrf(Rn, 5), rf(Zd, 0); + } + // SVE programmable table lookup/permute using vector of element indices void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { starti; diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index ad170ddea2a..ffe8e9e2dc9 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -1268,3 +1268,116 @@ void C2_MacroAssembler::sve_ptrue_lanecnt(PRegister dst, SIMD_RegVariant size, i ShouldNotReachHere(); } } + +// Pack active elements of src, under the control of mask, into the lowest-numbered elements of dst. +// Any remaining elements of dst will be filled with zero. +// Clobbers: rscratch1 +// Preserves: src, mask +void C2_MacroAssembler::sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask, + FloatRegister vtmp1, FloatRegister vtmp2, + PRegister pgtmp) { + assert(pgtmp->is_governing(), "This register has to be a governing predicate register"); + assert_different_registers(dst, src, vtmp1, vtmp2); + assert_different_registers(mask, pgtmp); + + // Example input: src = 8888 7777 6666 5555 4444 3333 2222 1111 + // mask = 0001 0000 0000 0001 0001 0000 0001 0001 + // Expected result: dst = 0000 0000 0000 8888 5555 4444 2222 1111 + sve_dup(vtmp2, H, 0); + + // Extend lowest half to type INT. + // dst = 00004444 00003333 00002222 00001111 + sve_uunpklo(dst, S, src); + // pgtmp = 00000001 00000000 00000001 00000001 + sve_punpklo(pgtmp, mask); + // Pack the active elements in size of type INT to the right, + // and fill the remainings with zero. + // dst = 00000000 00004444 00002222 00001111 + sve_compact(dst, S, dst, pgtmp); + // Narrow the result back to type SHORT. + // dst = 0000 0000 0000 0000 0000 4444 2222 1111 + sve_uzp1(dst, H, dst, vtmp2); + // Count the active elements of lowest half. + // rscratch1 = 3 + sve_cntp(rscratch1, S, ptrue, pgtmp); + + // Repeat to the highest half. + // pgtmp = 00000001 00000000 00000000 00000001 + sve_punpkhi(pgtmp, mask); + // vtmp1 = 00008888 00007777 00006666 00005555 + sve_uunpkhi(vtmp1, S, src); + // vtmp1 = 00000000 00000000 00008888 00005555 + sve_compact(vtmp1, S, vtmp1, pgtmp); + // vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555 + sve_uzp1(vtmp1, H, vtmp1, vtmp2); + + // Compressed low: dst = 0000 0000 0000 0000 0000 4444 2222 1111 + // Compressed high: vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555 + // Left shift(cross lane) compressed high with TRUE_CNT lanes, + // TRUE_CNT is the number of active elements in the compressed low. + neg(rscratch1, rscratch1); + // vtmp2 = {4 3 2 1 0 -1 -2 -3} + sve_index(vtmp2, H, rscratch1, 1); + // vtmp1 = 0000 0000 0000 8888 5555 0000 0000 0000 + sve_tbl(vtmp1, H, vtmp1, vtmp2); + + // Combine the compressed high(after shifted) with the compressed low. + // dst = 0000 0000 0000 8888 5555 4444 2222 1111 + sve_orr(dst, dst, vtmp1); +} + +// Clobbers: rscratch1, rscratch2 +// Preserves: src, mask +void C2_MacroAssembler::sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask, + FloatRegister vtmp1, FloatRegister vtmp2, + FloatRegister vtmp3, FloatRegister vtmp4, + PRegister ptmp, PRegister pgtmp) { + assert(pgtmp->is_governing(), "This register has to be a governing predicate register"); + assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3, vtmp4); + assert_different_registers(mask, ptmp, pgtmp); + // Example input: src = 88 77 66 45 44 33 22 11 + // mask = 01 00 00 01 01 00 01 01 + // Expected result: dst = 00 00 00 88 55 44 22 11 + + sve_dup(vtmp4, B, 0); + // Extend lowest half to type SHORT. + // vtmp1 = 0044 0033 0022 0011 + sve_uunpklo(vtmp1, H, src); + // ptmp = 0001 0000 0001 0001 + sve_punpklo(ptmp, mask); + // Count the active elements of lowest half. + // rscratch2 = 3 + sve_cntp(rscratch2, H, ptrue, ptmp); + // Pack the active elements in size of type SHORT to the right, + // and fill the remainings with zero. + // dst = 0000 0044 0022 0011 + sve_compress_short(dst, vtmp1, ptmp, vtmp2, vtmp3, pgtmp); + // Narrow the result back to type BYTE. + // dst = 00 00 00 00 00 44 22 11 + sve_uzp1(dst, B, dst, vtmp4); + + // Repeat to the highest half. + // ptmp = 0001 0000 0000 0001 + sve_punpkhi(ptmp, mask); + // vtmp1 = 0088 0077 0066 0055 + sve_uunpkhi(vtmp2, H, src); + // vtmp1 = 0000 0000 0088 0055 + sve_compress_short(vtmp1, vtmp2, ptmp, vtmp3, vtmp4, pgtmp); + + sve_dup(vtmp4, B, 0); + // vtmp1 = 00 00 00 00 00 00 88 55 + sve_uzp1(vtmp1, B, vtmp1, vtmp4); + + // Compressed low: dst = 00 00 00 00 00 44 22 11 + // Compressed high: vtmp1 = 00 00 00 00 00 00 88 55 + // Left shift(cross lane) compressed high with TRUE_CNT lanes, + // TRUE_CNT is the number of active elements in the compressed low. + neg(rscratch2, rscratch2); + // vtmp2 = {4 3 2 1 0 -1 -2 -3} + sve_index(vtmp2, B, rscratch2, 1); + // vtmp1 = 00 00 00 88 55 00 00 00 + sve_tbl(vtmp1, B, vtmp1, vtmp2); + // Combine the compressed high(after shifted) with the compressed low. + // dst = 00 00 00 88 55 44 22 11 + sve_orr(dst, dst, vtmp1); +} diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index d7af599dd28..07e1a1d67f5 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -112,4 +112,16 @@ sve_lastb(dst, size, pg, src); } + // Pack active elements of src, under the control of mask, into the + // lowest-numbered elements of dst. Any remaining elements of dst will + // be filled with zero. + void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask, + FloatRegister vtmp1, FloatRegister vtmp2, + FloatRegister vtmp3, FloatRegister vtmp4, + PRegister ptmp, PRegister pgtmp); + + void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask, + FloatRegister vtmp1, FloatRegister vtmp2, + PRegister pgtmp); + #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP diff --git a/test/hotspot/gtest/aarch64/aarch64-asmtest.py b/test/hotspot/gtest/aarch64/aarch64-asmtest.py index 50f8cb8a3c4..58adcc9a896 100644 --- a/test/hotspot/gtest/aarch64/aarch64-asmtest.py +++ b/test/hotspot/gtest/aarch64/aarch64-asmtest.py @@ -1708,6 +1708,10 @@ def generate(kind, names): ["lasta", "__ sve_lasta(v0, __ B, p0, z15);", "lasta\tb0, p0, z15.b"], ["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"], ["index", "__ sve_index(z6, __ S, 1, 1);", "index\tz6.s, #1, #1"], + ["index", "__ sve_index(z6, __ B, r5, 2);", "index\tz6.b, w5, #2"], + ["index", "__ sve_index(z6, __ H, r5, 3);", "index\tz6.h, w5, #3"], + ["index", "__ sve_index(z6, __ S, r5, 4);", "index\tz6.s, w5, #4"], + ["index", "__ sve_index(z7, __ D, r5, 5);", "index\tz7.d, x5, #5"], ["cpy", "__ sve_cpy(z7, __ H, p3, r5);", "cpy\tz7.h, p3/m, w5"], ["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"], ["ld1w", "__ sve_ld1w_gather(z15, p0, r5, z16);", "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"], diff --git a/test/hotspot/gtest/aarch64/asmtest.out.h b/test/hotspot/gtest/aarch64/asmtest.out.h index 5d71b566676..de73eb17555 100644 --- a/test/hotspot/gtest/aarch64/asmtest.out.h +++ b/test/hotspot/gtest/aarch64/asmtest.out.h @@ -849,6 +849,10 @@ __ sve_lasta(v0, __ B, p0, z15); // lasta b0, p0, z15.b __ sve_lastb(v1, __ B, p1, z16); // lastb b1, p1, z16.b __ sve_index(z6, __ S, 1, 1); // index z6.s, #1, #1 + __ sve_index(z6, __ B, r5, 2); // index z6.b, w5, #2 + __ sve_index(z6, __ H, r5, 3); // index z6.h, w5, #3 + __ sve_index(z6, __ S, r5, 4); // index z6.s, w5, #4 + __ sve_index(z7, __ D, r5, 5); // index z7.d, x5, #5 __ sve_cpy(z7, __ H, p3, r5); // cpy z7.h, p3/m, w5 __ sve_tbl(z16, __ S, z17, z18); // tbl z16.s, {z17.s}, z18.s __ sve_ld1w_gather(z15, p0, r5, z16); // ld1w {z15.s}, p0/z, [x5, z16.s, uxtw #2] @@ -1145,30 +1149,30 @@ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x140003a7, 0x94000000, - 0x97ffffd4, 0x940003a4, 0x3400000a, 0x34fffa2a, - 0x3400742a, 0x35000008, 0x35fff9c8, 0x350073c8, - 0xb400000b, 0xb4fff96b, 0xb400736b, 0xb500001d, - 0xb5fff91d, 0xb500731d, 0x10000013, 0x10fff8b3, - 0x100072b3, 0x90000013, 0x36300016, 0x3637f836, - 0x36307236, 0x3758000c, 0x375ff7cc, 0x375871cc, + 0x14000000, 0x17ffffd7, 0x140003ab, 0x94000000, + 0x97ffffd4, 0x940003a8, 0x3400000a, 0x34fffa2a, + 0x340074aa, 0x35000008, 0x35fff9c8, 0x35007448, + 0xb400000b, 0xb4fff96b, 0xb40073eb, 0xb500001d, + 0xb5fff91d, 0xb500739d, 0x10000013, 0x10fff8b3, + 0x10007333, 0x90000013, 0x36300016, 0x3637f836, + 0x363072b6, 0x3758000c, 0x375ff7cc, 0x3758724c, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x54006fa0, 0x54000001, 0x54fff541, 0x54006f41, - 0x54000002, 0x54fff4e2, 0x54006ee2, 0x54000002, - 0x54fff482, 0x54006e82, 0x54000003, 0x54fff423, - 0x54006e23, 0x54000003, 0x54fff3c3, 0x54006dc3, - 0x54000004, 0x54fff364, 0x54006d64, 0x54000005, - 0x54fff305, 0x54006d05, 0x54000006, 0x54fff2a6, - 0x54006ca6, 0x54000007, 0x54fff247, 0x54006c47, - 0x54000008, 0x54fff1e8, 0x54006be8, 0x54000009, - 0x54fff189, 0x54006b89, 0x5400000a, 0x54fff12a, - 0x54006b2a, 0x5400000b, 0x54fff0cb, 0x54006acb, - 0x5400000c, 0x54fff06c, 0x54006a6c, 0x5400000d, - 0x54fff00d, 0x54006a0d, 0x5400000e, 0x54ffefae, - 0x540069ae, 0x5400000f, 0x54ffef4f, 0x5400694f, + 0x54007020, 0x54000001, 0x54fff541, 0x54006fc1, + 0x54000002, 0x54fff4e2, 0x54006f62, 0x54000002, + 0x54fff482, 0x54006f02, 0x54000003, 0x54fff423, + 0x54006ea3, 0x54000003, 0x54fff3c3, 0x54006e43, + 0x54000004, 0x54fff364, 0x54006de4, 0x54000005, + 0x54fff305, 0x54006d85, 0x54000006, 0x54fff2a6, + 0x54006d26, 0x54000007, 0x54fff247, 0x54006cc7, + 0x54000008, 0x54fff1e8, 0x54006c68, 0x54000009, + 0x54fff189, 0x54006c09, 0x5400000a, 0x54fff12a, + 0x54006baa, 0x5400000b, 0x54fff0cb, 0x54006b4b, + 0x5400000c, 0x54fff06c, 0x54006aec, 0x5400000d, + 0x54fff00d, 0x54006a8d, 0x5400000e, 0x54ffefae, + 0x54006a2e, 0x5400000f, 0x54ffef4f, 0x540069cf, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200, @@ -1200,7 +1204,7 @@ 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176, 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422, 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a, - 0xbd1b1869, 0x5800599b, 0x1800000b, 0xf8945060, + 0xbd1b1869, 0x58005a1b, 0x1800000b, 0xf8945060, 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035, 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380, 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11, @@ -1319,7 +1323,8 @@ 0x6552ac26, 0x65cbac85, 0x65caac01, 0x65dea833, 0x659ca509, 0x65d8a801, 0x65dcac01, 0x655cb241, 0x0520a1e0, 0x0521a601, 0x052281e0, 0x05238601, - 0x04a14026, 0x0568aca7, 0x05b23230, 0x853040af, + 0x04a14026, 0x042244a6, 0x046344a6, 0x04a444a6, + 0x04e544a7, 0x0568aca7, 0x05b23230, 0x853040af, 0xc5b040af, 0xe57080af, 0xe5b080af, 0x25034440, 0x254054c4, 0x25034640, 0x25415a05, 0x25834440, 0x25c54489, 0x250b5d3a, 0x2550dc20, 0x2518e3e1,