Skip to content

Commit 088b244

Browse files
Patric Hedlinluhenry
Patric Hedlin
andcommittedDec 2, 2021
8251216: Implement MD5 intrinsics on AArch64
Co-authored-by: Ludovic Henry <luhenry@openjdk.org> Reviewed-by: aph, neliasso
1 parent a093cdd commit 088b244

File tree

4 files changed

+198
-6
lines changed

4 files changed

+198
-6
lines changed
 

‎src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

+192
Original file line numberDiff line numberDiff line change
@@ -3210,6 +3210,194 @@ class StubGenerator: public StubCodeGenerator {
32103210
return start;
32113211
}
32123212

3213+
// Arguments:
3214+
//
3215+
// Inputs:
3216+
// c_rarg0 - byte[] source+offset
3217+
// c_rarg1 - int[] SHA.state
3218+
// c_rarg2 - int offset
3219+
// c_rarg3 - int limit
3220+
//
3221+
address generate_md5_implCompress(bool multi_block, const char *name) {
3222+
__ align(CodeEntryAlignment);
3223+
StubCodeMark mark(this, "StubRoutines", name);
3224+
address start = __ pc();
3225+
3226+
Register buf = c_rarg0;
3227+
Register state = c_rarg1;
3228+
Register ofs = c_rarg2;
3229+
Register limit = c_rarg3;
3230+
Register a = r4;
3231+
Register b = r5;
3232+
Register c = r6;
3233+
Register d = r7;
3234+
Register rscratch3 = r10;
3235+
Register rscratch4 = r11;
3236+
3237+
Label keys;
3238+
Label md5_loop;
3239+
3240+
__ BIND(md5_loop);
3241+
3242+
// Save hash values for addition after rounds
3243+
__ ldrw(a, Address(state, 0));
3244+
__ ldrw(b, Address(state, 4));
3245+
__ ldrw(c, Address(state, 8));
3246+
__ ldrw(d, Address(state, 12));
3247+
3248+
#define FF(r1, r2, r3, r4, k, s, t) \
3249+
__ eorw(rscratch3, r3, r4); \
3250+
__ movw(rscratch2, t); \
3251+
__ andw(rscratch3, rscratch3, r2); \
3252+
__ addw(rscratch4, r1, rscratch2); \
3253+
__ ldrw(rscratch1, Address(buf, k*4)); \
3254+
__ eorw(rscratch3, rscratch3, r4); \
3255+
__ addw(rscratch3, rscratch3, rscratch1); \
3256+
__ addw(rscratch3, rscratch3, rscratch4); \
3257+
__ rorw(rscratch2, rscratch3, 32 - s); \
3258+
__ addw(r1, rscratch2, r2);
3259+
3260+
#define GG(r1, r2, r3, r4, k, s, t) \
3261+
__ eorw(rscratch2, r2, r3); \
3262+
__ ldrw(rscratch1, Address(buf, k*4)); \
3263+
__ andw(rscratch3, rscratch2, r4); \
3264+
__ movw(rscratch2, t); \
3265+
__ eorw(rscratch3, rscratch3, r3); \
3266+
__ addw(rscratch4, r1, rscratch2); \
3267+
__ addw(rscratch3, rscratch3, rscratch1); \
3268+
__ addw(rscratch3, rscratch3, rscratch4); \
3269+
__ rorw(rscratch2, rscratch3, 32 - s); \
3270+
__ addw(r1, rscratch2, r2);
3271+
3272+
#define HH(r1, r2, r3, r4, k, s, t) \
3273+
__ eorw(rscratch3, r3, r4); \
3274+
__ movw(rscratch2, t); \
3275+
__ addw(rscratch4, r1, rscratch2); \
3276+
__ ldrw(rscratch1, Address(buf, k*4)); \
3277+
__ eorw(rscratch3, rscratch3, r2); \
3278+
__ addw(rscratch3, rscratch3, rscratch1); \
3279+
__ addw(rscratch3, rscratch3, rscratch4); \
3280+
__ rorw(rscratch2, rscratch3, 32 - s); \
3281+
__ addw(r1, rscratch2, r2);
3282+
3283+
#define II(r1, r2, r3, r4, k, s, t) \
3284+
__ movw(rscratch3, t); \
3285+
__ ornw(rscratch2, r2, r4); \
3286+
__ addw(rscratch4, r1, rscratch3); \
3287+
__ ldrw(rscratch1, Address(buf, k*4)); \
3288+
__ eorw(rscratch3, rscratch2, r3); \
3289+
__ addw(rscratch3, rscratch3, rscratch1); \
3290+
__ addw(rscratch3, rscratch3, rscratch4); \
3291+
__ rorw(rscratch2, rscratch3, 32 - s); \
3292+
__ addw(r1, rscratch2, r2);
3293+
3294+
// Round 1
3295+
FF(a, b, c, d, 0, 7, 0xd76aa478)
3296+
FF(d, a, b, c, 1, 12, 0xe8c7b756)
3297+
FF(c, d, a, b, 2, 17, 0x242070db)
3298+
FF(b, c, d, a, 3, 22, 0xc1bdceee)
3299+
FF(a, b, c, d, 4, 7, 0xf57c0faf)
3300+
FF(d, a, b, c, 5, 12, 0x4787c62a)
3301+
FF(c, d, a, b, 6, 17, 0xa8304613)
3302+
FF(b, c, d, a, 7, 22, 0xfd469501)
3303+
FF(a, b, c, d, 8, 7, 0x698098d8)
3304+
FF(d, a, b, c, 9, 12, 0x8b44f7af)
3305+
FF(c, d, a, b, 10, 17, 0xffff5bb1)
3306+
FF(b, c, d, a, 11, 22, 0x895cd7be)
3307+
FF(a, b, c, d, 12, 7, 0x6b901122)
3308+
FF(d, a, b, c, 13, 12, 0xfd987193)
3309+
FF(c, d, a, b, 14, 17, 0xa679438e)
3310+
FF(b, c, d, a, 15, 22, 0x49b40821)
3311+
3312+
// Round 2
3313+
GG(a, b, c, d, 1, 5, 0xf61e2562)
3314+
GG(d, a, b, c, 6, 9, 0xc040b340)
3315+
GG(c, d, a, b, 11, 14, 0x265e5a51)
3316+
GG(b, c, d, a, 0, 20, 0xe9b6c7aa)
3317+
GG(a, b, c, d, 5, 5, 0xd62f105d)
3318+
GG(d, a, b, c, 10, 9, 0x02441453)
3319+
GG(c, d, a, b, 15, 14, 0xd8a1e681)
3320+
GG(b, c, d, a, 4, 20, 0xe7d3fbc8)
3321+
GG(a, b, c, d, 9, 5, 0x21e1cde6)
3322+
GG(d, a, b, c, 14, 9, 0xc33707d6)
3323+
GG(c, d, a, b, 3, 14, 0xf4d50d87)
3324+
GG(b, c, d, a, 8, 20, 0x455a14ed)
3325+
GG(a, b, c, d, 13, 5, 0xa9e3e905)
3326+
GG(d, a, b, c, 2, 9, 0xfcefa3f8)
3327+
GG(c, d, a, b, 7, 14, 0x676f02d9)
3328+
GG(b, c, d, a, 12, 20, 0x8d2a4c8a)
3329+
3330+
// Round 3
3331+
HH(a, b, c, d, 5, 4, 0xfffa3942)
3332+
HH(d, a, b, c, 8, 11, 0x8771f681)
3333+
HH(c, d, a, b, 11, 16, 0x6d9d6122)
3334+
HH(b, c, d, a, 14, 23, 0xfde5380c)
3335+
HH(a, b, c, d, 1, 4, 0xa4beea44)
3336+
HH(d, a, b, c, 4, 11, 0x4bdecfa9)
3337+
HH(c, d, a, b, 7, 16, 0xf6bb4b60)
3338+
HH(b, c, d, a, 10, 23, 0xbebfbc70)
3339+
HH(a, b, c, d, 13, 4, 0x289b7ec6)
3340+
HH(d, a, b, c, 0, 11, 0xeaa127fa)
3341+
HH(c, d, a, b, 3, 16, 0xd4ef3085)
3342+
HH(b, c, d, a, 6, 23, 0x04881d05)
3343+
HH(a, b, c, d, 9, 4, 0xd9d4d039)
3344+
HH(d, a, b, c, 12, 11, 0xe6db99e5)
3345+
HH(c, d, a, b, 15, 16, 0x1fa27cf8)
3346+
HH(b, c, d, a, 2, 23, 0xc4ac5665)
3347+
3348+
// Round 4
3349+
II(a, b, c, d, 0, 6, 0xf4292244)
3350+
II(d, a, b, c, 7, 10, 0x432aff97)
3351+
II(c, d, a, b, 14, 15, 0xab9423a7)
3352+
II(b, c, d, a, 5, 21, 0xfc93a039)
3353+
II(a, b, c, d, 12, 6, 0x655b59c3)
3354+
II(d, a, b, c, 3, 10, 0x8f0ccc92)
3355+
II(c, d, a, b, 10, 15, 0xffeff47d)
3356+
II(b, c, d, a, 1, 21, 0x85845dd1)
3357+
II(a, b, c, d, 8, 6, 0x6fa87e4f)
3358+
II(d, a, b, c, 15, 10, 0xfe2ce6e0)
3359+
II(c, d, a, b, 6, 15, 0xa3014314)
3360+
II(b, c, d, a, 13, 21, 0x4e0811a1)
3361+
II(a, b, c, d, 4, 6, 0xf7537e82)
3362+
II(d, a, b, c, 11, 10, 0xbd3af235)
3363+
II(c, d, a, b, 2, 15, 0x2ad7d2bb)
3364+
II(b, c, d, a, 9, 21, 0xeb86d391)
3365+
3366+
#undef FF
3367+
#undef GG
3368+
#undef HH
3369+
#undef II
3370+
3371+
// write hash values back in the correct order
3372+
__ ldrw(rscratch1, Address(state, 0));
3373+
__ addw(rscratch1, rscratch1, a);
3374+
__ strw(rscratch1, Address(state, 0));
3375+
3376+
__ ldrw(rscratch2, Address(state, 4));
3377+
__ addw(rscratch2, rscratch2, b);
3378+
__ strw(rscratch2, Address(state, 4));
3379+
3380+
__ ldrw(rscratch3, Address(state, 8));
3381+
__ addw(rscratch3, rscratch3, c);
3382+
__ strw(rscratch3, Address(state, 8));
3383+
3384+
__ ldrw(rscratch4, Address(state, 12));
3385+
__ addw(rscratch4, rscratch4, d);
3386+
__ strw(rscratch4, Address(state, 12));
3387+
3388+
if (multi_block) {
3389+
__ add(buf, buf, 64);
3390+
__ add(ofs, ofs, 64);
3391+
__ cmp(ofs, limit);
3392+
__ br(Assembler::LE, md5_loop);
3393+
__ mov(c_rarg0, ofs); // return ofs
3394+
}
3395+
3396+
__ ret(lr);
3397+
3398+
return start;
3399+
}
3400+
32133401
// Arguments:
32143402
//
32153403
// Inputs:
@@ -7501,6 +7689,10 @@ class StubGenerator: public StubCodeGenerator {
75017689
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
75027690
}
75037691

7692+
if (UseMD5Intrinsics) {
7693+
StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
7694+
StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
7695+
}
75047696
if (UseSHA1Intrinsics) {
75057697
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
75067698
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");

‎src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ static bool returns_to_call_stub(address return_pc) {
3636

3737
enum platform_dependent_constants {
3838
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
39-
code_size2 = 38000 // simply increase if too small (assembler will crash if too small)
39+
code_size2 = 45000 // simply increase if too small (assembler will crash if too small)
4040
};
4141

4242
class aarch64 {

‎src/hotspot/cpu/aarch64/vm_version_aarch64.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -301,9 +301,8 @@ void VM_Version::initialize() {
301301
FLAG_SET_DEFAULT(UseFMA, true);
302302
}
303303

304-
if (UseMD5Intrinsics) {
305-
warning("MD5 intrinsics are not available on this CPU");
306-
FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
304+
if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
305+
UseMD5Intrinsics = true;
307306
}
308307

309308
if (_features & (CPU_SHA1 | CPU_SHA2 | CPU_SHA3 | CPU_SHA512)) {

‎test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,11 @@ public class IntrinsicPredicates {
6060
};
6161

6262
public static final BooleanSupplier MD5_INSTRUCTION_AVAILABLE
63-
= // x86 variants
63+
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", null, null),
64+
// x86 variants
6465
new OrPredicate(new CPUSpecificPredicate("amd64.*", null, null),
6566
new OrPredicate(new CPUSpecificPredicate("i386.*", null, null),
66-
new CPUSpecificPredicate("x86.*", null, null)));
67+
new CPUSpecificPredicate("x86.*", null, null))));
6768

6869
public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
6970
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),

0 commit comments

Comments
 (0)
Failed to load comments.