Skip to content

Commit 0e7288f

Browse files
Smita KamathtkanteckAnthony Scarpino
committedAug 24, 2021
8267125: AES Galois CounterMode (GCM) interleaved implementation using AVX512 + VAES instructions
Co-authored-by: Smita Kamath <svkamath@openjdk.org> Co-authored-by: Tomasz Kantecki <tomasz.kantecki@intel.com> Co-authored-by: Anthony Scarpino <ascarpino@openjdk.org> Reviewed-by: kvn, valeriep
1 parent 6ace805 commit 0e7288f

21 files changed

+1318
-246
lines changed
 

‎src/hotspot/cpu/x86/macroAssembler_x86.hpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -945,12 +945,19 @@ class MacroAssembler: public Assembler {
945945
void roundDec(XMMRegister key, int rnum);
946946
void lastroundDec(XMMRegister key, int rnum);
947947
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
948-
948+
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
949+
void generateHtbl_48_block_zmm(Register htbl);
950+
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
951+
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
952+
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
953+
bool final_reduction, int index, XMMRegister counter_inc_mask);
949954
public:
950955
void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
951956
void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
952957
void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
953958
Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
959+
void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
960+
Register state, Register subkeyHtbl, Register counter);
954961

955962
#endif
956963

‎src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp

+624-1
Large diffs are not rendered by default.

‎src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

+100-2
Original file line numberDiff line numberDiff line change
@@ -4368,6 +4368,95 @@ class StubGenerator: public StubCodeGenerator {
43684368
return start;
43694369
}
43704370

4371+
address ghash_polynomial512_addr() {
4372+
__ align(CodeEntryAlignment);
4373+
StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr");
4374+
address start = __ pc();
4375+
__ emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction
4376+
__ emit_data64(0xC200000000000000, relocInfo::none);
4377+
__ emit_data64(0x00000001C2000000, relocInfo::none);
4378+
__ emit_data64(0xC200000000000000, relocInfo::none);
4379+
__ emit_data64(0x00000001C2000000, relocInfo::none);
4380+
__ emit_data64(0xC200000000000000, relocInfo::none);
4381+
__ emit_data64(0x00000001C2000000, relocInfo::none);
4382+
__ emit_data64(0xC200000000000000, relocInfo::none);
4383+
__ emit_data64(0x0000000000000001, relocInfo::none); // POLY
4384+
__ emit_data64(0xC200000000000000, relocInfo::none);
4385+
__ emit_data64(0x0000000000000001, relocInfo::none); // TWOONE
4386+
__ emit_data64(0x0000000100000000, relocInfo::none);
4387+
return start;
4388+
}
4389+
4390+
// Vector AES Galois Counter Mode implementation. Parameters:
4391+
// Windows regs | Linux regs
4392+
// in = c_rarg0 (rcx) | c_rarg0 (rsi)
4393+
// len = c_rarg1 (rdx) | c_rarg1 (rdi)
4394+
// ct = c_rarg2 (r8) | c_rarg2 (rdx)
4395+
// out = c_rarg3 (r9) | c_rarg3 (rcx)
4396+
// key = r10 | c_rarg4 (r8)
4397+
// state = r13 | c_rarg5 (r9)
4398+
// subkeyHtbl = r14 | r11
4399+
// counter = rsi | r12
4400+
// return - number of processed bytes
4401+
address generate_galoisCounterMode_AESCrypt() {
4402+
__ align(CodeEntryAlignment);
4403+
StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt");
4404+
address start = __ pc();
4405+
const Register in = c_rarg0;
4406+
const Register len = c_rarg1;
4407+
const Register ct = c_rarg2;
4408+
const Register out = c_rarg3;
4409+
// and updated with the incremented counter in the end
4410+
#ifndef _WIN64
4411+
const Register key = c_rarg4;
4412+
const Register state = c_rarg5;
4413+
const Address subkeyH_mem(rbp, 2 * wordSize);
4414+
const Register subkeyHtbl = r11;
4415+
const Address counter_mem(rbp, 3 * wordSize);
4416+
const Register counter = r12;
4417+
#else
4418+
const Address key_mem(rbp, 6 * wordSize);
4419+
const Register key = r10;
4420+
const Address state_mem(rbp, 7 * wordSize);
4421+
const Register state = r13;
4422+
const Address subkeyH_mem(rbp, 8 * wordSize);
4423+
const Register subkeyHtbl = r14;
4424+
const Address counter_mem(rbp, 9 * wordSize);
4425+
const Register counter = rsi;
4426+
#endif
4427+
__ enter();
4428+
// Save state before entering routine
4429+
__ push(r12);
4430+
__ push(r13);
4431+
__ push(r14);
4432+
__ push(r15);
4433+
__ push(rbx);
4434+
#ifdef _WIN64
4435+
// on win64, fill len_reg from stack position
4436+
__ push(rsi);
4437+
__ movptr(key, key_mem);
4438+
__ movptr(state, state_mem);
4439+
#endif
4440+
__ movptr(subkeyHtbl, subkeyH_mem);
4441+
__ movptr(counter, counter_mem);
4442+
4443+
__ aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, counter);
4444+
4445+
// Restore state before leaving routine
4446+
#ifdef _WIN64
4447+
__ pop(rsi);
4448+
#endif
4449+
__ pop(rbx);
4450+
__ pop(r15);
4451+
__ pop(r14);
4452+
__ pop(r13);
4453+
__ pop(r12);
4454+
4455+
__ leave(); // required for proper stackwalking of RuntimeStub frame
4456+
__ ret(0);
4457+
return start;
4458+
}
4459+
43714460
// This mask is used for incrementing counter value(linc0, linc4, etc.)
43724461
address counter_mask_addr() {
43734462
__ align(64);
@@ -7618,13 +7707,20 @@ address generate_avx_ghash_processBlocks() {
76187707
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
76197708
StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
76207709
StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
7710+
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
7711+
StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr();
7712+
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
7713+
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
76217714
} else {
76227715
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
76237716
}
76247717
}
7718+
76257719
if (UseAESCTRIntrinsics) {
76267720
if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) {
7627-
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
7721+
if (StubRoutines::x86::_counter_mask_addr == NULL) {
7722+
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
7723+
}
76287724
StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt();
76297725
} else {
76307726
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
@@ -7664,7 +7760,9 @@ address generate_avx_ghash_processBlocks() {
76647760

76657761
// Generate GHASH intrinsics code
76667762
if (UseGHASHIntrinsics) {
7667-
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
7763+
if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL) {
7764+
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
7765+
}
76687766
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
76697767
if (VM_Version::supports_avx()) {
76707768
StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr();

‎src/hotspot/cpu/x86/stubRoutines_x86.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ address StubRoutines::x86::_join_0_1_base64 = NULL;
8080
address StubRoutines::x86::_join_1_2_base64 = NULL;
8181
address StubRoutines::x86::_join_2_3_base64 = NULL;
8282
address StubRoutines::x86::_decoding_table_base64 = NULL;
83+
address StubRoutines::x86::_ghash_poly512_addr = NULL;
8384
#endif
8485
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
8586

‎src/hotspot/cpu/x86/stubRoutines_x86.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
3333

3434
enum platform_dependent_constants {
3535
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
36-
code_size2 = 35300 LP64_ONLY(+25000) // simply increase if too small (assembler will crash if too small)
36+
code_size2 = 35300 LP64_ONLY(+32000) // simply increase if too small (assembler will crash if too small)
3737
};
3838

3939
class x86 {
@@ -198,6 +198,7 @@ class x86 {
198198
static address _join_1_2_base64;
199199
static address _join_2_3_base64;
200200
static address _decoding_table_base64;
201+
static address _ghash_poly512_addr;
201202
#endif
202203
// byte flip mask for sha256
203204
static address _pshuffle_byte_flip_mask_addr;
@@ -254,6 +255,7 @@ class x86 {
254255
static address crc_by128_masks_avx512_addr() { return (address)_crc_by128_masks_avx512; }
255256
static address shuf_table_crc32_avx512_addr() { return (address)_shuf_table_crc32_avx512; }
256257
static address crc_table_avx512_addr() { return (address)_crc_table_avx512; }
258+
static address ghash_polynomial512_addr() { return _ghash_poly512_addr; }
257259
#endif // _LP64
258260
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
259261
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }

‎src/hotspot/share/classfile/vmIntrinsics.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ int vmIntrinsics::predicates_needed(vmIntrinsics::ID id) {
182182
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
183183
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
184184
case vmIntrinsics::_counterMode_AESCrypt:
185+
case vmIntrinsics::_galoisCounterMode_AESCrypt:
185186
return 1;
186187
case vmIntrinsics::_digestBase_implCompressMB:
187188
return 5;
@@ -429,6 +430,9 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
429430
case vmIntrinsics::_counterMode_AESCrypt:
430431
if (!UseAESCTRIntrinsics) return true;
431432
break;
433+
case vmIntrinsics::_galoisCounterMode_AESCrypt:
434+
if (!UseAESIntrinsics) return true;
435+
break;
432436
case vmIntrinsics::_md5_implCompress:
433437
if (!UseMD5Intrinsics) return true;
434438
break;

‎src/hotspot/share/classfile/vmIntrinsics.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,11 @@ class methodHandle;
415415
do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
416416
do_name( crypt_name, "implCrypt") \
417417
\
418+
do_class(com_sun_crypto_provider_galoisCounterMode, "com/sun/crypto/provider/GaloisCounterMode") \
419+
do_intrinsic(_galoisCounterMode_AESCrypt, com_sun_crypto_provider_galoisCounterMode, gcm_crypt_name, aes_gcm_signature, F_S) \
420+
do_name(gcm_crypt_name, "implGCMCrypt") \
421+
do_signature(aes_gcm_signature, "([BII[BI[BILcom/sun/crypto/provider/GCTR;Lcom/sun/crypto/provider/GHASH;)I") \
422+
\
418423
/* support for sun.security.provider.MD5 */ \
419424
do_class(sun_security_provider_md5, "sun/security/provider/MD5") \
420425
do_intrinsic(_md5_implCompress, sun_security_provider_md5, implCompress_name, implCompress_signature, F_R) \

‎src/hotspot/share/jvmci/vmStructs_jvmci.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@
308308
static_field(StubRoutines, _electronicCodeBook_encryptAESCrypt, address) \
309309
static_field(StubRoutines, _electronicCodeBook_decryptAESCrypt, address) \
310310
static_field(StubRoutines, _counterMode_AESCrypt, address) \
311+
static_field(StubRoutines, _galoisCounterMode_AESCrypt, address) \
311312
static_field(StubRoutines, _base64_encodeBlock, address) \
312313
static_field(StubRoutines, _base64_decodeBlock, address) \
313314
static_field(StubRoutines, _ghash_processBlocks, address) \

‎src/hotspot/share/opto/c2compiler.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
640640
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
641641
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
642642
case vmIntrinsics::_counterMode_AESCrypt:
643+
case vmIntrinsics::_galoisCounterMode_AESCrypt:
643644
case vmIntrinsics::_md5_implCompress:
644645
case vmIntrinsics::_sha_implCompress:
645646
case vmIntrinsics::_sha2_implCompress:

‎src/hotspot/share/opto/escape.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
10871087
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_encryptAESCrypt") == 0 ||
10881088
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_decryptAESCrypt") == 0 ||
10891089
strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
1090+
strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 ||
10901091
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
10911092
strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
10921093
strcmp(call->as_CallLeaf()->_name, "decodeBlock") == 0 ||

‎src/hotspot/share/opto/graphKit.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -2535,7 +2535,7 @@ Node* GraphKit::make_runtime_call(int flags,
25352535
if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
25362536
if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
25372537
if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
2538-
/* close each nested if ===> */ } } } } } } } }
2538+
/* close each nested if ===> */ } } } } } } } }
25392539
assert(call->in(call->req()-1) != NULL, "must initialize all parms");
25402540

25412541
if (!is_leaf) {

0 commit comments

Comments
 (0)
Please sign in to comment.