Skip to content

Commit 8638cd9

Browse files
Dong BoRealFYang
Dong Bo
authored andcommittedNov 11, 2020
8255625: AArch64: Implement Base64.encodeBlock accelerator/intrinsic
Reviewed-by: aph
1 parent 5de99da commit 8638cd9

File tree

3 files changed

+226
-0
lines changed

3 files changed

+226
-0
lines changed
 

‎src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

+148
Original file line numberDiff line numberDiff line change
@@ -5403,6 +5403,150 @@ class StubGenerator: public StubCodeGenerator {
54035403
return start;
54045404
}
54055405

5406+
void generate_base64_encode_simdround(Register src, Register dst,
5407+
FloatRegister codec, u8 size) {
5408+
5409+
FloatRegister in0 = v4, in1 = v5, in2 = v6;
5410+
FloatRegister out0 = v16, out1 = v17, out2 = v18, out3 = v19;
5411+
FloatRegister ind0 = v20, ind1 = v21, ind2 = v22, ind3 = v23;
5412+
5413+
Assembler::SIMD_Arrangement arrangement = size == 16 ? __ T16B : __ T8B;
5414+
5415+
__ ld3(in0, in1, in2, arrangement, __ post(src, 3 * size));
5416+
5417+
__ ushr(ind0, arrangement, in0, 2);
5418+
5419+
__ ushr(ind1, arrangement, in1, 2);
5420+
__ shl(in0, arrangement, in0, 6);
5421+
__ orr(ind1, arrangement, ind1, in0);
5422+
__ ushr(ind1, arrangement, ind1, 2);
5423+
5424+
__ ushr(ind2, arrangement, in2, 4);
5425+
__ shl(in1, arrangement, in1, 4);
5426+
__ orr(ind2, arrangement, in1, ind2);
5427+
__ ushr(ind2, arrangement, ind2, 2);
5428+
5429+
__ shl(ind3, arrangement, in2, 2);
5430+
__ ushr(ind3, arrangement, ind3, 2);
5431+
5432+
__ tbl(out0, arrangement, codec, 4, ind0);
5433+
__ tbl(out1, arrangement, codec, 4, ind1);
5434+
__ tbl(out2, arrangement, codec, 4, ind2);
5435+
__ tbl(out3, arrangement, codec, 4, ind3);
5436+
5437+
__ st4(out0, out1, out2, out3, arrangement, __ post(dst, 4 * size));
5438+
}
5439+
5440+
/**
5441+
* Arguments:
5442+
*
5443+
* Input:
5444+
* c_rarg0 - src_start
5445+
* c_rarg1 - src_offset
5446+
* c_rarg2 - src_length
5447+
* c_rarg3 - dest_start
5448+
* c_rarg4 - dest_offset
5449+
* c_rarg5 - isURL
5450+
*
5451+
*/
5452+
address generate_base64_encodeBlock() {
5453+
5454+
static const char toBase64[64] = {
5455+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
5456+
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
5457+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
5458+
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
5459+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
5460+
};
5461+
5462+
static const char toBase64URL[64] = {
5463+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
5464+
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
5465+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
5466+
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
5467+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
5468+
};
5469+
5470+
__ align(CodeEntryAlignment);
5471+
StubCodeMark mark(this, "StubRoutines", "encodeBlock");
5472+
address start = __ pc();
5473+
5474+
Register src = c_rarg0; // source array
5475+
Register soff = c_rarg1; // source start offset
5476+
Register send = c_rarg2; // source end offset
5477+
Register dst = c_rarg3; // dest array
5478+
Register doff = c_rarg4; // position for writing to dest array
5479+
Register isURL = c_rarg5; // Base64 or URL chracter set
5480+
5481+
// c_rarg6 and c_rarg7 are free to use as temps
5482+
Register codec = c_rarg6;
5483+
Register length = c_rarg7;
5484+
5485+
Label ProcessData, Process48B, Process24B, Process3B, SIMDExit, Exit;
5486+
5487+
__ add(src, src, soff);
5488+
__ add(dst, dst, doff);
5489+
__ sub(length, send, soff);
5490+
5491+
// load the codec base address
5492+
__ lea(codec, ExternalAddress((address) toBase64));
5493+
__ cbz(isURL, ProcessData);
5494+
__ lea(codec, ExternalAddress((address) toBase64URL));
5495+
5496+
__ BIND(ProcessData);
5497+
5498+
// too short to formup a SIMD loop, roll back
5499+
__ cmp(length, (u1)24);
5500+
__ br(Assembler::LT, Process3B);
5501+
5502+
__ ld1(v0, v1, v2, v3, __ T16B, Address(codec));
5503+
5504+
__ BIND(Process48B);
5505+
__ cmp(length, (u1)48);
5506+
__ br(Assembler::LT, Process24B);
5507+
generate_base64_encode_simdround(src, dst, v0, 16);
5508+
__ sub(length, length, 48);
5509+
__ b(Process48B);
5510+
5511+
__ BIND(Process24B);
5512+
__ cmp(length, (u1)24);
5513+
__ br(Assembler::LT, SIMDExit);
5514+
generate_base64_encode_simdround(src, dst, v0, 8);
5515+
__ sub(length, length, 24);
5516+
5517+
__ BIND(SIMDExit);
5518+
__ cbz(length, Exit);
5519+
5520+
__ BIND(Process3B);
5521+
// 3 src bytes, 24 bits
5522+
__ ldrb(r10, __ post(src, 1));
5523+
__ ldrb(r11, __ post(src, 1));
5524+
__ ldrb(r12, __ post(src, 1));
5525+
__ orrw(r11, r11, r10, Assembler::LSL, 8);
5526+
__ orrw(r12, r12, r11, Assembler::LSL, 8);
5527+
// codec index
5528+
__ ubfmw(r15, r12, 18, 23);
5529+
__ ubfmw(r14, r12, 12, 17);
5530+
__ ubfmw(r13, r12, 6, 11);
5531+
__ andw(r12, r12, 63);
5532+
// get the code based on the codec
5533+
__ ldrb(r15, Address(codec, r15, Address::uxtw(0)));
5534+
__ ldrb(r14, Address(codec, r14, Address::uxtw(0)));
5535+
__ ldrb(r13, Address(codec, r13, Address::uxtw(0)));
5536+
__ ldrb(r12, Address(codec, r12, Address::uxtw(0)));
5537+
__ strb(r15, __ post(dst, 1));
5538+
__ strb(r14, __ post(dst, 1));
5539+
__ strb(r13, __ post(dst, 1));
5540+
__ strb(r12, __ post(dst, 1));
5541+
__ sub(length, length, 3);
5542+
__ cbnz(length, Process3B);
5543+
5544+
__ BIND(Exit);
5545+
__ ret(lr);
5546+
5547+
return start;
5548+
}
5549+
54065550
// Continuation point for throwing of implicit exceptions that are
54075551
// not handled in the current activation. Fabricates an exception
54085552
// oop and initiates normal exception dispatching in this
@@ -6481,6 +6625,10 @@ class StubGenerator: public StubCodeGenerator {
64816625
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
64826626
}
64836627

6628+
if (UseBASE64Intrinsics) {
6629+
StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock();
6630+
}
6631+
64846632
// data cache line writeback
64856633
StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
64866634
StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();

‎src/hotspot/cpu/aarch64/vm_version_aarch64.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ void VM_Version::initialize() {
332332
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
333333
}
334334

335+
if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
336+
UseBASE64Intrinsics = true;
337+
}
338+
335339
if (is_zva_enabled()) {
336340
if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
337341
FLAG_SET_DEFAULT(UseBlockZeroing, true);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package org.openjdk.micro.bench.java.util;
25+
26+
import org.openjdk.jmh.annotations.*;
27+
import org.openjdk.jmh.infra.Blackhole;
28+
29+
import java.util.Base64;
30+
import java.util.Random;
31+
import java.util.ArrayList;
32+
import java.util.concurrent.TimeUnit;
33+
34+
@BenchmarkMode(Mode.AverageTime)
35+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
36+
@State(Scope.Thread)
37+
public class Base64Encode {
38+
39+
private Base64.Encoder encoder;
40+
private ArrayList<byte[]> unencoded;
41+
private byte[] encoded;
42+
43+
private static final int TESTSIZE = 1000;
44+
45+
@Param({"1", "2", "3", "6", "7", "9", "10", "48", "512", "1000", "20000"})
46+
private int maxNumBytes;
47+
48+
@Setup
49+
public void setup() {
50+
Random r = new Random(1123);
51+
52+
int dstLen = ((maxNumBytes + 16) / 3) * 4;
53+
54+
encoder = Base64.getEncoder();
55+
unencoded = new ArrayList<byte[]> ();
56+
encoded = new byte[dstLen];
57+
58+
for (int i = 0; i < TESTSIZE; i++) {
59+
int srcLen = 1 + r.nextInt(maxNumBytes);
60+
byte[] src = new byte[srcLen];
61+
r.nextBytes(src);
62+
unencoded.add(src);
63+
}
64+
}
65+
66+
@Benchmark
67+
@OperationsPerInvocation(TESTSIZE)
68+
public void testBase64Encode(Blackhole bh) {
69+
for (byte[] s : unencoded) {
70+
encoder.encode(s, encoded);
71+
bh.consume(encoded);
72+
}
73+
}
74+
}

0 commit comments

Comments
 (0)