Skip to content

Commit fb17a8e

Browse files
merykittyDamonFool
authored andcommittedFeb 8, 2022
8278947: Support for array constants in constant table
Reviewed-by: kvn, vlivanov
1 parent d658d94 commit fb17a8e

File tree

6 files changed

+200
-172
lines changed

6 files changed

+200
-172
lines changed
 

‎src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -1493,6 +1493,26 @@ void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegi
14931493
}
14941494
}
14951495

1496+
void C2_MacroAssembler::load_vector(XMMRegister dst, Address src, int vlen_in_bytes) {
1497+
switch (vlen_in_bytes) {
1498+
case 4: movdl(dst, src); break;
1499+
case 8: movq(dst, src); break;
1500+
case 16: movdqu(dst, src); break;
1501+
case 32: vmovdqu(dst, src); break;
1502+
case 64: evmovdquq(dst, src, Assembler::AVX_512bit); break;
1503+
default: ShouldNotReachHere();
1504+
}
1505+
}
1506+
1507+
void C2_MacroAssembler::load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch) {
1508+
if (reachable(src)) {
1509+
load_vector(dst, as_Address(src), vlen_in_bytes);
1510+
} else {
1511+
lea(rscratch, src);
1512+
load_vector(dst, Address(rscratch, 0), vlen_in_bytes);
1513+
}
1514+
}
1515+
14961516
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
14971517
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
14981518
if (vlen_in_bytes == 4) {

‎src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@
144144
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
145145
void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, Register tmp, bool novlbwdq, int vlen_enc);
146146

147+
void load_vector(XMMRegister dst, Address src, int vlen_in_bytes);
148+
void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = rscratch1);
147149
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
148150

149151
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.

‎src/hotspot/cpu/x86/x86.ad

+52-109
Original file line numberDiff line numberDiff line change
@@ -2552,15 +2552,21 @@ void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
25522552
}
25532553
}
25542554

2555-
static inline jlong replicate8_imm(int con, int width) {
2556-
// Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2557-
assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
2558-
int bit_width = width * 8;
2559-
jlong val = con;
2560-
val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
2561-
while(bit_width < 64) {
2562-
val |= (val << bit_width);
2563-
bit_width <<= 1;
2555+
template <class T>
2556+
static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) {
2557+
GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len);
2558+
jvalue ele;
2559+
switch (bt) {
2560+
case T_BYTE: ele.b = con; break;
2561+
case T_SHORT: ele.s = con; break;
2562+
case T_INT: ele.i = con; break;
2563+
case T_LONG: ele.j = con; break;
2564+
case T_FLOAT: ele.f = con; break;
2565+
case T_DOUBLE: ele.d = con; break;
2566+
default: ShouldNotReachHere();
2567+
}
2568+
for (int i = 0; i < len; i++) {
2569+
val->append(ele);
25642570
}
25652571
return val;
25662572
}
@@ -3824,14 +3830,7 @@ instruct loadV(vec dst, memory mem) %{
38243830
ins_cost(125);
38253831
format %{ "load_vector $dst,$mem" %}
38263832
ins_encode %{
3827-
switch (Matcher::vector_length_in_bytes(this)) {
3828-
case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break;
3829-
case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break;
3830-
case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break;
3831-
case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break;
3832-
case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break;
3833-
default: ShouldNotReachHere();
3834-
}
3833+
__ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
38353834
%}
38363835
ins_pipe( pipe_slow );
38373836
%}
@@ -4009,43 +4008,12 @@ instruct ReplB_imm(vec dst, immI con) %{
40094008
match(Set dst (ReplicateB con));
40104009
format %{ "replicateB $dst,$con" %}
40114010
ins_encode %{
4012-
uint vlen = Matcher::vector_length(this);
4013-
InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1));
4014-
if (vlen == 4) {
4015-
__ movdl($dst$$XMMRegister, const_addr);
4016-
} else {
4017-
__ movq($dst$$XMMRegister, const_addr);
4018-
if (vlen >= 16) {
4019-
if (VM_Version::supports_avx2()) {
4020-
int vlen_enc = vector_length_encoding(this);
4021-
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4022-
} else {
4023-
assert(vlen == 16, "sanity");
4024-
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4025-
}
4026-
}
4027-
}
4011+
InternalAddress addr = $constantaddress(T_BYTE, vreplicate_imm(T_BYTE, $con$$constant, Matcher::vector_length(this)));
4012+
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
40284013
%}
40294014
ins_pipe( pipe_slow );
40304015
%}
40314016

4032-
// Replicate byte scalar zero to be vector
4033-
instruct ReplB_zero(vec dst, immI_0 zero) %{
4034-
match(Set dst (ReplicateB zero));
4035-
format %{ "replicateB $dst,$zero" %}
4036-
ins_encode %{
4037-
uint vlen = Matcher::vector_length(this);
4038-
if (vlen <= 16) {
4039-
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4040-
} else {
4041-
// Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ).
4042-
int vlen_enc = vector_length_encoding(this);
4043-
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4044-
}
4045-
%}
4046-
ins_pipe( fpu_reg_reg );
4047-
%}
4048-
40494017
// ====================ReplicateS=======================================
40504018

40514019
instruct ReplS_reg(vec dst, rRegI src) %{
@@ -4091,39 +4059,10 @@ instruct ReplS_imm(vec dst, immI con) %{
40914059
match(Set dst (ReplicateS con));
40924060
format %{ "replicateS $dst,$con" %}
40934061
ins_encode %{
4094-
uint vlen = Matcher::vector_length(this);
4095-
InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2));
4096-
if (vlen == 2) {
4097-
__ movdl($dst$$XMMRegister, const_addr);
4098-
} else {
4099-
__ movq($dst$$XMMRegister, const_addr);
4100-
if (vlen >= 8) {
4101-
if (VM_Version::supports_avx2()) {
4102-
int vlen_enc = vector_length_encoding(this);
4103-
__ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4104-
} else {
4105-
assert(vlen == 8, "sanity");
4106-
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4107-
}
4108-
}
4109-
}
4110-
%}
4111-
ins_pipe( fpu_reg_reg );
4112-
%}
4113-
4114-
instruct ReplS_zero(vec dst, immI_0 zero) %{
4115-
match(Set dst (ReplicateS zero));
4116-
format %{ "replicateS $dst,$zero" %}
4117-
ins_encode %{
4118-
uint vlen = Matcher::vector_length(this);
4119-
if (vlen <= 8) {
4120-
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4121-
} else {
4122-
int vlen_enc = vector_length_encoding(this);
4123-
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4124-
}
4062+
InternalAddress addr = $constantaddress(T_SHORT, vreplicate_imm(T_SHORT, $con$$constant, Matcher::vector_length(this)));
4063+
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
41254064
%}
4126-
ins_pipe( fpu_reg_reg );
4065+
ins_pipe( pipe_slow );
41274066
%}
41284067

41294068
// ====================ReplicateI=======================================
@@ -4173,30 +4112,21 @@ instruct ReplI_imm(vec dst, immI con) %{
41734112
match(Set dst (ReplicateI con));
41744113
format %{ "replicateI $dst,$con" %}
41754114
ins_encode %{
4176-
uint vlen = Matcher::vector_length(this);
4177-
InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4));
4178-
if (vlen <= 4) {
4179-
__ movq($dst$$XMMRegister, const_addr);
4180-
if (vlen == 4) {
4181-
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4182-
}
4183-
} else {
4184-
assert(VM_Version::supports_avx2(), "sanity");
4185-
int vlen_enc = vector_length_encoding(this);
4186-
__ movq($dst$$XMMRegister, const_addr);
4187-
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4188-
}
4115+
InternalAddress addr = $constantaddress(T_INT, vreplicate_imm(T_INT, $con$$constant, Matcher::vector_length(this)));
4116+
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
41894117
%}
41904118
ins_pipe( pipe_slow );
41914119
%}
41924120

4193-
// Replicate integer (4 byte) scalar zero to be vector
4121+
// Replicate scalar zero to be vector
41944122
instruct ReplI_zero(vec dst, immI_0 zero) %{
4123+
match(Set dst (ReplicateB zero));
4124+
match(Set dst (ReplicateS zero));
41954125
match(Set dst (ReplicateI zero));
41964126
format %{ "replicateI $dst,$zero" %}
41974127
ins_encode %{
4198-
uint vlen = Matcher::vector_length(this);
4199-
if (vlen <= 4) {
4128+
uint vsize = Matcher::vector_length_in_bytes(this);
4129+
if (vsize <= 16) {
42004130
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
42014131
} else {
42024132
int vlen_enc = vector_length_encoding(this);
@@ -4327,17 +4257,8 @@ instruct ReplL_imm(vec dst, immL con) %{
43274257
match(Set dst (ReplicateL con));
43284258
format %{ "replicateL $dst,$con" %}
43294259
ins_encode %{
4330-
uint vlen = Matcher::vector_length(this);
4331-
InternalAddress const_addr = $constantaddress($con);
4332-
if (vlen == 2) {
4333-
__ movq($dst$$XMMRegister, const_addr);
4334-
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4335-
} else {
4336-
assert(VM_Version::supports_avx2(), "sanity");
4337-
int vlen_enc = vector_length_encoding(this);
4338-
__ movq($dst$$XMMRegister, const_addr);
4339-
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4340-
}
4260+
InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, Matcher::vector_length(this)));
4261+
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
43414262
%}
43424263
ins_pipe( pipe_slow );
43434264
%}
@@ -4407,6 +4328,17 @@ instruct ReplF_mem(vec dst, memory mem) %{
44074328
ins_pipe( pipe_slow );
44084329
%}
44094330

4331+
// Replicate float scalar immediate to be vector by loading from const table.
4332+
instruct ReplF_imm(vec dst, immF con) %{
4333+
match(Set dst (ReplicateF con));
4334+
format %{ "replicateF $dst,$con" %}
4335+
ins_encode %{
4336+
InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, Matcher::vector_length(this)));
4337+
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4338+
%}
4339+
ins_pipe( pipe_slow );
4340+
%}
4341+
44104342
instruct ReplF_zero(vec dst, immF0 zero) %{
44114343
match(Set dst (ReplicateF zero));
44124344
format %{ "replicateF $dst,$zero" %}
@@ -4461,6 +4393,17 @@ instruct ReplD_mem(vec dst, memory mem) %{
44614393
ins_pipe( pipe_slow );
44624394
%}
44634395

4396+
// Replicate double (8 byte) scalar immediate to be vector by loading from const table.
4397+
instruct ReplD_imm(vec dst, immD con) %{
4398+
match(Set dst (ReplicateD con));
4399+
format %{ "replicateD $dst,$con" %}
4400+
ins_encode %{
4401+
InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, Matcher::vector_length(this)));
4402+
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4403+
%}
4404+
ins_pipe( pipe_slow );
4405+
%}
4406+
44644407
instruct ReplD_zero(vec dst, immD0 zero) %{
44654408
match(Set dst (ReplicateD zero));
44664409
format %{ "replicateD $dst,$zero" %}

‎src/hotspot/share/asm/assembler.hpp

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -429,6 +429,31 @@ class AbstractAssembler : public ResourceObj {
429429
}
430430
return ptr;
431431
}
432+
address array_constant(BasicType bt, GrowableArray<jvalue>* c) {
433+
CodeSection* c1 = _code_section;
434+
int len = c->length();
435+
int size = type2aelembytes(bt) * len;
436+
address ptr = start_a_const(size, MIN2(round_up_power_of_2(size), 8));
437+
if (ptr != NULL) {
438+
for (int i = 0; i < len; i++) {
439+
jvalue e = c->at(i);
440+
switch(bt) {
441+
case T_BOOLEAN: emit_int8(e.z); break;
442+
case T_BYTE: emit_int8(e.b); break;
443+
case T_CHAR: emit_int16(e.c); break;
444+
case T_SHORT: emit_int16(e.s); break;
445+
case T_INT: emit_int32(e.i); break;
446+
case T_LONG: emit_int64(e.j); break;
447+
case T_FLOAT: emit_float(e.f); break;
448+
case T_DOUBLE: emit_double(e.d); break;
449+
default:
450+
ShouldNotReachHere();
451+
}
452+
}
453+
end_a_const(c1);
454+
}
455+
return ptr;
456+
}
432457

433458
// Bang stack to trigger StackOverflowError at a safe location
434459
// implementation delegates to machine-specific bang_stack_with_offset

0 commit comments

Comments
 (0)
Please sign in to comment.