@@ -2552,15 +2552,21 @@ void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
2552
2552
}
2553
2553
}
2554
2554
2555
- static inline jlong replicate8_imm(int con, int width) {
2556
- // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2557
- assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
2558
- int bit_width = width * 8;
2559
- jlong val = con;
2560
- val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
2561
- while(bit_width < 64) {
2562
- val |= (val << bit_width);
2563
- bit_width <<= 1;
2555
+ template <class T>
2556
+ static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) {
2557
+ GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len);
2558
+ jvalue ele;
2559
+ switch (bt) {
2560
+ case T_BYTE: ele.b = con; break;
2561
+ case T_SHORT: ele.s = con; break;
2562
+ case T_INT: ele.i = con; break;
2563
+ case T_LONG: ele.j = con; break;
2564
+ case T_FLOAT: ele.f = con; break;
2565
+ case T_DOUBLE: ele.d = con; break;
2566
+ default: ShouldNotReachHere();
2567
+ }
2568
+ for (int i = 0; i < len; i++) {
2569
+ val->append(ele);
2564
2570
}
2565
2571
return val;
2566
2572
}
@@ -3824,14 +3830,7 @@ instruct loadV(vec dst, memory mem) %{
3824
3830
ins_cost(125);
3825
3831
format %{ "load_vector $dst,$mem" %}
3826
3832
ins_encode %{
3827
- switch (Matcher::vector_length_in_bytes(this)) {
3828
- case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break;
3829
- case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break;
3830
- case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break;
3831
- case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break;
3832
- case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break;
3833
- default: ShouldNotReachHere();
3834
- }
3833
+ __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
3835
3834
%}
3836
3835
ins_pipe( pipe_slow );
3837
3836
%}
@@ -4009,43 +4008,12 @@ instruct ReplB_imm(vec dst, immI con) %{
4009
4008
match(Set dst (ReplicateB con));
4010
4009
format %{ "replicateB $dst,$con" %}
4011
4010
ins_encode %{
4012
- uint vlen = Matcher::vector_length(this);
4013
- InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1));
4014
- if (vlen == 4) {
4015
- __ movdl($dst$$XMMRegister, const_addr);
4016
- } else {
4017
- __ movq($dst$$XMMRegister, const_addr);
4018
- if (vlen >= 16) {
4019
- if (VM_Version::supports_avx2()) {
4020
- int vlen_enc = vector_length_encoding(this);
4021
- __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4022
- } else {
4023
- assert(vlen == 16, "sanity");
4024
- __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4025
- }
4026
- }
4027
- }
4011
+ InternalAddress addr = $constantaddress(T_BYTE, vreplicate_imm(T_BYTE, $con$$constant, Matcher::vector_length(this)));
4012
+ __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4028
4013
%}
4029
4014
ins_pipe( pipe_slow );
4030
4015
%}
4031
4016
4032
- // Replicate byte scalar zero to be vector
4033
- instruct ReplB_zero(vec dst, immI_0 zero) %{
4034
- match(Set dst (ReplicateB zero));
4035
- format %{ "replicateB $dst,$zero" %}
4036
- ins_encode %{
4037
- uint vlen = Matcher::vector_length(this);
4038
- if (vlen <= 16) {
4039
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4040
- } else {
4041
- // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ).
4042
- int vlen_enc = vector_length_encoding(this);
4043
- __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4044
- }
4045
- %}
4046
- ins_pipe( fpu_reg_reg );
4047
- %}
4048
-
4049
4017
// ====================ReplicateS=======================================
4050
4018
4051
4019
instruct ReplS_reg(vec dst, rRegI src) %{
@@ -4091,39 +4059,10 @@ instruct ReplS_imm(vec dst, immI con) %{
4091
4059
match(Set dst (ReplicateS con));
4092
4060
format %{ "replicateS $dst,$con" %}
4093
4061
ins_encode %{
4094
- uint vlen = Matcher::vector_length(this);
4095
- InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2));
4096
- if (vlen == 2) {
4097
- __ movdl($dst$$XMMRegister, const_addr);
4098
- } else {
4099
- __ movq($dst$$XMMRegister, const_addr);
4100
- if (vlen >= 8) {
4101
- if (VM_Version::supports_avx2()) {
4102
- int vlen_enc = vector_length_encoding(this);
4103
- __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4104
- } else {
4105
- assert(vlen == 8, "sanity");
4106
- __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4107
- }
4108
- }
4109
- }
4110
- %}
4111
- ins_pipe( fpu_reg_reg );
4112
- %}
4113
-
4114
- instruct ReplS_zero(vec dst, immI_0 zero) %{
4115
- match(Set dst (ReplicateS zero));
4116
- format %{ "replicateS $dst,$zero" %}
4117
- ins_encode %{
4118
- uint vlen = Matcher::vector_length(this);
4119
- if (vlen <= 8) {
4120
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4121
- } else {
4122
- int vlen_enc = vector_length_encoding(this);
4123
- __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4124
- }
4062
+ InternalAddress addr = $constantaddress(T_SHORT, vreplicate_imm(T_SHORT, $con$$constant, Matcher::vector_length(this)));
4063
+ __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4125
4064
%}
4126
- ins_pipe( fpu_reg_reg );
4065
+ ins_pipe( pipe_slow );
4127
4066
%}
4128
4067
4129
4068
// ====================ReplicateI=======================================
@@ -4173,30 +4112,21 @@ instruct ReplI_imm(vec dst, immI con) %{
4173
4112
match(Set dst (ReplicateI con));
4174
4113
format %{ "replicateI $dst,$con" %}
4175
4114
ins_encode %{
4176
- uint vlen = Matcher::vector_length(this);
4177
- InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4));
4178
- if (vlen <= 4) {
4179
- __ movq($dst$$XMMRegister, const_addr);
4180
- if (vlen == 4) {
4181
- __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4182
- }
4183
- } else {
4184
- assert(VM_Version::supports_avx2(), "sanity");
4185
- int vlen_enc = vector_length_encoding(this);
4186
- __ movq($dst$$XMMRegister, const_addr);
4187
- __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4188
- }
4115
+ InternalAddress addr = $constantaddress(T_INT, vreplicate_imm(T_INT, $con$$constant, Matcher::vector_length(this)));
4116
+ __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4189
4117
%}
4190
4118
ins_pipe( pipe_slow );
4191
4119
%}
4192
4120
4193
- // Replicate integer (4 byte) scalar zero to be vector
4121
+ // Replicate scalar zero to be vector
4194
4122
instruct ReplI_zero(vec dst, immI_0 zero) %{
4123
+ match(Set dst (ReplicateB zero));
4124
+ match(Set dst (ReplicateS zero));
4195
4125
match(Set dst (ReplicateI zero));
4196
4126
format %{ "replicateI $dst,$zero" %}
4197
4127
ins_encode %{
4198
- uint vlen = Matcher::vector_length (this);
4199
- if (vlen <= 4 ) {
4128
+ uint vsize = Matcher::vector_length_in_bytes (this);
4129
+ if (vsize <= 16 ) {
4200
4130
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4201
4131
} else {
4202
4132
int vlen_enc = vector_length_encoding(this);
@@ -4327,17 +4257,8 @@ instruct ReplL_imm(vec dst, immL con) %{
4327
4257
match(Set dst (ReplicateL con));
4328
4258
format %{ "replicateL $dst,$con" %}
4329
4259
ins_encode %{
4330
- uint vlen = Matcher::vector_length(this);
4331
- InternalAddress const_addr = $constantaddress($con);
4332
- if (vlen == 2) {
4333
- __ movq($dst$$XMMRegister, const_addr);
4334
- __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4335
- } else {
4336
- assert(VM_Version::supports_avx2(), "sanity");
4337
- int vlen_enc = vector_length_encoding(this);
4338
- __ movq($dst$$XMMRegister, const_addr);
4339
- __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4340
- }
4260
+ InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, Matcher::vector_length(this)));
4261
+ __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4341
4262
%}
4342
4263
ins_pipe( pipe_slow );
4343
4264
%}
@@ -4407,6 +4328,17 @@ instruct ReplF_mem(vec dst, memory mem) %{
4407
4328
ins_pipe( pipe_slow );
4408
4329
%}
4409
4330
4331
+ // Replicate float scalar immediate to be vector by loading from const table.
4332
+ instruct ReplF_imm(vec dst, immF con) %{
4333
+ match(Set dst (ReplicateF con));
4334
+ format %{ "replicateF $dst,$con" %}
4335
+ ins_encode %{
4336
+ InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, Matcher::vector_length(this)));
4337
+ __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4338
+ %}
4339
+ ins_pipe( pipe_slow );
4340
+ %}
4341
+
4410
4342
instruct ReplF_zero(vec dst, immF0 zero) %{
4411
4343
match(Set dst (ReplicateF zero));
4412
4344
format %{ "replicateF $dst,$zero" %}
@@ -4461,6 +4393,17 @@ instruct ReplD_mem(vec dst, memory mem) %{
4461
4393
ins_pipe( pipe_slow );
4462
4394
%}
4463
4395
4396
+ // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
4397
+ instruct ReplD_imm(vec dst, immD con) %{
4398
+ match(Set dst (ReplicateD con));
4399
+ format %{ "replicateD $dst,$con" %}
4400
+ ins_encode %{
4401
+ InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, Matcher::vector_length(this)));
4402
+ __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
4403
+ %}
4404
+ ins_pipe( pipe_slow );
4405
+ %}
4406
+
4464
4407
instruct ReplD_zero(vec dst, immD0 zero) %{
4465
4408
match(Set dst (ReplicateD zero));
4466
4409
format %{ "replicateD $dst,$zero" %}
0 commit comments