Skip to content

Commit 7840d02

Browse files
author
Jatin Bhateja
committedDec 18, 2019
8235824: C2: Merge AD instructions for AddReductionV and MulReductionV nodes
Reviewed-by: vlivanov, sviswanathan, jrose, kvn
1 parent ab7cfde commit 7840d02

File tree

1 file changed

+296
-638
lines changed

1 file changed

+296
-638
lines changed
 

‎src/hotspot/cpu/x86/x86.ad

+296-638
Original file line numberDiff line numberDiff line change
@@ -4803,195 +4803,111 @@ instruct Repl8D_zero_evex(vec dst, immD0 zero) %{
48034803

48044804
// ====================REDUCTION ARITHMETIC=======================================
48054805

4806-
instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4807-
predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4808-
match(Set dst (AddReductionVI src1 src2));
4809-
effect(TEMP tmp2, TEMP tmp);
4810-
format %{ "movdqu $tmp2,$src2\n\t"
4811-
"phaddd $tmp2,$tmp2\n\t"
4812-
"movd $tmp,$src1\n\t"
4813-
"paddd $tmp,$tmp2\n\t"
4814-
"movd $dst,$tmp\t! add reduction2I" %}
4815-
ins_encode %{
4816-
__ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4817-
__ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4818-
__ movdl($tmp$$XMMRegister, $src1$$Register);
4819-
__ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4820-
__ movdl($dst$$Register, $tmp$$XMMRegister);
4821-
%}
4822-
ins_pipe( pipe_slow );
4823-
%}
4806+
// =======================AddReductionVI==========================================
48244807

4825-
instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4826-
predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 2);
4808+
instruct vadd2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4809+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
48274810
match(Set dst (AddReductionVI src1 src2));
48284811
effect(TEMP tmp, TEMP tmp2);
4829-
format %{ "vphaddd $tmp,$src2,$src2\n\t"
4830-
"movd $tmp2,$src1\n\t"
4831-
"vpaddd $tmp2,$tmp2,$tmp\n\t"
4832-
"movd $dst,$tmp2\t! add reduction2I" %}
4833-
ins_encode %{
4834-
int vector_len = 0;
4835-
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4836-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4837-
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4838-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4839-
%}
4840-
ins_pipe( pipe_slow );
4841-
%}
4842-
4843-
instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4844-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4845-
match(Set dst (AddReductionVI src1 src2));
4846-
effect(TEMP tmp, TEMP tmp2);
4847-
format %{ "pshufd $tmp2,$src2,0x1\n\t"
4848-
"vpaddd $tmp,$src2,$tmp2\n\t"
4849-
"movd $tmp2,$src1\n\t"
4850-
"vpaddd $tmp2,$tmp,$tmp2\n\t"
4851-
"movd $dst,$tmp2\t! add reduction2I" %}
4852-
ins_encode %{
4853-
int vector_len = 0;
4854-
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4855-
__ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4856-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4857-
__ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4858-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4859-
%}
4860-
ins_pipe( pipe_slow );
4861-
%}
4862-
4863-
instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4864-
predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
4865-
match(Set dst (AddReductionVI src1 src2));
4866-
effect(TEMP tmp, TEMP tmp2);
4867-
format %{ "movdqu $tmp,$src2\n\t"
4868-
"phaddd $tmp,$tmp\n\t"
4869-
"phaddd $tmp,$tmp\n\t"
4870-
"movd $tmp2,$src1\n\t"
4871-
"paddd $tmp2,$tmp\n\t"
4872-
"movd $dst,$tmp2\t! add reduction4I" %}
4873-
ins_encode %{
4874-
__ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
4875-
__ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4876-
__ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4877-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4878-
__ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
4879-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4880-
%}
4881-
ins_pipe( pipe_slow );
4882-
%}
4883-
4884-
instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4885-
predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 4);
4886-
match(Set dst (AddReductionVI src1 src2));
4887-
effect(TEMP tmp, TEMP tmp2);
4888-
format %{ "vphaddd $tmp,$src2,$src2\n\t"
4889-
"vphaddd $tmp,$tmp,$tmp\n\t"
4890-
"movd $tmp2,$src1\n\t"
4891-
"vpaddd $tmp2,$tmp2,$tmp\n\t"
4892-
"movd $dst,$tmp2\t! add reduction4I" %}
4893-
ins_encode %{
4894-
int vector_len = 0;
4895-
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4896-
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
4897-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4898-
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4899-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4900-
%}
4901-
ins_pipe( pipe_slow );
4902-
%}
4903-
4904-
instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4905-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 4);
4906-
match(Set dst (AddReductionVI src1 src2));
4907-
effect(TEMP tmp, TEMP tmp2);
4908-
format %{ "pshufd $tmp2,$src2,0xE\n\t"
4909-
"vpaddd $tmp,$src2,$tmp2\n\t"
4910-
"pshufd $tmp2,$tmp,0x1\n\t"
4911-
"vpaddd $tmp,$tmp,$tmp2\n\t"
4912-
"movd $tmp2,$src1\n\t"
4913-
"vpaddd $tmp2,$tmp,$tmp2\n\t"
4914-
"movd $dst,$tmp2\t! add reduction4I" %}
4915-
ins_encode %{
4916-
int vector_len = 0;
4917-
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4918-
__ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4919-
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4920-
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4921-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4922-
__ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4923-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4812+
format %{ "vector_add2I_reduction $dst,$src1,$src2" %}
4813+
ins_encode %{
4814+
if (UseAVX > 2) {
4815+
int vector_len = Assembler::AVX_128bit;
4816+
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4817+
__ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4818+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4819+
__ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4820+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4821+
} else if (VM_Version::supports_avxonly()) {
4822+
int vector_len = Assembler::AVX_128bit;
4823+
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4824+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4825+
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4826+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4827+
} else {
4828+
assert(UseSSE > 2, "required");
4829+
__ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4830+
__ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4831+
__ movdl($tmp$$XMMRegister, $src1$$Register);
4832+
__ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4833+
__ movdl($dst$$Register, $tmp$$XMMRegister);
4834+
}
49244835
%}
49254836
ins_pipe( pipe_slow );
49264837
%}
49274838

4928-
instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4929-
predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 8);
4839+
instruct vadd4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4840+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
49304841
match(Set dst (AddReductionVI src1 src2));
49314842
effect(TEMP tmp, TEMP tmp2);
4932-
format %{ "vphaddd $tmp,$src2,$src2\n\t"
4933-
"vphaddd $tmp,$tmp,$tmp2\n\t"
4934-
"vextracti128_high $tmp2,$tmp\n\t"
4935-
"vpaddd $tmp,$tmp,$tmp2\n\t"
4936-
"movd $tmp2,$src1\n\t"
4937-
"vpaddd $tmp2,$tmp2,$tmp\n\t"
4938-
"movd $dst,$tmp2\t! add reduction8I" %}
4939-
ins_encode %{
4940-
int vector_len = 1;
4941-
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4942-
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4943-
__ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
4944-
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4945-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4946-
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4947-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4843+
format %{ "vector_add4I_reduction $dst,$src1,$src2" %}
4844+
ins_encode %{
4845+
if (UseAVX > 2) {
4846+
int vector_len = Assembler::AVX_128bit;
4847+
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4848+
__ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4849+
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4850+
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4851+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4852+
__ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4853+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4854+
} else if (VM_Version::supports_avxonly()) {
4855+
int vector_len = Assembler::AVX_128bit;
4856+
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4857+
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
4858+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4859+
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4860+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4861+
} else {
4862+
assert(UseSSE > 2, "required");
4863+
__ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
4864+
__ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4865+
__ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4866+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4867+
__ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
4868+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4869+
}
49484870
%}
49494871
ins_pipe( pipe_slow );
49504872
%}
49514873

4952-
instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4953-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
4874+
instruct vadd8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4875+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
49544876
match(Set dst (AddReductionVI src1 src2));
49554877
effect(TEMP tmp, TEMP tmp2);
4956-
format %{ "vextracti128_high $tmp,$src2\n\t"
4957-
"vpaddd $tmp,$tmp,$src2\n\t"
4958-
"pshufd $tmp2,$tmp,0xE\n\t"
4959-
"vpaddd $tmp,$tmp,$tmp2\n\t"
4960-
"pshufd $tmp2,$tmp,0x1\n\t"
4961-
"vpaddd $tmp,$tmp,$tmp2\n\t"
4962-
"movd $tmp2,$src1\n\t"
4963-
"vpaddd $tmp2,$tmp,$tmp2\n\t"
4964-
"movd $dst,$tmp2\t! add reduction8I" %}
4965-
ins_encode %{
4966-
int vector_len = 0;
4967-
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
4968-
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4969-
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4970-
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4971-
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4972-
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4973-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4974-
__ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4975-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4878+
format %{ "vector_add8I_reduction $dst,$src1,$src2" %}
4879+
ins_encode %{
4880+
if (UseAVX > 2) {
4881+
int vector_len = Assembler::AVX_128bit;
4882+
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
4883+
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4884+
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4885+
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4886+
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4887+
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4888+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4889+
__ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4890+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4891+
} else {
4892+
assert(UseAVX > 0, "");
4893+
int vector_len = Assembler::AVX_256bit;
4894+
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4895+
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4896+
__ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
4897+
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4898+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
4899+
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4900+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
4901+
}
49764902
%}
49774903
ins_pipe( pipe_slow );
49784904
%}
49794905

4980-
instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
4981-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
4906+
instruct vadd16I_reduction_reg(rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
4907+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
49824908
match(Set dst (AddReductionVI src1 src2));
49834909
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4984-
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
4985-
"vpaddd $tmp3,$tmp3,$src2\n\t"
4986-
"vextracti128_high $tmp,$tmp3\n\t"
4987-
"vpaddd $tmp,$tmp,$tmp3\n\t"
4988-
"pshufd $tmp2,$tmp,0xE\n\t"
4989-
"vpaddd $tmp,$tmp,$tmp2\n\t"
4990-
"pshufd $tmp2,$tmp,0x1\n\t"
4991-
"vpaddd $tmp,$tmp,$tmp2\n\t"
4992-
"movd $tmp2,$src1\n\t"
4993-
"vpaddd $tmp2,$tmp,$tmp2\n\t"
4994-
"movd $dst,$tmp2\t! mul reduction16I" %}
4910+
format %{ "vector_add16I_reduction $dst,$src1,$src2" %}
49954911
ins_encode %{
49964912
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
49974913
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
@@ -5008,17 +4924,16 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVec src2, legVec
50084924
ins_pipe( pipe_slow );
50094925
%}
50104926

4927+
// =======================AddReductionVL==========================================
4928+
50114929
#ifdef _LP64
5012-
instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5013-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4930+
instruct vadd2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
4931+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
50144932
match(Set dst (AddReductionVL src1 src2));
50154933
effect(TEMP tmp, TEMP tmp2);
5016-
format %{ "pshufd $tmp2,$src2,0xE\n\t"
5017-
"vpaddq $tmp,$src2,$tmp2\n\t"
5018-
"movdq $tmp2,$src1\n\t"
5019-
"vpaddq $tmp2,$tmp,$tmp2\n\t"
5020-
"movdq $dst,$tmp2\t! add reduction2L" %}
4934+
format %{ "vector_add2L_reduction $dst,$src1,$src2" %}
50214935
ins_encode %{
4936+
assert(UseAVX > 2, "required");
50224937
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
50234938
__ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
50244939
__ movdq($tmp2$$XMMRegister, $src1$$Register);
@@ -5028,18 +4943,13 @@ instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
50284943
ins_pipe( pipe_slow );
50294944
%}
50304945

5031-
instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5032-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 4);
4946+
instruct vadd4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
4947+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
50334948
match(Set dst (AddReductionVL src1 src2));
50344949
effect(TEMP tmp, TEMP tmp2);
5035-
format %{ "vextracti128_high $tmp,$src2\n\t"
5036-
"vpaddq $tmp2,$tmp,$src2\n\t"
5037-
"pshufd $tmp,$tmp2,0xE\n\t"
5038-
"vpaddq $tmp2,$tmp2,$tmp\n\t"
5039-
"movdq $tmp,$src1\n\t"
5040-
"vpaddq $tmp2,$tmp2,$tmp\n\t"
5041-
"movdq $dst,$tmp2\t! add reduction4L" %}
4950+
format %{ "vector_add4L_reduction $dst,$src1,$src2" %}
50424951
ins_encode %{
4952+
assert(UseAVX > 2, "required");
50434953
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
50444954
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
50454955
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
@@ -5051,20 +4961,13 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
50514961
ins_pipe( pipe_slow );
50524962
%}
50534963

5054-
instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
5055-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
4964+
instruct vadd8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
4965+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
50564966
match(Set dst (AddReductionVL src1 src2));
50574967
effect(TEMP tmp, TEMP tmp2);
5058-
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
5059-
"vpaddq $tmp2,$tmp2,$src2\n\t"
5060-
"vextracti128_high $tmp,$tmp2\n\t"
5061-
"vpaddq $tmp2,$tmp2,$tmp\n\t"
5062-
"pshufd $tmp,$tmp2,0xE\n\t"
5063-
"vpaddq $tmp2,$tmp2,$tmp\n\t"
5064-
"movdq $tmp,$src1\n\t"
5065-
"vpaddq $tmp2,$tmp2,$tmp\n\t"
5066-
"movdq $dst,$tmp2\t! add reduction8L" %}
4968+
format %{ "vector_addL_reduction $dst,$src1,$src2" %}
50674969
ins_encode %{
4970+
assert(UseAVX > 2, "required");
50684971
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
50694972
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
50704973
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
@@ -5077,104 +4980,66 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, l
50774980
%}
50784981
ins_pipe( pipe_slow );
50794982
%}
5080-
#endif
4983+
#endif // _LP64
50814984

5082-
instruct rsadd2F_reduction_reg(regF dst, vec src2, vec tmp) %{
5083-
predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5084-
match(Set dst (AddReductionVF dst src2));
5085-
effect(TEMP dst, TEMP tmp);
5086-
format %{ "addss $dst,$src2\n\t"
5087-
"pshufd $tmp,$src2,0x01\n\t"
5088-
"addss $dst,$tmp\t! add reduction2F" %}
5089-
ins_encode %{
5090-
__ addss($dst$$XMMRegister, $src2$$XMMRegister);
5091-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5092-
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5093-
%}
5094-
ins_pipe( pipe_slow );
5095-
%}
4985+
// =======================AddReductionVF==========================================
50964986

5097-
instruct rvadd2F_reduction_reg(regF dst, vec src2, vec tmp) %{
5098-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4987+
instruct vadd2F_reduction_reg(regF dst, vec src2, vec tmp) %{
4988+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
50994989
match(Set dst (AddReductionVF dst src2));
51004990
effect(TEMP dst, TEMP tmp);
5101-
format %{ "vaddss $dst,$dst,$src2\n\t"
5102-
"pshufd $tmp,$src2,0x01\n\t"
5103-
"vaddss $dst,$dst,$tmp\t! add reduction2F" %}
4991+
format %{ "vector_add2F_reduction $dst,$dst,$src2" %}
51044992
ins_encode %{
5105-
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5106-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5107-
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
4993+
if (UseAVX > 0) {
4994+
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
4995+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4996+
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
4997+
} else {
4998+
assert(UseSSE > 0, "required");
4999+
__ addss($dst$$XMMRegister, $src2$$XMMRegister);
5000+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5001+
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5002+
}
51085003
%}
51095004
ins_pipe( pipe_slow );
51105005
%}
51115006

5112-
instruct rsadd4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5113-
predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5007+
instruct vadd4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5008+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
51145009
match(Set dst (AddReductionVF dst src2));
51155010
effect(TEMP dst, TEMP tmp);
5116-
format %{ "addss $dst,$src2\n\t"
5117-
"pshufd $tmp,$src2,0x01\n\t"
5118-
"addss $dst,$tmp\n\t"
5119-
"pshufd $tmp,$src2,0x02\n\t"
5120-
"addss $dst,$tmp\n\t"
5121-
"pshufd $tmp,$src2,0x03\n\t"
5122-
"addss $dst,$tmp\t! add reduction4F" %}
5123-
ins_encode %{
5124-
__ addss($dst$$XMMRegister, $src2$$XMMRegister);
5125-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5126-
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5127-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5128-
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5129-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5130-
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5131-
%}
5132-
ins_pipe( pipe_slow );
5133-
%}
5134-
5135-
instruct rvadd4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5136-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5137-
match(Set dst (AddReductionVF dst src2));
5138-
effect(TEMP tmp, TEMP dst);
5139-
format %{ "vaddss $dst,dst,$src2\n\t"
5140-
"pshufd $tmp,$src2,0x01\n\t"
5141-
"vaddss $dst,$dst,$tmp\n\t"
5142-
"pshufd $tmp,$src2,0x02\n\t"
5143-
"vaddss $dst,$dst,$tmp\n\t"
5144-
"pshufd $tmp,$src2,0x03\n\t"
5145-
"vaddss $dst,$dst,$tmp\t! add reduction4F" %}
5011+
format %{ "vector_add4F_reduction $dst,$dst,$src2" %}
51465012
ins_encode %{
5147-
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5148-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5149-
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5150-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5151-
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5152-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5153-
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5013+
if (UseAVX > 0) {
5014+
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5015+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5016+
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5017+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5018+
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5019+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5020+
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5021+
} else {
5022+
assert(UseSSE > 0, "required");
5023+
__ addss($dst$$XMMRegister, $src2$$XMMRegister);
5024+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5025+
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5026+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5027+
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5028+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5029+
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5030+
}
51545031
%}
51555032
ins_pipe( pipe_slow );
51565033
%}
51575034

5158-
instruct radd8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5159-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5035+
5036+
instruct vadd8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5037+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
51605038
match(Set dst (AddReductionVF dst src2));
51615039
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5162-
format %{ "vaddss $dst,$dst,$src2\n\t"
5163-
"pshufd $tmp,$src2,0x01\n\t"
5164-
"vaddss $dst,$dst,$tmp\n\t"
5165-
"pshufd $tmp,$src2,0x02\n\t"
5166-
"vaddss $dst,$dst,$tmp\n\t"
5167-
"pshufd $tmp,$src2,0x03\n\t"
5168-
"vaddss $dst,$dst,$tmp\n\t"
5169-
"vextractf128_high $tmp2,$src2\n\t"
5170-
"vaddss $dst,$dst,$tmp2\n\t"
5171-
"pshufd $tmp,$tmp2,0x01\n\t"
5172-
"vaddss $dst,$dst,$tmp\n\t"
5173-
"pshufd $tmp,$tmp2,0x02\n\t"
5174-
"vaddss $dst,$dst,$tmp\n\t"
5175-
"pshufd $tmp,$tmp2,0x03\n\t"
5176-
"vaddss $dst,$dst,$tmp\t! add reduction8F" %}
5040+
format %{ "vector_add8F_reduction $dst,$dst,$src2" %}
51775041
ins_encode %{
5042+
assert(UseAVX > 0, "required");
51785043
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
51795044
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
51805045
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5194,42 +5059,13 @@ instruct radd8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
51945059
ins_pipe( pipe_slow );
51955060
%}
51965061

5197-
instruct radd16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5198-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
5062+
instruct vadd16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5063+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
51995064
match(Set dst (AddReductionVF dst src2));
52005065
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5201-
format %{ "vaddss $dst,$dst,$src2\n\t"
5202-
"pshufd $tmp,$src2,0x01\n\t"
5203-
"vaddss $dst,$dst,$tmp\n\t"
5204-
"pshufd $tmp,$src2,0x02\n\t"
5205-
"vaddss $dst,$dst,$tmp\n\t"
5206-
"pshufd $tmp,$src2,0x03\n\t"
5207-
"vaddss $dst,$dst,$tmp\n\t"
5208-
"vextractf32x4 $tmp2,$src2,0x1\n\t"
5209-
"vaddss $dst,$dst,$tmp2\n\t"
5210-
"pshufd $tmp,$tmp2,0x01\n\t"
5211-
"vaddss $dst,$dst,$tmp\n\t"
5212-
"pshufd $tmp,$tmp2,0x02\n\t"
5213-
"vaddss $dst,$dst,$tmp\n\t"
5214-
"pshufd $tmp,$tmp2,0x03\n\t"
5215-
"vaddss $dst,$dst,$tmp\n\t"
5216-
"vextractf32x4 $tmp2,$src2,0x2\n\t"
5217-
"vaddss $dst,$dst,$tmp2\n\t"
5218-
"pshufd $tmp,$tmp2,0x01\n\t"
5219-
"vaddss $dst,$dst,$tmp\n\t"
5220-
"pshufd $tmp,$tmp2,0x02\n\t"
5221-
"vaddss $dst,$dst,$tmp\n\t"
5222-
"pshufd $tmp,$tmp2,0x03\n\t"
5223-
"vaddss $dst,$dst,$tmp\n\t"
5224-
"vextractf32x4 $tmp2,$src2,0x3\n\t"
5225-
"vaddss $dst,$dst,$tmp2\n\t"
5226-
"pshufd $tmp,$tmp2,0x01\n\t"
5227-
"vaddss $dst,$dst,$tmp\n\t"
5228-
"pshufd $tmp,$tmp2,0x02\n\t"
5229-
"vaddss $dst,$dst,$tmp\n\t"
5230-
"pshufd $tmp,$tmp2,0x03\n\t"
5231-
"vaddss $dst,$dst,$tmp\t! add reduction16F" %}
5066+
format %{ "vector_add16F_reduction $dst,$dst,$src2" %}
52325067
ins_encode %{
5068+
assert(UseAVX > 2, "required");
52335069
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
52345070
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
52355071
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5265,48 +5101,35 @@ instruct radd16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %
52655101
ins_pipe( pipe_slow );
52665102
%}
52675103

5268-
instruct rsadd2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5269-
predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5270-
match(Set dst (AddReductionVD dst src2));
5271-
effect(TEMP tmp, TEMP dst);
5272-
format %{ "addsd $dst,$src2\n\t"
5273-
"pshufd $tmp,$src2,0xE\n\t"
5274-
"addsd $dst,$tmp\t! add reduction2D" %}
5275-
ins_encode %{
5276-
__ addsd($dst$$XMMRegister, $src2$$XMMRegister);
5277-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5278-
__ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
5279-
%}
5280-
ins_pipe( pipe_slow );
5281-
%}
5104+
// =======================AddReductionVD==========================================
52825105

5283-
instruct rvadd2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5284-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5106+
instruct vadd2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5107+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
52855108
match(Set dst (AddReductionVD dst src2));
52865109
effect(TEMP tmp, TEMP dst);
5287-
format %{ "vaddsd $dst,$dst,$src2\n\t"
5288-
"pshufd $tmp,$src2,0xE\n\t"
5289-
"vaddsd $dst,$dst,$tmp\t! add reduction2D" %}
5110+
format %{ "vector_add2D_reduction $dst,$src2" %}
52905111
ins_encode %{
5291-
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5292-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5293-
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5112+
if (UseAVX > 0) {
5113+
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5114+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5115+
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5116+
} else {
5117+
assert(UseSSE > 0, "required");
5118+
__ addsd($dst$$XMMRegister, $src2$$XMMRegister);
5119+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5120+
__ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
5121+
}
52945122
%}
52955123
ins_pipe( pipe_slow );
52965124
%}
52975125

5298-
instruct rvadd4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
5299-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5126+
instruct vadd4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
5127+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
53005128
match(Set dst (AddReductionVD dst src2));
53015129
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5302-
format %{ "vaddsd $dst,$dst,$src2\n\t"
5303-
"pshufd $tmp,$src2,0xE\n\t"
5304-
"vaddsd $dst,$dst,$tmp\n\t"
5305-
"vextractf128 $tmp2,$src2,0x1\n\t"
5306-
"vaddsd $dst,$dst,$tmp2\n\t"
5307-
"pshufd $tmp,$tmp2,0xE\n\t"
5308-
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
5130+
format %{ "vector_add4D_reduction $dst,$dst,$src2" %}
53095131
ins_encode %{
5132+
assert(UseAVX > 0, "required");
53105133
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
53115134
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
53125135
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5318,26 +5141,13 @@ instruct rvadd4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
53185141
ins_pipe( pipe_slow );
53195142
%}
53205143

5321-
instruct rvadd8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5322-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5144+
instruct vadd8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5145+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
53235146
match(Set dst (AddReductionVD dst src2));
53245147
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5325-
format %{ "vaddsd $dst,$dst,$src2\n\t"
5326-
"pshufd $tmp,$src2,0xE\n\t"
5327-
"vaddsd $dst,$dst,$tmp\n\t"
5328-
"vextractf32x4 $tmp2,$src2,0x1\n\t"
5329-
"vaddsd $dst,$dst,$tmp2\n\t"
5330-
"pshufd $tmp,$tmp2,0xE\n\t"
5331-
"vaddsd $dst,$dst,$tmp\n\t"
5332-
"vextractf32x4 $tmp2,$src2,0x2\n\t"
5333-
"vaddsd $dst,$dst,$tmp2\n\t"
5334-
"pshufd $tmp,$tmp2,0xE\n\t"
5335-
"vaddsd $dst,$dst,$tmp\n\t"
5336-
"vextractf32x4 $tmp2,$src2,0x3\n\t"
5337-
"vaddsd $dst,$dst,$tmp2\n\t"
5338-
"pshufd $tmp,$tmp2,0xE\n\t"
5339-
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
5148+
format %{ "vector_add8D_reduction $dst,$dst,$src2" %}
53405149
ins_encode %{
5150+
assert(UseAVX > 2, "required");
53415151
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
53425152
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
53435153
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5357,107 +5167,70 @@ instruct rvadd8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %
53575167
ins_pipe( pipe_slow );
53585168
%}
53595169

5360-
instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5361-
predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5362-
match(Set dst (MulReductionVI src1 src2));
5363-
effect(TEMP tmp, TEMP tmp2);
5364-
format %{ "pshufd $tmp2,$src2,0x1\n\t"
5365-
"pmulld $tmp2,$src2\n\t"
5366-
"movd $tmp,$src1\n\t"
5367-
"pmulld $tmp2,$tmp\n\t"
5368-
"movd $dst,$tmp2\t! mul reduction2I" %}
5369-
ins_encode %{
5370-
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5371-
__ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5372-
__ movdl($tmp$$XMMRegister, $src1$$Register);
5373-
__ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5374-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5375-
%}
5376-
ins_pipe( pipe_slow );
5377-
%}
5170+
// =======================MulReductionVI==========================================
53785171

5379-
instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5380-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5172+
instruct vmul2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5173+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
53815174
match(Set dst (MulReductionVI src1 src2));
53825175
effect(TEMP tmp, TEMP tmp2);
5383-
format %{ "pshufd $tmp2,$src2,0x1\n\t"
5384-
"vpmulld $tmp,$src2,$tmp2\n\t"
5385-
"movd $tmp2,$src1\n\t"
5386-
"vpmulld $tmp2,$tmp,$tmp2\n\t"
5387-
"movd $dst,$tmp2\t! mul reduction2I" %}
5176+
format %{ "vector_mul2I_reduction $dst,$src1,$src2" %}
53885177
ins_encode %{
5389-
int vector_len = 0;
5390-
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5391-
__ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5392-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
5393-
__ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5394-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5395-
%}
5396-
ins_pipe( pipe_slow );
5397-
%}
5398-
5399-
instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5400-
predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5401-
match(Set dst (MulReductionVI src1 src2));
5402-
effect(TEMP tmp, TEMP tmp2);
5403-
format %{ "pshufd $tmp2,$src2,0xE\n\t"
5404-
"pmulld $tmp2,$src2\n\t"
5405-
"pshufd $tmp,$tmp2,0x1\n\t"
5406-
"pmulld $tmp2,$tmp\n\t"
5407-
"movd $tmp,$src1\n\t"
5408-
"pmulld $tmp2,$tmp\n\t"
5409-
"movd $dst,$tmp2\t! mul reduction4I" %}
5410-
ins_encode %{
5411-
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5412-
__ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5413-
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
5414-
__ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5415-
__ movdl($tmp$$XMMRegister, $src1$$Register);
5416-
__ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5417-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5178+
if (UseAVX > 0) {
5179+
int vector_len = Assembler::AVX_128bit;
5180+
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5181+
__ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5182+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
5183+
__ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5184+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5185+
} else {
5186+
assert(UseSSE > 3, "required");
5187+
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5188+
__ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5189+
__ movdl($tmp$$XMMRegister, $src1$$Register);
5190+
__ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5191+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5192+
}
54185193
%}
54195194
ins_pipe( pipe_slow );
54205195
%}
54215196

5422-
instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5423-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5197+
instruct vmul4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5198+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
54245199
match(Set dst (MulReductionVI src1 src2));
54255200
effect(TEMP tmp, TEMP tmp2);
5426-
format %{ "pshufd $tmp2,$src2,0xE\n\t"
5427-
"vpmulld $tmp,$src2,$tmp2\n\t"
5428-
"pshufd $tmp2,$tmp,0x1\n\t"
5429-
"vpmulld $tmp,$tmp,$tmp2\n\t"
5430-
"movd $tmp2,$src1\n\t"
5431-
"vpmulld $tmp2,$tmp,$tmp2\n\t"
5432-
"movd $dst,$tmp2\t! mul reduction4I" %}
5201+
format %{ "vector_mul4I_reduction $dst,$src1,$src2" %}
54335202
ins_encode %{
5434-
int vector_len = 0;
5435-
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5436-
__ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5437-
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5438-
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5439-
__ movdl($tmp2$$XMMRegister, $src1$$Register);
5440-
__ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5441-
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5203+
if (UseAVX > 0) {
5204+
int vector_len = Assembler::AVX_128bit;
5205+
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5206+
__ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5207+
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5208+
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5209+
__ movdl($tmp2$$XMMRegister, $src1$$Register);
5210+
__ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5211+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5212+
} else {
5213+
assert(UseSSE > 3, "required");
5214+
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5215+
__ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5216+
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
5217+
__ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5218+
__ movdl($tmp$$XMMRegister, $src1$$Register);
5219+
__ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5220+
__ movdl($dst$$Register, $tmp2$$XMMRegister);
5221+
}
54425222
%}
54435223
ins_pipe( pipe_slow );
54445224
%}
54455225

5446-
instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5447-
predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5226+
instruct vmul8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5227+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
54485228
match(Set dst (MulReductionVI src1 src2));
54495229
effect(TEMP tmp, TEMP tmp2);
5450-
format %{ "vextracti128_high $tmp,$src2\n\t"
5451-
"vpmulld $tmp,$tmp,$src2\n\t"
5452-
"pshufd $tmp2,$tmp,0xE\n\t"
5453-
"vpmulld $tmp,$tmp,$tmp2\n\t"
5454-
"pshufd $tmp2,$tmp,0x1\n\t"
5455-
"vpmulld $tmp,$tmp,$tmp2\n\t"
5456-
"movd $tmp2,$src1\n\t"
5457-
"vpmulld $tmp2,$tmp,$tmp2\n\t"
5458-
"movd $dst,$tmp2\t! mul reduction8I" %}
5230+
format %{ "vector_mul8I_reduction $dst,$src1,$src2" %}
54595231
ins_encode %{
5460-
int vector_len = 0;
5232+
assert(UseAVX > 1, "required");
5233+
int vector_len = Assembler::AVX_128bit;
54615234
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
54625235
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
54635236
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
@@ -5471,22 +5244,13 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp
54715244
ins_pipe( pipe_slow );
54725245
%}
54735246

5474-
instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
5475-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
5247+
instruct vmul16I_reduction_reg(rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
5248+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
54765249
match(Set dst (MulReductionVI src1 src2));
54775250
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5478-
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
5479-
"vpmulld $tmp3,$tmp3,$src2\n\t"
5480-
"vextracti128_high $tmp,$tmp3\n\t"
5481-
"vpmulld $tmp,$tmp,$src2\n\t"
5482-
"pshufd $tmp2,$tmp,0xE\n\t"
5483-
"vpmulld $tmp,$tmp,$tmp2\n\t"
5484-
"pshufd $tmp2,$tmp,0x1\n\t"
5485-
"vpmulld $tmp,$tmp,$tmp2\n\t"
5486-
"movd $tmp2,$src1\n\t"
5487-
"vpmulld $tmp2,$tmp,$tmp2\n\t"
5488-
"movd $dst,$tmp2\t! mul reduction16I" %}
5251+
format %{ "vector_mul16I_reduction $dst,$src1,$src2" %}
54895252
ins_encode %{
5253+
assert(UseAVX > 2, "required");
54905254
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
54915255
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
54925256
__ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
@@ -5502,17 +5266,16 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVec src2, legVec tmp,
55025266
ins_pipe( pipe_slow );
55035267
%}
55045268

5269+
// =======================MulReductionVL==========================================
5270+
55055271
#ifdef _LP64
5506-
instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5507-
predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 2);
5272+
instruct vmul2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5273+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
55085274
match(Set dst (MulReductionVL src1 src2));
55095275
effect(TEMP tmp, TEMP tmp2);
5510-
format %{ "pshufd $tmp2,$src2,0xE\n\t"
5511-
"vpmullq $tmp,$src2,$tmp2\n\t"
5512-
"movdq $tmp2,$src1\n\t"
5513-
"vpmullq $tmp2,$tmp,$tmp2\n\t"
5514-
"movdq $dst,$tmp2\t! mul reduction2L" %}
5276+
format %{ "vector_mul2L_reduction $dst,$src1,$src2" %}
55155277
ins_encode %{
5278+
assert(VM_Version::supports_avx512dq(), "required");
55165279
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
55175280
__ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
55185281
__ movdq($tmp2$$XMMRegister, $src1$$Register);
@@ -5522,18 +5285,13 @@ instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
55225285
ins_pipe( pipe_slow );
55235286
%}
55245287

5525-
instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5526-
predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 4);
5288+
instruct vmul4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5289+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
55275290
match(Set dst (MulReductionVL src1 src2));
55285291
effect(TEMP tmp, TEMP tmp2);
5529-
format %{ "vextracti128_high $tmp,$src2\n\t"
5530-
"vpmullq $tmp2,$tmp,$src2\n\t"
5531-
"pshufd $tmp,$tmp2,0xE\n\t"
5532-
"vpmullq $tmp2,$tmp2,$tmp\n\t"
5533-
"movdq $tmp,$src1\n\t"
5534-
"vpmullq $tmp2,$tmp2,$tmp\n\t"
5535-
"movdq $dst,$tmp2\t! mul reduction4L" %}
5292+
format %{ "vector_mul4L_reduction $dst,$src1,$src2" %}
55365293
ins_encode %{
5294+
assert(VM_Version::supports_avx512dq(), "required");
55375295
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
55385296
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
55395297
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
@@ -5545,20 +5303,13 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
55455303
ins_pipe( pipe_slow );
55465304
%}
55475305

5548-
instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
5549-
predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 8);
5306+
instruct vmul8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
5307+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
55505308
match(Set dst (MulReductionVL src1 src2));
55515309
effect(TEMP tmp, TEMP tmp2);
5552-
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
5553-
"vpmullq $tmp2,$tmp2,$src2\n\t"
5554-
"vextracti128_high $tmp,$tmp2\n\t"
5555-
"vpmullq $tmp2,$tmp2,$tmp\n\t"
5556-
"pshufd $tmp,$tmp2,0xE\n\t"
5557-
"vpmullq $tmp2,$tmp2,$tmp\n\t"
5558-
"movdq $tmp,$src1\n\t"
5559-
"vpmullq $tmp2,$tmp2,$tmp\n\t"
5560-
"movdq $dst,$tmp2\t! mul reduction8L" %}
5310+
format %{ "vector_mul8L_reduction $dst,$src1,$src2" %}
55615311
ins_encode %{
5312+
assert(VM_Version::supports_avx512dq(), "required");
55625313
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
55635314
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
55645315
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
@@ -5573,102 +5324,63 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, l
55735324
%}
55745325
#endif
55755326

5576-
instruct rsmul2F_reduction(regF dst, vec src2, vec tmp) %{
5577-
predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5578-
match(Set dst (MulReductionVF dst src2));
5579-
effect(TEMP dst, TEMP tmp);
5580-
format %{ "mulss $dst,$src2\n\t"
5581-
"pshufd $tmp,$src2,0x01\n\t"
5582-
"mulss $dst,$tmp\t! mul reduction2F" %}
5583-
ins_encode %{
5584-
__ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5585-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5586-
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5587-
%}
5588-
ins_pipe( pipe_slow );
5589-
%}
5327+
// =======================MulReductionVF==========================================
55905328

5591-
instruct rvmul2F_reduction_reg(regF dst, vec src2, vec tmp) %{
5592-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5329+
instruct vmul2F_reduction_reg(regF dst, vec src2, vec tmp) %{
5330+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
55935331
match(Set dst (MulReductionVF dst src2));
5594-
effect(TEMP tmp, TEMP dst);
5595-
format %{ "vmulss $dst,$dst,$src2\n\t"
5596-
"pshufd $tmp,$src2,0x01\n\t"
5597-
"vmulss $dst,$dst,$tmp\t! mul reduction2F" %}
5332+
effect(TEMP dst, TEMP tmp);
5333+
format %{ "vector_mul2F_reduction $dst,$dst,$src2" %}
55985334
ins_encode %{
5599-
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5600-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5601-
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5335+
if (UseAVX > 0) {
5336+
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5337+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5338+
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5339+
} else {
5340+
assert(UseSSE > 0, "required");
5341+
__ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5342+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5343+
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5344+
}
56025345
%}
56035346
ins_pipe( pipe_slow );
56045347
%}
56055348

5606-
instruct rsmul4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5607-
predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5349+
instruct vmul4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5350+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
56085351
match(Set dst (MulReductionVF dst src2));
56095352
effect(TEMP dst, TEMP tmp);
5610-
format %{ "mulss $dst,$src2\n\t"
5611-
"pshufd $tmp,$src2,0x01\n\t"
5612-
"mulss $dst,$tmp\n\t"
5613-
"pshufd $tmp,$src2,0x02\n\t"
5614-
"mulss $dst,$tmp\n\t"
5615-
"pshufd $tmp,$src2,0x03\n\t"
5616-
"mulss $dst,$tmp\t! mul reduction4F" %}
5617-
ins_encode %{
5618-
__ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5619-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5620-
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5621-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5622-
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5623-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5624-
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5625-
%}
5626-
ins_pipe( pipe_slow );
5627-
%}
5628-
5629-
instruct rvmul4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5630-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5631-
match(Set dst (MulReductionVF dst src2));
5632-
effect(TEMP tmp, TEMP dst);
5633-
format %{ "vmulss $dst,$dst,$src2\n\t"
5634-
"pshufd $tmp,$src2,0x01\n\t"
5635-
"vmulss $dst,$dst,$tmp\n\t"
5636-
"pshufd $tmp,$src2,0x02\n\t"
5637-
"vmulss $dst,$dst,$tmp\n\t"
5638-
"pshufd $tmp,$src2,0x03\n\t"
5639-
"vmulss $dst,$dst,$tmp\t! mul reduction4F" %}
5353+
format %{ "vector_mul4F_reduction $dst,$dst,$src2" %}
56405354
ins_encode %{
5641-
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5642-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5643-
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5644-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5645-
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5646-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5647-
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5355+
if (UseAVX > 0) {
5356+
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5357+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5358+
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5359+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5360+
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5361+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5362+
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5363+
} else {
5364+
assert(UseSSE > 0, "required");
5365+
__ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5366+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5367+
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5368+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5369+
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5370+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5371+
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5372+
}
56485373
%}
56495374
ins_pipe( pipe_slow );
56505375
%}
56515376

5652-
instruct rvmul8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5653-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5377+
instruct vmul8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5378+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
56545379
match(Set dst (MulReductionVF dst src2));
56555380
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5656-
format %{ "vmulss $dst,$dst,$src2\n\t"
5657-
"pshufd $tmp,$src2,0x01\n\t"
5658-
"vmulss $dst,$dst,$tmp\n\t"
5659-
"pshufd $tmp,$src2,0x02\n\t"
5660-
"vmulss $dst,$dst,$tmp\n\t"
5661-
"pshufd $tmp,$src2,0x03\n\t"
5662-
"vmulss $dst,$dst,$tmp\n\t"
5663-
"vextractf128_high $tmp2,$src2\n\t"
5664-
"vmulss $dst,$dst,$tmp2\n\t"
5665-
"pshufd $tmp,$tmp2,0x01\n\t"
5666-
"vmulss $dst,$dst,$tmp\n\t"
5667-
"pshufd $tmp,$tmp2,0x02\n\t"
5668-
"vmulss $dst,$dst,$tmp\n\t"
5669-
"pshufd $tmp,$tmp2,0x03\n\t"
5670-
"vmulss $dst,$dst,$tmp\t! mul reduction8F" %}
5381+
format %{ "vector_mul8F_reduction $dst,$dst,$src2" %}
56715382
ins_encode %{
5383+
assert(UseAVX > 0, "required");
56725384
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
56735385
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
56745386
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5688,42 +5400,13 @@ instruct rvmul8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
56885400
ins_pipe( pipe_slow );
56895401
%}
56905402

5691-
instruct rvmul16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5692-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
5403+
instruct vmul16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5404+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
56935405
match(Set dst (MulReductionVF dst src2));
56945406
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5695-
format %{ "vmulss $dst,$dst,$src2\n\t"
5696-
"pshufd $tmp,$src2,0x01\n\t"
5697-
"vmulss $dst,$dst,$tmp\n\t"
5698-
"pshufd $tmp,$src2,0x02\n\t"
5699-
"vmulss $dst,$dst,$tmp\n\t"
5700-
"pshufd $tmp,$src2,0x03\n\t"
5701-
"vmulss $dst,$dst,$tmp\n\t"
5702-
"vextractf32x4 $tmp2,$src2,0x1\n\t"
5703-
"vmulss $dst,$dst,$tmp2\n\t"
5704-
"pshufd $tmp,$tmp2,0x01\n\t"
5705-
"vmulss $dst,$dst,$tmp\n\t"
5706-
"pshufd $tmp,$tmp2,0x02\n\t"
5707-
"vmulss $dst,$dst,$tmp\n\t"
5708-
"pshufd $tmp,$tmp2,0x03\n\t"
5709-
"vmulss $dst,$dst,$tmp\n\t"
5710-
"vextractf32x4 $tmp2,$src2,0x2\n\t"
5711-
"vmulss $dst,$dst,$tmp2\n\t"
5712-
"pshufd $tmp,$tmp2,0x01\n\t"
5713-
"vmulss $dst,$dst,$tmp\n\t"
5714-
"pshufd $tmp,$tmp2,0x02\n\t"
5715-
"vmulss $dst,$dst,$tmp\n\t"
5716-
"pshufd $tmp,$tmp2,0x03\n\t"
5717-
"vmulss $dst,$dst,$tmp\n\t"
5718-
"vextractf32x4 $tmp2,$src2,0x3\n\t"
5719-
"vmulss $dst,$dst,$tmp2\n\t"
5720-
"pshufd $tmp,$tmp2,0x01\n\t"
5721-
"vmulss $dst,$dst,$tmp\n\t"
5722-
"pshufd $tmp,$tmp2,0x02\n\t"
5723-
"vmulss $dst,$dst,$tmp\n\t"
5724-
"pshufd $tmp,$tmp2,0x03\n\t"
5725-
"vmulss $dst,$dst,$tmp\t! mul reduction16F" %}
5407+
format %{ "vector_mul16F_reduction $dst,$dst,$src2" %}
57265408
ins_encode %{
5409+
assert(UseAVX > 2, "required");
57275410
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
57285411
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
57295412
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5759,48 +5442,36 @@ instruct rvmul16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2)
57595442
ins_pipe( pipe_slow );
57605443
%}
57615444

5762-
instruct rsmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5763-
predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5445+
// =======================MulReductionVD==========================================
5446+
5447+
instruct vmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5448+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
57645449
match(Set dst (MulReductionVD dst src2));
57655450
effect(TEMP dst, TEMP tmp);
5766-
format %{ "mulsd $dst,$src2\n\t"
5767-
"pshufd $tmp,$src2,0xE\n\t"
5768-
"mulsd $dst,$tmp\t! mul reduction2D" %}
5451+
format %{ "vector_mul2D_reduction $dst,$dst,$src2" %}
57695452
ins_encode %{
5770-
__ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
5771-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5772-
__ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
5453+
if (UseAVX > 0) {
5454+
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5455+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5456+
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5457+
} else {
5458+
assert(UseSSE > 0, "required");
5459+
__ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
5460+
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5461+
__ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
5462+
}
57735463
%}
57745464
ins_pipe( pipe_slow );
57755465
%}
57765466

5777-
instruct rvmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5778-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5779-
match(Set dst (MulReductionVD dst src2));
5780-
effect(TEMP tmp, TEMP dst);
5781-
format %{ "vmulsd $dst,$dst,$src2\n\t"
5782-
"pshufd $tmp,$src2,0xE\n\t"
5783-
"vmulsd $dst,$dst,$tmp\t! mul reduction2D" %}
5784-
ins_encode %{
5785-
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5786-
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5787-
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5788-
%}
5789-
ins_pipe( pipe_slow );
5790-
%}
57915467

5792-
instruct rvmul4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
5793-
predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5468+
instruct vmul4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
5469+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 2
57945470
match(Set dst (MulReductionVD dst src2));
57955471
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5796-
format %{ "vmulsd $dst,$dst,$src2\n\t"
5797-
"pshufd $tmp,$src2,0xE\n\t"
5798-
"vmulsd $dst,$dst,$tmp\n\t"
5799-
"vextractf128_high $tmp2,$src2\n\t"
5800-
"vmulsd $dst,$dst,$tmp2\n\t"
5801-
"pshufd $tmp,$tmp2,0xE\n\t"
5802-
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
5472+
format %{ "vector_mul4D_reduction $dst,$dst,$src2" %}
58035473
ins_encode %{
5474+
assert(UseAVX > 0, "required");
58045475
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
58055476
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
58065477
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5812,26 +5483,13 @@ instruct rvmul4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
58125483
ins_pipe( pipe_slow );
58135484
%}
58145485

5815-
instruct rvmul8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5816-
predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5486+
instruct vmul8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5487+
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 2
58175488
match(Set dst (MulReductionVD dst src2));
58185489
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5819-
format %{ "vmulsd $dst,$dst,$src2\n\t"
5820-
"pshufd $tmp,$src2,0xE\n\t"
5821-
"vmulsd $dst,$dst,$tmp\n\t"
5822-
"vextractf32x4 $tmp2,$src2,0x1\n\t"
5823-
"vmulsd $dst,$dst,$tmp2\n\t"
5824-
"pshufd $tmp,$src2,0xE\n\t"
5825-
"vmulsd $dst,$dst,$tmp\n\t"
5826-
"vextractf32x4 $tmp2,$src2,0x2\n\t"
5827-
"vmulsd $dst,$dst,$tmp2\n\t"
5828-
"pshufd $tmp,$tmp2,0xE\n\t"
5829-
"vmulsd $dst,$dst,$tmp\n\t"
5830-
"vextractf32x4 $tmp2,$src2,0x3\n\t"
5831-
"vmulsd $dst,$dst,$tmp2\n\t"
5832-
"pshufd $tmp,$tmp2,0xE\n\t"
5833-
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
5490+
format %{ "vector_mul8D_reduction $dst,$dst,$src2" %}
58345491
ins_encode %{
5492+
assert(UseAVX > 0, "required");
58355493
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
58365494
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
58375495
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);

0 commit comments

Comments
 (0)
Please sign in to comment.