@@ -4803,195 +4803,111 @@ instruct Repl8D_zero_evex(vec dst, immD0 zero) %{
4803
4803
4804
4804
// ====================REDUCTION ARITHMETIC=======================================
4805
4805
4806
- instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4807
- predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4808
- match(Set dst (AddReductionVI src1 src2));
4809
- effect(TEMP tmp2, TEMP tmp);
4810
- format %{ "movdqu $tmp2,$src2\n\t"
4811
- "phaddd $tmp2,$tmp2\n\t"
4812
- "movd $tmp,$src1\n\t"
4813
- "paddd $tmp,$tmp2\n\t"
4814
- "movd $dst,$tmp\t! add reduction2I" %}
4815
- ins_encode %{
4816
- __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4817
- __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4818
- __ movdl($tmp$$XMMRegister, $src1$$Register);
4819
- __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4820
- __ movdl($dst$$Register, $tmp$$XMMRegister);
4821
- %}
4822
- ins_pipe( pipe_slow );
4823
- %}
4806
+ // =======================AddReductionVI==========================================
4824
4807
4825
- instruct rvadd2I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4826
- predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 2);
4808
+ instruct vadd2I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4809
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
4827
4810
match(Set dst (AddReductionVI src1 src2));
4828
4811
effect(TEMP tmp, TEMP tmp2);
4829
- format %{ "vphaddd $tmp,$src2,$src2\n\t"
4830
- "movd $tmp2,$src1\n\t"
4831
- "vpaddd $tmp2,$tmp2,$tmp\n\t"
4832
- "movd $dst,$tmp2\t! add reduction2I" %}
4833
- ins_encode %{
4834
- int vector_len = 0;
4835
- __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4836
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
4837
- __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4838
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
4839
- %}
4840
- ins_pipe( pipe_slow );
4841
- %}
4842
-
4843
- instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4844
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4845
- match(Set dst (AddReductionVI src1 src2));
4846
- effect(TEMP tmp, TEMP tmp2);
4847
- format %{ "pshufd $tmp2,$src2,0x1\n\t"
4848
- "vpaddd $tmp,$src2,$tmp2\n\t"
4849
- "movd $tmp2,$src1\n\t"
4850
- "vpaddd $tmp2,$tmp,$tmp2\n\t"
4851
- "movd $dst,$tmp2\t! add reduction2I" %}
4852
- ins_encode %{
4853
- int vector_len = 0;
4854
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4855
- __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4856
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
4857
- __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4858
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
4859
- %}
4860
- ins_pipe( pipe_slow );
4861
- %}
4862
-
4863
- instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4864
- predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
4865
- match(Set dst (AddReductionVI src1 src2));
4866
- effect(TEMP tmp, TEMP tmp2);
4867
- format %{ "movdqu $tmp,$src2\n\t"
4868
- "phaddd $tmp,$tmp\n\t"
4869
- "phaddd $tmp,$tmp\n\t"
4870
- "movd $tmp2,$src1\n\t"
4871
- "paddd $tmp2,$tmp\n\t"
4872
- "movd $dst,$tmp2\t! add reduction4I" %}
4873
- ins_encode %{
4874
- __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
4875
- __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4876
- __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4877
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
4878
- __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
4879
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
4880
- %}
4881
- ins_pipe( pipe_slow );
4882
- %}
4883
-
4884
- instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4885
- predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 4);
4886
- match(Set dst (AddReductionVI src1 src2));
4887
- effect(TEMP tmp, TEMP tmp2);
4888
- format %{ "vphaddd $tmp,$src2,$src2\n\t"
4889
- "vphaddd $tmp,$tmp,$tmp\n\t"
4890
- "movd $tmp2,$src1\n\t"
4891
- "vpaddd $tmp2,$tmp2,$tmp\n\t"
4892
- "movd $dst,$tmp2\t! add reduction4I" %}
4893
- ins_encode %{
4894
- int vector_len = 0;
4895
- __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4896
- __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
4897
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
4898
- __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4899
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
4900
- %}
4901
- ins_pipe( pipe_slow );
4902
- %}
4903
-
4904
- instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4905
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 4);
4906
- match(Set dst (AddReductionVI src1 src2));
4907
- effect(TEMP tmp, TEMP tmp2);
4908
- format %{ "pshufd $tmp2,$src2,0xE\n\t"
4909
- "vpaddd $tmp,$src2,$tmp2\n\t"
4910
- "pshufd $tmp2,$tmp,0x1\n\t"
4911
- "vpaddd $tmp,$tmp,$tmp2\n\t"
4912
- "movd $tmp2,$src1\n\t"
4913
- "vpaddd $tmp2,$tmp,$tmp2\n\t"
4914
- "movd $dst,$tmp2\t! add reduction4I" %}
4915
- ins_encode %{
4916
- int vector_len = 0;
4917
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4918
- __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4919
- __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4920
- __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4921
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
4922
- __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4923
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
4812
+ format %{ "vector_add2I_reduction $dst,$src1,$src2" %}
4813
+ ins_encode %{
4814
+ if (UseAVX > 2) {
4815
+ int vector_len = Assembler::AVX_128bit;
4816
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4817
+ __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4818
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
4819
+ __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4820
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
4821
+ } else if (VM_Version::supports_avxonly()) {
4822
+ int vector_len = Assembler::AVX_128bit;
4823
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4824
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
4825
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4826
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
4827
+ } else {
4828
+ assert(UseSSE > 2, "required");
4829
+ __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4830
+ __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4831
+ __ movdl($tmp$$XMMRegister, $src1$$Register);
4832
+ __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4833
+ __ movdl($dst$$Register, $tmp$$XMMRegister);
4834
+ }
4924
4835
%}
4925
4836
ins_pipe( pipe_slow );
4926
4837
%}
4927
4838
4928
- instruct rvadd8I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4929
- predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 8);
4839
+ instruct vadd4I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4840
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
4930
4841
match(Set dst (AddReductionVI src1 src2));
4931
4842
effect(TEMP tmp, TEMP tmp2);
4932
- format %{ "vphaddd $tmp,$src2,$src2\n\t"
4933
- "vphaddd $tmp,$tmp,$tmp2\n\t"
4934
- "vextracti128_high $tmp2,$tmp\n\t"
4935
- "vpaddd $tmp,$tmp,$tmp2\n\t"
4936
- "movd $tmp2,$src1\n\t"
4937
- "vpaddd $tmp2,$tmp2,$tmp\n\t"
4938
- "movd $dst,$tmp2\t! add reduction8I" %}
4939
- ins_encode %{
4940
- int vector_len = 1;
4941
- __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4942
- __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4943
- __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
4944
- __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4945
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
4946
- __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4947
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
4843
+ format %{ "vector_add4I_reduction $dst,$src1,$src2" %}
4844
+ ins_encode %{
4845
+ if (UseAVX > 2) {
4846
+ int vector_len = Assembler::AVX_128bit;
4847
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4848
+ __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4849
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4850
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4851
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
4852
+ __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4853
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
4854
+ } else if (VM_Version::supports_avxonly()) {
4855
+ int vector_len = Assembler::AVX_128bit;
4856
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4857
+ __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
4858
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
4859
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4860
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
4861
+ } else {
4862
+ assert(UseSSE > 2, "required");
4863
+ __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
4864
+ __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4865
+ __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4866
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
4867
+ __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
4868
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
4869
+ }
4948
4870
%}
4949
4871
ins_pipe( pipe_slow );
4950
4872
%}
4951
4873
4952
- instruct rvadd8I_reduction_reg_evex (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4953
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
4874
+ instruct vadd8I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
4875
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
4954
4876
match(Set dst (AddReductionVI src1 src2));
4955
4877
effect(TEMP tmp, TEMP tmp2);
4956
- format %{ "vextracti128_high $tmp,$src2\n\t"
4957
- "vpaddd $tmp,$tmp,$src2\n\t"
4958
- "pshufd $tmp2,$tmp,0xE\n\t"
4959
- "vpaddd $tmp,$tmp,$tmp2\n\t"
4960
- "pshufd $tmp2,$tmp,0x1\n\t"
4961
- "vpaddd $tmp,$tmp,$tmp2\n\t"
4962
- "movd $tmp2,$src1\n\t"
4963
- "vpaddd $tmp2,$tmp,$tmp2\n\t"
4964
- "movd $dst,$tmp2\t! add reduction8I" %}
4965
- ins_encode %{
4966
- int vector_len = 0;
4967
- __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
4968
- __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4969
- __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4970
- __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4971
- __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4972
- __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4973
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
4974
- __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4975
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
4878
+ format %{ "vector_add8I_reduction $dst,$src1,$src2" %}
4879
+ ins_encode %{
4880
+ if (UseAVX > 2) {
4881
+ int vector_len = Assembler::AVX_128bit;
4882
+ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
4883
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4884
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4885
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4886
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4887
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4888
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
4889
+ __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4890
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
4891
+ } else {
4892
+ assert(UseAVX > 0, "");
4893
+ int vector_len = Assembler::AVX_256bit;
4894
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4895
+ __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4896
+ __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
4897
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4898
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
4899
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4900
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
4901
+ }
4976
4902
%}
4977
4903
ins_pipe( pipe_slow );
4978
4904
%}
4979
4905
4980
- instruct rvadd16I_reduction_reg_evex (rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
4981
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
4906
+ instruct vadd16I_reduction_reg (rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
4907
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
4982
4908
match(Set dst (AddReductionVI src1 src2));
4983
4909
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4984
- format %{ "vextracti64x4_high $tmp3,$src2\n\t"
4985
- "vpaddd $tmp3,$tmp3,$src2\n\t"
4986
- "vextracti128_high $tmp,$tmp3\n\t"
4987
- "vpaddd $tmp,$tmp,$tmp3\n\t"
4988
- "pshufd $tmp2,$tmp,0xE\n\t"
4989
- "vpaddd $tmp,$tmp,$tmp2\n\t"
4990
- "pshufd $tmp2,$tmp,0x1\n\t"
4991
- "vpaddd $tmp,$tmp,$tmp2\n\t"
4992
- "movd $tmp2,$src1\n\t"
4993
- "vpaddd $tmp2,$tmp,$tmp2\n\t"
4994
- "movd $dst,$tmp2\t! mul reduction16I" %}
4910
+ format %{ "vector_add16I_reduction $dst,$src1,$src2" %}
4995
4911
ins_encode %{
4996
4912
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
4997
4913
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
@@ -5008,17 +4924,16 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVec src2, legVec
5008
4924
ins_pipe( pipe_slow );
5009
4925
%}
5010
4926
4927
+ // =======================AddReductionVL==========================================
4928
+
5011
4929
#ifdef _LP64
5012
- instruct rvadd2L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5013
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4930
+ instruct vadd2L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
4931
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
5014
4932
match(Set dst (AddReductionVL src1 src2));
5015
4933
effect(TEMP tmp, TEMP tmp2);
5016
- format %{ "pshufd $tmp2,$src2,0xE\n\t"
5017
- "vpaddq $tmp,$src2,$tmp2\n\t"
5018
- "movdq $tmp2,$src1\n\t"
5019
- "vpaddq $tmp2,$tmp,$tmp2\n\t"
5020
- "movdq $dst,$tmp2\t! add reduction2L" %}
4934
+ format %{ "vector_add2L_reduction $dst,$src1,$src2" %}
5021
4935
ins_encode %{
4936
+ assert(UseAVX > 2, "required");
5022
4937
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5023
4938
__ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
5024
4939
__ movdq($tmp2$$XMMRegister, $src1$$Register);
@@ -5028,18 +4943,13 @@ instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
5028
4943
ins_pipe( pipe_slow );
5029
4944
%}
5030
4945
5031
- instruct rvadd4L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5032
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 4);
4946
+ instruct vadd4L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
4947
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
5033
4948
match(Set dst (AddReductionVL src1 src2));
5034
4949
effect(TEMP tmp, TEMP tmp2);
5035
- format %{ "vextracti128_high $tmp,$src2\n\t"
5036
- "vpaddq $tmp2,$tmp,$src2\n\t"
5037
- "pshufd $tmp,$tmp2,0xE\n\t"
5038
- "vpaddq $tmp2,$tmp2,$tmp\n\t"
5039
- "movdq $tmp,$src1\n\t"
5040
- "vpaddq $tmp2,$tmp2,$tmp\n\t"
5041
- "movdq $dst,$tmp2\t! add reduction4L" %}
4950
+ format %{ "vector_add4L_reduction $dst,$src1,$src2" %}
5042
4951
ins_encode %{
4952
+ assert(UseAVX > 2, "required");
5043
4953
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
5044
4954
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
5045
4955
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
@@ -5051,20 +4961,13 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
5051
4961
ins_pipe( pipe_slow );
5052
4962
%}
5053
4963
5054
- instruct rvadd8L_reduction_reg (rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
5055
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
4964
+ instruct vadd8L_reduction_reg (rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
4965
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
5056
4966
match(Set dst (AddReductionVL src1 src2));
5057
4967
effect(TEMP tmp, TEMP tmp2);
5058
- format %{ "vextracti64x4_high $tmp2,$src2\n\t"
5059
- "vpaddq $tmp2,$tmp2,$src2\n\t"
5060
- "vextracti128_high $tmp,$tmp2\n\t"
5061
- "vpaddq $tmp2,$tmp2,$tmp\n\t"
5062
- "pshufd $tmp,$tmp2,0xE\n\t"
5063
- "vpaddq $tmp2,$tmp2,$tmp\n\t"
5064
- "movdq $tmp,$src1\n\t"
5065
- "vpaddq $tmp2,$tmp2,$tmp\n\t"
5066
- "movdq $dst,$tmp2\t! add reduction8L" %}
4968
+ format %{ "vector_addL_reduction $dst,$src1,$src2" %}
5067
4969
ins_encode %{
4970
+ assert(UseAVX > 2, "required");
5068
4971
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
5069
4972
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
5070
4973
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
@@ -5077,104 +4980,66 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, l
5077
4980
%}
5078
4981
ins_pipe( pipe_slow );
5079
4982
%}
5080
- #endif
4983
+ #endif // _LP64
5081
4984
5082
- instruct rsadd2F_reduction_reg(regF dst, vec src2, vec tmp) %{
5083
- predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5084
- match(Set dst (AddReductionVF dst src2));
5085
- effect(TEMP dst, TEMP tmp);
5086
- format %{ "addss $dst,$src2\n\t"
5087
- "pshufd $tmp,$src2,0x01\n\t"
5088
- "addss $dst,$tmp\t! add reduction2F" %}
5089
- ins_encode %{
5090
- __ addss($dst$$XMMRegister, $src2$$XMMRegister);
5091
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5092
- __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5093
- %}
5094
- ins_pipe( pipe_slow );
5095
- %}
4985
+ // =======================AddReductionVF==========================================
5096
4986
5097
- instruct rvadd2F_reduction_reg (regF dst, vec src2, vec tmp) %{
5098
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
4987
+ instruct vadd2F_reduction_reg (regF dst, vec src2, vec tmp) %{
4988
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
5099
4989
match(Set dst (AddReductionVF dst src2));
5100
4990
effect(TEMP dst, TEMP tmp);
5101
- format %{ "vaddss $dst,$dst,$src2\n\t"
5102
- "pshufd $tmp,$src2,0x01\n\t"
5103
- "vaddss $dst,$dst,$tmp\t! add reduction2F" %}
4991
+ format %{ "vector_add2F_reduction $dst,$dst,$src2" %}
5104
4992
ins_encode %{
5105
- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5106
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5107
- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
4993
+ if (UseAVX > 0) {
4994
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
4995
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4996
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
4997
+ } else {
4998
+ assert(UseSSE > 0, "required");
4999
+ __ addss($dst$$XMMRegister, $src2$$XMMRegister);
5000
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5001
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5002
+ }
5108
5003
%}
5109
5004
ins_pipe( pipe_slow );
5110
5005
%}
5111
5006
5112
- instruct rsadd4F_reduction_reg (regF dst, vec src2, vec tmp) %{
5113
- predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5007
+ instruct vadd4F_reduction_reg (regF dst, vec src2, vec tmp) %{
5008
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
5114
5009
match(Set dst (AddReductionVF dst src2));
5115
5010
effect(TEMP dst, TEMP tmp);
5116
- format %{ "addss $dst,$src2\n\t"
5117
- "pshufd $tmp,$src2,0x01\n\t"
5118
- "addss $dst,$tmp\n\t"
5119
- "pshufd $tmp,$src2,0x02\n\t"
5120
- "addss $dst,$tmp\n\t"
5121
- "pshufd $tmp,$src2,0x03\n\t"
5122
- "addss $dst,$tmp\t! add reduction4F" %}
5123
- ins_encode %{
5124
- __ addss($dst$$XMMRegister, $src2$$XMMRegister);
5125
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5126
- __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5127
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5128
- __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5129
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5130
- __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5131
- %}
5132
- ins_pipe( pipe_slow );
5133
- %}
5134
-
5135
- instruct rvadd4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5136
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5137
- match(Set dst (AddReductionVF dst src2));
5138
- effect(TEMP tmp, TEMP dst);
5139
- format %{ "vaddss $dst,dst,$src2\n\t"
5140
- "pshufd $tmp,$src2,0x01\n\t"
5141
- "vaddss $dst,$dst,$tmp\n\t"
5142
- "pshufd $tmp,$src2,0x02\n\t"
5143
- "vaddss $dst,$dst,$tmp\n\t"
5144
- "pshufd $tmp,$src2,0x03\n\t"
5145
- "vaddss $dst,$dst,$tmp\t! add reduction4F" %}
5011
+ format %{ "vector_add4F_reduction $dst,$dst,$src2" %}
5146
5012
ins_encode %{
5147
- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5148
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5149
- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5150
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5151
- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5152
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5153
- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5013
+ if (UseAVX > 0) {
5014
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5015
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5016
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5017
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5018
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5019
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5020
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5021
+ } else {
5022
+ assert(UseSSE > 0, "required");
5023
+ __ addss($dst$$XMMRegister, $src2$$XMMRegister);
5024
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5025
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5026
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5027
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5028
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5029
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
5030
+ }
5154
5031
%}
5155
5032
ins_pipe( pipe_slow );
5156
5033
%}
5157
5034
5158
- instruct radd8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5159
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5035
+
5036
+ instruct vadd8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5037
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
5160
5038
match(Set dst (AddReductionVF dst src2));
5161
5039
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5162
- format %{ "vaddss $dst,$dst,$src2\n\t"
5163
- "pshufd $tmp,$src2,0x01\n\t"
5164
- "vaddss $dst,$dst,$tmp\n\t"
5165
- "pshufd $tmp,$src2,0x02\n\t"
5166
- "vaddss $dst,$dst,$tmp\n\t"
5167
- "pshufd $tmp,$src2,0x03\n\t"
5168
- "vaddss $dst,$dst,$tmp\n\t"
5169
- "vextractf128_high $tmp2,$src2\n\t"
5170
- "vaddss $dst,$dst,$tmp2\n\t"
5171
- "pshufd $tmp,$tmp2,0x01\n\t"
5172
- "vaddss $dst,$dst,$tmp\n\t"
5173
- "pshufd $tmp,$tmp2,0x02\n\t"
5174
- "vaddss $dst,$dst,$tmp\n\t"
5175
- "pshufd $tmp,$tmp2,0x03\n\t"
5176
- "vaddss $dst,$dst,$tmp\t! add reduction8F" %}
5040
+ format %{ "vector_add8F_reduction $dst,$dst,$src2" %}
5177
5041
ins_encode %{
5042
+ assert(UseAVX > 0, "required");
5178
5043
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5179
5044
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5180
5045
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5194,42 +5059,13 @@ instruct radd8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5194
5059
ins_pipe( pipe_slow );
5195
5060
%}
5196
5061
5197
- instruct radd16F_reduction_reg (regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5198
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
5062
+ instruct vadd16F_reduction_reg (regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5063
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
5199
5064
match(Set dst (AddReductionVF dst src2));
5200
5065
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5201
- format %{ "vaddss $dst,$dst,$src2\n\t"
5202
- "pshufd $tmp,$src2,0x01\n\t"
5203
- "vaddss $dst,$dst,$tmp\n\t"
5204
- "pshufd $tmp,$src2,0x02\n\t"
5205
- "vaddss $dst,$dst,$tmp\n\t"
5206
- "pshufd $tmp,$src2,0x03\n\t"
5207
- "vaddss $dst,$dst,$tmp\n\t"
5208
- "vextractf32x4 $tmp2,$src2,0x1\n\t"
5209
- "vaddss $dst,$dst,$tmp2\n\t"
5210
- "pshufd $tmp,$tmp2,0x01\n\t"
5211
- "vaddss $dst,$dst,$tmp\n\t"
5212
- "pshufd $tmp,$tmp2,0x02\n\t"
5213
- "vaddss $dst,$dst,$tmp\n\t"
5214
- "pshufd $tmp,$tmp2,0x03\n\t"
5215
- "vaddss $dst,$dst,$tmp\n\t"
5216
- "vextractf32x4 $tmp2,$src2,0x2\n\t"
5217
- "vaddss $dst,$dst,$tmp2\n\t"
5218
- "pshufd $tmp,$tmp2,0x01\n\t"
5219
- "vaddss $dst,$dst,$tmp\n\t"
5220
- "pshufd $tmp,$tmp2,0x02\n\t"
5221
- "vaddss $dst,$dst,$tmp\n\t"
5222
- "pshufd $tmp,$tmp2,0x03\n\t"
5223
- "vaddss $dst,$dst,$tmp\n\t"
5224
- "vextractf32x4 $tmp2,$src2,0x3\n\t"
5225
- "vaddss $dst,$dst,$tmp2\n\t"
5226
- "pshufd $tmp,$tmp2,0x01\n\t"
5227
- "vaddss $dst,$dst,$tmp\n\t"
5228
- "pshufd $tmp,$tmp2,0x02\n\t"
5229
- "vaddss $dst,$dst,$tmp\n\t"
5230
- "pshufd $tmp,$tmp2,0x03\n\t"
5231
- "vaddss $dst,$dst,$tmp\t! add reduction16F" %}
5066
+ format %{ "vector_add16F_reduction $dst,$dst,$src2" %}
5232
5067
ins_encode %{
5068
+ assert(UseAVX > 2, "required");
5233
5069
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5234
5070
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5235
5071
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5265,48 +5101,35 @@ instruct radd16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %
5265
5101
ins_pipe( pipe_slow );
5266
5102
%}
5267
5103
5268
- instruct rsadd2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5269
- predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5270
- match(Set dst (AddReductionVD dst src2));
5271
- effect(TEMP tmp, TEMP dst);
5272
- format %{ "addsd $dst,$src2\n\t"
5273
- "pshufd $tmp,$src2,0xE\n\t"
5274
- "addsd $dst,$tmp\t! add reduction2D" %}
5275
- ins_encode %{
5276
- __ addsd($dst$$XMMRegister, $src2$$XMMRegister);
5277
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5278
- __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
5279
- %}
5280
- ins_pipe( pipe_slow );
5281
- %}
5104
+ // =======================AddReductionVD==========================================
5282
5105
5283
- instruct rvadd2D_reduction_reg (regD dst, vec src2, vec tmp) %{
5284
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5106
+ instruct vadd2D_reduction_reg (regD dst, vec src2, vec tmp) %{
5107
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
5285
5108
match(Set dst (AddReductionVD dst src2));
5286
5109
effect(TEMP tmp, TEMP dst);
5287
- format %{ "vaddsd $dst,$dst,$src2\n\t"
5288
- "pshufd $tmp,$src2,0xE\n\t"
5289
- "vaddsd $dst,$dst,$tmp\t! add reduction2D" %}
5110
+ format %{ "vector_add2D_reduction $dst,$src2" %}
5290
5111
ins_encode %{
5291
- __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5292
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5293
- __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5112
+ if (UseAVX > 0) {
5113
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5114
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5115
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5116
+ } else {
5117
+ assert(UseSSE > 0, "required");
5118
+ __ addsd($dst$$XMMRegister, $src2$$XMMRegister);
5119
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5120
+ __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
5121
+ }
5294
5122
%}
5295
5123
ins_pipe( pipe_slow );
5296
5124
%}
5297
5125
5298
- instruct rvadd4D_reduction_reg (regD dst, vec src2, vec tmp, vec tmp2) %{
5299
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5126
+ instruct vadd4D_reduction_reg (regD dst, vec src2, vec tmp, vec tmp2) %{
5127
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
5300
5128
match(Set dst (AddReductionVD dst src2));
5301
5129
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5302
- format %{ "vaddsd $dst,$dst,$src2\n\t"
5303
- "pshufd $tmp,$src2,0xE\n\t"
5304
- "vaddsd $dst,$dst,$tmp\n\t"
5305
- "vextractf128 $tmp2,$src2,0x1\n\t"
5306
- "vaddsd $dst,$dst,$tmp2\n\t"
5307
- "pshufd $tmp,$tmp2,0xE\n\t"
5308
- "vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
5130
+ format %{ "vector_add4D_reduction $dst,$dst,$src2" %}
5309
5131
ins_encode %{
5132
+ assert(UseAVX > 0, "required");
5310
5133
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5311
5134
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5312
5135
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5318,26 +5141,13 @@ instruct rvadd4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
5318
5141
ins_pipe( pipe_slow );
5319
5142
%}
5320
5143
5321
- instruct rvadd8D_reduction_reg (regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5322
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5144
+ instruct vadd8D_reduction_reg (regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5145
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
5323
5146
match(Set dst (AddReductionVD dst src2));
5324
5147
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5325
- format %{ "vaddsd $dst,$dst,$src2\n\t"
5326
- "pshufd $tmp,$src2,0xE\n\t"
5327
- "vaddsd $dst,$dst,$tmp\n\t"
5328
- "vextractf32x4 $tmp2,$src2,0x1\n\t"
5329
- "vaddsd $dst,$dst,$tmp2\n\t"
5330
- "pshufd $tmp,$tmp2,0xE\n\t"
5331
- "vaddsd $dst,$dst,$tmp\n\t"
5332
- "vextractf32x4 $tmp2,$src2,0x2\n\t"
5333
- "vaddsd $dst,$dst,$tmp2\n\t"
5334
- "pshufd $tmp,$tmp2,0xE\n\t"
5335
- "vaddsd $dst,$dst,$tmp\n\t"
5336
- "vextractf32x4 $tmp2,$src2,0x3\n\t"
5337
- "vaddsd $dst,$dst,$tmp2\n\t"
5338
- "pshufd $tmp,$tmp2,0xE\n\t"
5339
- "vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
5148
+ format %{ "vector_add8D_reduction $dst,$dst,$src2" %}
5340
5149
ins_encode %{
5150
+ assert(UseAVX > 2, "required");
5341
5151
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5342
5152
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5343
5153
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5357,107 +5167,70 @@ instruct rvadd8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %
5357
5167
ins_pipe( pipe_slow );
5358
5168
%}
5359
5169
5360
- instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5361
- predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5362
- match(Set dst (MulReductionVI src1 src2));
5363
- effect(TEMP tmp, TEMP tmp2);
5364
- format %{ "pshufd $tmp2,$src2,0x1\n\t"
5365
- "pmulld $tmp2,$src2\n\t"
5366
- "movd $tmp,$src1\n\t"
5367
- "pmulld $tmp2,$tmp\n\t"
5368
- "movd $dst,$tmp2\t! mul reduction2I" %}
5369
- ins_encode %{
5370
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5371
- __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5372
- __ movdl($tmp$$XMMRegister, $src1$$Register);
5373
- __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5374
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
5375
- %}
5376
- ins_pipe( pipe_slow );
5377
- %}
5170
+ // =======================MulReductionVI==========================================
5378
5171
5379
- instruct rvmul2I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5380
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5172
+ instruct vmul2I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5173
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
5381
5174
match(Set dst (MulReductionVI src1 src2));
5382
5175
effect(TEMP tmp, TEMP tmp2);
5383
- format %{ "pshufd $tmp2,$src2,0x1\n\t"
5384
- "vpmulld $tmp,$src2,$tmp2\n\t"
5385
- "movd $tmp2,$src1\n\t"
5386
- "vpmulld $tmp2,$tmp,$tmp2\n\t"
5387
- "movd $dst,$tmp2\t! mul reduction2I" %}
5176
+ format %{ "vector_mul2I_reduction $dst,$src1,$src2" %}
5388
5177
ins_encode %{
5389
- int vector_len = 0;
5390
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5391
- __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5392
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
5393
- __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5394
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
5395
- %}
5396
- ins_pipe( pipe_slow );
5397
- %}
5398
-
5399
- instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5400
- predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5401
- match(Set dst (MulReductionVI src1 src2));
5402
- effect(TEMP tmp, TEMP tmp2);
5403
- format %{ "pshufd $tmp2,$src2,0xE\n\t"
5404
- "pmulld $tmp2,$src2\n\t"
5405
- "pshufd $tmp,$tmp2,0x1\n\t"
5406
- "pmulld $tmp2,$tmp\n\t"
5407
- "movd $tmp,$src1\n\t"
5408
- "pmulld $tmp2,$tmp\n\t"
5409
- "movd $dst,$tmp2\t! mul reduction4I" %}
5410
- ins_encode %{
5411
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5412
- __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5413
- __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
5414
- __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5415
- __ movdl($tmp$$XMMRegister, $src1$$Register);
5416
- __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5417
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
5178
+ if (UseAVX > 0) {
5179
+ int vector_len = Assembler::AVX_128bit;
5180
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5181
+ __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5182
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
5183
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5184
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
5185
+ } else {
5186
+ assert(UseSSE > 3, "required");
5187
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5188
+ __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5189
+ __ movdl($tmp$$XMMRegister, $src1$$Register);
5190
+ __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5191
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
5192
+ }
5418
5193
%}
5419
5194
ins_pipe( pipe_slow );
5420
5195
%}
5421
5196
5422
- instruct rvmul4I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5423
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5197
+ instruct vmul4I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5198
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
5424
5199
match(Set dst (MulReductionVI src1 src2));
5425
5200
effect(TEMP tmp, TEMP tmp2);
5426
- format %{ "pshufd $tmp2,$src2,0xE\n\t"
5427
- "vpmulld $tmp,$src2,$tmp2\n\t"
5428
- "pshufd $tmp2,$tmp,0x1\n\t"
5429
- "vpmulld $tmp,$tmp,$tmp2\n\t"
5430
- "movd $tmp2,$src1\n\t"
5431
- "vpmulld $tmp2,$tmp,$tmp2\n\t"
5432
- "movd $dst,$tmp2\t! mul reduction4I" %}
5201
+ format %{ "vector_mul4I_reduction $dst,$src1,$src2" %}
5433
5202
ins_encode %{
5434
- int vector_len = 0;
5435
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5436
- __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5437
- __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5438
- __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5439
- __ movdl($tmp2$$XMMRegister, $src1$$Register);
5440
- __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5441
- __ movdl($dst$$Register, $tmp2$$XMMRegister);
5203
+ if (UseAVX > 0) {
5204
+ int vector_len = Assembler::AVX_128bit;
5205
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5206
+ __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5207
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5208
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5209
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
5210
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5211
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
5212
+ } else {
5213
+ assert(UseSSE > 3, "required");
5214
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5215
+ __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5216
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
5217
+ __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5218
+ __ movdl($tmp$$XMMRegister, $src1$$Register);
5219
+ __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5220
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
5221
+ }
5442
5222
%}
5443
5223
ins_pipe( pipe_slow );
5444
5224
%}
5445
5225
5446
- instruct rvmul8I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5447
- predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5226
+ instruct vmul8I_reduction_reg (rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{
5227
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
5448
5228
match(Set dst (MulReductionVI src1 src2));
5449
5229
effect(TEMP tmp, TEMP tmp2);
5450
- format %{ "vextracti128_high $tmp,$src2\n\t"
5451
- "vpmulld $tmp,$tmp,$src2\n\t"
5452
- "pshufd $tmp2,$tmp,0xE\n\t"
5453
- "vpmulld $tmp,$tmp,$tmp2\n\t"
5454
- "pshufd $tmp2,$tmp,0x1\n\t"
5455
- "vpmulld $tmp,$tmp,$tmp2\n\t"
5456
- "movd $tmp2,$src1\n\t"
5457
- "vpmulld $tmp2,$tmp,$tmp2\n\t"
5458
- "movd $dst,$tmp2\t! mul reduction8I" %}
5230
+ format %{ "vector_mul8I_reduction $dst,$src1,$src2" %}
5459
5231
ins_encode %{
5460
- int vector_len = 0;
5232
+ assert(UseAVX > 1, "required");
5233
+ int vector_len = Assembler::AVX_128bit;
5461
5234
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
5462
5235
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
5463
5236
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
@@ -5471,22 +5244,13 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp
5471
5244
ins_pipe( pipe_slow );
5472
5245
%}
5473
5246
5474
- instruct rvmul16I_reduction_reg (rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
5475
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
5247
+ instruct vmul16I_reduction_reg (rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{
5248
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
5476
5249
match(Set dst (MulReductionVI src1 src2));
5477
5250
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5478
- format %{ "vextracti64x4_high $tmp3,$src2\n\t"
5479
- "vpmulld $tmp3,$tmp3,$src2\n\t"
5480
- "vextracti128_high $tmp,$tmp3\n\t"
5481
- "vpmulld $tmp,$tmp,$src2\n\t"
5482
- "pshufd $tmp2,$tmp,0xE\n\t"
5483
- "vpmulld $tmp,$tmp,$tmp2\n\t"
5484
- "pshufd $tmp2,$tmp,0x1\n\t"
5485
- "vpmulld $tmp,$tmp,$tmp2\n\t"
5486
- "movd $tmp2,$src1\n\t"
5487
- "vpmulld $tmp2,$tmp,$tmp2\n\t"
5488
- "movd $dst,$tmp2\t! mul reduction16I" %}
5251
+ format %{ "vector_mul16I_reduction $dst,$src1,$src2" %}
5489
5252
ins_encode %{
5253
+ assert(UseAVX > 2, "required");
5490
5254
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
5491
5255
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
5492
5256
__ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
@@ -5502,17 +5266,16 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVec src2, legVec tmp,
5502
5266
ins_pipe( pipe_slow );
5503
5267
%}
5504
5268
5269
+ // =======================MulReductionVL==========================================
5270
+
5505
5271
#ifdef _LP64
5506
- instruct rvmul2L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5507
- predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 2);
5272
+ instruct vmul2L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5273
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
5508
5274
match(Set dst (MulReductionVL src1 src2));
5509
5275
effect(TEMP tmp, TEMP tmp2);
5510
- format %{ "pshufd $tmp2,$src2,0xE\n\t"
5511
- "vpmullq $tmp,$src2,$tmp2\n\t"
5512
- "movdq $tmp2,$src1\n\t"
5513
- "vpmullq $tmp2,$tmp,$tmp2\n\t"
5514
- "movdq $dst,$tmp2\t! mul reduction2L" %}
5276
+ format %{ "vector_mul2L_reduction $dst,$src1,$src2" %}
5515
5277
ins_encode %{
5278
+ assert(VM_Version::supports_avx512dq(), "required");
5516
5279
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5517
5280
__ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
5518
5281
__ movdq($tmp2$$XMMRegister, $src1$$Register);
@@ -5522,18 +5285,13 @@ instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
5522
5285
ins_pipe( pipe_slow );
5523
5286
%}
5524
5287
5525
- instruct rvmul4L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5526
- predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 4);
5288
+ instruct vmul4L_reduction_reg (rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{
5289
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
5527
5290
match(Set dst (MulReductionVL src1 src2));
5528
5291
effect(TEMP tmp, TEMP tmp2);
5529
- format %{ "vextracti128_high $tmp,$src2\n\t"
5530
- "vpmullq $tmp2,$tmp,$src2\n\t"
5531
- "pshufd $tmp,$tmp2,0xE\n\t"
5532
- "vpmullq $tmp2,$tmp2,$tmp\n\t"
5533
- "movdq $tmp,$src1\n\t"
5534
- "vpmullq $tmp2,$tmp2,$tmp\n\t"
5535
- "movdq $dst,$tmp2\t! mul reduction4L" %}
5292
+ format %{ "vector_mul4L_reduction $dst,$src1,$src2" %}
5536
5293
ins_encode %{
5294
+ assert(VM_Version::supports_avx512dq(), "required");
5537
5295
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
5538
5296
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
5539
5297
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
@@ -5545,20 +5303,13 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp
5545
5303
ins_pipe( pipe_slow );
5546
5304
%}
5547
5305
5548
- instruct rvmul8L_reduction_reg (rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
5549
- predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 8);
5306
+ instruct vmul8L_reduction_reg (rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{
5307
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
5550
5308
match(Set dst (MulReductionVL src1 src2));
5551
5309
effect(TEMP tmp, TEMP tmp2);
5552
- format %{ "vextracti64x4_high $tmp2,$src2\n\t"
5553
- "vpmullq $tmp2,$tmp2,$src2\n\t"
5554
- "vextracti128_high $tmp,$tmp2\n\t"
5555
- "vpmullq $tmp2,$tmp2,$tmp\n\t"
5556
- "pshufd $tmp,$tmp2,0xE\n\t"
5557
- "vpmullq $tmp2,$tmp2,$tmp\n\t"
5558
- "movdq $tmp,$src1\n\t"
5559
- "vpmullq $tmp2,$tmp2,$tmp\n\t"
5560
- "movdq $dst,$tmp2\t! mul reduction8L" %}
5310
+ format %{ "vector_mul8L_reduction $dst,$src1,$src2" %}
5561
5311
ins_encode %{
5312
+ assert(VM_Version::supports_avx512dq(), "required");
5562
5313
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
5563
5314
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
5564
5315
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
@@ -5573,102 +5324,63 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, l
5573
5324
%}
5574
5325
#endif
5575
5326
5576
- instruct rsmul2F_reduction(regF dst, vec src2, vec tmp) %{
5577
- predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5578
- match(Set dst (MulReductionVF dst src2));
5579
- effect(TEMP dst, TEMP tmp);
5580
- format %{ "mulss $dst,$src2\n\t"
5581
- "pshufd $tmp,$src2,0x01\n\t"
5582
- "mulss $dst,$tmp\t! mul reduction2F" %}
5583
- ins_encode %{
5584
- __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5585
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5586
- __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5587
- %}
5588
- ins_pipe( pipe_slow );
5589
- %}
5327
+ // =======================MulReductionVF==========================================
5590
5328
5591
- instruct rvmul2F_reduction_reg (regF dst, vec src2, vec tmp) %{
5592
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5329
+ instruct vmul2F_reduction_reg (regF dst, vec src2, vec tmp) %{
5330
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
5593
5331
match(Set dst (MulReductionVF dst src2));
5594
- effect(TEMP tmp, TEMP dst);
5595
- format %{ "vmulss $dst,$dst,$src2\n\t"
5596
- "pshufd $tmp,$src2,0x01\n\t"
5597
- "vmulss $dst,$dst,$tmp\t! mul reduction2F" %}
5332
+ effect(TEMP dst, TEMP tmp);
5333
+ format %{ "vector_mul2F_reduction $dst,$dst,$src2" %}
5598
5334
ins_encode %{
5599
- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5600
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5601
- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5335
+ if (UseAVX > 0) {
5336
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5337
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5338
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5339
+ } else {
5340
+ assert(UseSSE > 0, "required");
5341
+ __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5342
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5343
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5344
+ }
5602
5345
%}
5603
5346
ins_pipe( pipe_slow );
5604
5347
%}
5605
5348
5606
- instruct rsmul4F_reduction_reg (regF dst, vec src2, vec tmp) %{
5607
- predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5349
+ instruct vmul4F_reduction_reg (regF dst, vec src2, vec tmp) %{
5350
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 4
5608
5351
match(Set dst (MulReductionVF dst src2));
5609
5352
effect(TEMP dst, TEMP tmp);
5610
- format %{ "mulss $dst,$src2\n\t"
5611
- "pshufd $tmp,$src2,0x01\n\t"
5612
- "mulss $dst,$tmp\n\t"
5613
- "pshufd $tmp,$src2,0x02\n\t"
5614
- "mulss $dst,$tmp\n\t"
5615
- "pshufd $tmp,$src2,0x03\n\t"
5616
- "mulss $dst,$tmp\t! mul reduction4F" %}
5617
- ins_encode %{
5618
- __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5619
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5620
- __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5621
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5622
- __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5623
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5624
- __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5625
- %}
5626
- ins_pipe( pipe_slow );
5627
- %}
5628
-
5629
- instruct rvmul4F_reduction_reg(regF dst, vec src2, vec tmp) %{
5630
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5631
- match(Set dst (MulReductionVF dst src2));
5632
- effect(TEMP tmp, TEMP dst);
5633
- format %{ "vmulss $dst,$dst,$src2\n\t"
5634
- "pshufd $tmp,$src2,0x01\n\t"
5635
- "vmulss $dst,$dst,$tmp\n\t"
5636
- "pshufd $tmp,$src2,0x02\n\t"
5637
- "vmulss $dst,$dst,$tmp\n\t"
5638
- "pshufd $tmp,$src2,0x03\n\t"
5639
- "vmulss $dst,$dst,$tmp\t! mul reduction4F" %}
5353
+ format %{ "vector_mul4F_reduction $dst,$dst,$src2" %}
5640
5354
ins_encode %{
5641
- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5642
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5643
- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5644
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5645
- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5646
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5647
- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5355
+ if (UseAVX > 0) {
5356
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5357
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5358
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5359
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5360
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5361
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5362
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5363
+ } else {
5364
+ assert(UseSSE > 0, "required");
5365
+ __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5366
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5367
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5368
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5369
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5370
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5371
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5372
+ }
5648
5373
%}
5649
5374
ins_pipe( pipe_slow );
5650
5375
%}
5651
5376
5652
- instruct rvmul8F_reduction_reg (regF dst, vec src2, vec tmp, vec tmp2) %{
5653
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5377
+ instruct vmul8F_reduction_reg (regF dst, vec src2, vec tmp, vec tmp2) %{
5378
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 8
5654
5379
match(Set dst (MulReductionVF dst src2));
5655
5380
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5656
- format %{ "vmulss $dst,$dst,$src2\n\t"
5657
- "pshufd $tmp,$src2,0x01\n\t"
5658
- "vmulss $dst,$dst,$tmp\n\t"
5659
- "pshufd $tmp,$src2,0x02\n\t"
5660
- "vmulss $dst,$dst,$tmp\n\t"
5661
- "pshufd $tmp,$src2,0x03\n\t"
5662
- "vmulss $dst,$dst,$tmp\n\t"
5663
- "vextractf128_high $tmp2,$src2\n\t"
5664
- "vmulss $dst,$dst,$tmp2\n\t"
5665
- "pshufd $tmp,$tmp2,0x01\n\t"
5666
- "vmulss $dst,$dst,$tmp\n\t"
5667
- "pshufd $tmp,$tmp2,0x02\n\t"
5668
- "vmulss $dst,$dst,$tmp\n\t"
5669
- "pshufd $tmp,$tmp2,0x03\n\t"
5670
- "vmulss $dst,$dst,$tmp\t! mul reduction8F" %}
5381
+ format %{ "vector_mul8F_reduction $dst,$dst,$src2" %}
5671
5382
ins_encode %{
5383
+ assert(UseAVX > 0, "required");
5672
5384
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5673
5385
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5674
5386
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5688,42 +5400,13 @@ instruct rvmul8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{
5688
5400
ins_pipe( pipe_slow );
5689
5401
%}
5690
5402
5691
- instruct rvmul16F_reduction_reg (regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5692
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16);
5403
+ instruct vmul16F_reduction_reg (regF dst, legVec src2, legVec tmp, legVec tmp2) %{
5404
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); // vector_length(src2) == 16
5693
5405
match(Set dst (MulReductionVF dst src2));
5694
5406
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5695
- format %{ "vmulss $dst,$dst,$src2\n\t"
5696
- "pshufd $tmp,$src2,0x01\n\t"
5697
- "vmulss $dst,$dst,$tmp\n\t"
5698
- "pshufd $tmp,$src2,0x02\n\t"
5699
- "vmulss $dst,$dst,$tmp\n\t"
5700
- "pshufd $tmp,$src2,0x03\n\t"
5701
- "vmulss $dst,$dst,$tmp\n\t"
5702
- "vextractf32x4 $tmp2,$src2,0x1\n\t"
5703
- "vmulss $dst,$dst,$tmp2\n\t"
5704
- "pshufd $tmp,$tmp2,0x01\n\t"
5705
- "vmulss $dst,$dst,$tmp\n\t"
5706
- "pshufd $tmp,$tmp2,0x02\n\t"
5707
- "vmulss $dst,$dst,$tmp\n\t"
5708
- "pshufd $tmp,$tmp2,0x03\n\t"
5709
- "vmulss $dst,$dst,$tmp\n\t"
5710
- "vextractf32x4 $tmp2,$src2,0x2\n\t"
5711
- "vmulss $dst,$dst,$tmp2\n\t"
5712
- "pshufd $tmp,$tmp2,0x01\n\t"
5713
- "vmulss $dst,$dst,$tmp\n\t"
5714
- "pshufd $tmp,$tmp2,0x02\n\t"
5715
- "vmulss $dst,$dst,$tmp\n\t"
5716
- "pshufd $tmp,$tmp2,0x03\n\t"
5717
- "vmulss $dst,$dst,$tmp\n\t"
5718
- "vextractf32x4 $tmp2,$src2,0x3\n\t"
5719
- "vmulss $dst,$dst,$tmp2\n\t"
5720
- "pshufd $tmp,$tmp2,0x01\n\t"
5721
- "vmulss $dst,$dst,$tmp\n\t"
5722
- "pshufd $tmp,$tmp2,0x02\n\t"
5723
- "vmulss $dst,$dst,$tmp\n\t"
5724
- "pshufd $tmp,$tmp2,0x03\n\t"
5725
- "vmulss $dst,$dst,$tmp\t! mul reduction16F" %}
5407
+ format %{ "vector_mul16F_reduction $dst,$dst,$src2" %}
5726
5408
ins_encode %{
5409
+ assert(UseAVX > 2, "required");
5727
5410
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5728
5411
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5729
5412
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5759,48 +5442,36 @@ instruct rvmul16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2)
5759
5442
ins_pipe( pipe_slow );
5760
5443
%}
5761
5444
5762
- instruct rsmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5763
- predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5445
+ // =======================MulReductionVD==========================================
5446
+
5447
+ instruct vmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5448
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); // vector_length(src2) == 2
5764
5449
match(Set dst (MulReductionVD dst src2));
5765
5450
effect(TEMP dst, TEMP tmp);
5766
- format %{ "mulsd $dst,$src2\n\t"
5767
- "pshufd $tmp,$src2,0xE\n\t"
5768
- "mulsd $dst,$tmp\t! mul reduction2D" %}
5451
+ format %{ "vector_mul2D_reduction $dst,$dst,$src2" %}
5769
5452
ins_encode %{
5770
- __ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
5771
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5772
- __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
5453
+ if (UseAVX > 0) {
5454
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5455
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5456
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5457
+ } else {
5458
+ assert(UseSSE > 0, "required");
5459
+ __ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
5460
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5461
+ __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
5462
+ }
5773
5463
%}
5774
5464
ins_pipe( pipe_slow );
5775
5465
%}
5776
5466
5777
- instruct rvmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{
5778
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2);
5779
- match(Set dst (MulReductionVD dst src2));
5780
- effect(TEMP tmp, TEMP dst);
5781
- format %{ "vmulsd $dst,$dst,$src2\n\t"
5782
- "pshufd $tmp,$src2,0xE\n\t"
5783
- "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %}
5784
- ins_encode %{
5785
- __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5786
- __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5787
- __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5788
- %}
5789
- ins_pipe( pipe_slow );
5790
- %}
5791
5467
5792
- instruct rvmul4D_reduction_reg (regD dst, vec src2, vec tmp, vec tmp2) %{
5793
- predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4);
5468
+ instruct vmul4D_reduction_reg (regD dst, vec src2, vec tmp, vec tmp2) %{
5469
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); // vector_length(src2) == 2
5794
5470
match(Set dst (MulReductionVD dst src2));
5795
5471
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5796
- format %{ "vmulsd $dst,$dst,$src2\n\t"
5797
- "pshufd $tmp,$src2,0xE\n\t"
5798
- "vmulsd $dst,$dst,$tmp\n\t"
5799
- "vextractf128_high $tmp2,$src2\n\t"
5800
- "vmulsd $dst,$dst,$tmp2\n\t"
5801
- "pshufd $tmp,$tmp2,0xE\n\t"
5802
- "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
5472
+ format %{ "vector_mul4D_reduction $dst,$dst,$src2" %}
5803
5473
ins_encode %{
5474
+ assert(UseAVX > 0, "required");
5804
5475
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5805
5476
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5806
5477
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5812,26 +5483,13 @@ instruct rvmul4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{
5812
5483
ins_pipe( pipe_slow );
5813
5484
%}
5814
5485
5815
- instruct rvmul8D_reduction_reg (regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5816
- predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8);
5486
+ instruct vmul8D_reduction_reg (regD dst, legVec src2, legVec tmp, legVec tmp2) %{
5487
+ predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); // vector_length(src2) == 2
5817
5488
match(Set dst (MulReductionVD dst src2));
5818
5489
effect(TEMP tmp, TEMP dst, TEMP tmp2);
5819
- format %{ "vmulsd $dst,$dst,$src2\n\t"
5820
- "pshufd $tmp,$src2,0xE\n\t"
5821
- "vmulsd $dst,$dst,$tmp\n\t"
5822
- "vextractf32x4 $tmp2,$src2,0x1\n\t"
5823
- "vmulsd $dst,$dst,$tmp2\n\t"
5824
- "pshufd $tmp,$src2,0xE\n\t"
5825
- "vmulsd $dst,$dst,$tmp\n\t"
5826
- "vextractf32x4 $tmp2,$src2,0x2\n\t"
5827
- "vmulsd $dst,$dst,$tmp2\n\t"
5828
- "pshufd $tmp,$tmp2,0xE\n\t"
5829
- "vmulsd $dst,$dst,$tmp\n\t"
5830
- "vextractf32x4 $tmp2,$src2,0x3\n\t"
5831
- "vmulsd $dst,$dst,$tmp2\n\t"
5832
- "pshufd $tmp,$tmp2,0xE\n\t"
5833
- "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
5490
+ format %{ "vector_mul8D_reduction $dst,$dst,$src2" %}
5834
5491
ins_encode %{
5492
+ assert(UseAVX > 0, "required");
5835
5493
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5836
5494
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5837
5495
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
0 commit comments