Skip to content

Commit c4518e2

Browse files
vamsi-parasaSandhya Viswanathan
authored and
Sandhya Viswanathan
committedJan 11, 2022
8278868: Add x86 vectorization support for Long.bitCount()
Reviewed-by: jbhateja, sviswanathan, kvn
1 parent 6714184 commit c4518e2

File tree

11 files changed

+237
-6
lines changed

11 files changed

+237
-6
lines changed
 

‎src/hotspot/cpu/x86/assembler_x86.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -4829,6 +4829,14 @@ void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
48294829
emit_int16(0x55, (0xC0 | encode));
48304830
}
48314831

4832+
void Assembler::vpopcntq(XMMRegister dst, XMMRegister src, int vector_len) {
4833+
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
4834+
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4835+
attributes.set_is_evex_instruction();
4836+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4837+
emit_int16(0x55, (0xC0 | encode));
4838+
}
4839+
48324840
void Assembler::popf() {
48334841
emit_int8((unsigned char)0x9D);
48344842
}

‎src/hotspot/cpu/x86/assembler_x86.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -1869,6 +1869,7 @@ class Assembler : public AbstractAssembler {
18691869
void popcntl(Register dst, Register src);
18701870

18711871
void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1872+
void vpopcntq(XMMRegister dst, XMMRegister src, int vector_len);
18721873

18731874
#ifdef _LP64
18741875
void popcntq(Register dst, Address src);

‎src/hotspot/cpu/x86/x86.ad

+15
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,7 @@ const bool Matcher::match_rule_supported(int opcode) {
14051405
}
14061406
break;
14071407
case Op_PopCountVI:
1408+
case Op_PopCountVL:
14081409
if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) {
14091410
return false;
14101411
}
@@ -8590,6 +8591,20 @@ instruct vpopcountI(vec dst, vec src) %{
85908591
ins_pipe( pipe_slow );
85918592
%}
85928593

8594+
instruct vpopcountL(vec dst, vec src) %{
8595+
match(Set dst (PopCountVL src));
8596+
format %{ "vpopcntq $dst,$src\t! vector popcount packedL" %}
8597+
ins_encode %{
8598+
assert(UsePopCountInstruction, "not enabled");
8599+
8600+
int vlen_enc = vector_length_encoding(this, $src);
8601+
__ vpopcntq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8602+
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8603+
8604+
%}
8605+
ins_pipe( pipe_slow );
8606+
%}
8607+
85938608
// --------------------------------- Bitwise Ternary Logic ----------------------------------
85948609

85958610
instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{

‎src/hotspot/share/adlc/formssel.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -4235,7 +4235,7 @@ bool MatchRule::is_vector() const {
42354235
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
42364236
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
42374237
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
4238-
"FmaVD","FmaVF","PopCountVI","VectorLongToMask",
4238+
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
42394239
// Next are vector mask ops.
42404240
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
42414241
// Next are not supported currently.

‎src/hotspot/share/opto/classes.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -272,6 +272,7 @@ macro(Phi)
272272
macro(PopCountI)
273273
macro(PopCountL)
274274
macro(PopCountVI)
275+
macro(PopCountVL)
275276
macro(PrefetchAllocation)
276277
macro(Proj)
277278
macro(RShiftI)

‎src/hotspot/share/opto/superword.cpp

+21-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -2553,7 +2553,7 @@ void SuperWord::output() {
25532553
opc == Op_AbsF || opc == Op_AbsD ||
25542554
opc == Op_AbsI || opc == Op_AbsL ||
25552555
opc == Op_NegF || opc == Op_NegD ||
2556-
opc == Op_PopCountI) {
2556+
opc == Op_PopCountI || opc == Op_PopCountL) {
25572557
assert(n->req() == 2, "only one input expected");
25582558
Node* in = vector_opd(p, 1);
25592559
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
@@ -2928,6 +2928,7 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
29282928
}
29292929
return true;
29302930
}
2931+
29312932
if (VectorNode::is_muladds2i(use)) {
29322933
// MulAddS2I takes shorts and produces ints - hence the special checks
29332934
// on alignment and size.
@@ -2943,6 +2944,24 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
29432944
}
29442945
return true;
29452946
}
2947+
2948+
if (VectorNode::is_vpopcnt_long(use)) {
2949+
// VPOPCNT_LONG takes long and produces int - hence the special checks
2950+
// on alignment and size.
2951+
if (u_pk->size() != d_pk->size()) {
2952+
return false;
2953+
}
2954+
for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
2955+
Node* ui = u_pk->at(i);
2956+
Node* di = d_pk->at(i);
2957+
if (alignment(ui) * 2 != alignment(di)) {
2958+
return false;
2959+
}
2960+
}
2961+
return true;
2962+
}
2963+
2964+
29462965
if (u_pk->size() != d_pk->size())
29472966
return false;
29482967
for (uint i = 0; i < u_pk->size(); i++) {

‎src/hotspot/share/opto/vectornode.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ int VectorNode::opcode(int sopc, BasicType bt) {
154154
// Unimplemented for subword types since bit count changes
155155
// depending on size of lane (and sign bit).
156156
return (bt == T_INT ? Op_PopCountVI : 0);
157+
case Op_PopCountL:
158+
return Op_PopCountVL;
157159
case Op_LShiftI:
158160
switch (bt) {
159161
case T_BOOLEAN:
@@ -297,6 +299,16 @@ bool VectorNode::is_muladds2i(Node* n) {
297299
return false;
298300
}
299301

302+
bool VectorNode::is_vpopcnt_long(Node* n) {
303+
if (n->Opcode() == Op_PopCountL) {
304+
return true;
305+
}
306+
return false;
307+
}
308+
309+
310+
311+
300312
bool VectorNode::is_roundopD(Node* n) {
301313
if (n->Opcode() == Op_RoundDoubleMode) {
302314
return true;
@@ -531,6 +543,7 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
531543
case Op_SqrtVD: return new SqrtVDNode(n1, vt);
532544

533545
case Op_PopCountVI: return new PopCountVINode(n1, vt);
546+
case Op_PopCountVL: return new PopCountVLNode(n1, vt);
534547
case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt);
535548
case Op_RotateRightV: return new RotateRightVNode(n1, n2, vt);
536549

‎src/hotspot/share/opto/vectornode.hpp

+9
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class VectorNode : public TypeNode {
9393
static bool is_type_transition_short_to_int(Node* n);
9494
static bool is_type_transition_to_int(Node* n);
9595
static bool is_muladds2i(Node* n);
96+
static bool is_vpopcnt_long(Node* n);
9697
static bool is_roundopD(Node* n);
9798
static bool is_scalar_rotate(Node* n);
9899
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
@@ -505,6 +506,14 @@ class PopCountVINode : public VectorNode {
505506
virtual int Opcode() const;
506507
};
507508

509+
//------------------------------PopCountVLNode---------------------------------
510+
// Vector popcount long bits
511+
class PopCountVLNode : public VectorNode {
512+
public:
513+
PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
514+
virtual int Opcode() const;
515+
};
516+
508517
//------------------------------SqrtVFNode--------------------------------------
509518
// Vector Sqrt float
510519
class SqrtVFNode : public VectorNode {

‎src/hotspot/share/runtime/vmStructs.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -1767,6 +1767,7 @@
17671767
declare_c2_type(DivVFNode, VectorNode) \
17681768
declare_c2_type(DivVDNode, VectorNode) \
17691769
declare_c2_type(PopCountVINode, VectorNode) \
1770+
declare_c2_type(PopCountVLNode, VectorNode) \
17701771
declare_c2_type(LShiftVBNode, VectorNode) \
17711772
declare_c2_type(LShiftVSNode, VectorNode) \
17721773
declare_c2_type(LShiftVINode, VectorNode) \
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
/**
25+
* @test
26+
* @summary Test vectorization of popcount for Long
27+
* @requires vm.cpu.features ~= ".*avx512dq.*"
28+
* @requires vm.compiler2.enabled
29+
* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64"
30+
* @library /test/lib /
31+
* @run driver compiler.vectorization.TestPopCountVectorLong
32+
*/
33+
34+
package compiler.vectorization;
35+
import compiler.lib.ir_framework.*;
36+
import java.util.Random;
37+
38+
39+
public class TestPopCountVectorLong {
40+
private long[] input;
41+
private int[] output;
42+
private static final int LEN = 1024;
43+
private Random rng;
44+
45+
public static void main(String args[]) {
46+
TestFramework.run(TestPopCountVectorLong.class);
47+
}
48+
49+
public TestPopCountVectorLong() {
50+
input = new long[LEN];
51+
output = new int[LEN];
52+
rng = new Random(42);
53+
for (int i = 0; i < LEN; ++i) {
54+
input[i] = rng.nextLong();
55+
}
56+
}
57+
58+
@Test // needs to be run in (fast) debug mode
59+
@Warmup(10000)
60+
@IR(counts = {"PopCountVL", ">= 1"}) // Atleast one PopCountVL node is generated if vectorization is successful
61+
public void vectorizeBitCount() {
62+
for (int i = 0; i < LEN; ++i) {
63+
output[i] = Long.bitCount(input[i]);
64+
}
65+
checkResult();
66+
}
67+
68+
public void checkResult() {
69+
for (int i = 0; i < LEN; ++i) {
70+
int expected = Long.bitCount(input[i]);
71+
if (output[i] != expected) {
72+
throw new RuntimeException("Invalid result: output[" + i + "] = " + output[i] + " != " + expected);
73+
}
74+
}
75+
}
76+
}
77+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
package org.openjdk.bench.vm.compiler;
24+
25+
import org.openjdk.jmh.annotations.*;
26+
import org.openjdk.jmh.infra.*;
27+
import java.util.concurrent.TimeUnit;
28+
import java.util.random.RandomGenerator;
29+
import java.util.random.RandomGeneratorFactory;
30+
31+
@BenchmarkMode(Mode.AverageTime)
32+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
33+
@State(Scope.Thread)
34+
public abstract class VectorBitCount {
35+
@Param({"1024"})
36+
public int SIZE;
37+
38+
@Param("0")
39+
private int seed;
40+
private RandomGenerator rng = RandomGeneratorFactory.getDefault().create(seed);
41+
private int[] bufferRandInts;
42+
private long[] bufferRandLongs;
43+
private int[] bitCounts;
44+
@Setup
45+
public void init() {
46+
bufferRandInts = new int[SIZE];
47+
bufferRandLongs = new long[SIZE];
48+
bitCounts = new int[SIZE];
49+
50+
for (int i = 0; i < SIZE; i++) {
51+
bufferRandInts[i] = rng.nextInt();
52+
bufferRandLongs[i] = rng.nextLong();
53+
}
54+
}
55+
56+
@Benchmark
57+
public int[] intBitCount() {
58+
for (int i = 0; i < SIZE; i++) {
59+
bitCounts[i] = Integer.bitCount(bufferRandInts[i]);
60+
}
61+
return bitCounts;
62+
}
63+
64+
@Benchmark
65+
public int[] longBitCount() {
66+
for (int i = 0; i < SIZE; i++) {
67+
bitCounts[i] = Long.bitCount(bufferRandLongs[i]);
68+
}
69+
return bitCounts;
70+
}
71+
72+
73+
@Fork(value = 1, jvmArgsPrepend = {
74+
"-XX:+UseSuperWord"
75+
})
76+
public static class WithSuperword extends VectorBitCount {
77+
78+
}
79+
80+
@Fork(value = 1, jvmArgsPrepend = {
81+
"-XX:-UseSuperWord"
82+
})
83+
public static class NoSuperword extends VectorBitCount {
84+
}
85+
86+
}
87+

0 commit comments

Comments
 (0)
Please sign in to comment.