Skip to content

Commit 69d296d

Browse files
committedJan 17, 2022
8279833: Loop optimization issue in String.encodeUTF8_UTF16
Backport-of: c3d0a94040d9bd0f4b99da97b89fbfce252a41c0
1 parent 6f644e0 commit 69d296d

File tree

2 files changed

+84
-43
lines changed

2 files changed

+84
-43
lines changed
 

‎src/java.base/share/classes/java/lang/String.java

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1994, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -1284,14 +1284,17 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
12841284
int sp = 0;
12851285
int sl = val.length >> 1;
12861286
byte[] dst = new byte[sl * 3];
1287-
char c;
1288-
while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
1287+
while (sp < sl) {
12891288
// ascii fast loop;
1289+
char c = StringUTF16.getChar(val, sp);
1290+
if (c >= '\u0080') {
1291+
break;
1292+
}
12901293
dst[dp++] = (byte)c;
12911294
sp++;
12921295
}
12931296
while (sp < sl) {
1294-
c = StringUTF16.getChar(val, sp++);
1297+
char c = StringUTF16.getChar(val, sp++);
12951298
if (c < 0x80) {
12961299
dst[dp++] = (byte)c;
12971300
} else if (c < 0x800) {
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -30,59 +30,97 @@
3030

3131
@BenchmarkMode(Mode.AverageTime)
3232
@OutputTimeUnit(TimeUnit.NANOSECONDS)
33-
@Fork(value = 3, jvmArgs = "-Xmx1g")
33+
@Fork(value = 3)
3434
@Warmup(iterations = 5, time = 2)
3535
@Measurement(iterations = 5, time = 3)
3636
@State(Scope.Thread)
3737
public class StringEncode {
3838

39-
@BenchmarkMode(Mode.AverageTime)
40-
@OutputTimeUnit(TimeUnit.NANOSECONDS)
41-
@Fork(value = 3, jvmArgs = "-Xmx1g")
42-
@Warmup(iterations = 5, time = 2)
43-
@Measurement(iterations = 5, time = 2)
44-
@State(Scope.Thread)
45-
public static class WithCharset {
39+
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
40+
private String charsetName;
41+
private Charset charset;
42+
private String asciiString;
43+
private String utf16String;
44+
private String longUtf16String;
45+
private String longUtf16StartString;
4646

47-
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
48-
private String charsetName;
47+
@Setup
48+
public void setup() {
49+
charset = Charset.forName(charsetName);
50+
asciiString = "ascii string";
51+
utf16String = "UTF-\uFF11\uFF16 string";
52+
longUtf16String = """
53+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
54+
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
55+
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
56+
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
57+
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
58+
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
59+
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
60+
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
61+
Suspendisse potenti.
4962
50-
private Charset charset;
51-
private String asciiString;
52-
private String utf16String;
63+
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
64+
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
65+
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
66+
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
67+
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
68+
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
69+
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
70+
tristique mollis odio blandit quis. Vivamus posuere.
71+
\uFF11
72+
""";
73+
longUtf16StartString = """
74+
\uFF11
75+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
76+
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
77+
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
78+
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
79+
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
80+
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
81+
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
82+
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
83+
Suspendisse potenti.
5384
54-
@Setup
55-
public void setup() {
56-
charset = Charset.forName(charsetName);
57-
asciiString = "ascii string";
58-
utf16String = "UTF-\uFF11\uFF16 string";
59-
}
85+
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
86+
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
87+
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
88+
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
89+
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
90+
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
91+
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
92+
tristique mollis odio blandit quis. Vivamus posuere.
93+
""";
94+
}
6095

61-
@Benchmark
62-
public void encodeCharsetName(Blackhole bh) throws Exception {
63-
bh.consume(asciiString.getBytes(charsetName));
64-
bh.consume(utf16String.getBytes(charsetName));
65-
}
96+
@Benchmark
97+
public byte[] encodeAsciiCharsetName() throws Exception {
98+
return asciiString.getBytes(charset);
99+
}
100+
101+
@Benchmark
102+
public byte[] encodeAscii() throws Exception {
103+
return asciiString.getBytes(charset);
104+
}
66105

67-
@Benchmark
68-
public void encodeCharset(Blackhole bh) throws Exception {
69-
bh.consume(asciiString.getBytes(charset));
70-
bh.consume(utf16String.getBytes(charset));
71-
}
106+
@Benchmark
107+
public void encodeMix(Blackhole bh) throws Exception {
108+
bh.consume(asciiString.getBytes(charset));
109+
bh.consume(utf16String.getBytes(charset));
72110
}
73111

74-
private String asciiDefaultString;
75-
private String utf16DefaultString;
112+
@Benchmark
113+
public byte[] encodeUTF16LongEnd() throws Exception {
114+
return longUtf16String.getBytes(charset);
115+
}
76116

77-
@Setup
78-
public void setup() {
79-
asciiDefaultString = "ascii string";
80-
utf16DefaultString = "UTF-\uFF11\uFF16 string";
117+
@Benchmark
118+
public byte[] encodeUTF16LongStart() throws Exception {
119+
return longUtf16StartString.getBytes(charset);
81120
}
82121

83122
@Benchmark
84-
public void encodeDefault(Blackhole bh) throws Exception {
85-
bh.consume(asciiDefaultString.getBytes());
86-
bh.consume(utf16DefaultString.getBytes());
123+
public byte[] encodeUTF16() throws Exception {
124+
return utf16String.getBytes(charset);
87125
}
88126
}

0 commit comments

Comments
 (0)