Skip to content

Commit cbc3fee

Browse files
author
Stuart Marks
committedDec 18, 2020
8258259: Unicode linebreak matching behavior is incorrect; backout JDK-8235812
Reviewed-by: naoto
1 parent 7320e05 commit cbc3fee

File tree

2 files changed

+29
-77
lines changed

2 files changed

+29
-77
lines changed
 

‎src/java.base/share/classes/java/util/regex/Pattern.java

+11-67
Original file line numberDiff line numberDiff line change
@@ -2063,7 +2063,7 @@ private Node expr(Node end) {
20632063
Node prev = null;
20642064
Node firstTail = null;
20652065
Branch branch = null;
2066-
BranchConn branchConn = null;
2066+
Node branchConn = null;
20672067

20682068
for (;;) {
20692069
Node node = sequence(end);
@@ -2211,24 +2211,7 @@ private Node sequence(Node end) {
22112211
break;
22122212
}
22132213

2214-
if (node instanceof LineEnding) {
2215-
LineEnding le = (LineEnding)node;
2216-
node = closureOfLineEnding(le);
2217-
2218-
if (node != le) {
2219-
// LineEnding was replaced with an anonymous group
2220-
if (head == null)
2221-
head = node;
2222-
else
2223-
tail.next = node;
2224-
// Double return: Tail was returned in root
2225-
tail = root;
2226-
continue;
2227-
}
2228-
} else {
2229-
node = closure(node);
2230-
}
2231-
2214+
node = closure(node);
22322215
/* save the top dot-greedy nodes (.*, .+) as well
22332216
if (node instanceof GreedyCharProperty &&
22342217
((GreedyCharProperty)node).cp instanceof Dot) {
@@ -3096,31 +3079,18 @@ private Node group0() {
30963079
if (saveTCNCount < topClosureNodes.size())
30973080
topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
30983081

3099-
return groupWithClosure(node, head, tail, capturingGroup);
3100-
}
3101-
3102-
/**
3103-
* Transforms a Group with quantifiers into some special constructs
3104-
* (such as Branch or Loop/GroupCurly), if necessary.
3105-
*
3106-
* This method is applied either to actual groups or to the Unicode
3107-
* linebreak (aka \\R) represented as an anonymous group.
3108-
*/
3109-
private Node groupWithClosure(Node node, Node head, Node tail,
3110-
boolean capturingGroup)
3111-
{
31123082
if (node instanceof Ques) {
31133083
Ques ques = (Ques) node;
31143084
if (ques.type == Qtype.POSSESSIVE) {
31153085
root = node;
31163086
return node;
31173087
}
3118-
BranchConn branchConn = new BranchConn();
3119-
tail = tail.next = branchConn;
3088+
tail.next = new BranchConn();
3089+
tail = tail.next;
31203090
if (ques.type == Qtype.GREEDY) {
3121-
head = new Branch(head, null, branchConn);
3091+
head = new Branch(head, null, tail);
31223092
} else { // Reluctant quantifier
3123-
head = new Branch(null, head, branchConn);
3093+
head = new Branch(null, head, tail);
31243094
}
31253095
root = tail;
31263096
return head;
@@ -3297,31 +3267,6 @@ private Node curly(Node prev, int cmin) {
32973267
return new Curly(prev, cmin, MAX_REPS, qtype);
32983268
}
32993269

3300-
/**
3301-
* Processing repetition of a Unicode linebreak \\R.
3302-
*/
3303-
private Node closureOfLineEnding(LineEnding le) {
3304-
int ch = peek();
3305-
if (ch != '?' && ch != '*' && ch != '+' && ch != '{') {
3306-
return le;
3307-
}
3308-
3309-
// Replace the LineEnding with an anonymous group
3310-
// (?:\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029])
3311-
Node grHead = createGroup(true);
3312-
Node grTail = root;
3313-
BranchConn branchConn = new BranchConn();
3314-
branchConn.next = grTail;
3315-
Node slice = new Slice(new int[] {0x0D, 0x0A});
3316-
slice.next = branchConn;
3317-
Node chClass = newCharProperty(x -> x == 0x0A || x == 0x0B ||
3318-
x == 0x0C || x == 0x0D || x == 0x85 || x == 0x2028 ||
3319-
x == 0x2029);
3320-
chClass.next = branchConn;
3321-
grHead.next = new Branch(slice, chClass, branchConn);
3322-
return groupWithClosure(closure(grHead), grHead, grTail, false);
3323-
}
3324-
33253270
/**
33263271
* Processes repetition. If the next character peeked is a quantifier
33273272
* then new nodes must be appended to handle the repetition.
@@ -4777,19 +4722,18 @@ boolean study(TreeInfo info) {
47774722
static final class Branch extends Node {
47784723
Node[] atoms = new Node[2];
47794724
int size = 2;
4780-
BranchConn conn;
4781-
Branch(Node first, Node second, BranchConn branchConn) {
4725+
Node conn;
4726+
Branch(Node first, Node second, Node branchConn) {
47824727
conn = branchConn;
47834728
atoms[0] = first;
47844729
atoms[1] = second;
47854730
}
47864731

47874732
void add(Node node) {
47884733
if (size >= atoms.length) {
4789-
int len = ArraysSupport.newLength(size,
4790-
1, /* minimum growth */
4791-
size /* preferred growth */);
4792-
atoms = Arrays.copyOf(atoms, len);
4734+
Node[] tmp = new Node[atoms.length*2];
4735+
System.arraycopy(atoms, 0, tmp, 0, atoms.length);
4736+
atoms = tmp;
47934737
}
47944738
atoms[size++] = node;
47954739
}

‎test/jdk/java/util/regex/RegExTest.java

+18-10
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
3737
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
3838
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
39-
* 8216332 8214245 8237599 8241055 8247546
39+
* 8216332 8214245 8237599 8241055 8247546 8258259
4040
*
4141
* @library /test/lib
4242
* @library /lib/testlibrary/java/lang
@@ -5063,7 +5063,15 @@ private static void surrogatePairWithCanonEq() {
50635063
report("surrogatePairWithCanonEq");
50645064
}
50655065

5066-
// This test is for 8235812
5066+
private static String s2x(String s) {
5067+
StringBuilder sb = new StringBuilder();
5068+
for (char ch : s.toCharArray()) {
5069+
sb.append(String.format("\\u%04x", (int)ch));
5070+
}
5071+
return sb.toString();
5072+
}
5073+
5074+
// This test is for 8235812, with cases excluded by 8258259
50675075
private static void lineBreakWithQuantifier() {
50685076
// key: pattern
50695077
// value: lengths of input that must match the pattern
@@ -5073,22 +5081,22 @@ private static void lineBreakWithQuantifier() {
50735081
Map.entry("\\R+", List.of(1, 2, 3)),
50745082
Map.entry("\\R{0}", List.of(0)),
50755083
Map.entry("\\R{1}", List.of(1)),
5076-
Map.entry("\\R{2}", List.of(2)),
5077-
Map.entry("\\R{3}", List.of(3)),
5084+
// Map.entry("\\R{2}", List.of(2)), // 8258259
5085+
// Map.entry("\\R{3}", List.of(3)), // 8258259
50785086
Map.entry("\\R{0,}", List.of(0, 1, 2, 3)),
50795087
Map.entry("\\R{1,}", List.of(1, 2, 3)),
5080-
Map.entry("\\R{2,}", List.of(2, 3)),
5081-
Map.entry("\\R{3,}", List.of(3)),
5088+
// Map.entry("\\R{2,}", List.of(2, 3)), // 8258259
5089+
// Map.entry("\\R{3,}", List.of(3)), // 8258259
50825090
Map.entry("\\R{0,0}", List.of(0)),
50835091
Map.entry("\\R{0,1}", List.of(0, 1)),
50845092
Map.entry("\\R{0,2}", List.of(0, 1, 2)),
50855093
Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)),
50865094
Map.entry("\\R{1,1}", List.of(1)),
50875095
Map.entry("\\R{1,2}", List.of(1, 2)),
50885096
Map.entry("\\R{1,3}", List.of(1, 2, 3)),
5089-
Map.entry("\\R{2,2}", List.of(2)),
5090-
Map.entry("\\R{2,3}", List.of(2, 3)),
5091-
Map.entry("\\R{3,3}", List.of(3)),
5097+
// Map.entry("\\R{2,2}", List.of(2)), // 8258259
5098+
// Map.entry("\\R{2,3}", List.of(2, 3)), // 8258259
5099+
// Map.entry("\\R{3,3}", List.of(3)), // 8258259
50925100
Map.entry("\\R", List.of(1)),
50935101
Map.entry("\\R\\R", List.of(2)),
50945102
Map.entry("\\R\\R\\R", List.of(3))
@@ -5131,7 +5139,7 @@ private static void lineBreakWithQuantifier() {
51315139
if (!m.reset(in).matches()) {
51325140
failCount++;
51335141
System.err.println("Expected to match '" +
5134-
in + "' =~ /" + p + "/");
5142+
s2x(in) + "' =~ /" + p + "/");
51355143
}
51365144
}
51375145
}

0 commit comments

Comments
 (0)
Please sign in to comment.