Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8281315: Unicode, (?i) flag and backreference throwing IndexOutOfBounds Exception #7501

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions src/java.base/share/classes/java/util/regex/Pattern.java
Original file line number Diff line number Diff line change
@@ -5060,22 +5060,28 @@ boolean match(Matcher matcher, int i, CharSequence seq) {
int j = matcher.groups[groupIndex];
int k = matcher.groups[groupIndex+1];

int groupSize = k - j;
int groupSizeChars = k - j; //Group size in chars

// If the referenced group didn't match, neither can this
if (j < 0)
return false;

// If there isn't enough input left no match
if (i + groupSize > matcher.to) {
if (i + groupSizeChars > matcher.to) {
matcher.hitEnd = true;
return false;
}

// Check each new char to make sure it matches what the group
// referenced matched last time around
int x = i;
for (int index=0; index<groupSize; index++) {

// We set groupCodepoints to the number of chars
// in the given subsequence but this is an upper bound estimate
// we reduce by one if we spot 2-char codepoints.
int groupCodepoints = groupSizeChars;

for (int index=0; index<groupCodepoints; index++) {
int c1 = Character.codePointAt(seq, x);
int c2 = Character.codePointAt(seq, j);
if (c1 != c2) {
@@ -5093,9 +5099,15 @@ boolean match(Matcher matcher, int i, CharSequence seq) {
}
x += Character.charCount(c1);
j += Character.charCount(c2);

if(c1 >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
//Group size is guessed in terms of chars, but we need to
//adjust if we spot a 2-char codePoint.
groupCodepoints--;
}
}

return next.match(matcher, i+groupSize, seq);
return next.match(matcher, i+groupSizeChars, seq);
}
boolean study(TreeInfo info) {
info.maxValid = false;
9 changes: 9 additions & 0 deletions test/jdk/java/util/regex/RegExTest.java
Original file line number Diff line number Diff line change
@@ -4556,4 +4556,13 @@ public static void badIntersectionSyntax() {
Pattern.compile(pattern));
assertTrue(e.getMessage().contains("Bad intersection syntax"));
}

//This test is for 8281315
@Test
public static void iOOBForCIBackrefs(){
String line = "\ud83d\udc95\ud83d\udc95\ud83d\udc95";
var pattern2 = Pattern.compile("(?i)(.)\\1{2,}");
assertTrue(pattern2.matcher(line).find());

}
}