Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 91 additions & 88 deletions src/main/java/org/apache/xmlbeans/impl/regex/RegexParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ static class ReferencePosition {
int context = S_NORMAL;
int parennumber = 1;
boolean hasBackReferences;
Vector references = null;
Vector<ReferencePosition> references = null;

public RegexParser() {
this.setLocale(Locale.getDefault());
Expand Down Expand Up @@ -112,7 +112,7 @@ synchronized Token parse(String regex, int options) throws ParseException {
throw ex("parser.parse.1", this.offset);
if (this.references != null) {
for (int i = 0; i < this.references.size(); i ++) {
ReferencePosition position = (ReferencePosition)this.references.elementAt(i);
ReferencePosition position = this.references.elementAt(i);
if (this.parennumber <= position.refNumber)
throw ex("parser.parse.2", position.position);
}
Expand Down Expand Up @@ -431,7 +431,7 @@ Token processCondition() throws ParseException {
if ('1' <= ch && ch <= '9') {
refno = ch-'0';
this.hasBackReferences = true;
if (this.references == null) this.references = new Vector();
if (this.references == null) this.references = new Vector<>();
this.references.addElement(new ReferencePosition(refno, this.offset));
this.offset ++;
if (this.regex.charAt(this.offset) != ')') throw ex("parser.factor.1", this.offset);
Expand Down Expand Up @@ -543,7 +543,7 @@ Token processBackreference() throws ParseException {
int refnum = this.chardata-'0';
Token tok = Token.createBackReference(refnum);
this.hasBackReferences = true;
if (this.references == null) this.references = new Vector();
if (this.references == null) this.references = new Vector<>();
this.references.addElement(new ReferencePosition(refnum, this.offset-2));
this.next();
return tok;
Expand Down Expand Up @@ -604,9 +604,9 @@ Token parseFactor() throws ParseException {
min = ch -'0';
while (off < this.regexlen
&& (ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
min = min*10 +ch-'0';
if (min < 0)
if (min > (Integer.MAX_VALUE - (ch-'0')) / 10)
throw ex("parser.quantifier.5", this.offset);
min = min*10 +ch-'0';
}
}
else {
Expand All @@ -625,9 +625,9 @@ else if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
while (off < this.regexlen
&& (ch = this.regex.charAt(off++)) >= '0'
&& ch <= '9') {
max = max*10 +ch-'0';
if (max < 0)
if (max > (Integer.MAX_VALUE - (ch-'0')) / 10)
throw ex("parser.quantifier.5", this.offset);
max = max*10 +ch-'0';
}

if (min > max)
Expand Down Expand Up @@ -976,89 +976,92 @@ int decodeEscaped() throws ParseException {
case 'r': c = '\r'; break; // CRRIAGE RETURN U+000D
case 't': c = '\t'; break; // HORIZONTAL TABULATION U+0009
//case 'v': c = 0x0b; break; // VERTICAL TABULATION U+000B
case 'x':
this.next();
if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset-1);
if (this.chardata == '{') {
int v1 = 0;
int uv = 0;
do {
this.next();
if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset-1);
if ((v1 = hexChar(this.chardata)) < 0)
break;
if (uv > uv*16) throw ex("parser.descape.2", this.offset-1);
uv = uv*16+v1;
} while (true);
if (this.chardata != '}') throw ex("parser.descape.3", this.offset-1);
if (uv > Token.UTF16_MAX) throw ex("parser.descape.4", this.offset-1);
c = uv;
} else {
int v1 = 0;
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
int uv = v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
c = uv;
}
break;

case 'u':
int v1 = 0;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
int uv = v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
c = uv;
break;

case 'v':
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset-1);
uv = uv*16+v1;
if (uv > Token.UTF16_MAX) throw ex("parser.descappe.4", this.offset-1);
c = uv;
break;
case 'x': {
this.next();
if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset - 1);
if (this.chardata == '{') {
int v1 = 0;
int uv = 0;
do {
this.next();
if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset - 1);
if ((v1 = hexChar(this.chardata)) < 0)
break;
if (uv > uv * 16) throw ex("parser.descape.2", this.offset - 1);
uv = uv * 16 + v1;
} while (true);
if (this.chardata != '}') throw ex("parser.descape.3", this.offset - 1);
if (uv > Token.UTF16_MAX) throw ex("parser.descape.4", this.offset - 1);
c = uv;
} else {
int v1 = 0;
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
int uv = v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv = uv * 16 + v1;
c = uv;
}
break;
}
case 'u': {
int v1 = 0;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
int uv1 = v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv1 = uv1 * 16 + v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv1 = uv1 * 16 + v1;
this.next();
if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv1 = uv1 * 16 + v1;
c = uv1;
break;
}
case 'v': {
int v2 = 0;
int uv2 = 0;
this.next();
if (this.read() != T_CHAR || (v2 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv2 = v2;
this.next();
if (this.read() != T_CHAR || (v2 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv2 = uv2 * 16 + v2;
this.next();
if (this.read() != T_CHAR || (v2 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv2 = uv2 * 16 + v2;
this.next();
if (this.read() != T_CHAR || (v2 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv2 = uv2 * 16 + v2;
this.next();
if (this.read() != T_CHAR || (v2 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv2 = uv2 * 16 + v2;
this.next();
if (this.read() != T_CHAR || (v2 = hexChar(this.chardata)) < 0)
throw ex("parser.descape.1", this.offset - 1);
uv2 = uv2 * 16 + v2;
if (uv2 > Token.UTF16_MAX) throw ex("parser.descappe.4", this.offset - 1);
c = uv2;
break;
}
case 'A':
case 'Z':
case 'z':
throw ex("parser.descape.5", this.offset-2);
throw ex("parser.descape.5", this.offset-2);
default:
}
return c;
Expand Down
19 changes: 19 additions & 0 deletions src/test/java/misc/checkin/RegularExpressionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@

package misc.checkin;

import org.apache.xmlbeans.impl.regex.ParseException;
import org.apache.xmlbeans.impl.regex.RegularExpression;
import org.junit.jupiter.api.Test;

import java.util.Random;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class RegularExpressionTest {
Expand All @@ -44,6 +46,23 @@ void testLookbehindRangeAtInputEnd() {
assertFalse(new RegularExpression("x(?<=[a-c])").matches("xc"));
}

@Test
void testQuantifierOverflow() {
// a {min,max} count larger than Integer.MAX_VALUE overflowed the int
// accumulator. the only guard was a post-multiply min<0/max<0 check, so
// counts that wrapped to a non-negative value slipped through: "a{4294967296}"
// parsed as "a{0}" (matched the empty string) and "a{1,4294967298}" as "a{1,2}",
// while bigger ones such as "a{99999999999}" blew the heap at match time.
assertThrows(ParseException.class, () -> new RegularExpression("a{4294967296}"));
assertThrows(ParseException.class, () -> new RegularExpression("a{4294967297}"));
assertThrows(ParseException.class, () -> new RegularExpression("a{99999999999}"));
assertThrows(ParseException.class, () -> new RegularExpression("a{1,4294967298}"));
// counts up to Integer.MAX_VALUE are representable and must still parse
new RegularExpression("a{2147483647}");
new RegularExpression("a{0,2147483647}");
assertTrue(new RegularExpression("a{2,4}").matches("aaa"));
}


private static final String AB = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
private static final Random rnd = new Random();
Expand Down
Loading