diff --git a/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java b/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java
index 9a27991571..28548e7cb6 100755
--- a/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java
+++ b/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java
@@ -336,25 +336,35 @@ public Xml.Tag visitElement(XMLParser.ElementContext ctx) {
beforeTagDelimiterPrefix = prefix(ctx.SLASH_CLOSE());
advanceCursor(ctx.SLASH_CLOSE().getSymbol().getStopIndex() + 1);
} else {
- beforeTagDelimiterPrefix = prefix(ctx.CLOSE(0));
- advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1);
+ beforeTagDelimiterPrefix = ctx.CLOSE(0) == null ? "" : prefix(ctx.CLOSE(0));
+ if (ctx.CLOSE(0) != null) {
+ advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1);
+ }
content = ctx.content().stream()
.map(this::visit)
.map(Content.class::cast)
.collect(toList());
- String closeTagPrefix = prefix(ctx.OPEN(1));
- advanceCursor(codePointCursor + 2);
-
- closeTag = new Xml.Tag.Closing(
- randomId(),
- closeTagPrefix,
- Markers.EMPTY,
- convert(ctx.Name(1), (n, p) -> n.getText()),
- prefix(ctx.CLOSE(1))
- );
- advanceCursor(codePointCursor + 1);
+ // ANTLR may synthesize missing closing-tag tokens during error recovery
+ // on malformed input; tolerate any combination of null OPEN/Name/CLOSE.
+ if (ctx.OPEN(1) != null || ctx.Name(1) != null || ctx.CLOSE(1) != null) {
+ String closeTagPrefix = ctx.OPEN(1) == null ? "" : prefix(ctx.OPEN(1));
+ advanceCursor(codePointCursor + 2);
+
+ String closeName = ctx.Name(1) == null ? "" :
+ convert(ctx.Name(1), (n, p) -> n.getText());
+ String afterCloseName = ctx.CLOSE(1) == null ? "" : prefix(ctx.CLOSE(1));
+
+ closeTag = new Xml.Tag.Closing(
+ randomId(),
+ closeTagPrefix,
+ Markers.EMPTY,
+ closeName,
+ afterCloseName
+ );
+ advanceCursor(codePointCursor + 1);
+ }
}
return new Xml.Tag(randomId(), prefix, Markers.EMPTY, name, attributes,
@@ -468,15 +478,23 @@ private String prefix(Token token) {
/**
- * Advance both the cursor and the code point cursor
+ * Advance both the cursor and the code point cursor.
+ * Clamps to the end of source if a synthesized token (from ANTLR error recovery
+ * on malformed input) would advance past the end of the source string.
*/
@SuppressWarnings("UnusedReturnValue")
private int advanceCursor(int newCodePointIndex) {
if (newCodePointIndex <= codePointCursor) {
return cursor;
}
- cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor);
- codePointCursor = newCodePointIndex;
+ try {
+ cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor);
+ codePointCursor = newCodePointIndex;
+ } catch (IndexOutOfBoundsException e) {
+ int reachableCodePoints = source.codePointCount(cursor, source.length());
+ cursor = source.length();
+ codePointCursor += reachableCodePoints;
+ }
return cursor;
}
diff --git a/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java b/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java
index a330218459..cbdc79bfcf 100755
--- a/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java
+++ b/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java
@@ -765,4 +765,42 @@ void utf8SurrogatePairsSimple() {
)
);
}
+
+ @Issue("https://github.com/openrewrite/rewrite/issues/7554")
+ @Test
+ void malformedMissingRootCloseDoesNotThrow() {
+ // Inner element is never closed before EOF. Previously threw IndexOutOfBoundsException
+ // from advanceCursor when ANTLR error recovery synthesized closing-tag tokens past EOF.
+ SourceFile parsed = XmlParser.builder().build()
+ .parse(new InMemoryExecutionContext(t -> {
+ }), "\n\n wrong format\n")
+ .findFirst().orElseThrow();
+ assertThat(parsed).isInstanceOf(ParseError.class);
+ assertThat(parsed.printAll()).isEqualTo("\n\n wrong format\n");
+ }
+
+ @Issue("https://github.com/openrewrite/rewrite/issues/7554")
+ @Test
+ void malformedUnterminatedEndTagDoesNotThrow() {
+ // End-tag missing its '>' before EOF.
+ SourceFile parsed = XmlParser.builder().build()
+ .parse(new InMemoryExecutionContext(t -> {
+ }), " {
+ }), "McFarland & Company")
+ .findFirst().orElseThrow();
+ assertThat(parsed).isInstanceOf(ParseError.class);
+ assertThat(parsed.printAll()).isEqualTo("McFarland & Company");
+ }
}