From 500261fd09193d7bf0c930cadf3328a645020f61 Mon Sep 17 00:00:00 2001 From: Tim te Beek Date: Sun, 3 May 2026 23:24:43 +0200 Subject: [PATCH] XML: harden parser against malformed input crashes ANTLR error recovery synthesizes closing-tag tokens when input ends with an unclosed element, which caused `XmlParserVisitor` to throw `IndexOutOfBoundsException` from `advanceCursor` and `NullPointerException` when accessing `Name(1)`. Clamp `advanceCursor` to the source length and tolerate null `OPEN(1)`/`Name(1)`/`CLOSE(1)` so these inputs fall back to a `ParseError` (preserving original text) instead of crashing. See https://github.com/openrewrite/rewrite/issues/7554#issuecomment-4367184049 --- .../xml/internal/XmlParserVisitor.java | 50 +++++++++++++------ .../org/openrewrite/xml/XmlParserTest.java | 38 ++++++++++++++ 2 files changed, 72 insertions(+), 16 deletions(-) diff --git a/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java b/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java index 9a27991571c..28548e7cb63 100755 --- a/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java +++ b/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java @@ -336,25 +336,35 @@ public Xml.Tag visitElement(XMLParser.ElementContext ctx) { beforeTagDelimiterPrefix = prefix(ctx.SLASH_CLOSE()); advanceCursor(ctx.SLASH_CLOSE().getSymbol().getStopIndex() + 1); } else { - beforeTagDelimiterPrefix = prefix(ctx.CLOSE(0)); - advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1); + beforeTagDelimiterPrefix = ctx.CLOSE(0) == null ? "" : prefix(ctx.CLOSE(0)); + if (ctx.CLOSE(0) != null) { + advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1); + } content = ctx.content().stream() .map(this::visit) .map(Content.class::cast) .collect(toList()); - String closeTagPrefix = prefix(ctx.OPEN(1)); - advanceCursor(codePointCursor + 2); - - closeTag = new Xml.Tag.Closing( - randomId(), - closeTagPrefix, - Markers.EMPTY, - convert(ctx.Name(1), (n, p) -> n.getText()), - prefix(ctx.CLOSE(1)) - ); - advanceCursor(codePointCursor + 1); + // ANTLR may synthesize missing closing-tag tokens during error recovery + // on malformed input; tolerate any combination of null OPEN/Name/CLOSE. + if (ctx.OPEN(1) != null || ctx.Name(1) != null || ctx.CLOSE(1) != null) { + String closeTagPrefix = ctx.OPEN(1) == null ? "" : prefix(ctx.OPEN(1)); + advanceCursor(codePointCursor + 2); + + String closeName = ctx.Name(1) == null ? "" : + convert(ctx.Name(1), (n, p) -> n.getText()); + String afterCloseName = ctx.CLOSE(1) == null ? "" : prefix(ctx.CLOSE(1)); + + closeTag = new Xml.Tag.Closing( + randomId(), + closeTagPrefix, + Markers.EMPTY, + closeName, + afterCloseName + ); + advanceCursor(codePointCursor + 1); + } } return new Xml.Tag(randomId(), prefix, Markers.EMPTY, name, attributes, @@ -468,15 +478,23 @@ private String prefix(Token token) { /** - * Advance both the cursor and the code point cursor + * Advance both the cursor and the code point cursor. + * Clamps to the end of source if a synthesized token (from ANTLR error recovery + * on malformed input) would advance past the end of the source string. */ @SuppressWarnings("UnusedReturnValue") private int advanceCursor(int newCodePointIndex) { if (newCodePointIndex <= codePointCursor) { return cursor; } - cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor); - codePointCursor = newCodePointIndex; + try { + cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor); + codePointCursor = newCodePointIndex; + } catch (IndexOutOfBoundsException e) { + int reachableCodePoints = source.codePointCount(cursor, source.length()); + cursor = source.length(); + codePointCursor += reachableCodePoints; + } return cursor; } diff --git a/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java b/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java index a3302184598..cbdc79bfcfc 100755 --- a/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java +++ b/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java @@ -765,4 +765,42 @@ void utf8SurrogatePairsSimple() { ) ); } + + @Issue("https://github.com/openrewrite/rewrite/issues/7554") + @Test + void malformedMissingRootCloseDoesNotThrow() { + // Inner element is never closed before EOF. Previously threw IndexOutOfBoundsException + // from advanceCursor when ANTLR error recovery synthesized closing-tag tokens past EOF. + SourceFile parsed = XmlParser.builder().build() + .parse(new InMemoryExecutionContext(t -> { + }), "\n\n wrong format\n") + .findFirst().orElseThrow(); + assertThat(parsed).isInstanceOf(ParseError.class); + assertThat(parsed.printAll()).isEqualTo("\n\n wrong format\n"); + } + + @Issue("https://github.com/openrewrite/rewrite/issues/7554") + @Test + void malformedUnterminatedEndTagDoesNotThrow() { + // End-tag missing its '>' before EOF. + SourceFile parsed = XmlParser.builder().build() + .parse(new InMemoryExecutionContext(t -> { + }), " { + }), "McFarland & Company") + .findFirst().orElseThrow(); + assertThat(parsed).isInstanceOf(ParseError.class); + assertThat(parsed.printAll()).isEqualTo("McFarland & Company"); + } }