diff --git a/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java b/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java index 9a27991571..28548e7cb6 100755 --- a/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java +++ b/rewrite-xml/src/main/java/org/openrewrite/xml/internal/XmlParserVisitor.java @@ -336,25 +336,35 @@ public Xml.Tag visitElement(XMLParser.ElementContext ctx) { beforeTagDelimiterPrefix = prefix(ctx.SLASH_CLOSE()); advanceCursor(ctx.SLASH_CLOSE().getSymbol().getStopIndex() + 1); } else { - beforeTagDelimiterPrefix = prefix(ctx.CLOSE(0)); - advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1); + beforeTagDelimiterPrefix = ctx.CLOSE(0) == null ? "" : prefix(ctx.CLOSE(0)); + if (ctx.CLOSE(0) != null) { + advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1); + } content = ctx.content().stream() .map(this::visit) .map(Content.class::cast) .collect(toList()); - String closeTagPrefix = prefix(ctx.OPEN(1)); - advanceCursor(codePointCursor + 2); - - closeTag = new Xml.Tag.Closing( - randomId(), - closeTagPrefix, - Markers.EMPTY, - convert(ctx.Name(1), (n, p) -> n.getText()), - prefix(ctx.CLOSE(1)) - ); - advanceCursor(codePointCursor + 1); + // ANTLR may synthesize missing closing-tag tokens during error recovery + // on malformed input; tolerate any combination of null OPEN/Name/CLOSE. + if (ctx.OPEN(1) != null || ctx.Name(1) != null || ctx.CLOSE(1) != null) { + String closeTagPrefix = ctx.OPEN(1) == null ? "" : prefix(ctx.OPEN(1)); + advanceCursor(codePointCursor + 2); + + String closeName = ctx.Name(1) == null ? "" : + convert(ctx.Name(1), (n, p) -> n.getText()); + String afterCloseName = ctx.CLOSE(1) == null ? "" : prefix(ctx.CLOSE(1)); + + closeTag = new Xml.Tag.Closing( + randomId(), + closeTagPrefix, + Markers.EMPTY, + closeName, + afterCloseName + ); + advanceCursor(codePointCursor + 1); + } } return new Xml.Tag(randomId(), prefix, Markers.EMPTY, name, attributes, @@ -468,15 +478,23 @@ private String prefix(Token token) { /** - * Advance both the cursor and the code point cursor + * Advance both the cursor and the code point cursor. + * Clamps to the end of source if a synthesized token (from ANTLR error recovery + * on malformed input) would advance past the end of the source string. */ @SuppressWarnings("UnusedReturnValue") private int advanceCursor(int newCodePointIndex) { if (newCodePointIndex <= codePointCursor) { return cursor; } - cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor); - codePointCursor = newCodePointIndex; + try { + cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor); + codePointCursor = newCodePointIndex; + } catch (IndexOutOfBoundsException e) { + int reachableCodePoints = source.codePointCount(cursor, source.length()); + cursor = source.length(); + codePointCursor += reachableCodePoints; + } return cursor; } diff --git a/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java b/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java index a330218459..cbdc79bfcf 100755 --- a/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java +++ b/rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java @@ -765,4 +765,42 @@ void utf8SurrogatePairsSimple() { ) ); } + + @Issue("https://github.com/openrewrite/rewrite/issues/7554") + @Test + void malformedMissingRootCloseDoesNotThrow() { + // Inner element is never closed before EOF. Previously threw IndexOutOfBoundsException + // from advanceCursor when ANTLR error recovery synthesized closing-tag tokens past EOF. + SourceFile parsed = XmlParser.builder().build() + .parse(new InMemoryExecutionContext(t -> { + }), "\n\n wrong format\n") + .findFirst().orElseThrow(); + assertThat(parsed).isInstanceOf(ParseError.class); + assertThat(parsed.printAll()).isEqualTo("\n\n wrong format\n"); + } + + @Issue("https://github.com/openrewrite/rewrite/issues/7554") + @Test + void malformedUnterminatedEndTagDoesNotThrow() { + // End-tag missing its '>' before EOF. + SourceFile parsed = XmlParser.builder().build() + .parse(new InMemoryExecutionContext(t -> { + }), " { + }), "McFarland & Company") + .findFirst().orElseThrow(); + assertThat(parsed).isInstanceOf(ParseError.class); + assertThat(parsed.printAll()).isEqualTo("McFarland & Company"); + } }