Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -336,25 +336,35 @@ public Xml.Tag visitElement(XMLParser.ElementContext ctx) {
beforeTagDelimiterPrefix = prefix(ctx.SLASH_CLOSE());
advanceCursor(ctx.SLASH_CLOSE().getSymbol().getStopIndex() + 1);
} else {
beforeTagDelimiterPrefix = prefix(ctx.CLOSE(0));
advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1);
beforeTagDelimiterPrefix = ctx.CLOSE(0) == null ? "" : prefix(ctx.CLOSE(0));
if (ctx.CLOSE(0) != null) {
advanceCursor(ctx.CLOSE(0).getSymbol().getStopIndex() + 1);
}

content = ctx.content().stream()
.map(this::visit)
.map(Content.class::cast)
.collect(toList());

String closeTagPrefix = prefix(ctx.OPEN(1));
advanceCursor(codePointCursor + 2);

closeTag = new Xml.Tag.Closing(
randomId(),
closeTagPrefix,
Markers.EMPTY,
convert(ctx.Name(1), (n, p) -> n.getText()),
prefix(ctx.CLOSE(1))
);
advanceCursor(codePointCursor + 1);
// ANTLR may synthesize missing closing-tag tokens during error recovery
// on malformed input; tolerate any combination of null OPEN/Name/CLOSE.
if (ctx.OPEN(1) != null || ctx.Name(1) != null || ctx.CLOSE(1) != null) {
String closeTagPrefix = ctx.OPEN(1) == null ? "" : prefix(ctx.OPEN(1));
advanceCursor(codePointCursor + 2);

String closeName = ctx.Name(1) == null ? "" :
convert(ctx.Name(1), (n, p) -> n.getText());
String afterCloseName = ctx.CLOSE(1) == null ? "" : prefix(ctx.CLOSE(1));

closeTag = new Xml.Tag.Closing(
randomId(),
closeTagPrefix,
Markers.EMPTY,
closeName,
afterCloseName
);
advanceCursor(codePointCursor + 1);
}
}

return new Xml.Tag(randomId(), prefix, Markers.EMPTY, name, attributes,
Expand Down Expand Up @@ -468,15 +478,23 @@ private String prefix(Token token) {


/**
* Advance both the cursor and the code point cursor
* Advance both the cursor and the code point cursor.
* Clamps to the end of source if a synthesized token (from ANTLR error recovery
* on malformed input) would advance past the end of the source string.
*/
@SuppressWarnings("UnusedReturnValue")
private int advanceCursor(int newCodePointIndex) {
if (newCodePointIndex <= codePointCursor) {
return cursor;
}
cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor);
codePointCursor = newCodePointIndex;
try {
cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor);
codePointCursor = newCodePointIndex;
} catch (IndexOutOfBoundsException e) {
int reachableCodePoints = source.codePointCount(cursor, source.length());
cursor = source.length();
codePointCursor += reachableCodePoints;
}
return cursor;
}

Expand Down
38 changes: 38 additions & 0 deletions rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -765,4 +765,42 @@ void utf8SurrogatePairsSimple() {
)
);
}

@Issue("https://github.com/openrewrite/rewrite/issues/7554")
@Test
void malformedMissingRootCloseDoesNotThrow() {
// Inner element is never closed before EOF. Previously threw IndexOutOfBoundsException
// from advanceCursor when ANTLR error recovery synthesized closing-tag tokens past EOF.
SourceFile parsed = XmlParser.builder().build()
.parse(new InMemoryExecutionContext(t -> {
}), "<root>\n<inner>\n wrong format\n</root>")
.findFirst().orElseThrow();
assertThat(parsed).isInstanceOf(ParseError.class);
assertThat(parsed.printAll()).isEqualTo("<root>\n<inner>\n wrong format\n</root>");
}

@Issue("https://github.com/openrewrite/rewrite/issues/7554")
@Test
void malformedUnterminatedEndTagDoesNotThrow() {
// End-tag missing its '>' before EOF.
SourceFile parsed = XmlParser.builder().build()
.parse(new InMemoryExecutionContext(t -> {
}), "<a></a")
.findFirst().orElseThrow();
assertThat(parsed).isInstanceOf(ParseError.class);
assertThat(parsed.printAll()).isEqualTo("<a></a");
}

@Issue("https://github.com/openrewrite/rewrite/issues/7554")
@Test
void malformedBareAmpersandDoesNotThrow() {
// A bare '&' (not part of an entity reference) is invalid XML; the original
// text is preserved by falling back to a ParseError rather than silently dropping it.
SourceFile parsed = XmlParser.builder().build()
.parse(new InMemoryExecutionContext(t -> {
}), "<a>McFarland & Company</a>")
.findFirst().orElseThrow();
assertThat(parsed).isInstanceOf(ParseError.class);
assertThat(parsed.printAll()).isEqualTo("<a>McFarland & Company</a>");
}
}
Loading