Skip to content

Commit a36416d

Browse files
timtebeekclaude
andauthored
Fix HCL parser for comments with CR-only line endings (#6532)
* Support CR-only line endings in HCL line comments The HCL lexer's LINE_COMMENT rule only supported LF (\n) or CRLF (\r\n) line endings, but not CR-only (\r) line endings (classic Mac-style). This caused parsing failures for files with CR-only line endings after comments, such as "# comment\rlocals { a = 1 }". The fix changes the lexer rule from: LINE_COMMENT : ('//' | '#') ~[\r\n]* '\r'? ('\n' | EOF) -> channel(HIDDEN); to: LINE_COMMENT : ('//' | '#') ~[\r\n]* ('\r\n' | '\r' | '\n' | EOF) -> channel(HIDDEN); This now accepts all three line ending styles: CRLF, LF, and CR. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Add comprehensive tests for HCL comments at file boundaries Add extensive test coverage for HCL comments at the start and end of Terraform/HCL files, including: - Comments as the only content in a file - Comments at the very first character of files - Comments at the end without trailing newlines - Multiple comments at start/end - Block comments at file boundaries - CRLF and CR-only line endings - Unicode and emoji in comments - Realistic Terraform patterns with header/footer comments - Edge cases with heredocs and comments The CR-only line ending tests pass after the lexer grammar fix. One test is disabled pending further investigation: - emojiInComment: Emoji (surrogate pair) in comments at the start of the file causes cursor tracking issues due to a mismatch between ANTLR character indices and the parser visitor's code point tracking. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Restore original headers * Copy the `antlrGeneration` approach from rewrite-toml * Handle all newlines in NEWLINE * Shorten LINE_COMMENT --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 718b88c commit a36416d

9 files changed

Lines changed: 1087 additions & 317 deletions

File tree

rewrite-hcl/build.gradle.kts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ plugins {
22
id("org.openrewrite.build.language-library")
33
}
44

5-
// run manually with `./gradlew rewrite-hcl:generateAntlrSources` when you need to regenerate
6-
// be sure to use `implementation("org.antlr:antlr4:4.13.2")` below when generating
5+
val antlrGeneration by configurations.creating {
6+
extendsFrom(configurations.implementation.get())
7+
}
78
tasks.register<JavaExec>("generateAntlrSources") {
89
mainClass.set("org.antlr.v4.Tool")
910

@@ -13,7 +14,7 @@ tasks.register<JavaExec>("generateAntlrSources") {
1314
"-visitor"
1415
) + fileTree("src/main/antlr").matching { include("**/*.g4") }.map { it.path }
1516

16-
classpath = sourceSets["main"].runtimeClasspath
17+
classpath = antlrGeneration
1718

1819
finalizedBy("licenseFormat")
1920
}
@@ -28,6 +29,10 @@ dependencies {
2829
implementation("org.antlr:antlr4-runtime:4.13.2")
2930
implementation("io.micrometer:micrometer-core:1.9.+")
3031

32+
antlrGeneration("org.antlr:antlr4:4.13.2"){
33+
exclude(group = "com.ibm.icu", module = "icu4j")
34+
}
35+
3136
testImplementation(project(":rewrite-test"))
3237
testImplementation("org.junit-pioneer:junit-pioneer:2.0.0")
3338
}

rewrite-hcl/src/main/antlr/HCLLexer.g4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ Identifier
5656
5757
// Lexical Elements - Comments and Whitespace
5858
// https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#comments-and-whitespace
59-
WS : [ \t\r\u000C]+ -> channel(HIDDEN);
59+
WS : [ \t\u000C]+ -> channel(HIDDEN);
6060
COMMENT : '/*' .*? '*/' -> channel(HIDDEN);
61-
LINE_COMMENT : ('//' | '#') ~[\r\n]* '\r'? ('\n' | EOF) -> channel(HIDDEN);
62-
NEWLINE : '\n' -> channel(HIDDEN);
61+
LINE_COMMENT : ('//' | '#') ~[\r\n]* -> channel(HIDDEN);
62+
NEWLINE : ('\r\n' | '\r' | '\n') -> channel(HIDDEN);
6363
6464
fragment LetterOrDigit
6565
: Letter

rewrite-hcl/src/main/java/org/openrewrite/hcl/internal/grammar/HCLLexer.interp

Lines changed: 3 additions & 1 deletion
Large diffs are not rendered by default.

rewrite-hcl/src/main/java/org/openrewrite/hcl/internal/grammar/HCLLexer.java

Lines changed: 303 additions & 305 deletions
Large diffs are not rendered by default.

rewrite-hcl/src/main/java/org/openrewrite/hcl/internal/grammar/HCLLexer.tokens

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ HP_COMMENT=48
4949
HP_LINE_COMMENT=49
5050
HTemplateLiteral=50
5151
HTemplateLiteralChar=51
52+
H_NEWLINE=52
5253
'if'=3
5354
'in'=4
5455
'null'=6
@@ -81,3 +82,4 @@ HTemplateLiteralChar=51
8182
'%'=41
8283
'...'=42
8384
'~'=43
85+
'\n'=52

rewrite-hcl/src/main/java/org/openrewrite/hcl/internal/grammar/HCLParser.interp

Lines changed: 3 additions & 1 deletion
Large diffs are not rendered by default.

rewrite-hcl/src/main/java/org/openrewrite/hcl/internal/grammar/HCLParser.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ public class HCLParser extends Parser {
3939
OR=27, NEQ=28, GT=29, QUESTION=30, RBRACK=31, RPAREN=32, MUL=33, NOT=34,
4040
LEQ=35, DOT=36, DIV=37, GEQ=38, ARROW=39, COMMA=40, MOD=41, ELLIPSIS=42,
4141
TILDE=43, TEMPLATE_INTERPOLATION_START=44, TemplateStringLiteral=45, TemplateStringLiteralChar=46,
42-
HP_WS=47, HP_COMMENT=48, HP_LINE_COMMENT=49, HTemplateLiteral=50, HTemplateLiteralChar=51;
42+
HP_WS=47, HP_COMMENT=48, HP_LINE_COMMENT=49, HTemplateLiteral=50, HTemplateLiteralChar=51,
43+
H_NEWLINE=52;
4344
public static final int
4445
RULE_configFile = 0, RULE_body = 1, RULE_bodyContent = 2, RULE_attribute = 3,
4546
RULE_block = 4, RULE_blockLabel = 5, RULE_expression = 6, RULE_exprTerm = 7,
@@ -74,7 +75,8 @@ private static String[] makeLiteralNames() {
7475
null, null, null, null, null, null, null, null, "'+'", "'&&'", "'=='",
7576
"'<'", "'::'", "':'", "'['", "'('", "'-'", "'||'", "'!='", "'>'", "'?'",
7677
"']'", "')'", "'*'", "'!'", "'<='", "'.'", "'/'", "'>='", "'=>'", "','",
77-
"'%'", "'...'", "'~'"
78+
"'%'", "'...'", "'~'", null, null, null, null, null, null, null, null,
79+
"'\\n'"
7880
};
7981
}
8082
private static final String[] _LITERAL_NAMES = makeLiteralNames();
@@ -87,7 +89,7 @@ private static String[] makeSymbolicNames() {
8789
"NEQ", "GT", "QUESTION", "RBRACK", "RPAREN", "MUL", "NOT", "LEQ", "DOT",
8890
"DIV", "GEQ", "ARROW", "COMMA", "MOD", "ELLIPSIS", "TILDE", "TEMPLATE_INTERPOLATION_START",
8991
"TemplateStringLiteral", "TemplateStringLiteralChar", "HP_WS", "HP_COMMENT",
90-
"HP_LINE_COMMENT", "HTemplateLiteral", "HTemplateLiteralChar"
92+
"HP_LINE_COMMENT", "HTemplateLiteral", "HTemplateLiteralChar", "H_NEWLINE"
9193
};
9294
}
9395
private static final String[] _SYMBOLIC_NAMES = makeSymbolicNames();
@@ -3511,7 +3513,7 @@ private boolean exprTerm_sempred(ExprTermContext _localctx, int predIndex) {
35113513
}
35123514

35133515
public static final String _serializedATN =
3514-
"\u0004\u00013\u0187\u0002\u0000\u0007\u0000\u0002\u0001\u0007\u0001\u0002"+
3516+
"\u0004\u00014\u0187\u0002\u0000\u0007\u0000\u0002\u0001\u0007\u0001\u0002"+
35153517
"\u0002\u0007\u0002\u0002\u0003\u0007\u0003\u0002\u0004\u0007\u0004\u0002"+
35163518
"\u0005\u0007\u0005\u0002\u0006\u0007\u0006\u0002\u0007\u0007\u0007\u0002"+
35173519
"\b\u0007\b\u0002\t\u0007\t\u0002\n\u0007\n\u0002\u000b\u0007\u000b\u0002"+

rewrite-hcl/src/main/java/org/openrewrite/hcl/internal/grammar/HCLParser.tokens

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ HP_COMMENT=48
4949
HP_LINE_COMMENT=49
5050
HTemplateLiteral=50
5151
HTemplateLiteralChar=51
52+
H_NEWLINE=52
5253
'if'=3
5354
'in'=4
5455
'null'=6
@@ -81,3 +82,4 @@ HTemplateLiteralChar=51
8182
'%'=41
8283
'...'=42
8384
'~'=43
85+
'\n'=52

0 commit comments

Comments
 (0)