Skip to content

Commit e9be26b

Browse files
Fix comment serialization: multi-token NL_OR_COMMENT and classification (#134) (#281)
NewLineOrCommentRule.serialize() now concatenates all child tokens instead of only reading the first, fixing silently dropped comments in multi-token new_line_or_comment nodes. BinaryOpRule distinguishes its trailing new_line_or_comment (which can absorb body-level comments from the grammar) from expression-internal comments, routing absorbed standalone comments to __comments__ and keeping expression-internal ones in __inline_comments__. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b8f1dfe commit e9be26b

6 files changed

Lines changed: 217 additions & 2 deletions

File tree

hcl2/rules/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ def serialize(
8585
attribute_names.add(child.identifier.serialize(options))
8686
result.update(child.serialize(options))
8787
if options.with_comments:
88-
# collect in-line comments from attribute assignments, expressions etc
8988
inline_comments.extend(child.expression.inline_comments())
89+
comments.extend(child.expression.absorbed_comments())
9090

9191
if isinstance(child, NewLineOrCommentRule) and options.with_comments:
9292
child_comments = child.to_list()

hcl2/rules/expressions.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,38 @@ def binary_term(self) -> BinaryTermRule:
242242
"""Return the binary term (operator + right-hand operand)."""
243243
return self._children[1]
244244

245+
@property
246+
def _trailing_nl(self) -> Optional[NewLineOrCommentRule]:
247+
"""Return the trailing new_line_or_comment child, if present."""
248+
child = self._children[2]
249+
if isinstance(child, NewLineOrCommentRule):
250+
return child
251+
return None
252+
253+
def inline_comments(self):
254+
"""Collect inline comments, excluding absorbed body-level comments."""
255+
trailing = self._trailing_nl
256+
result = []
257+
for child in self._children:
258+
if isinstance(child, NewLineOrCommentRule):
259+
# Trailing NL_OR_COMMENT with a leading newline contains
260+
# body-level comments absorbed by the grammar, not inline ones.
261+
if child is trailing and not child.is_inline:
262+
continue
263+
comments = child.to_list()
264+
if comments is not None:
265+
result.extend(comments)
266+
elif isinstance(child, InlineCommentMixIn):
267+
result.extend(child.inline_comments())
268+
return result
269+
270+
def absorbed_comments(self):
271+
"""Return body-level comments absorbed into the trailing NL_OR_COMMENT."""
272+
trailing = self._trailing_nl
273+
if trailing is not None and not trailing.is_inline:
274+
return trailing.to_list() or []
275+
return []
276+
245277
def serialize(
246278
self, options=SerializationOptions(), context=SerializationContext()
247279
) -> Any:

hcl2/rules/whitespace.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,17 @@ def serialize(
2626
self, options=SerializationOptions(), context=SerializationContext()
2727
) -> Any:
2828
"""Serialize to the raw comment/newline string."""
29-
return self.token.serialize()
29+
return "".join(child.serialize() for child in self._children)
30+
31+
@property
32+
def is_inline(self) -> bool:
33+
"""True if this comment is on the same line as preceding code.
34+
35+
A raw string starting with ``\\n`` means the comment sits on its own
36+
line (standalone). One starting with ``#``, ``//``, or ``/*`` is
37+
inline — it follows code on the same line.
38+
"""
39+
return not self.serialize().startswith("\n")
3040

3141
def to_list(
3242
self, options: SerializationOptions = SerializationOptions()
@@ -91,3 +101,11 @@ def inline_comments(self):
91101
result.extend(child.inline_comments())
92102

93103
return result
104+
105+
def absorbed_comments(self):
106+
"""Return body-level comments absorbed by grammar into this expression.
107+
108+
Default: empty. ``BinaryOpRule`` overrides this because its trailing
109+
``new_line_or_comment?`` can swallow the next body-level comment.
110+
"""
111+
return []
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"resource": [
3+
{
4+
"\"aws_instance\"": {
5+
"\"web\"": {
6+
"ami": "\"abc-123\"",
7+
"instance_type": "\"t2.micro\"",
8+
"count": "${1 + 2}",
9+
"tags": {
10+
"Name": "\"web\"",
11+
"Env": "\"prod\""
12+
},
13+
"enabled": "true",
14+
"nested": [
15+
{
16+
"key": "\"value\"",
17+
"__comments__": [
18+
{
19+
"value": "comment inside nested block"
20+
}
21+
],
22+
"__is_block__": true
23+
}
24+
],
25+
"__comments__": [
26+
{
27+
"value": "standalone comment inside block"
28+
},
29+
{
30+
"value": "hash standalone comment"
31+
},
32+
{
33+
"value": "absorbed standalone after binary_op"
34+
},
35+
{
36+
"value": "multi-line\n block comment"
37+
}
38+
],
39+
"__inline_comments__": [
40+
{
41+
"value": "comment inside object"
42+
},
43+
{
44+
"value": "inline after value"
45+
}
46+
],
47+
"__is_block__": true
48+
}
49+
}
50+
}
51+
],
52+
"__comments__": [
53+
{
54+
"value": "top-level standalone comment"
55+
}
56+
]
57+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// top-level standalone comment
2+
resource "aws_instance" "web" {
3+
ami = "abc-123"
4+
5+
// standalone comment inside block
6+
instance_type = "t2.micro"
7+
8+
# hash standalone comment
9+
count = 1 + 2
10+
# absorbed standalone after binary_op
11+
12+
tags = {
13+
Name = "web"
14+
# comment inside object
15+
Env = "prod" # inline after value
16+
}
17+
18+
/*
19+
multi-line
20+
block comment
21+
*/
22+
enabled = true
23+
24+
nested {
25+
// comment inside nested block
26+
key = "value"
27+
}
28+
}

test/integration/test_specialized.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,86 @@ def test_full_round_trip(self):
150150
self.assertEqual(reserialized, serialized)
151151

152152

153+
class TestCommentSerialization(TestCase):
154+
"""Test that comments are correctly classified during HCL → JSON serialization.
155+
156+
Covers:
157+
- Standalone comments (// and #) at body level → __comments__
158+
- Standalone comments absorbed by binary_op grammar → __comments__
159+
- Comments inside expressions (objects) → __inline_comments__
160+
- Multi-line block comments → __comments__
161+
- Comments in nested blocks
162+
- Top-level comments
163+
"""
164+
165+
maxDiff = None
166+
_OPTIONS = SerializationOptions(with_comments=True)
167+
168+
def test_comment_classification(self):
169+
hcl_path = SPECIAL_DIR / "comments.tf"
170+
json_path = SPECIAL_DIR / "comments.json"
171+
172+
actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS)
173+
expected = json.loads(json_path.read_text())
174+
175+
self.assertEqual(actual, expected)
176+
177+
def test_top_level_comments(self):
178+
actual = _parse_and_serialize("// file header\nx = 1\n", options=self._OPTIONS)
179+
self.assertEqual(actual["__comments__"], [{"value": "file header"}])
180+
181+
def test_standalone_in_body(self):
182+
actual = _parse_and_serialize(
183+
'resource "a" "b" {\n # standalone\n x = 1\n}\n',
184+
options=self._OPTIONS,
185+
)
186+
block = actual["resource"][0]['"a"']['"b"']
187+
self.assertEqual(block["__comments__"], [{"value": "standalone"}])
188+
self.assertNotIn("__inline_comments__", block)
189+
190+
def test_absorbed_after_binary_op(self):
191+
actual = _parse_and_serialize(
192+
"x {\n a = 1 + 2\n # absorbed\n b = 3\n}\n",
193+
options=self._OPTIONS,
194+
)
195+
block = actual["x"][0]
196+
self.assertIn({"value": "absorbed"}, block["__comments__"])
197+
self.assertNotIn("__inline_comments__", block)
198+
199+
def test_inline_after_binary_op(self):
200+
actual = _parse_and_serialize(
201+
"x {\n a = 1 + 2 # inline\n b = 3\n}\n",
202+
options=self._OPTIONS,
203+
)
204+
block = actual["x"][0]
205+
self.assertEqual(block["__inline_comments__"], [{"value": "inline"}])
206+
207+
def test_comment_inside_object(self):
208+
actual = _parse_and_serialize(
209+
"x {\n m = {\n # inside\n k = 1\n }\n}\n",
210+
options=self._OPTIONS,
211+
)
212+
block = actual["x"][0]
213+
self.assertEqual(block["__inline_comments__"], [{"value": "inside"}])
214+
self.assertNotIn("__comments__", block)
215+
216+
def test_multiline_block_comment(self):
217+
actual = _parse_and_serialize(
218+
"x {\n /*\n multi\n line\n */\n a = 1\n}\n",
219+
options=self._OPTIONS,
220+
)
221+
block = actual["x"][0]
222+
self.assertEqual(block["__comments__"], [{"value": "multi\n line"}])
223+
224+
def test_no_comments_without_option(self):
225+
actual = _parse_and_serialize(
226+
"// comment\nx = 1\n",
227+
options=SerializationOptions(with_comments=False),
228+
)
229+
self.assertNotIn("__comments__", actual)
230+
self.assertNotIn("__inline_comments__", actual)
231+
232+
153233
class TestHeredocs(TestCase):
154234
"""Test heredoc serialization, flattening, restoration, and round-trips.
155235

0 commit comments

Comments
 (0)