Skip to content

Commit e42f880

Browse files
Add template directives support (%{if}, %{for}) in quoted strings (#276)
1 parent 70a5307 commit e42f880

14 files changed

Lines changed: 1010 additions & 9 deletions

CLAUDE.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,11 @@ The **Direct** pipeline (`parse_to_tree` → `transform` → `to_lark` → `reco
5252
| `rules/containers.py` | `TupleRule`, `ObjectRule`, `ObjectElemRule`, `ObjectElemKeyRule` |
5353
| `rules/expressions.py` | `ExprTermRule`, `BinaryOpRule`, `UnaryOpRule`, `ConditionalRule` |
5454
| `rules/literal_rules.py` | `IntLitRule`, `FloatLitRule`, `IdentifierRule`, `KeywordRule` |
55-
| `rules/strings.py` | `StringRule`, `InterpolationRule`, `HeredocTemplateRule` |
55+
| `rules/strings.py` | `StringRule`, `InterpolationRule`, `HeredocTemplateRule`, `TemplateStringRule` |
5656
| `rules/functions.py` | `FunctionCallRule`, `ArgumentsRule` |
5757
| `rules/indexing.py` | `GetAttrRule`, `SqbIndexRule`, splat rules |
5858
| `rules/for_expressions.py` | `ForTupleExprRule`, `ForObjectExprRule`, `ForIntroRule`, `ForCondRule` |
59+
| `rules/directives.py` | `TemplateIfRule`, `TemplateForRule`, and flat directive start/end rules |
5960
| `rules/whitespace.py` | `NewLineOrCommentRule`, `InlineCommentMixIn` |
6061

6162
## Public API (`api.py`)

hcl2/deserializer.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Deserialize Python dicts (or JSON) into LarkElement trees."""
2+
23
import json
34
import re
45
from abc import ABC, abstractmethod
@@ -189,6 +190,13 @@ def _deserialize_identifier(self, value: str) -> IdentifierRule:
189190
return IdentifierRule([NAME(value)])
190191

191192
def _deserialize_string(self, value: str) -> StringRule:
193+
# If the string contains template directives, delegate to parser
194+
inner = value[1:-1] if value.startswith('"') and value.endswith('"') else value
195+
# Check for unescaped %{ (i.e. %{ not preceded by another %)
196+
stripped = inner.replace("%%{", "")
197+
if "%{" in stripped:
198+
return self._deserialize_string_via_parser(value)
199+
192200
result = []
193201
# split string into individual parts based on lark grammar
194202
# e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
@@ -210,6 +218,23 @@ def _deserialize_string(self, value: str) -> StringRule:
210218

211219
return StringRule([DBLQUOTE(), *result, DBLQUOTE()])
212220

221+
def _deserialize_string_via_parser(self, value: str) -> StringRule:
222+
"""Deserialize a string containing template directives by parsing it."""
223+
# Ensure the value is quoted
224+
if not (value.startswith('"') and value.endswith('"')):
225+
value = f'"{value}"'
226+
snippet = f"temp = {value}"
227+
parsed_tree = _get_parser().parse(snippet)
228+
rules_tree = self._transformer.transform(parsed_tree)
229+
# Extract the string from: start -> body -> attribute -> expression -> string
230+
expr = rules_tree.body.children[0].expression
231+
# The expression is an ExprTermRule wrapping a StringRule
232+
for child in expr.children:
233+
if isinstance(child, StringRule):
234+
return child
235+
# Fallback: shouldn't happen, but return as-is
236+
return expr # type: ignore[return-value]
237+
213238
def _deserialize_string_part(self, value: str) -> StringPartRule:
214239
if value.startswith("$${") and value.endswith("}"):
215240
return StringPartRule([ESCAPED_INTERPOLATION(value)])

hcl2/hcl2.lark

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,16 @@ IF : "if"
1010
IN : "in"
1111
FOR : "for"
1212
FOR_EACH : "for_each"
13+
ELSE : "else"
14+
ENDIF : "endif"
15+
ENDFOR : "endfor"
1316

1417

1518
// Literals
1619
NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/
1720
ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/
18-
STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/
21+
ESCAPED_DIRECTIVE.2: /%%\{[^}]*\}/
22+
STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)(?!%%\{)(?!%\{)[^"\\]|\\.|(?:\$(?!\$?\{))|(?:%(?!%?\{)))+/
1923
DECIMAL : "0".."9"
2024
NEGATIVE_DECIMAL : "-" DECIMAL
2125
EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+
@@ -52,10 +56,18 @@ DOT : "."
5256
EQ : /[ \t]*=(?!=|>)/
5357
COLON : /[ \t]*:(?!:)/
5458
DBLQUOTE : "\""
59+
// Escaped-quote string inside template directives: matches \"content\"
60+
// (Lark escape-processes the regex, so the compiled pattern is \\?"(...)\\?" —
61+
// i.e. one literal backslash + quote as delimiters.)
62+
TEMPLATE_STRING.3 : /\\\\"(?:[^"\\\\]|\\\\.)*\\\\"/
5563

5664
// Interpolation
5765
INTERP_START : "${"
5866

67+
// Template Directives
68+
DIRECTIVE_START : "%{"
69+
STRIP_MARKER : "~"
70+
5971
// Splat Operators
6072
ATTR_SPLAT : ".*"
6173
FULL_SPLAT_START : "[*]"
@@ -90,19 +102,32 @@ new_line_or_comment: ( NL_OR_COMMENT )+
90102

91103
// Basic literals and identifiers
92104
identifier : NAME
93-
keyword: IN | FOR | IF | FOR_EACH
105+
keyword: IN | FOR | IF | FOR_EACH | ELSE | ENDIF | ENDFOR
94106
int_lit: INT_LITERAL
95107
float_lit: FLOAT_LITERAL
96108
string: DBLQUOTE string_part* DBLQUOTE
97109
string_part: STRING_CHARS
98110
| ESCAPED_INTERPOLATION
111+
| ESCAPED_DIRECTIVE
99112
| interpolation
113+
| template_if_start
114+
| template_else
115+
| template_endif
116+
| template_for_start
117+
| template_endfor
100118

101119
// Expressions
102120
?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional
103121
| or_expr
104122
interpolation: INTERP_START expression RBRACE
105123

124+
// Template directives (flat rules — transformer assembles if/for structure)
125+
template_if_start: DIRECTIVE_START STRIP_MARKER? IF expression STRIP_MARKER? RBRACE
126+
template_else: DIRECTIVE_START STRIP_MARKER? ELSE STRIP_MARKER? RBRACE
127+
template_endif: DIRECTIVE_START STRIP_MARKER? ENDIF STRIP_MARKER? RBRACE
128+
template_for_start: DIRECTIVE_START STRIP_MARKER? FOR identifier (COMMA identifier)? IN expression STRIP_MARKER? RBRACE
129+
template_endfor: DIRECTIVE_START STRIP_MARKER? ENDFOR STRIP_MARKER? RBRACE
130+
106131
// Operator precedence ladder (lowest to highest)
107132
// Each level uses left recursion for left-associativity.
108133
// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain
@@ -160,6 +185,7 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR
160185
| float_lit
161186
| int_lit
162187
| string
188+
| template_string
163189
| tuple
164190
| object
165191
| identifier
@@ -173,6 +199,8 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR
173199
| for_tuple_expr
174200
| for_object_expr
175201

202+
template_string : TEMPLATE_STRING
203+
176204
// Collections
177205
tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB
178206
object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE

hcl2/reconstructor.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,20 @@
11
"""Reconstruct HCL2 text from a Lark Tree AST."""
2+
23
from typing import List, Optional, Union
34

45
from lark import Tree, Token
56
from hcl2.rules import tokens
67
from hcl2.rules.base import BlockRule
78
from hcl2.rules.containers import ObjectElemRule
9+
from hcl2.rules.directives import (
10+
TemplateIfRule,
11+
TemplateForRule,
12+
TemplateIfStartRule,
13+
TemplateElseRule,
14+
TemplateEndifRule,
15+
TemplateForStartRule,
16+
TemplateEndforRule,
17+
)
818
from hcl2.rules.for_expressions import ForIntroRule, ForTupleExprRule, ForObjectExprRule
919
from hcl2.rules.literal_rules import IdentifierRule
1020
from hcl2.rules.strings import StringRule
@@ -34,6 +44,16 @@ class HCLReconstructor:
3444
"PLUS",
3545
}
3646

47+
_directive_rule_names = {
48+
TemplateIfStartRule.lark_name(),
49+
TemplateElseRule.lark_name(),
50+
TemplateEndifRule.lark_name(),
51+
TemplateForStartRule.lark_name(),
52+
TemplateEndforRule.lark_name(),
53+
TemplateIfRule.lark_name(),
54+
TemplateForRule.lark_name(),
55+
}
56+
3757
def __init__(self):
3858
self._last_was_space = True
3959
self._current_indent = 0
@@ -100,6 +120,41 @@ def _should_add_space_before(
100120
return False
101121
return True
102122

123+
# Template directive spacing: %{~ keyword ~} patterns
124+
if parent_rule_name in self._directive_rule_names:
125+
# Space after DIRECTIVE_START (before keyword or strip marker)
126+
if self._last_token_name == tokens.DIRECTIVE_START.lark_name():
127+
# No space before strip marker
128+
if token_type == tokens.STRIP_MARKER.lark_name():
129+
return False
130+
return True
131+
# Space after STRIP_MARKER (before keyword)
132+
if self._last_token_name == tokens.STRIP_MARKER.lark_name():
133+
# After strip marker: space before keyword, no space before RBRACE
134+
if token_type == tokens.RBRACE.lark_name():
135+
return False
136+
return True
137+
# Space after keywords
138+
if self._last_token_name in [
139+
tokens.FOR.lark_name(),
140+
tokens.IN.lark_name(),
141+
tokens.IF.lark_name(),
142+
]:
143+
return True
144+
# Space before IN keyword (after identifier)
145+
if token_type == tokens.IN.lark_name():
146+
return True
147+
# Space before STRIP_MARKER (before closing })
148+
if token_type == tokens.STRIP_MARKER.lark_name():
149+
return True
150+
# Space before RBRACE (closing directive, no strip marker)
151+
if token_type == tokens.RBRACE.lark_name():
152+
return True
153+
# Space after COMMA in for directives
154+
if self._last_token_name == tokens.COMMA.lark_name():
155+
return True
156+
return False
157+
103158
if token_type in [
104159
tokens.FOR.lark_name(),
105160
tokens.IN.lark_name(),

0 commit comments

Comments
 (0)