Skip to content

Commit f41bc52

Browse files
committed
fix(suggest-rule): require path boundary in move-transform verification
Per code review: verifySuggestedRule used strings.HasPrefix without a trailing-separator check, so a "move" rule with from="agg/py" would falsely pass verification for source="agg/python/foo.py" — the LLM's intended rule and the verified rule diverge silently. After the prefix the next character must now be either nothing (whole-path rename) or "/" (subpath rename). Test coverage on the suggester also expanded per review: - renderRuleYAML golden tests for each transform type (the YAML the user copies into config) plus the empty-source-repo branch. - handleSuggestRule branch coverage: 405/503/429/400/502, the Verified=false warning path, and the askLLMForRule default-fill path (name, branch, dest_repo, commit_strategy). - Move-boundary regression test plus an exact-match-at-boundary case.
1 parent 739ab56 commit f41bc52

2 files changed

Lines changed: 385 additions & 1 deletion

File tree

services/operator_suggest_rule.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,16 @@ func verifySuggestedRule(s *llmSuggestedRule, sourcePath, targetPath string) (bo
229229
if !strings.HasPrefix(sourcePath, from) {
230230
return false, "", fmt.Errorf("source path does not start with %q", from)
231231
}
232-
rel := strings.TrimPrefix(strings.TrimPrefix(sourcePath, from), "/")
232+
// Boundary check: the prefix must end at a path separator (or be the
233+
// whole path). Without this, from="agg/py" would falsely "match"
234+
// sourcePath="agg/python/foo.py" and produce a bogus rel of
235+
// "thon/foo.py" — verification would pass for a rule that means
236+
// something entirely different from what the LLM intended.
237+
rest := sourcePath[len(from):]
238+
if rest != "" && !strings.HasPrefix(rest, "/") {
239+
return false, "", fmt.Errorf("source path %q does not match move from %q at a path boundary", sourcePath, from)
240+
}
241+
rel := strings.TrimPrefix(rest, "/")
233242
computed := strings.TrimSuffix(s.TransformTo, "/") + "/" + rel
234243
computed = strings.TrimSuffix(computed, "/")
235244
return computed == targetPath, computed, nil
Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,375 @@
1+
package services
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"errors"
7+
"net/http"
8+
"net/http/httptest"
9+
"strings"
10+
"testing"
11+
"time"
12+
13+
"github.com/grove-platform/github-copier/configs"
14+
)
15+
16+
// ── renderRuleYAML golden tests ──
17+
18+
func TestRenderRuleYAML_Move(t *testing.T) {
19+
got := renderRuleYAML(&llmSuggestedRule{
20+
Name: "mflix-java-spring-server",
21+
DestRepo: "mongodb/sample-app-java-mflix",
22+
DestBranch: "main",
23+
TransformType: "move",
24+
TransformFrom: "mflix/server/java-spring",
25+
TransformTo: "server",
26+
CommitStrategy: "pull_request",
27+
}, operatorSuggestRuleRequest{
28+
SourceRepo: "mongodb/docs-content",
29+
})
30+
want := `- name: "mflix-java-spring-server"
31+
source:
32+
repo: "mongodb/docs-content"
33+
destination:
34+
repo: "mongodb/sample-app-java-mflix"
35+
branch: "main"
36+
transformations:
37+
- move: { from: "mflix/server/java-spring", to: "server" }
38+
commit_strategy:
39+
type: "pull_request"
40+
`
41+
if got != want {
42+
t.Errorf("move YAML mismatch\n--- got ---\n%s--- want ---\n%s", got, want)
43+
}
44+
}
45+
46+
func TestRenderRuleYAML_Copy(t *testing.T) {
47+
got := renderRuleYAML(&llmSuggestedRule{
48+
Name: "mflix-readme",
49+
DestRepo: "mongodb/sample-app-java-mflix",
50+
DestBranch: "main",
51+
TransformType: "copy",
52+
TransformFrom: "mflix/README-JAVA-SPRING.md",
53+
TransformTo: "README.md",
54+
CommitStrategy: "pull_request",
55+
}, operatorSuggestRuleRequest{SourceRepo: "mongodb/docs-content"})
56+
want := `- name: "mflix-readme"
57+
source:
58+
repo: "mongodb/docs-content"
59+
destination:
60+
repo: "mongodb/sample-app-java-mflix"
61+
branch: "main"
62+
transformations:
63+
- copy: { from: "mflix/README-JAVA-SPRING.md", to: "README.md" }
64+
commit_strategy:
65+
type: "pull_request"
66+
`
67+
if got != want {
68+
t.Errorf("copy YAML mismatch\n--- got ---\n%s--- want ---\n%s", got, want)
69+
}
70+
}
71+
72+
func TestRenderRuleYAML_Glob(t *testing.T) {
73+
got := renderRuleYAML(&llmSuggestedRule{
74+
Name: "agg-python",
75+
DestRepo: "org/shared-examples",
76+
DestBranch: "main",
77+
TransformType: "glob",
78+
Pattern: "agg/python/**/*.py",
79+
TransformTempl: "shared/python/${relative_path}",
80+
CommitStrategy: "direct",
81+
}, operatorSuggestRuleRequest{SourceRepo: "org/aggregations"})
82+
want := `- name: "agg-python"
83+
source:
84+
repo: "org/aggregations"
85+
destination:
86+
repo: "org/shared-examples"
87+
branch: "main"
88+
transformations:
89+
- glob:
90+
pattern: "agg/python/**/*.py"
91+
transform: "shared/python/${relative_path}"
92+
commit_strategy:
93+
type: "direct"
94+
`
95+
if got != want {
96+
t.Errorf("glob YAML mismatch\n--- got ---\n%s--- want ---\n%s", got, want)
97+
}
98+
}
99+
100+
func TestRenderRuleYAML_Regex(t *testing.T) {
101+
got := renderRuleYAML(&llmSuggestedRule{
102+
Name: "tutorials-versioned",
103+
DestRepo: "org/docs-site",
104+
DestBranch: "main",
105+
TransformType: "regex",
106+
Pattern: `tutorials/v(?P<ver>[0-9]+)/(?P<slug>.+)\.mdx`,
107+
TransformTempl: "docs/${slug}-v${ver}.mdx",
108+
CommitStrategy: "pull_request",
109+
}, operatorSuggestRuleRequest{SourceRepo: "org/tutorials"})
110+
want := `- name: "tutorials-versioned"
111+
source:
112+
repo: "org/tutorials"
113+
destination:
114+
repo: "org/docs-site"
115+
branch: "main"
116+
transformations:
117+
- regex:
118+
pattern: "tutorials/v(?P<ver>[0-9]+)/(?P<slug>.+)\\.mdx"
119+
transform: "docs/${slug}-v${ver}.mdx"
120+
commit_strategy:
121+
type: "pull_request"
122+
`
123+
if got != want {
124+
t.Errorf("regex YAML mismatch\n--- got ---\n%s--- want ---\n%s", got, want)
125+
}
126+
}
127+
128+
// When the operator UI didn't send a source_repo, the YAML must omit the
129+
// entire source: block — the writer fills it in by hand.
130+
func TestRenderRuleYAML_OmitsSourceBlockWhenSourceRepoEmpty(t *testing.T) {
131+
got := renderRuleYAML(&llmSuggestedRule{
132+
Name: "x",
133+
DestRepo: "org/dst",
134+
DestBranch: "main",
135+
TransformType: "move",
136+
TransformFrom: "a",
137+
TransformTo: "b",
138+
CommitStrategy: "pull_request",
139+
}, operatorSuggestRuleRequest{}) // SourceRepo intentionally empty
140+
if strings.Contains(got, "source:") {
141+
t.Errorf("source: block should be omitted when SourceRepo is empty\nyaml:\n%s", got)
142+
}
143+
if !strings.Contains(got, "destination:") {
144+
t.Errorf("destination: block must always be present\nyaml:\n%s", got)
145+
}
146+
}
147+
148+
// ── verifySuggestedRule: move-transform path-boundary test ──
149+
//
150+
// Reviewer flagged that strings.HasPrefix without a "/" boundary check would
151+
// pass verification for from="agg/py" against source="agg/python/foo.py".
152+
// This test pins the corrected behaviour: such mismatches must reject.
153+
154+
func TestVerifySuggestedRule_Move_RejectsNonBoundaryPrefix(t *testing.T) {
155+
s := &llmSuggestedRule{
156+
TransformType: "move",
157+
TransformFrom: "agg/py",
158+
TransformTo: "shared/py",
159+
}
160+
ok, _, err := verifySuggestedRule(s, "agg/python/models/user.py", "shared/py/thon/models/user.py")
161+
if ok {
162+
t.Fatal("verifier must not accept from=agg/py for source=agg/python/...; that's a substring match, not a path-component match")
163+
}
164+
if err == nil {
165+
t.Fatal("want a boundary-mismatch error, got nil")
166+
}
167+
if !strings.Contains(err.Error(), "boundary") {
168+
t.Errorf("error should mention boundary, got %q", err.Error())
169+
}
170+
}
171+
172+
func TestVerifySuggestedRule_Move_AcceptsExactMatchAtBoundary(t *testing.T) {
173+
// Source equals from exactly (single-file directory rename).
174+
s := &llmSuggestedRule{
175+
TransformType: "move",
176+
TransformFrom: "agg/python",
177+
TransformTo: "shared/python",
178+
}
179+
ok, computed, err := verifySuggestedRule(s, "agg/python", "shared/python")
180+
if err != nil || !ok {
181+
t.Fatalf("exact-match boundary case failed: ok=%v computed=%q err=%v", ok, computed, err)
182+
}
183+
}
184+
185+
// ── handleSuggestRule branch coverage ──
186+
187+
// fakeLLMClient records prompts and returns canned responses.
188+
type fakeLLMClient struct {
189+
respJSON string
190+
err error
191+
lastSys string
192+
lastUser string
193+
}
194+
195+
func (f *fakeLLMClient) GenerateJSON(_ context.Context, sys, user string) (string, error) {
196+
f.lastSys = sys
197+
f.lastUser = user
198+
return f.respJSON, f.err
199+
}
200+
func (f *fakeLLMClient) ProviderName() string { return "fake" }
201+
func (f *fakeLLMClient) Ping(_ context.Context) error { return nil }
202+
func (f *fakeLLMClient) GetBaseURL() string { return "" }
203+
func (f *fakeLLMClient) SetBaseURL(_ string) {}
204+
func (f *fakeLLMClient) GetActiveModel() string { return "fake-model" }
205+
func (f *fakeLLMClient) SetActiveModel(_ string) {}
206+
func (f *fakeLLMClient) ListModels(_ context.Context) ([]LLMModel, error) {
207+
return nil, nil
208+
}
209+
func (f *fakeLLMClient) PullModel(_ context.Context, _ string, _ func(LLMPullProgress)) error {
210+
return ErrModelManagementNotSupported
211+
}
212+
func (f *fakeLLMClient) DeleteModel(_ context.Context, _ string) error {
213+
return ErrModelManagementNotSupported
214+
}
215+
216+
func newSuggestRuleTestUI(llm LLMClient) *operatorUI {
217+
return &operatorUI{
218+
cfg: &configs.Config{},
219+
container: &ServiceContainer{},
220+
llm: llm,
221+
ghCache: newGHAuthCache(5 * time.Minute),
222+
suggestLimiter: newTokenBucket(30, time.Hour),
223+
}
224+
}
225+
226+
func suggestRequest(t *testing.T, body string) *http.Request {
227+
t.Helper()
228+
r := httptest.NewRequest(http.MethodPost, "/operator/api/suggest-rule", strings.NewReader(body))
229+
r.Header.Set("Authorization", "Bearer pat-test")
230+
return r
231+
}
232+
233+
func TestHandleSuggestRule_MethodNotAllowed(t *testing.T) {
234+
o := newSuggestRuleTestUI(&fakeLLMClient{})
235+
r := httptest.NewRequest(http.MethodGet, "/operator/api/suggest-rule", nil)
236+
w := httptest.NewRecorder()
237+
o.handleSuggestRule(w, r)
238+
if w.Code != http.StatusMethodNotAllowed {
239+
t.Errorf("status = %d, want 405", w.Code)
240+
}
241+
}
242+
243+
func TestHandleSuggestRule_LLMNotInitialized(t *testing.T) {
244+
o := newSuggestRuleTestUI(nil)
245+
w := httptest.NewRecorder()
246+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"a","target_path":"b"}`))
247+
if w.Code != http.StatusServiceUnavailable {
248+
t.Errorf("status = %d, want 503", w.Code)
249+
}
250+
}
251+
252+
func TestHandleSuggestRule_RateLimited(t *testing.T) {
253+
o := newSuggestRuleTestUI(&fakeLLMClient{respJSON: `{}`})
254+
o.suggestLimiter = newTokenBucket(1, time.Hour)
255+
// First call consumes the only token.
256+
w := httptest.NewRecorder()
257+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"a","target_path":"b"}`))
258+
// Second call must be rate limited regardless of body validity.
259+
w = httptest.NewRecorder()
260+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"a","target_path":"b"}`))
261+
if w.Code != http.StatusTooManyRequests {
262+
t.Errorf("status = %d, want 429", w.Code)
263+
}
264+
if w.Header().Get("Retry-After") == "" {
265+
t.Error("missing Retry-After header on 429")
266+
}
267+
}
268+
269+
func TestHandleSuggestRule_InvalidJSON(t *testing.T) {
270+
o := newSuggestRuleTestUI(&fakeLLMClient{})
271+
w := httptest.NewRecorder()
272+
o.handleSuggestRule(w, suggestRequest(t, `{not-json`))
273+
if w.Code != http.StatusBadRequest {
274+
t.Errorf("status = %d, want 400", w.Code)
275+
}
276+
}
277+
278+
func TestHandleSuggestRule_MissingRequiredFields(t *testing.T) {
279+
o := newSuggestRuleTestUI(&fakeLLMClient{})
280+
w := httptest.NewRecorder()
281+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"only"}`))
282+
if w.Code != http.StatusBadRequest {
283+
t.Errorf("status = %d, want 400", w.Code)
284+
}
285+
}
286+
287+
func TestHandleSuggestRule_LLMError(t *testing.T) {
288+
o := newSuggestRuleTestUI(&fakeLLMClient{err: errors.New("upstream nope")})
289+
w := httptest.NewRecorder()
290+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"a","target_path":"b"}`))
291+
if w.Code != http.StatusBadGateway {
292+
t.Errorf("status = %d, want 502", w.Code)
293+
}
294+
}
295+
296+
func TestHandleSuggestRule_LLMReturnsInvalidJSON(t *testing.T) {
297+
o := newSuggestRuleTestUI(&fakeLLMClient{respJSON: "not json at all"})
298+
w := httptest.NewRecorder()
299+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"a","target_path":"b"}`))
300+
if w.Code != http.StatusBadGateway {
301+
t.Errorf("status = %d, want 502 when LLM emits invalid JSON", w.Code)
302+
}
303+
}
304+
305+
func TestHandleSuggestRule_VerifiedFalseSetsWarning(t *testing.T) {
306+
// LLM returns a "move" rule that doesn't actually produce target_path —
307+
// verification fails, the response should include the warning + verify_error.
308+
llmResp := `{
309+
"name":"x",
310+
"destination_repo":"org/dst",
311+
"transform_type":"move",
312+
"transform_from":"a/b",
313+
"transform_to":"x/y"
314+
}`
315+
o := newSuggestRuleTestUI(&fakeLLMClient{respJSON: llmResp})
316+
317+
w := httptest.NewRecorder()
318+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"a/b/file.txt","target_path":"completely/different.txt"}`))
319+
320+
if w.Code != http.StatusOK {
321+
t.Fatalf("status = %d, want 200 (verification failure is non-fatal)", w.Code)
322+
}
323+
var resp operatorSuggestRuleResponse
324+
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
325+
t.Fatalf("decode: %v", err)
326+
}
327+
if resp.Verified {
328+
t.Errorf("verified should be false")
329+
}
330+
if resp.Warning == "" {
331+
t.Errorf("warning should be set when verification fails")
332+
}
333+
if resp.ComputedPath == "" {
334+
t.Errorf("computed_path should be populated even on verification failure")
335+
}
336+
}
337+
338+
func TestHandleSuggestRule_AppliesDefaultsFromAskLLMForRule(t *testing.T) {
339+
// LLM omits destination_branch, commit_strategy, name, destination_repo —
340+
// askLLMForRule fills them in. Run an end-to-end suggest call and inspect
341+
// the rendered YAML to confirm defaults landed.
342+
llmResp := `{
343+
"transform_type":"copy",
344+
"transform_from":"src/file.txt",
345+
"transform_to":"dst/file.txt"
346+
}`
347+
o := newSuggestRuleTestUI(&fakeLLMClient{respJSON: llmResp})
348+
349+
w := httptest.NewRecorder()
350+
o.handleSuggestRule(w, suggestRequest(t, `{"source_path":"src/file.txt","target_path":"dst/file.txt","target_repo":"org/from-request"}`))
351+
352+
if w.Code != http.StatusOK {
353+
t.Fatalf("status = %d, body=%s", w.Code, w.Body.String())
354+
}
355+
var resp operatorSuggestRuleResponse
356+
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
357+
t.Fatalf("decode: %v", err)
358+
}
359+
// destination_repo defaulted from target_repo in the request.
360+
if !strings.Contains(resp.RuleYAML, `repo: "org/from-request"`) {
361+
t.Errorf("dest repo default missing from yaml:\n%s", resp.RuleYAML)
362+
}
363+
// destination_branch defaulted to "main".
364+
if !strings.Contains(resp.RuleYAML, `branch: "main"`) {
365+
t.Errorf("default branch missing from yaml:\n%s", resp.RuleYAML)
366+
}
367+
// commit_strategy defaulted to "pull_request".
368+
if !strings.Contains(resp.RuleYAML, `type: "pull_request"`) {
369+
t.Errorf("default commit strategy missing from yaml:\n%s", resp.RuleYAML)
370+
}
371+
// name defaulted to "generated-rule".
372+
if !strings.Contains(resp.RuleYAML, `name: "generated-rule"`) {
373+
t.Errorf("default name missing from yaml:\n%s", resp.RuleYAML)
374+
}
375+
}

0 commit comments

Comments
 (0)