Skip to content

Commit 935f2bf

Browse files
authored
Merge pull request #62 from agent-ecosystem/reduce-contamination-warnings
Focus contamination warn on PLC potential
2 parents 1fe10b0 + cc6263d commit 935f2bf

8 files changed

Lines changed: 224 additions & 39 deletions

File tree

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ Detects cross-language contamination — skills where code examples in one langu
273273
Contamination Analysis
274274
Contamination level: medium (score: 0.35)
275275
Primary language category: javascript
276-
⚠ Language mismatch: python, shell (2 categories differ from primary)
276+
⚠ Language mismatch: python (1 category differ from primary)
277277
ℹ Multi-interface tool detected: mongodb
278278
Scope breadth: 4
279279

@@ -282,7 +282,7 @@ References Contamination Analysis
282282
Scope breadth: 0
283283
```
284284
285-
Contamination scoring considers three factors: multi-interface tools (0.3 weight), language mismatch across code blocks (0.4 weight), and scope breadth (0.3 weight). Reference files in `references/` are analyzed in aggregate. Use `--per-file` to see a breakdown by individual reference file.
285+
Contamination scoring considers three factors: multi-interface tools (0.3 weight), application language mismatch across code blocks (0.4 weight), and scope breadth (0.3 weight). Auxiliary languages (shell, config formats, query languages, markup) are excluded from the mismatch calculation since they don't cause syntactic confusion with application languages. Reference files in `references/` are analyzed in aggregate. Use `--per-file` to see a breakdown by individual reference file.
286286
287287
### check
288288
@@ -790,10 +790,10 @@ Detects cross-language contamination — where code examples in one language cou
790790

791791
- **Multi-interface tools**: detects tools with many language bindings (MongoDB, AWS, Docker, Kubernetes, Redis, etc.) by scanning the skill name and content
792792
- **Language categories**: maps code block languages to broad categories (shell, javascript, python, java, systems, config, etc.)
793-
- **Language mismatch**: code blocks spanning different language categories
793+
- **Language mismatch**: code blocks spanning different application language categories (auxiliary categories like shell, config, query, and markup are excluded)
794794
- **Technology references**: framework/runtime mentions (Node.js, Django, Flask, Spring, Rails, etc.)
795795
- **Scope breadth**: number of distinct technology categories referenced
796-
- **Contamination score**: 3-factor formula — multi_interface (0.3) + mismatch (0.4) + breadth (0.3), capped at 1.0
796+
- **Contamination score**: 3-factor formula — multi_interface (0.3) + application language mismatch (0.4) + breadth (0.3), capped at 1.0
797797
- **Contamination level**: high (≥0.5), medium (≥0.2), low (<0.2)
798798

799799
### LLM scoring (`score evaluate`)

cmd/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
"github.com/agent-ecosystem/skill-validator/types"
1212
)
1313

14-
const version = "v1.5.2"
14+
const version = "v1.5.3"
1515

1616
var (
1717
outputFormat string

contamination/contamination.go

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,19 @@ func Analyze(name, content string, codeLanguages []string) *types.ContaminationR
138138
}
139139
scopeBreadth := len(allScopes)
140140

141-
// Detect language mismatch
141+
// Detect language mismatch (application↔application only).
142+
// Auxiliary categories (shell, config, query, markup) are expected alongside
143+
// any primary language and do not cause syntactic confusion, so they are
144+
// excluded from the mismatch set. The primary application category is
145+
// determined separately from the overall primary, since a skill's most
146+
// common language may be auxiliary (e.g. bash) while still mixing multiple
147+
// application languages.
142148
primaryCategory := findPrimaryCategory(codeLanguages)
149+
primaryAppCategory := findPrimaryApplicationCategory(codeLanguages)
143150
mismatchedCategories := make(map[string]bool)
144-
if primaryCategory != "" {
151+
if primaryAppCategory != "" {
145152
for cat := range langCategories {
146-
if cat != primaryCategory {
153+
if cat != primaryAppCategory && applicationCategories[cat] {
147154
mismatchedCategories[cat] = true
148155
}
149156
}
@@ -283,3 +290,42 @@ func findPrimaryCategory(codeLanguages []string) string {
283290
}
284291
return primary
285292
}
293+
294+
// findPrimaryApplicationCategory returns the most common application language
295+
// category (ignoring auxiliary categories like shell, config, query, markup).
296+
func findPrimaryApplicationCategory(codeLanguages []string) string {
297+
if len(codeLanguages) == 0 {
298+
return ""
299+
}
300+
301+
counts := make(map[string]int)
302+
var order []string
303+
seen := make(map[string]bool)
304+
for _, lang := range codeLanguages {
305+
langLower := strings.ToLower(lang)
306+
for category, members := range languageCategories {
307+
if members[langLower] && applicationCategories[category] {
308+
counts[category]++
309+
if !seen[category] {
310+
seen[category] = true
311+
order = append(order, category)
312+
}
313+
break
314+
}
315+
}
316+
}
317+
318+
if len(counts) == 0 {
319+
return ""
320+
}
321+
322+
maxCount := 0
323+
primary := ""
324+
for _, cat := range order {
325+
if counts[cat] > maxCount {
326+
maxCount = counts[cat]
327+
primary = cat
328+
}
329+
}
330+
return primary
331+
}

contamination/contamination_test.go

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ func TestAnalyze_LanguageMismatch(t *testing.T) {
4242
if r.PrimaryCategory != "python" {
4343
t.Errorf("expected primary category python, got %s", r.PrimaryCategory)
4444
}
45-
if len(r.MismatchedCategories) == 0 {
46-
t.Error("expected mismatched categories")
45+
// Only application↔application mismatches are reported; bash (shell) is excluded
46+
if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "javascript" {
47+
t.Errorf("expected mismatched categories [javascript], got %v", r.MismatchedCategories)
4748
}
4849
}
4950

@@ -172,24 +173,21 @@ func TestMismatchWeight(t *testing.T) {
172173
}
173174

174175
func TestAnalyze_AuxiliaryOnlyMismatches(t *testing.T) {
175-
// python + bash + yaml: auxiliary mismatches should score low
176+
// python + bash + yaml: auxiliary categories are not mismatches
176177
languages := []string{"python", "python", "bash", "yaml"}
177178
r := Analyze("deploy-skill", "Deploy with bash and config.", languages)
178-
if !r.LanguageMismatch {
179-
t.Error("expected language mismatch")
179+
if r.LanguageMismatch {
180+
t.Error("expected no language mismatch when only auxiliary categories differ")
180181
}
181-
// 2 auxiliary mismatches × 0.25 = 0.50 weighted → 0.4 × (0.50/3) ≈ 0.067
182-
// No multi-interface tool, scope breadth = 3 → factor3 = 0.3 * (1/4) = 0.075
183-
// Total ≈ 0.142, should be low
184-
if r.ContaminationLevel != "low" {
185-
t.Errorf("expected low contamination for python+bash+yaml, got %s (score=%f)", r.ContaminationLevel, r.ContaminationScore)
182+
if len(r.MismatchedCategories) != 0 {
183+
t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories)
186184
}
187-
// Verify weights are populated
188-
if w, ok := r.MismatchWeights["shell"]; !ok || w != 0.25 {
189-
t.Errorf("expected shell weight 0.25, got %f (ok=%v)", w, ok)
185+
if len(r.MismatchWeights) != 0 {
186+
t.Errorf("expected no mismatch weights, got %v", r.MismatchWeights)
190187
}
191-
if w, ok := r.MismatchWeights["config"]; !ok || w != 0.25 {
192-
t.Errorf("expected config weight 0.25, got %f (ok=%v)", w, ok)
188+
// Score should be very low with no mismatches
189+
if r.ContaminationLevel != "low" {
190+
t.Errorf("expected low contamination for python+bash+yaml, got %s (score=%f)", r.ContaminationLevel, r.ContaminationScore)
193191
}
194192
}
195193

@@ -213,16 +211,65 @@ func TestAnalyze_ApplicationOnlyMismatches(t *testing.T) {
213211
}
214212

215213
func TestAnalyze_MixedMismatches(t *testing.T) {
216-
// java + config + shell + markup: 3 auxiliary mismatches
214+
// java + config + shell + markup: all auxiliary, no app↔app mismatch
217215
languages := []string{"java", "java", "yaml", "bash", "html"}
218216
r := Analyze("spring-boot", "Spring Boot app with config.", languages)
217+
if r.LanguageMismatch {
218+
t.Error("expected no language mismatch when only auxiliary categories differ from primary")
219+
}
220+
if len(r.MismatchedCategories) != 0 {
221+
t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories)
222+
}
223+
}
224+
225+
func TestAnalyze_AppAndAuxMixed(t *testing.T) {
226+
// python + javascript + bash + yaml: only javascript is an app mismatch
227+
languages := []string{"python", "python", "javascript", "bash", "yaml"}
228+
r := Analyze("mixed-skill", "Some content.", languages)
219229
if !r.LanguageMismatch {
220-
t.Error("expected language mismatch")
230+
t.Error("expected language mismatch for app↔app")
231+
}
232+
if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "javascript" {
233+
t.Errorf("expected mismatched categories [javascript], got %v", r.MismatchedCategories)
234+
}
235+
// bash and yaml should not appear in weights
236+
if _, ok := r.MismatchWeights["shell"]; ok {
237+
t.Error("shell should not be in mismatch weights")
238+
}
239+
if _, ok := r.MismatchWeights["config"]; ok {
240+
t.Error("config should not be in mismatch weights")
241+
}
242+
}
243+
244+
func TestAnalyze_AuxPrimaryWithAppMismatch(t *testing.T) {
245+
// bash appears most often (overall primary is shell/auxiliary),
246+
// but javascript and python are both present → app↔app mismatch
247+
languages := []string{"bash", "bash", "bash", "javascript", "python"}
248+
r := Analyze("scripty-skill", "Some content.", languages)
249+
if r.PrimaryCategory != "shell" {
250+
t.Errorf("expected overall primary category shell, got %s", r.PrimaryCategory)
251+
}
252+
if !r.LanguageMismatch {
253+
t.Error("expected language mismatch between javascript and python")
254+
}
255+
// Primary app category should be first-encountered app (javascript)
256+
if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "python" {
257+
t.Errorf("expected mismatched categories [python], got %v", r.MismatchedCategories)
258+
}
259+
}
260+
261+
func TestAnalyze_PurelyAuxiliary(t *testing.T) {
262+
// Only auxiliary languages — no application languages at all
263+
languages := []string{"bash", "yaml", "json", "sh"}
264+
r := Analyze("config-skill", "Just config and shell.", languages)
265+
if r.LanguageMismatch {
266+
t.Error("expected no language mismatch with only auxiliary languages")
267+
}
268+
if len(r.MismatchedCategories) != 0 {
269+
t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories)
221270
}
222-
// 3 auxiliary mismatches × 0.25 = 0.75 weighted → 0.4 × (0.75/3) = 0.1
223-
// Should be significantly lower than old score of 0.4 × (3/3) = 0.4
224-
if r.ContaminationScore >= 0.4 {
225-
t.Errorf("expected score < 0.4 for java+config+shell+markup, got %f", r.ContaminationScore)
271+
if r.ContaminationScore >= 0.2 {
272+
t.Errorf("expected low score for purely auxiliary languages, got %f", r.ContaminationScore)
226273
}
227274
}
228275

orchestrate/orchestrate_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,54 @@ func TestRunContaminationAnalysis_RichSkill(t *testing.T) {
435435
}
436436
}
437437

438+
func TestRunContaminationAnalysis_RichSkill_AppMismatch(t *testing.T) {
439+
dir := fixtureDir(t, "rich-skill")
440+
r := RunContaminationAnalysis(dir)
441+
cr := r.ContaminationReport
442+
if cr == nil {
443+
t.Fatal("expected ContaminationReport")
444+
}
445+
// rich-skill has bash+javascript+python+yaml: javascript↔python is app↔app mismatch
446+
if !cr.LanguageMismatch {
447+
t.Error("expected language mismatch for javascript↔python")
448+
}
449+
// Auxiliary categories (shell, config) should NOT appear in mismatches
450+
for _, cat := range cr.MismatchedCategories {
451+
if cat == "shell" || cat == "config" {
452+
t.Errorf("auxiliary category %q should not be in mismatched categories", cat)
453+
}
454+
}
455+
456+
var buf bytes.Buffer
457+
report.Print(&buf, r, false)
458+
output := buf.String()
459+
if !strings.Contains(output, "Language mismatch") {
460+
t.Error("expected Language mismatch warning in output for app↔app mismatch")
461+
}
462+
}
463+
464+
func TestRunContaminationAnalysis_AuxiliaryOnly_NoMismatch(t *testing.T) {
465+
dir := fixtureDir(t, "auxiliary-only-skill")
466+
r := RunContaminationAnalysis(dir)
467+
cr := r.ContaminationReport
468+
if cr == nil {
469+
t.Fatal("expected ContaminationReport")
470+
}
471+
if cr.LanguageMismatch {
472+
t.Error("expected no language mismatch for auxiliary-only languages")
473+
}
474+
if len(cr.MismatchedCategories) != 0 {
475+
t.Errorf("expected no mismatched categories, got %v", cr.MismatchedCategories)
476+
}
477+
478+
var buf bytes.Buffer
479+
report.Print(&buf, r, false)
480+
output := buf.String()
481+
if strings.Contains(output, "Language mismatch") {
482+
t.Error("Language mismatch warning should not appear for auxiliary-only languages")
483+
}
484+
}
485+
438486
func TestRunContaminationAnalysis_BrokenDir(t *testing.T) {
439487
dir := t.TempDir()
440488
r := RunContaminationAnalysis(dir)

report/json_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) {
400400
CodeLanguages: []string{"python", "javascript", "bash"},
401401
LanguageCategories: []string{"python", "javascript", "shell"},
402402
PrimaryCategory: "python",
403-
MismatchedCategories: []string{"javascript", "shell"},
403+
MismatchedCategories: []string{"javascript"},
404404
LanguageMismatch: true,
405405
TechReferences: []string{"javascript", "python"},
406406
ScopeBreadth: 4,
@@ -446,8 +446,8 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) {
446446
}
447447

448448
mismatched := ca["mismatched_categories"].([]any)
449-
if len(mismatched) != 2 {
450-
t.Errorf("mismatched_categories length = %d, want 2", len(mismatched))
449+
if len(mismatched) != 1 {
450+
t.Errorf("mismatched_categories length = %d, want 1", len(mismatched))
451451
}
452452
}
453453

report/report_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -556,10 +556,10 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) {
556556
ContaminationLevel: "medium",
557557
ContaminationScore: 0.35,
558558
ScopeBreadth: 3,
559-
CodeLanguages: []string{"python", "bash"},
560-
LanguageCategories: []string{"python", "shell"},
559+
CodeLanguages: []string{"python", "javascript"},
560+
LanguageCategories: []string{"python", "javascript"},
561561
PrimaryCategory: "python",
562-
MismatchedCategories: []string{"shell"},
562+
MismatchedCategories: []string{"javascript"},
563563
LanguageMismatch: true,
564564
},
565565
}
@@ -571,8 +571,8 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) {
571571
if !strings.Contains(output, colorYellow+"medium") {
572572
t.Error("expected yellow color for medium level")
573573
}
574-
if !strings.Contains(output, "Language mismatch: shell") {
575-
t.Error("expected language mismatch warning with shell")
574+
if !strings.Contains(output, "Language mismatch: javascript") {
575+
t.Error("expected language mismatch warning with javascript")
576576
}
577577
if !strings.Contains(output, "1 category differ from primary") {
578578
t.Error("expected singular category count")
@@ -590,7 +590,7 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) {
590590
CodeLanguages: []string{"python", "javascript", "bash", "ruby"},
591591
LanguageCategories: []string{"python", "javascript", "shell", "ruby"},
592592
PrimaryCategory: "python",
593-
MismatchedCategories: []string{"javascript", "ruby", "shell"},
593+
MismatchedCategories: []string{"javascript", "ruby"},
594594
LanguageMismatch: true,
595595
MultiInterfaceTools: []string{"mongodb"},
596596
},
@@ -606,7 +606,7 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) {
606606
if !strings.Contains(output, "Multi-interface tool detected: mongodb") {
607607
t.Error("expected multi-interface tool warning")
608608
}
609-
if !strings.Contains(output, "3 categories differ") {
609+
if !strings.Contains(output, "2 categories differ") {
610610
t.Error("expected plural categories count")
611611
}
612612
if !strings.Contains(output, "Scope breadth: 5") {
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
---
2+
name: auxiliary-only-skill
3+
description: A skill that uses only shell and config code blocks for testing contamination.
4+
---
5+
# Auxiliary Only Skill
6+
7+
This skill uses bash commands and config files but no application languages.
8+
9+
## Setup
10+
11+
Install the required tools:
12+
13+
```bash
14+
brew install jq yq
15+
```
16+
17+
## Configuration
18+
19+
Create a config file:
20+
21+
```yaml
22+
server:
23+
host: localhost
24+
port: 8080
25+
```
26+
27+
Alternatively use JSON:
28+
29+
```json
30+
{
31+
"server": {
32+
"host": "localhost",
33+
"port": 8080
34+
}
35+
}
36+
```
37+
38+
## Running
39+
40+
Start the service:
41+
42+
```sh
43+
./start.sh --config config.yaml
44+
```

0 commit comments

Comments
 (0)