Skip to content

Commit d1d8e2e

Browse files
committed
fix: keyword stuffing detection checks for minimum average words per segment
1 parent 0256ab5 commit d1d8e2e

2 files changed

Lines changed: 20 additions & 0 deletions

File tree

structure/frontmatter.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ const (
117117
// maxShortSegmentPct is the percentage of comma segments that must be
118118
// "short" (≤3 words) for the comma-list heuristic to fire.
119119
maxShortSegmentPct = 60
120+
121+
// minAvgWordsPerSegment is the minimum average words per comma-separated
122+
// segment. Sentences at or above this density are considered prose with
123+
// inline lists rather than keyword dumps, even if many segments are short.
124+
minAvgWordsPerSegment = 3
120125
)
121126

122127
func checkDescriptionKeywordStuffing(ctx types.ResultContext, desc string) []types.Result {
@@ -165,12 +170,19 @@ func checkDescriptionKeywordStuffing(ctx types.ResultContext, desc string) []typ
165170
}
166171
if len(segments) >= minCommaSegments {
167172
shortCount := 0
173+
totalWords := 0
168174
for _, seg := range segments {
169175
words := strings.Fields(strings.TrimSpace(seg))
176+
totalWords += len(words)
170177
if len(words) <= 3 {
171178
shortCount++
172179
}
173180
}
181+
// Sentences with enough prose density are not keyword dumps,
182+
// even if many individual segments are short.
183+
if totalWords >= minAvgWordsPerSegment*len(segments) {
184+
continue
185+
}
174186
if shortCount*100/len(segments) >= maxShortSegmentPct {
175187
return []types.Result{ctx.Warnf(
176188
"description has %d comma-separated segments, most very short — "+

structure/frontmatter_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,14 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) {
237237
requireResultContaining(t, results, types.Warning, "comma-separated segments")
238238
})
239239

240+
t.Run("prose with inline enumeration is fine (issue #71)", func(t *testing.T) {
241+
desc := "Helps agents write and edit interface copy (microcopy) for digital products — buttons, labels, error messages, forms, onboarding flows, empty states, and help text. Use this skill whenever you need to produce or improve any text that appears in an app, website, or software UI. It applies four core quality standards (purposeful, concise, conversational, and clear) and ships with accessibility guidelines, research-backed readability benchmarks, error-message patterns, tone adaptation frameworks, and fillable templates."
242+
s := makeSkill("/tmp/my-skill", "my-skill", desc)
243+
results := CheckFrontmatter(s, Options{})
244+
requireNoResultContaining(t, results, types.Warning, "keyword")
245+
requireNoResultContaining(t, results, types.Warning, "comma-separated")
246+
})
247+
240248
t.Run("description with abbreviations splits correctly", func(t *testing.T) {
241249
desc := "Use for e.g. vector search and embedding workflows. Supports multiple backends, distributed indexing, and query optimization."
242250
s := makeSkill("/tmp/my-skill", "my-skill", desc)

0 commit comments

Comments
 (0)