Merge pull request #16 from agent-ecosystem/v0.9.1-bugfixes

dacharyc · web-flow · commit 6ce6a6a4b2dd · 2026-04-07T19:37:08.000-04:00
v0.9.1 bugfixes
diff --git a/eslint.config.js b/eslint.config.js
@@ -15,6 +15,6 @@ export default tseslint.config(
     },
   },
   {
-    ignores: ['dist/', 'coverage/', 'node_modules/', 'bin/'],
+    ignores: ['dist/', 'coverage/', 'node_modules/', 'bin/', 'docs/.vitepress/dist/'],
   },
 );
diff --git a/src/checks/content-discoverability/llms-txt-valid.ts b/src/checks/content-discoverability/llms-txt-valid.ts
@@ -12,7 +12,7 @@ interface ValidationResult {
 
 /** Extract markdown links from text: [name](url) */
 export function extractMarkdownLinks(content: string): Array<{ name: string; url: string }> {
-  const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
+  const linkRegex = /\[([^\]]+)\]\(([^\s)]+)(?:\s+["'][^"']*["'])?\)/g;
   const links: Array<{ name: string; url: string }> = [];
   let match;
   while ((match = linkRegex.exec(content)) !== null) {
diff --git a/src/checks/page-size/content-start-position.ts b/src/checks/page-size/content-start-position.ts
@@ -20,10 +20,16 @@ const INLINE_SCRIPT_TOKENS =
   /function\s*\(|=>\s*\{|document\.|window\.|localStorage|\.addEventListener|\.getElementById|\.querySelector|\.setAttribute|self\.\\/;
 const NAV_MAX_LENGTH = 40;
 
-/** Measure how much of a line is markdown link syntax: `[text](url)` or `[![img](src)](url)` */
+/** Measure how much of a line is markdown link syntax: `[text](url)` */
 function linkDensity(line: string): number {
-  // Match plain links [text](url) and image links [![alt](src)](url)
-  const links = line.match(/\[(?:[^[\]]*|!\[[^\]]*\]\([^)]*\))*\]\([^)]*\)/g);
+  // Simple [text](url) pattern without nested-group repetition.
+  // The previous regex (/\[(?:[^[\]]*|!\[...)*\]\(...)/) used a repeated
+  // alternation group to handle image links, which caused catastrophic
+  // backtracking on lines containing bracket characters from non-markdown
+  // content (JSON-LD, Next.js RSC payloads, etc.). The simple pattern
+  // is sufficient for density estimation since image-link syntax is rare
+  // in the nav/boilerplate lines this function is designed to classify.
+  const links = line.match(/\[[^\]]+\]\([^)]+\)/g);
   if (!links) return 0;
   return links.join('').length / line.length;
 }
diff --git a/test/unit/checks/llms-txt-valid.test.ts b/test/unit/checks/llms-txt-valid.test.ts
@@ -21,6 +21,27 @@ describe('extractMarkdownLinks', () => {
   it('returns empty array for no links', () => {
     expect(extractMarkdownLinks('Just some text')).toHaveLength(0);
   });
+
+  it('strips title attributes from single-line links', () => {
+    const content = '- [Cloud](/path/to/page/ "View the Cloud version")';
+    const links = extractMarkdownLinks(content);
+    expect(links).toHaveLength(1);
+    expect(links[0]).toEqual({ name: 'Cloud', url: '/path/to/page/' });
+  });
+
+  it('strips title attributes from multi-line links', () => {
+    const content = '- [Cloud](/path/to/page/\n   "View the Cloud version")';
+    const links = extractMarkdownLinks(content);
+    expect(links).toHaveLength(1);
+    expect(links[0]).toEqual({ name: 'Cloud', url: '/path/to/page/' });
+  });
+
+  it('strips single-quoted title attributes', () => {
+    const content = "- [Docs](/docs/ 'Documentation')";
+    const links = extractMarkdownLinks(content);
+    expect(links).toHaveLength(1);
+    expect(links[0]).toEqual({ name: 'Docs', url: '/docs/' });
+  });
 });
 
 describe('llms-txt-valid', () => {

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,6 @@ export default tseslint.config(`
`15`	`15`	`},`
`16`	`16`	`},`
`17`	`17`	`{`
`18`		`- ignores: ['dist/', 'coverage/', 'node_modules/', 'bin/'],`
	`18`	`+ ignores: ['dist/', 'coverage/', 'node_modules/', 'bin/', 'docs/.vitepress/dist/'],`
`19`	`19`	`},`
`20`	`20`	`);`