Skip to content

Commit 6ce6a6a

Browse files
authored
Merge pull request #16 from agent-ecosystem/v0.9.1-bugfixes
v0.9.1 bugfixes
2 parents 60092e5 + 8e90b17 commit 6ce6a6a

4 files changed

Lines changed: 32 additions & 5 deletions

File tree

eslint.config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ export default tseslint.config(
1515
},
1616
},
1717
{
18-
ignores: ['dist/', 'coverage/', 'node_modules/', 'bin/'],
18+
ignores: ['dist/', 'coverage/', 'node_modules/', 'bin/', 'docs/.vitepress/dist/'],
1919
},
2020
);

src/checks/content-discoverability/llms-txt-valid.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ interface ValidationResult {
1212

1313
/** Extract markdown links from text: [name](url) */
1414
export function extractMarkdownLinks(content: string): Array<{ name: string; url: string }> {
15-
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
15+
const linkRegex = /\[([^\]]+)\]\(([^\s)]+)(?:\s+["'][^"']*["'])?\)/g;
1616
const links: Array<{ name: string; url: string }> = [];
1717
let match;
1818
while ((match = linkRegex.exec(content)) !== null) {

src/checks/page-size/content-start-position.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,16 @@ const INLINE_SCRIPT_TOKENS =
2020
/function\s*\(|=>\s*\{|document\.|window\.|localStorage|\.addEventListener|\.getElementById|\.querySelector|\.setAttribute|self\.\\/;
2121
const NAV_MAX_LENGTH = 40;
2222

23-
/** Measure how much of a line is markdown link syntax: `[text](url)` or `[![img](src)](url)` */
23+
/** Measure how much of a line is markdown link syntax: `[text](url)` */
2424
function linkDensity(line: string): number {
25-
// Match plain links [text](url) and image links [![alt](src)](url)
26-
const links = line.match(/\[(?:[^[\]]*|!\[[^\]]*\]\([^)]*\))*\]\([^)]*\)/g);
25+
// Simple [text](url) pattern without nested-group repetition.
26+
// The previous regex (/\[(?:[^[\]]*|!\[...)*\]\(...)/) used a repeated
27+
// alternation group to handle image links, which caused catastrophic
28+
// backtracking on lines containing bracket characters from non-markdown
29+
// content (JSON-LD, Next.js RSC payloads, etc.). The simple pattern
30+
// is sufficient for density estimation since image-link syntax is rare
31+
// in the nav/boilerplate lines this function is designed to classify.
32+
const links = line.match(/\[[^\]]+\]\([^)]+\)/g);
2733
if (!links) return 0;
2834
return links.join('').length / line.length;
2935
}

test/unit/checks/llms-txt-valid.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,27 @@ describe('extractMarkdownLinks', () => {
2121
it('returns empty array for no links', () => {
2222
expect(extractMarkdownLinks('Just some text')).toHaveLength(0);
2323
});
24+
25+
it('strips title attributes from single-line links', () => {
26+
const content = '- [Cloud](/path/to/page/ "View the Cloud version")';
27+
const links = extractMarkdownLinks(content);
28+
expect(links).toHaveLength(1);
29+
expect(links[0]).toEqual({ name: 'Cloud', url: '/path/to/page/' });
30+
});
31+
32+
it('strips title attributes from multi-line links', () => {
33+
const content = '- [Cloud](/path/to/page/\n "View the Cloud version")';
34+
const links = extractMarkdownLinks(content);
35+
expect(links).toHaveLength(1);
36+
expect(links[0]).toEqual({ name: 'Cloud', url: '/path/to/page/' });
37+
});
38+
39+
it('strips single-quoted title attributes', () => {
40+
const content = "- [Docs](/docs/ 'Documentation')";
41+
const links = extractMarkdownLinks(content);
42+
expect(links).toHaveLength(1);
43+
expect(links[0]).toEqual({ name: 'Docs', url: '/docs/' });
44+
});
2445
});
2546

2647
describe('llms-txt-valid', () => {

0 commit comments

Comments
 (0)