Skip to content

Commit fa90af8

Browse files
committed
fix(website): updated askai markdown documentation
1 parent 5e9bdfe commit fa90af8

File tree

1 file changed

+101
-33
lines changed

1 file changed

+101
-33
lines changed

packages/website/docs/v4/askai-markdown-indexing.mdx

Lines changed: 101 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -82,19 +82,30 @@ For users who need advanced customization or want to understand the underlying c
8282
indexName: "my-markdown-index",
8383
pathsToMatch: ["https://example.com/docs/**"],
8484
recordExtractor: ({ $, url, helpers }) => {
85-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
85+
// Target only the main content, excluding navigation
86+
const text = helpers.markdown(
87+
"main > *:not(nav):not(header):not(.breadcrumb)",
88+
);
89+
8690
if (text === "") return [];
8791

88-
// Extract language or other attributes as needed. Optional
8992
const language = $("html").attr("lang") || "en";
9093

94+
// Extract cleaner title (without " - Algolia" suffix)
95+
const rawTitle = $("head > title").text();
96+
const title = rawTitle.replace(/ - Algolia$/, "");
97+
98+
// Get the main heading for better searchability
99+
const h1 = $("main h1").first().text();
100+
91101
return helpers.splitTextIntoRecords({
92102
text,
93103
baseRecord: {
94104
url,
95105
objectID: url,
96-
title: $("head > title").text(),
97-
lang: language, // Add more attributes as needed
106+
title: title || h1,
107+
heading: h1, // Add main heading as separate field
108+
lang: language,
98109
},
99110
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
100111
// Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -110,12 +121,15 @@ For users who need advanced customization or want to understand the underlying c
110121
```js
111122
// initialIndexSettings: { ...,
112123
"my-markdown-index": {
113-
attributesForFaceting: ["lang"], // Add more if you extract more attributes
124+
attributesForFaceting: ["lang"],
114125
ignorePlurals: true,
115-
minProximity: 4,
126+
minProximity: 1,
116127
removeStopWords: false,
117-
searchableAttributes: ["unordered(title)", "unordered(text)"],
118-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
128+
searchableAttributes: ["title", "heading", "unordered(text)"],
129+
removeWordsIfNoResults: "lastWords",
130+
attributesToHighlight: ["title", "text"],
131+
typoTolerance: false,
132+
advancedSyntax: false,
119133
},
120134
// ...},
121135
```
@@ -397,20 +411,30 @@ import TabItem from '@theme/TabItem';
397411
indexName: "my-markdown-index",
398412
pathsToMatch: ["https://example.com/**"],
399413
recordExtractor: ({ $, url, helpers }) => {
400-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
414+
// Target only the main content, excluding navigation
415+
const text = helpers.markdown(
416+
"main > *:not(nav):not(header):not(.breadcrumb)",
417+
);
418+
401419
if (text === "") return [];
402420

403-
// Customize selectors or meta extraction as needed. Optional
404421
const language = $("html").attr("lang") || "en";
405422

423+
// Extract cleaner title (without " - Algolia" suffix)
424+
const rawTitle = $("head > title").text();
425+
const title = rawTitle.replace(/ - Algolia$/, "");
426+
427+
// Get the main heading for better searchability
428+
const h1 = $("main h1").first().text();
429+
406430
return helpers.splitTextIntoRecords({
407431
text,
408432
baseRecord: {
409433
url,
410434
objectID: url,
411-
title: $("head > title").text(),
412-
// Add more optional attributes to the record
413-
lang: language
435+
title: title || h1,
436+
heading: h1, // Add main heading as separate field
437+
lang: language,
414438
},
415439
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
416440
// Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -424,10 +448,13 @@ import TabItem from '@theme/TabItem';
424448
"my-markdown-index": {
425449
attributesForFaceting: ["lang"], // Recommended if you add more attributes outside of objectID
426450
ignorePlurals: true,
427-
minProximity: 4,
451+
minProximity: 1,
428452
removeStopWords: false,
429-
searchableAttributes: ["unordered(title)", "unordered(text)"],
430-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
453+
searchableAttributes: ["title", "heading", "unordered(text)"],
454+
removeWordsIfNoResults: "lastWords",
455+
attributesToHighlight: ["title", "text"],
456+
typoTolerance: false,
457+
advancedSyntax: false,
431458
},
432459
// ...},
433460
```
@@ -446,7 +473,11 @@ import TabItem from '@theme/TabItem';
446473
indexName: "my-markdown-index",
447474
pathsToMatch: ["https://example.com/docs/**"],
448475
recordExtractor: ({ $, url, helpers }) => {
449-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
476+
// Target only the main content, excluding navigation
477+
const text = helpers.markdown(
478+
"main > *:not(nav):not(header):not(.breadcrumb)",
479+
);
480+
450481
if (text === "") return [];
451482

452483
// Extract meta tag values. These are required for Docusaurus
@@ -457,12 +488,20 @@ import TabItem from '@theme/TabItem';
457488
const docusaurus_tag =
458489
$('meta[name="docsearch:docusaurus_tag"]').attr("content") || "";
459490

491+
// Extract cleaner title (without " - Algolia" suffix)
492+
const rawTitle = $("head > title").text();
493+
const title = rawTitle.replace(/ - Algolia$/, "");
494+
495+
// Get the main heading for better searchability
496+
const h1 = $("main h1").first().text();
497+
460498
return helpers.splitTextIntoRecords({
461499
text,
462500
baseRecord: {
463501
url,
464502
objectID: url,
465-
title: $("head > title").text(),
503+
title: title || h1,
504+
heading: h1, // Add main heading as separate field
466505
lang: language, // Required for Docusaurus
467506
language, // Required for Docusaurus
468507
version: version.split(","), // in case there are multiple versions. Required for Docusaurus
@@ -483,10 +522,13 @@ import TabItem from '@theme/TabItem';
483522
"my-markdown-index": {
484523
attributesForFaceting: ["lang", "language", "version", "docusaurus_tag"], // Required for Docusaurus
485524
ignorePlurals: true,
486-
minProximity: 4,
525+
minProximity: 1,
487526
removeStopWords: false,
488-
searchableAttributes: ["unordered(title)", "unordered(text)"],
489-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
527+
searchableAttributes: ["title", "heading", "unordered(text)"],
528+
removeWordsIfNoResults: "lastWords",
529+
attributesToHighlight: ["title", "text"],
530+
typoTolerance: false,
531+
advancedSyntax: false,
490532
},
491533
// ...},
492534
```
@@ -505,19 +547,29 @@ import TabItem from '@theme/TabItem';
505547
indexName: "my-markdown-index",
506548
pathsToMatch: ["https://example.com/docs/**"],
507549
recordExtractor: ({ $, url, helpers }) => {
508-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
550+
// Target only the main content, excluding navigation
551+
const text = helpers.markdown(
552+
"main > *:not(nav):not(header):not(.breadcrumb)",
553+
);
554+
509555
if (text === "") return [];
510556

511-
// Extract meta tag values. These are required for VitePress
512557
const language = $("html").attr("lang") || "en";
513558

559+
// Extract cleaner title (without " - Algolia" suffix)
560+
const rawTitle = $("head > title").text();
561+
const title = rawTitle.replace(/ - Algolia$/, "");
562+
563+
// Get the main heading for better searchability
564+
const h1 = $("main h1").first().text();
514565

515566
return helpers.splitTextIntoRecords({
516567
text,
517568
baseRecord: {
518569
url,
519-
title: $("head > title").text(),
520570
objectID: url,
571+
title: title || h1,
572+
heading: h1, // Add main heading as separate field
521573
lang: language, // Required for VitePress
522574
},
523575
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
@@ -532,10 +584,13 @@ import TabItem from '@theme/TabItem';
532584
"my-markdown-index": {
533585
attributesForFaceting: ["lang"], // Required for VitePress
534586
ignorePlurals: true,
535-
minProximity: 4,
587+
minProximity: 1,
536588
removeStopWords: false,
537-
searchableAttributes: ["unordered(title)", "unordered(text)"],
538-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
589+
searchableAttributes: ["title", "heading", "unordered(text)"],
590+
removeWordsIfNoResults: "lastWords",
591+
attributesToHighlight: ["title", "text"],
592+
typoTolerance: false,
593+
advancedSyntax: false,
539594
},
540595
// ...},
541596
```
@@ -554,19 +609,29 @@ import TabItem from '@theme/TabItem';
554609
indexName: "my-markdown-index",
555610
pathsToMatch: ["https://example.com/docs/**"],
556611
recordExtractor: ({ $, url, helpers }) => {
557-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
612+
// Target only the main content, excluding navigation
613+
const text = helpers.markdown(
614+
"main > *:not(nav):not(header):not(.breadcrumb)",
615+
);
616+
558617
if (text === "") return [];
559618

560-
// Extract meta tag values. These are required for Astro/StarLight
561619
const language = $("html").attr("lang") || "en";
562620

621+
// Extract cleaner title (without " - Algolia" suffix)
622+
const rawTitle = $("head > title").text();
623+
const title = rawTitle.replace(/ - Algolia$/, "");
624+
625+
// Get the main heading for better searchability
626+
const h1 = $("main h1").first().text();
563627

564628
return helpers.splitTextIntoRecords({
565629
text,
566630
baseRecord: {
567631
url,
568-
title: $("head > title").text(),
569632
objectID: url,
633+
title: title || h1,
634+
heading: h1, // Add main heading as separate field
570635
lang: language, // Required for Astro/StarLight
571636
},
572637
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
@@ -581,10 +646,13 @@ import TabItem from '@theme/TabItem';
581646
"my-markdown-index": {
582647
attributesForFaceting: ["lang"], // Required for Astro/StarLight
583648
ignorePlurals: true,
584-
minProximity: 4,
649+
minProximity: 1,
585650
removeStopWords: false,
586-
searchableAttributes: ["unordered(title)", "unordered(text)"],
587-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
651+
searchableAttributes: ["title", "heading", "unordered(text)"],
652+
removeWordsIfNoResults: "lastWords",
653+
attributesToHighlight: ["title", "text"],
654+
typoTolerance: false,
655+
advancedSyntax: false,
588656
},
589657
// ...},
590658
```

0 commit comments

Comments
 (0)