From fa90af8700726d60d346341f19758bcf79753063 Mon Sep 17 00:00:00 2001 From: NatanTechOfNY Date: Thu, 23 Oct 2025 15:26:45 -0400 Subject: [PATCH 1/2] fix(website): updated askai markdown documentation --- .../docs/v4/askai-markdown-indexing.mdx | 134 +++++++++++++----- 1 file changed, 101 insertions(+), 33 deletions(-) diff --git a/packages/website/docs/v4/askai-markdown-indexing.mdx b/packages/website/docs/v4/askai-markdown-indexing.mdx index 5b645ebde..f955c3ff1 100644 --- a/packages/website/docs/v4/askai-markdown-indexing.mdx +++ b/packages/website/docs/v4/askai-markdown-indexing.mdx @@ -82,19 +82,30 @@ For users who need advanced customization or want to understand the underlying c indexName: "my-markdown-index", pathsToMatch: ["https://example.com/docs/**"], recordExtractor: ({ $, url, helpers }) => { - const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.) + // Target only the main content, excluding navigation + const text = helpers.markdown( + "main > *:not(nav):not(header):not(.breadcrumb)", + ); + if (text === "") return []; - // Extract language or other attributes as needed. Optional const language = $("html").attr("lang") || "en"; + // Extract cleaner title (without " - Algolia" suffix) + const rawTitle = $("head > title").text(); + const title = rawTitle.replace(/ - Algolia$/, ""); + + // Get the main heading for better searchability + const h1 = $("main h1").first().text(); + return helpers.splitTextIntoRecords({ text, baseRecord: { url, objectID: url, - title: $("head > title").text(), - lang: language, // Add more attributes as needed + title: title || h1, + heading: h1, // Add main heading as separate field + lang: language, }, maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records. // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost. @@ -110,12 +121,15 @@ For users who need advanced customization or want to understand the underlying c ```js // initialIndexSettings: { ..., "my-markdown-index": { - attributesForFaceting: ["lang"], // Add more if you extract more attributes + attributesForFaceting: ["lang"], ignorePlurals: true, - minProximity: 4, + minProximity: 1, removeStopWords: false, - searchableAttributes: ["unordered(title)", "unordered(text)"], - removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback. + searchableAttributes: ["title", "heading", "unordered(text)"], + removeWordsIfNoResults: "lastWords", + attributesToHighlight: ["title", "text"], + typoTolerance: false, + advancedSyntax: false, }, // ...}, ``` @@ -397,20 +411,30 @@ import TabItem from '@theme/TabItem'; indexName: "my-markdown-index", pathsToMatch: ["https://example.com/**"], recordExtractor: ({ $, url, helpers }) => { - const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.) + // Target only the main content, excluding navigation + const text = helpers.markdown( + "main > *:not(nav):not(header):not(.breadcrumb)", + ); + if (text === "") return []; - // Customize selectors or meta extraction as needed. Optional const language = $("html").attr("lang") || "en"; + // Extract cleaner title (without " - Algolia" suffix) + const rawTitle = $("head > title").text(); + const title = rawTitle.replace(/ - Algolia$/, ""); + + // Get the main heading for better searchability + const h1 = $("main h1").first().text(); + return helpers.splitTextIntoRecords({ text, baseRecord: { url, objectID: url, - title: $("head > title").text(), - // Add more optional attributes to the record - lang: language + title: title || h1, + heading: h1, // Add main heading as separate field + lang: language, }, maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records. // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost. @@ -424,10 +448,13 @@ import TabItem from '@theme/TabItem'; "my-markdown-index": { attributesForFaceting: ["lang"], // Recommended if you add more attributes outside of objectID ignorePlurals: true, - minProximity: 4, + minProximity: 1, removeStopWords: false, - searchableAttributes: ["unordered(title)", "unordered(text)"], - removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback. + searchableAttributes: ["title", "heading", "unordered(text)"], + removeWordsIfNoResults: "lastWords", + attributesToHighlight: ["title", "text"], + typoTolerance: false, + advancedSyntax: false, }, // ...}, ``` @@ -446,7 +473,11 @@ import TabItem from '@theme/TabItem'; indexName: "my-markdown-index", pathsToMatch: ["https://example.com/docs/**"], recordExtractor: ({ $, url, helpers }) => { - const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.) + // Target only the main content, excluding navigation + const text = helpers.markdown( + "main > *:not(nav):not(header):not(.breadcrumb)", + ); + if (text === "") return []; // Extract meta tag values. These are required for Docusaurus @@ -457,12 +488,20 @@ import TabItem from '@theme/TabItem'; const docusaurus_tag = $('meta[name="docsearch:docusaurus_tag"]').attr("content") || ""; + // Extract cleaner title (without " - Algolia" suffix) + const rawTitle = $("head > title").text(); + const title = rawTitle.replace(/ - Algolia$/, ""); + + // Get the main heading for better searchability + const h1 = $("main h1").first().text(); + return helpers.splitTextIntoRecords({ text, baseRecord: { url, objectID: url, - title: $("head > title").text(), + title: title || h1, + heading: h1, // Add main heading as separate field lang: language, // Required for Docusaurus language, // Required for Docusaurus version: version.split(","), // in case there are multiple versions. Required for Docusaurus @@ -483,10 +522,13 @@ import TabItem from '@theme/TabItem'; "my-markdown-index": { attributesForFaceting: ["lang", "language", "version", "docusaurus_tag"], // Required for Docusaurus ignorePlurals: true, - minProximity: 4, + minProximity: 1, removeStopWords: false, - searchableAttributes: ["unordered(title)", "unordered(text)"], - removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback. + searchableAttributes: ["title", "heading", "unordered(text)"], + removeWordsIfNoResults: "lastWords", + attributesToHighlight: ["title", "text"], + typoTolerance: false, + advancedSyntax: false, }, // ...}, ``` @@ -505,19 +547,29 @@ import TabItem from '@theme/TabItem'; indexName: "my-markdown-index", pathsToMatch: ["https://example.com/docs/**"], recordExtractor: ({ $, url, helpers }) => { - const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.) + // Target only the main content, excluding navigation + const text = helpers.markdown( + "main > *:not(nav):not(header):not(.breadcrumb)", + ); + if (text === "") return []; - // Extract meta tag values. These are required for VitePress const language = $("html").attr("lang") || "en"; + // Extract cleaner title (without " - Algolia" suffix) + const rawTitle = $("head > title").text(); + const title = rawTitle.replace(/ - Algolia$/, ""); + + // Get the main heading for better searchability + const h1 = $("main h1").first().text(); return helpers.splitTextIntoRecords({ text, baseRecord: { url, - title: $("head > title").text(), objectID: url, + title: title || h1, + heading: h1, // Add main heading as separate field lang: language, // Required for VitePress }, maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records. @@ -532,10 +584,13 @@ import TabItem from '@theme/TabItem'; "my-markdown-index": { attributesForFaceting: ["lang"], // Required for VitePress ignorePlurals: true, - minProximity: 4, + minProximity: 1, removeStopWords: false, - searchableAttributes: ["unordered(title)", "unordered(text)"], - removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback. + searchableAttributes: ["title", "heading", "unordered(text)"], + removeWordsIfNoResults: "lastWords", + attributesToHighlight: ["title", "text"], + typoTolerance: false, + advancedSyntax: false, }, // ...}, ``` @@ -554,19 +609,29 @@ import TabItem from '@theme/TabItem'; indexName: "my-markdown-index", pathsToMatch: ["https://example.com/docs/**"], recordExtractor: ({ $, url, helpers }) => { - const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.) + // Target only the main content, excluding navigation + const text = helpers.markdown( + "main > *:not(nav):not(header):not(.breadcrumb)", + ); + if (text === "") return []; - // Extract meta tag values. These are required for Astro/StarLight const language = $("html").attr("lang") || "en"; + // Extract cleaner title (without " - Algolia" suffix) + const rawTitle = $("head > title").text(); + const title = rawTitle.replace(/ - Algolia$/, ""); + + // Get the main heading for better searchability + const h1 = $("main h1").first().text(); return helpers.splitTextIntoRecords({ text, baseRecord: { url, - title: $("head > title").text(), objectID: url, + title: title || h1, + heading: h1, // Add main heading as separate field lang: language, // Required for Astro/StarLight }, maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records. @@ -581,10 +646,13 @@ import TabItem from '@theme/TabItem'; "my-markdown-index": { attributesForFaceting: ["lang"], // Required for Astro/StarLight ignorePlurals: true, - minProximity: 4, + minProximity: 1, removeStopWords: false, - searchableAttributes: ["unordered(title)", "unordered(text)"], - removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback. + searchableAttributes: ["title", "heading", "unordered(text)"], + removeWordsIfNoResults: "lastWords", + attributesToHighlight: ["title", "text"], + typoTolerance: false, + advancedSyntax: false, }, // ...}, ``` From 8570785cad69339d18516c136dd54d4a87efb23a Mon Sep 17 00:00:00 2001 From: NatanTechOfNY Date: Thu, 23 Oct 2025 15:35:09 -0400 Subject: [PATCH 2/2] fix(website): updated askai markdown documentation --- .../docs/v4/askai-markdown-indexing.mdx | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/packages/website/docs/v4/askai-markdown-indexing.mdx b/packages/website/docs/v4/askai-markdown-indexing.mdx index f955c3ff1..c95b07337 100644 --- a/packages/website/docs/v4/askai-markdown-indexing.mdx +++ b/packages/website/docs/v4/askai-markdown-indexing.mdx @@ -91,9 +91,7 @@ For users who need advanced customization or want to understand the underlying c const language = $("html").attr("lang") || "en"; - // Extract cleaner title (without " - Algolia" suffix) - const rawTitle = $("head > title").text(); - const title = rawTitle.replace(/ - Algolia$/, ""); + const title = $("head > title").text(); // Get the main heading for better searchability const h1 = $("main h1").first().text(); @@ -420,9 +418,7 @@ import TabItem from '@theme/TabItem'; const language = $("html").attr("lang") || "en"; - // Extract cleaner title (without " - Algolia" suffix) - const rawTitle = $("head > title").text(); - const title = rawTitle.replace(/ - Algolia$/, ""); + const title = $("head > title").text(); // Get the main heading for better searchability const h1 = $("main h1").first().text(); @@ -488,9 +484,7 @@ import TabItem from '@theme/TabItem'; const docusaurus_tag = $('meta[name="docsearch:docusaurus_tag"]').attr("content") || ""; - // Extract cleaner title (without " - Algolia" suffix) - const rawTitle = $("head > title").text(); - const title = rawTitle.replace(/ - Algolia$/, ""); + const title = $("head > title").text(); // Get the main heading for better searchability const h1 = $("main h1").first().text(); @@ -556,9 +550,7 @@ import TabItem from '@theme/TabItem'; const language = $("html").attr("lang") || "en"; - // Extract cleaner title (without " - Algolia" suffix) - const rawTitle = $("head > title").text(); - const title = rawTitle.replace(/ - Algolia$/, ""); + const title = $("head > title").text(); // Get the main heading for better searchability const h1 = $("main h1").first().text(); @@ -618,9 +610,7 @@ import TabItem from '@theme/TabItem'; const language = $("html").attr("lang") || "en"; - // Extract cleaner title (without " - Algolia" suffix) - const rawTitle = $("head > title").text(); - const title = rawTitle.replace(/ - Algolia$/, ""); + const title = $("head > title").text(); // Get the main heading for better searchability const h1 = $("main h1").first().text();