From fa90af8700726d60d346341f19758bcf79753063 Mon Sep 17 00:00:00 2001
From: NatanTechOfNY <natanyagudayev@gmail.com>
Date: Thu, 23 Oct 2025 15:26:45 -0400
Subject: [PATCH 1/2] fix(website): updated askai markdown documentation

---
 .../docs/v4/askai-markdown-indexing.mdx       | 134 +++++++++++++-----
 1 file changed, 101 insertions(+), 33 deletions(-)

diff --git a/packages/website/docs/v4/askai-markdown-indexing.mdx b/packages/website/docs/v4/askai-markdown-indexing.mdx
index 5b645ebde..f955c3ff1 100644
--- a/packages/website/docs/v4/askai-markdown-indexing.mdx
+++ b/packages/website/docs/v4/askai-markdown-indexing.mdx
@@ -82,19 +82,30 @@ For users who need advanced customization or want to understand the underlying c
   indexName: "my-markdown-index",
   pathsToMatch: ["https://example.com/docs/**"],
   recordExtractor: ({ $, url, helpers }) => {
-    const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
+    // Target only the main content, excluding navigation
+    const text = helpers.markdown(
+      "main > *:not(nav):not(header):not(.breadcrumb)",
+    );
+
     if (text === "") return [];
 
-    // Extract language or other attributes as needed. Optional
     const language = $("html").attr("lang") || "en";
 
+    // Extract cleaner title (without " - Algolia" suffix)
+    const rawTitle = $("head > title").text();
+    const title = rawTitle.replace(/ - Algolia$/, "");
+
+    // Get the main heading for better searchability
+    const h1 = $("main h1").first().text();
+
     return helpers.splitTextIntoRecords({
       text,
       baseRecord: {
         url,
         objectID: url,
-        title: $("head > title").text(),
-        lang: language, // Add more attributes as needed
+        title: title || h1,
+        heading: h1, // Add main heading as separate field
+        lang: language,
       },
       maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
       // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -110,12 +121,15 @@ For users who need advanced customization or want to understand the underlying c
 ```js
 // initialIndexSettings: { ...,
 "my-markdown-index": {
-  attributesForFaceting: ["lang"], // Add more if you extract more attributes
+  attributesForFaceting: ["lang"],
   ignorePlurals: true,
-  minProximity: 4,
+  minProximity: 1,
   removeStopWords: false,
-  searchableAttributes: ["unordered(title)", "unordered(text)"],
-  removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
+  searchableAttributes: ["title", "heading", "unordered(text)"],
+  removeWordsIfNoResults: "lastWords",
+  attributesToHighlight: ["title", "text"],
+  typoTolerance: false,
+  advancedSyntax: false,
 },
 // ...},
 ```
@@ -397,20 +411,30 @@ import TabItem from '@theme/TabItem';
   indexName: "my-markdown-index",
   pathsToMatch: ["https://example.com/**"],
   recordExtractor: ({ $, url, helpers }) => {
-    const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
+    // Target only the main content, excluding navigation
+    const text = helpers.markdown(
+      "main > *:not(nav):not(header):not(.breadcrumb)",
+    );
+
     if (text === "") return [];
 
-    // Customize selectors or meta extraction as needed. Optional
     const language = $("html").attr("lang") || "en";
 
+    // Extract cleaner title (without " - Algolia" suffix)
+    const rawTitle = $("head > title").text();
+    const title = rawTitle.replace(/ - Algolia$/, "");
+
+    // Get the main heading for better searchability
+    const h1 = $("main h1").first().text();
+
     return helpers.splitTextIntoRecords({
       text,
       baseRecord: {
         url,
         objectID: url,
-        title: $("head > title").text(),
-        // Add more optional attributes to the record
-        lang: language
+        title: title || h1,
+        heading: h1, // Add main heading as separate field
+        lang: language,
       },
       maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
       // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -424,10 +448,13 @@ import TabItem from '@theme/TabItem';
 "my-markdown-index": {
   attributesForFaceting: ["lang"], // Recommended if you add more attributes outside of objectID
   ignorePlurals: true,
-  minProximity: 4,
+  minProximity: 1,
   removeStopWords: false,
-  searchableAttributes: ["unordered(title)", "unordered(text)"],
-  removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
+  searchableAttributes: ["title", "heading", "unordered(text)"],
+  removeWordsIfNoResults: "lastWords",
+  attributesToHighlight: ["title", "text"],
+  typoTolerance: false,
+  advancedSyntax: false,
 },
 // ...},
 ```
@@ -446,7 +473,11 @@ import TabItem from '@theme/TabItem';
   indexName: "my-markdown-index",
   pathsToMatch: ["https://example.com/docs/**"],
   recordExtractor: ({ $, url, helpers }) => {
-    const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
+    // Target only the main content, excluding navigation
+    const text = helpers.markdown(
+      "main > *:not(nav):not(header):not(.breadcrumb)",
+    );
+
     if (text === "") return [];
 
     // Extract meta tag values. These are required for Docusaurus
@@ -457,12 +488,20 @@ import TabItem from '@theme/TabItem';
     const docusaurus_tag =
       $('meta[name="docsearch:docusaurus_tag"]').attr("content") || "";
 
+    // Extract cleaner title (without " - Algolia" suffix)
+    const rawTitle = $("head > title").text();
+    const title = rawTitle.replace(/ - Algolia$/, "");
+
+    // Get the main heading for better searchability
+    const h1 = $("main h1").first().text();
+
     return helpers.splitTextIntoRecords({
       text,
       baseRecord: {
         url,
         objectID: url,
-        title: $("head > title").text(),
+        title: title || h1,
+        heading: h1, // Add main heading as separate field
         lang: language, // Required for Docusaurus
         language, // Required for Docusaurus
         version: version.split(","), // in case there are multiple versions. Required for Docusaurus
@@ -483,10 +522,13 @@ import TabItem from '@theme/TabItem';
 "my-markdown-index": {
   attributesForFaceting: ["lang", "language", "version", "docusaurus_tag"], // Required for Docusaurus
   ignorePlurals: true,
-  minProximity: 4,
+  minProximity: 1,
   removeStopWords: false,
-  searchableAttributes: ["unordered(title)", "unordered(text)"],
-  removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
+  searchableAttributes: ["title", "heading", "unordered(text)"],
+  removeWordsIfNoResults: "lastWords",
+  attributesToHighlight: ["title", "text"],
+  typoTolerance: false,
+  advancedSyntax: false,
 },
 // ...},
 ```
@@ -505,19 +547,29 @@ import TabItem from '@theme/TabItem';
   indexName: "my-markdown-index",
   pathsToMatch: ["https://example.com/docs/**"],
   recordExtractor: ({ $, url, helpers }) => {
-    const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
+    // Target only the main content, excluding navigation
+    const text = helpers.markdown(
+      "main > *:not(nav):not(header):not(.breadcrumb)",
+    );
+
     if (text === "") return [];
 
-    // Extract meta tag values. These are required for VitePress
     const language = $("html").attr("lang") || "en";
 
+    // Extract cleaner title (without " - Algolia" suffix)
+    const rawTitle = $("head > title").text();
+    const title = rawTitle.replace(/ - Algolia$/, "");
+
+    // Get the main heading for better searchability
+    const h1 = $("main h1").first().text();
 
     return helpers.splitTextIntoRecords({
       text,
       baseRecord: {
         url,
-        title: $("head > title").text(),
         objectID: url,
+        title: title || h1,
+        heading: h1, // Add main heading as separate field
         lang: language, // Required for VitePress
       },
       maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
@@ -532,10 +584,13 @@ import TabItem from '@theme/TabItem';
 "my-markdown-index": {
   attributesForFaceting: ["lang"], // Required for VitePress
   ignorePlurals: true,
-  minProximity: 4,
+  minProximity: 1,
   removeStopWords: false,
-  searchableAttributes: ["unordered(title)", "unordered(text)"],
-  removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
+  searchableAttributes: ["title", "heading", "unordered(text)"],
+  removeWordsIfNoResults: "lastWords",
+  attributesToHighlight: ["title", "text"],
+  typoTolerance: false,
+  advancedSyntax: false,
 },
 // ...},
 ```
@@ -554,19 +609,29 @@ import TabItem from '@theme/TabItem';
   indexName: "my-markdown-index",
   pathsToMatch: ["https://example.com/docs/**"],
   recordExtractor: ({ $, url, helpers }) => {
-    const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
+    // Target only the main content, excluding navigation
+    const text = helpers.markdown(
+      "main > *:not(nav):not(header):not(.breadcrumb)",
+    );
+
     if (text === "") return [];
 
-    // Extract meta tag values. These are required for Astro/StarLight
     const language = $("html").attr("lang") || "en";
 
+    // Extract cleaner title (without " - Algolia" suffix)
+    const rawTitle = $("head > title").text();
+    const title = rawTitle.replace(/ - Algolia$/, "");
+
+    // Get the main heading for better searchability
+    const h1 = $("main h1").first().text();
 
     return helpers.splitTextIntoRecords({
       text,
       baseRecord: {
         url,
-        title: $("head > title").text(),
         objectID: url,
+        title: title || h1,
+        heading: h1, // Add main heading as separate field
         lang: language, // Required for Astro/StarLight
       },
       maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
@@ -581,10 +646,13 @@ import TabItem from '@theme/TabItem';
 "my-markdown-index": {
   attributesForFaceting: ["lang"], // Required for Astro/StarLight
   ignorePlurals: true,
-  minProximity: 4,
+  minProximity: 1,
   removeStopWords: false,
-  searchableAttributes: ["unordered(title)", "unordered(text)"],
-  removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
+  searchableAttributes: ["title", "heading", "unordered(text)"],
+  removeWordsIfNoResults: "lastWords",
+  attributesToHighlight: ["title", "text"],
+  typoTolerance: false,
+  advancedSyntax: false,
 },
 // ...},
 ```

From 8570785cad69339d18516c136dd54d4a87efb23a Mon Sep 17 00:00:00 2001
From: NatanTechOfNY <natanyagudayev@gmail.com>
Date: Thu, 23 Oct 2025 15:35:09 -0400
Subject: [PATCH 2/2] fix(website): updated askai markdown documentation

---
 .../docs/v4/askai-markdown-indexing.mdx       | 20 +++++--------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/packages/website/docs/v4/askai-markdown-indexing.mdx b/packages/website/docs/v4/askai-markdown-indexing.mdx
index f955c3ff1..c95b07337 100644
--- a/packages/website/docs/v4/askai-markdown-indexing.mdx
+++ b/packages/website/docs/v4/askai-markdown-indexing.mdx
@@ -91,9 +91,7 @@ For users who need advanced customization or want to understand the underlying c
 
     const language = $("html").attr("lang") || "en";
 
-    // Extract cleaner title (without " - Algolia" suffix)
-    const rawTitle = $("head > title").text();
-    const title = rawTitle.replace(/ - Algolia$/, "");
+    const title = $("head > title").text();
 
     // Get the main heading for better searchability
     const h1 = $("main h1").first().text();
@@ -420,9 +418,7 @@ import TabItem from '@theme/TabItem';
 
     const language = $("html").attr("lang") || "en";
 
-    // Extract cleaner title (without " - Algolia" suffix)
-    const rawTitle = $("head > title").text();
-    const title = rawTitle.replace(/ - Algolia$/, "");
+    const title = $("head > title").text();
 
     // Get the main heading for better searchability
     const h1 = $("main h1").first().text();
@@ -488,9 +484,7 @@ import TabItem from '@theme/TabItem';
     const docusaurus_tag =
       $('meta[name="docsearch:docusaurus_tag"]').attr("content") || "";
 
-    // Extract cleaner title (without " - Algolia" suffix)
-    const rawTitle = $("head > title").text();
-    const title = rawTitle.replace(/ - Algolia$/, "");
+    const title = $("head > title").text();
 
     // Get the main heading for better searchability
     const h1 = $("main h1").first().text();
@@ -556,9 +550,7 @@ import TabItem from '@theme/TabItem';
 
     const language = $("html").attr("lang") || "en";
 
-    // Extract cleaner title (without " - Algolia" suffix)
-    const rawTitle = $("head > title").text();
-    const title = rawTitle.replace(/ - Algolia$/, "");
+    const title = $("head > title").text();
 
     // Get the main heading for better searchability
     const h1 = $("main h1").first().text();
@@ -618,9 +610,7 @@ import TabItem from '@theme/TabItem';
 
     const language = $("html").attr("lang") || "en";
 
-    // Extract cleaner title (without " - Algolia" suffix)
-    const rawTitle = $("head > title").text();
-    const title = rawTitle.replace(/ - Algolia$/, "");
+    const title = $("head > title").text();
 
     // Get the main heading for better searchability
     const h1 = $("main h1").first().text();