@@ -82,19 +82,30 @@ For users who need advanced customization or want to understand the underlying c
8282 indexName: " my-markdown-index" ,
8383 pathsToMatch: [" https://example.com/docs/**" ],
8484 recordExtractor : ({ $, url, helpers }) => {
85- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
85+ // Target only the main content, excluding navigation
86+ const text = helpers .markdown (
87+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
88+ );
89+
8690 if (text === " " ) return [];
8791
88- // Extract language or other attributes as needed. Optional
8992 const language = $ (" html" ).attr (" lang" ) || " en" ;
9093
94+ // Extract cleaner title (without " - Algolia" suffix)
95+ const rawTitle = $ (" head > title" ).text ();
96+ const title = rawTitle .replace (/ - Algolia$ / , " " );
97+
98+ // Get the main heading for better searchability
99+ const h1 = $ (" main h1" ).first ().text ();
100+
91101 return helpers .splitTextIntoRecords ({
92102 text,
93103 baseRecord: {
94104 url,
95105 objectID: url,
96- title: $ (" head > title" ).text (),
97- lang: language, // Add more attributes as needed
106+ title: title || h1,
107+ heading: h1, // Add main heading as separate field
108+ lang: language,
98109 },
99110 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
100111 // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -110,12 +121,15 @@ For users who need advanced customization or want to understand the underlying c
110121``` js
111122// initialIndexSettings: { ...,
112123" my-markdown-index" : {
113- attributesForFaceting: [" lang" ], // Add more if you extract more attributes
124+ attributesForFaceting: [" lang" ],
114125 ignorePlurals: true ,
115- minProximity: 4 ,
126+ minProximity: 1 ,
116127 removeStopWords: false ,
117- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
118- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
128+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
129+ removeWordsIfNoResults: " lastWords" ,
130+ attributesToHighlight: [" title" , " text" ],
131+ typoTolerance: false ,
132+ advancedSyntax: false ,
119133},
120134// ...},
121135```
@@ -397,20 +411,30 @@ import TabItem from '@theme/TabItem';
397411 indexName: " my-markdown-index" ,
398412 pathsToMatch: [" https://example.com/**" ],
399413 recordExtractor : ({ $, url, helpers }) => {
400- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
414+ // Target only the main content, excluding navigation
415+ const text = helpers .markdown (
416+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
417+ );
418+
401419 if (text === " " ) return [];
402420
403- // Customize selectors or meta extraction as needed. Optional
404421 const language = $ (" html" ).attr (" lang" ) || " en" ;
405422
423+ // Extract cleaner title (without " - Algolia" suffix)
424+ const rawTitle = $ (" head > title" ).text ();
425+ const title = rawTitle .replace (/ - Algolia$ / , " " );
426+
427+ // Get the main heading for better searchability
428+ const h1 = $ (" main h1" ).first ().text ();
429+
406430 return helpers .splitTextIntoRecords ({
407431 text,
408432 baseRecord: {
409433 url,
410434 objectID: url,
411- title: $ ( " head > title " ). text () ,
412- // Add more optional attributes to the record
413- lang: language
435+ title: title || h1 ,
436+ heading : h1, // Add main heading as separate field
437+ lang: language,
414438 },
415439 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
416440 // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -424,10 +448,13 @@ import TabItem from '@theme/TabItem';
424448" my-markdown-index" : {
425449 attributesForFaceting: [" lang" ], // Recommended if you add more attributes outside of objectID
426450 ignorePlurals: true ,
427- minProximity: 4 ,
451+ minProximity: 1 ,
428452 removeStopWords: false ,
429- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
430- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
453+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
454+ removeWordsIfNoResults: " lastWords" ,
455+ attributesToHighlight: [" title" , " text" ],
456+ typoTolerance: false ,
457+ advancedSyntax: false ,
431458},
432459// ...},
433460```
@@ -446,7 +473,11 @@ import TabItem from '@theme/TabItem';
446473 indexName: " my-markdown-index" ,
447474 pathsToMatch: [" https://example.com/docs/**" ],
448475 recordExtractor : ({ $, url, helpers }) => {
449- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
476+ // Target only the main content, excluding navigation
477+ const text = helpers .markdown (
478+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
479+ );
480+
450481 if (text === " " ) return [];
451482
452483 // Extract meta tag values. These are required for Docusaurus
@@ -457,12 +488,20 @@ import TabItem from '@theme/TabItem';
457488 const docusaurus_tag =
458489 $ (' meta[name="docsearch:docusaurus_tag"]' ).attr (" content" ) || " " ;
459490
491+ // Extract cleaner title (without " - Algolia" suffix)
492+ const rawTitle = $ (" head > title" ).text ();
493+ const title = rawTitle .replace (/ - Algolia$ / , " " );
494+
495+ // Get the main heading for better searchability
496+ const h1 = $ (" main h1" ).first ().text ();
497+
460498 return helpers .splitTextIntoRecords ({
461499 text,
462500 baseRecord: {
463501 url,
464502 objectID: url,
465- title: $ (" head > title" ).text (),
503+ title: title || h1,
504+ heading: h1, // Add main heading as separate field
466505 lang: language, // Required for Docusaurus
467506 language, // Required for Docusaurus
468507 version: version .split (" ," ), // in case there are multiple versions. Required for Docusaurus
@@ -483,10 +522,13 @@ import TabItem from '@theme/TabItem';
483522" my-markdown-index" : {
484523 attributesForFaceting: [" lang" , " language" , " version" , " docusaurus_tag" ], // Required for Docusaurus
485524 ignorePlurals: true ,
486- minProximity: 4 ,
525+ minProximity: 1 ,
487526 removeStopWords: false ,
488- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
489- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
527+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
528+ removeWordsIfNoResults: " lastWords" ,
529+ attributesToHighlight: [" title" , " text" ],
530+ typoTolerance: false ,
531+ advancedSyntax: false ,
490532},
491533// ...},
492534```
@@ -505,19 +547,29 @@ import TabItem from '@theme/TabItem';
505547 indexName: " my-markdown-index" ,
506548 pathsToMatch: [" https://example.com/docs/**" ],
507549 recordExtractor : ({ $, url, helpers }) => {
508- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
550+ // Target only the main content, excluding navigation
551+ const text = helpers .markdown (
552+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
553+ );
554+
509555 if (text === " " ) return [];
510556
511- // Extract meta tag values. These are required for VitePress
512557 const language = $ (" html" ).attr (" lang" ) || " en" ;
513558
559+ // Extract cleaner title (without " - Algolia" suffix)
560+ const rawTitle = $ (" head > title" ).text ();
561+ const title = rawTitle .replace (/ - Algolia$ / , " " );
562+
563+ // Get the main heading for better searchability
564+ const h1 = $ (" main h1" ).first ().text ();
514565
515566 return helpers .splitTextIntoRecords ({
516567 text,
517568 baseRecord: {
518569 url,
519- title: $ (" head > title" ).text (),
520570 objectID: url,
571+ title: title || h1,
572+ heading: h1, // Add main heading as separate field
521573 lang: language, // Required for VitePress
522574 },
523575 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
@@ -532,10 +584,13 @@ import TabItem from '@theme/TabItem';
532584" my-markdown-index" : {
533585 attributesForFaceting: [" lang" ], // Required for VitePress
534586 ignorePlurals: true ,
535- minProximity: 4 ,
587+ minProximity: 1 ,
536588 removeStopWords: false ,
537- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
538- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
589+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
590+ removeWordsIfNoResults: " lastWords" ,
591+ attributesToHighlight: [" title" , " text" ],
592+ typoTolerance: false ,
593+ advancedSyntax: false ,
539594},
540595// ...},
541596```
@@ -554,19 +609,29 @@ import TabItem from '@theme/TabItem';
554609 indexName: " my-markdown-index" ,
555610 pathsToMatch: [" https://example.com/docs/**" ],
556611 recordExtractor : ({ $, url, helpers }) => {
557- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
612+ // Target only the main content, excluding navigation
613+ const text = helpers .markdown (
614+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
615+ );
616+
558617 if (text === " " ) return [];
559618
560- // Extract meta tag values. These are required for Astro/StarLight
561619 const language = $ (" html" ).attr (" lang" ) || " en" ;
562620
621+ // Extract cleaner title (without " - Algolia" suffix)
622+ const rawTitle = $ (" head > title" ).text ();
623+ const title = rawTitle .replace (/ - Algolia$ / , " " );
624+
625+ // Get the main heading for better searchability
626+ const h1 = $ (" main h1" ).first ().text ();
563627
564628 return helpers .splitTextIntoRecords ({
565629 text,
566630 baseRecord: {
567631 url,
568- title: $ (" head > title" ).text (),
569632 objectID: url,
633+ title: title || h1,
634+ heading: h1, // Add main heading as separate field
570635 lang: language, // Required for Astro/StarLight
571636 },
572637 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
@@ -581,10 +646,13 @@ import TabItem from '@theme/TabItem';
581646" my-markdown-index" : {
582647 attributesForFaceting: [" lang" ], // Required for Astro/StarLight
583648 ignorePlurals: true ,
584- minProximity: 4 ,
649+ minProximity: 1 ,
585650 removeStopWords: false ,
586- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
587- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
651+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
652+ removeWordsIfNoResults: " lastWords" ,
653+ attributesToHighlight: [" title" , " text" ],
654+ typoTolerance: false ,
655+ advancedSyntax: false ,
588656},
589657// ...},
590658```
0 commit comments