@@ -6,9 +6,9 @@ import type {
66} from "../scraper/fetcher" ;
77import { ContentProcessingPipeline } from "../scraper/middleware/ContentProcessorPipeline" ;
88import {
9- HtmlDomParserMiddleware ,
109 HtmlMetadataExtractorMiddleware ,
1110 HtmlSanitizerMiddleware ,
11+ HtmlSelectProcessorMiddleware , // Import the new middleware
1212 HtmlToMarkdownMiddleware ,
1313 MarkdownMetadataExtractorMiddleware ,
1414} from "../scraper/middleware/components" ;
@@ -30,6 +30,15 @@ export interface FetchUrlToolOptions {
3030 * @default true
3131 */
3232 followRedirects ?: boolean ;
33+
34+ /**
35+ * Determines the HTML processing strategy.
36+ * - 'fetch': Use a simple DOM parser (faster, less JS support).
37+ * - 'playwright': Use a headless browser (slower, full JS support).
38+ * - 'auto': Automatically select the best strategy (currently defaults to 'playwright').
39+ * @default 'auto'
40+ */
41+ scrapeMode ?: "fetch" | "playwright" | "auto" ;
3342}
3443
3544/**
@@ -57,7 +66,7 @@ export class FetchUrlTool {
5766 * @throws {ToolError } If fetching or processing fails
5867 */
5968 async execute ( options : FetchUrlToolOptions ) : Promise < string > {
60- const { url } = options ;
69+ const { url, scrapeMode = "auto" } = options ; // Destructure scrapeMode with default
6170
6271 // Check all fetchers first (helpful for testing and future extensions)
6372 const canFetchResults = this . fetchers . map ( ( f ) => f . canFetch ( url ) ) ;
@@ -104,17 +113,18 @@ export class FetchUrlTool {
104113 followRedirects : options . followRedirects ?? true ,
105114 excludeSelectors : undefined , // Not currently configurable via this tool
106115 ignoreErrors : false ,
116+ scrapeMode : scrapeMode , // Pass the scrapeMode
107117 } satisfies ScraperOptions ,
108118 } ;
109119
110120 let pipeline : ContentProcessingPipeline ;
111121 if ( initialContext . contentType . startsWith ( "text/html" ) ) {
112122 // Updated HTML pipeline for FetchUrlTool
113123 pipeline = new ContentProcessingPipeline ( [
114- new HtmlDomParserMiddleware ( ) ,
124+ new HtmlSelectProcessorMiddleware ( ) , // Use the selector middleware
115125 new HtmlMetadataExtractorMiddleware ( ) , // Keep for potential future use, though title isn't returned
116126 // No Link Extractor needed
117- new HtmlSanitizerMiddleware ( ) , // Renamed instantiation, use default selectors
127+ new HtmlSanitizerMiddleware ( ) , // Use default selectors
118128 new HtmlToMarkdownMiddleware ( ) ,
119129 ] ) ;
120130 } else if (
0 commit comments