File tree Expand file tree Collapse file tree 7 files changed +48
-11
lines changed
Expand file tree Collapse file tree 7 files changed +48
-11
lines changed Original file line number Diff line number Diff line change 1- import { type DOMWindow , JSDOM } from "jsdom" ;
1+ import type { DOMWindow } from "jsdom" ;
2+ import { createJSDOM } from "../../../utils/dom" ;
23import { logger } from "../../../utils/logger" ;
34import type { ContentProcessingContext , ContentProcessorMiddleware } from "../types" ;
45
@@ -26,7 +27,8 @@ export class HtmlDomParserMiddleware implements ContentProcessorMiddleware {
2627 let domWindow : DOMWindow | undefined ;
2728 try {
2829 logger . debug ( `Parsing HTML content from ${ context . source } ` ) ;
29- domWindow = new JSDOM ( htmlString , {
30+ // Use createJSDOM factory
31+ domWindow = createJSDOM ( htmlString , {
3032 url : context . source , // Provide the source URL to JSDOM
3133 // Consider adding other JSDOM options if needed, e.g., runScripts: "dangerously"
3234 } ) . window ;
Original file line number Diff line number Diff line change 1- import { JSDOM } from "jsdom" ;
1+ import { createJSDOM } from "../../../utils/dom" ; // Replace JSDOM import with createJSDOM
22import { logger } from "../../../utils/logger" ;
33import type { FetchOptions , RawContent } from "../../fetcher/types" ;
44import { executeJsInSandbox } from "../../utils/sandbox" ;
@@ -145,7 +145,8 @@ export class HtmlJsExecutorMiddleware implements ContentProcessorMiddleware {
145145 context . content = result . finalHtml ;
146146
147147 // Optionally, update the DOM object as well for potential custom middleware use
148- context . dom = new JSDOM ( result . finalHtml , {
148+ // Use createJSDOM factory
149+ context . dom = createJSDOM ( result . finalHtml , {
149150 url : context . source ,
150151 } ) . window ;
151152
Original file line number Diff line number Diff line change @@ -39,7 +39,7 @@ const createMockContext = (
3939 options : { ...createMockScraperOptions ( source ) , ...options } ,
4040 } ;
4141 if ( htmlContent && contentType . startsWith ( "text/html" ) ) {
42- context . dom = new JSDOM ( htmlContent ) . window ;
42+ context . dom = new JSDOM ( htmlContent , { url : source } ) . window ;
4343 }
4444 return context ;
4545} ;
Original file line number Diff line number Diff line change 1- import { JSDOM } from "jsdom" ;
21import { type Browser , type Page , chromium } from "playwright" ;
2+ import { createJSDOM } from "../../../utils/dom" ;
33import { logger } from "../../../utils/logger" ;
44import type { ContentProcessingContext , ContentProcessorMiddleware } from "../types" ;
55
@@ -113,7 +113,9 @@ export class HtmlPlaywrightMiddleware implements ContentProcessorMiddleware {
113113
114114 try {
115115 logger . debug ( `Parsing Playwright-rendered HTML with JSDOM for ${ context . source } ` ) ;
116- const domWindow = new JSDOM ( renderedHtml , {
116+ // Remove manual VirtualConsole setup
117+ // Use createJSDOM factory
118+ const domWindow = createJSDOM ( renderedHtml , {
117119 url : context . source , // Provide the source URL to JSDOM
118120 } ) . window ;
119121
Original file line number Diff line number Diff line change 11import { createContext , runInContext } from "node:vm" ;
2- import { type DOMWindow , JSDOM } from "jsdom" ;
2+ import type { JSDOM } from "jsdom" ;
3+ import { createJSDOM } from "../../utils/dom" ;
34import { logger } from "../../utils/logger" ;
45
56/**
@@ -44,8 +45,8 @@ export async function executeJsInSandbox(
4445
4546 try {
4647 logger . debug ( `Creating JSDOM sandbox for ${ url } ` ) ;
47- // Create JSDOM instance *without* running scripts immediately
48- jsdom = new JSDOM ( html , {
48+ // Create JSDOM instance using the factory, which includes default virtualConsole
49+ jsdom = createJSDOM ( html , {
4950 url,
5051 runScripts : "outside-only" , // We'll run scripts manually in the VM
5152 pretendToBeVisual : true , // Helps with some scripts expecting a visual environment
Original file line number Diff line number Diff line change @@ -5,6 +5,7 @@ import remarkHtml from "remark-html";
55import remarkParse from "remark-parse" ;
66import TurndownService from "turndown" ;
77import { unified } from "unified" ;
8+ import { createJSDOM } from "../utils/dom" ;
89import { logger } from "../utils/logger" ;
910import { fullTrim } from "../utils/string" ;
1011import { ContentSplitterError , MinimumChunkSizeError } from "./errors" ;
@@ -335,7 +336,8 @@ export class SemanticMarkdownSplitter implements DocumentSplitter {
335336 * Parse HTML
336337 */
337338 private async parseHtml ( html : string ) : Promise < Document > {
338- const { window } = new JSDOM ( html ) ;
339+ // Use createJSDOM which includes default options like virtualConsole
340+ const { window } = createJSDOM ( html ) ;
339341 return window . document ;
340342 }
341343}
Original file line number Diff line number Diff line change 1+ import { JSDOM , VirtualConsole } from "jsdom" ;
2+ import type { ConstructorOptions } from "jsdom" ;
3+
4+ /**
5+ * Creates a JSDOM instance with a pre-configured virtual console to suppress console noise.
6+ * This utility simplifies the setup of JSDOM by providing a standard configuration.
7+ *
8+ * @param html - The HTML content to parse.
9+ * @param options - Optional JSDOM configuration options. These will be merged with the default virtual console setup.
10+ * @returns A JSDOM instance.
11+ */
12+ export function createJSDOM ( html : string , options ?: ConstructorOptions ) : JSDOM {
13+ const virtualConsole = new VirtualConsole ( ) ;
14+ // Suppress console output from JSDOM by default
15+ virtualConsole . on ( "error" , ( ) => { } ) ;
16+ virtualConsole . on ( "warn" , ( ) => { } ) ;
17+ virtualConsole . on ( "info" , ( ) => { } ) ;
18+ virtualConsole . on ( "debug" , ( ) => { } ) ;
19+ virtualConsole . on ( "log" , ( ) => { } ) ; // Also suppress regular logs
20+
21+ const defaultOptions : ConstructorOptions = {
22+ virtualConsole,
23+ } ;
24+
25+ // Merge provided options with defaults, letting provided options override
26+ const finalOptions : ConstructorOptions = { ...defaultOptions , ...options } ;
27+
28+ return new JSDOM ( html , finalOptions ) ;
29+ }
You can’t perform that action at this time.
0 commit comments