Skip to content

Commit 61e41be

Browse files
committed
fix: silence JSDOM virtual console output
Replaces direct JSDOM instantiation with a factory function that configures a silent virtual console by default. Fixes #53
1 parent cfa6709 commit 61e41be

File tree

7 files changed

+48
-11
lines changed

7 files changed

+48
-11
lines changed

src/scraper/middleware/components/HtmlDomParserMiddleware.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import { type DOMWindow, JSDOM } from "jsdom";
1+
import type { DOMWindow } from "jsdom";
2+
import { createJSDOM } from "../../../utils/dom";
23
import { logger } from "../../../utils/logger";
34
import type { ContentProcessingContext, ContentProcessorMiddleware } from "../types";
45

@@ -26,7 +27,8 @@ export class HtmlDomParserMiddleware implements ContentProcessorMiddleware {
2627
let domWindow: DOMWindow | undefined;
2728
try {
2829
logger.debug(`Parsing HTML content from ${context.source}`);
29-
domWindow = new JSDOM(htmlString, {
30+
// Use createJSDOM factory
31+
domWindow = createJSDOM(htmlString, {
3032
url: context.source, // Provide the source URL to JSDOM
3133
// Consider adding other JSDOM options if needed, e.g., runScripts: "dangerously"
3234
}).window;

src/scraper/middleware/components/HtmlJsExecutorMiddleware.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { JSDOM } from "jsdom";
1+
import { createJSDOM } from "../../../utils/dom"; // Replace JSDOM import with createJSDOM
22
import { logger } from "../../../utils/logger";
33
import type { FetchOptions, RawContent } from "../../fetcher/types";
44
import { executeJsInSandbox } from "../../utils/sandbox";
@@ -145,7 +145,8 @@ export class HtmlJsExecutorMiddleware implements ContentProcessorMiddleware {
145145
context.content = result.finalHtml;
146146

147147
// Optionally, update the DOM object as well for potential custom middleware use
148-
context.dom = new JSDOM(result.finalHtml, {
148+
// Use createJSDOM factory
149+
context.dom = createJSDOM(result.finalHtml, {
149150
url: context.source,
150151
}).window;
151152

src/scraper/middleware/components/HtmlMetadataExtractorMiddleware.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ const createMockContext = (
3939
options: { ...createMockScraperOptions(source), ...options },
4040
};
4141
if (htmlContent && contentType.startsWith("text/html")) {
42-
context.dom = new JSDOM(htmlContent).window;
42+
context.dom = new JSDOM(htmlContent, { url: source }).window;
4343
}
4444
return context;
4545
};

src/scraper/middleware/components/HtmlPlaywrightMiddleware.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import { JSDOM } from "jsdom";
21
import { type Browser, type Page, chromium } from "playwright";
2+
import { createJSDOM } from "../../../utils/dom";
33
import { logger } from "../../../utils/logger";
44
import type { ContentProcessingContext, ContentProcessorMiddleware } from "../types";
55

@@ -113,7 +113,9 @@ export class HtmlPlaywrightMiddleware implements ContentProcessorMiddleware {
113113

114114
try {
115115
logger.debug(`Parsing Playwright-rendered HTML with JSDOM for ${context.source}`);
116-
const domWindow = new JSDOM(renderedHtml, {
116+
// Remove manual VirtualConsole setup
117+
// Use createJSDOM factory
118+
const domWindow = createJSDOM(renderedHtml, {
117119
url: context.source, // Provide the source URL to JSDOM
118120
}).window;
119121

src/scraper/utils/sandbox.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { createContext, runInContext } from "node:vm";
2-
import { type DOMWindow, JSDOM } from "jsdom";
2+
import type { JSDOM } from "jsdom";
3+
import { createJSDOM } from "../../utils/dom";
34
import { logger } from "../../utils/logger";
45

56
/**
@@ -44,8 +45,8 @@ export async function executeJsInSandbox(
4445

4546
try {
4647
logger.debug(`Creating JSDOM sandbox for ${url}`);
47-
// Create JSDOM instance *without* running scripts immediately
48-
jsdom = new JSDOM(html, {
48+
// Create JSDOM instance using the factory, which includes default virtualConsole
49+
jsdom = createJSDOM(html, {
4950
url,
5051
runScripts: "outside-only", // We'll run scripts manually in the VM
5152
pretendToBeVisual: true, // Helps with some scripts expecting a visual environment

src/splitter/SemanticMarkdownSplitter.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import remarkHtml from "remark-html";
55
import remarkParse from "remark-parse";
66
import TurndownService from "turndown";
77
import { unified } from "unified";
8+
import { createJSDOM } from "../utils/dom";
89
import { logger } from "../utils/logger";
910
import { fullTrim } from "../utils/string";
1011
import { ContentSplitterError, MinimumChunkSizeError } from "./errors";
@@ -335,7 +336,8 @@ export class SemanticMarkdownSplitter implements DocumentSplitter {
335336
* Parse HTML
336337
*/
337338
private async parseHtml(html: string): Promise<Document> {
338-
const { window } = new JSDOM(html);
339+
// Use createJSDOM which includes default options like virtualConsole
340+
const { window } = createJSDOM(html);
339341
return window.document;
340342
}
341343
}

src/utils/dom.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { JSDOM, VirtualConsole } from "jsdom";
2+
import type { ConstructorOptions } from "jsdom";
3+
4+
/**
5+
* Creates a JSDOM instance with a pre-configured virtual console to suppress console noise.
6+
* This utility simplifies the setup of JSDOM by providing a standard configuration.
7+
*
8+
* @param html - The HTML content to parse.
9+
* @param options - Optional JSDOM configuration options. These will be merged with the default virtual console setup.
10+
* @returns A JSDOM instance.
11+
*/
12+
export function createJSDOM(html: string, options?: ConstructorOptions): JSDOM {
13+
const virtualConsole = new VirtualConsole();
14+
// Suppress console output from JSDOM by default
15+
virtualConsole.on("error", () => {});
16+
virtualConsole.on("warn", () => {});
17+
virtualConsole.on("info", () => {});
18+
virtualConsole.on("debug", () => {});
19+
virtualConsole.on("log", () => {}); // Also suppress regular logs
20+
21+
const defaultOptions: ConstructorOptions = {
22+
virtualConsole,
23+
};
24+
25+
// Merge provided options with defaults, letting provided options override
26+
const finalOptions: ConstructorOptions = { ...defaultOptions, ...options };
27+
28+
return new JSDOM(html, finalOptions);
29+
}

0 commit comments

Comments
 (0)