11import type { DOMWindow } from "jsdom" ;
2- import { type Mock , beforeEach , describe , expect , it , vi } from "vitest" ;
2+ import {
3+ type Mock ,
4+ type MockedObject ,
5+ beforeEach ,
6+ describe ,
7+ expect ,
8+ it ,
9+ vi ,
10+ } from "vitest" ;
11+ import type { ContentFetcher , RawContent } from "../../fetcher/types" ;
312import { executeJsInSandbox } from "../../utils/sandbox" ;
4- import type { SandboxExecutionResult } from "../../utils/sandbox" ;
13+ import type {
14+ SandboxExecutionOptions ,
15+ SandboxExecutionResult ,
16+ } from "../../utils/sandbox" ;
517import type { ContentProcessingContext } from "../types" ;
618import { HtmlJsExecutorMiddleware } from "./HtmlJsExecutorMiddleware" ;
719
@@ -15,10 +27,17 @@ describe("HtmlJsExecutorMiddleware", () => {
1527 let mockContext : ContentProcessingContext ;
1628 let mockNext : Mock ;
1729 let mockSandboxResult : SandboxExecutionResult ;
30+ let mockFetcher : MockedObject < ContentFetcher > ;
1831
1932 beforeEach ( ( ) => {
2033 vi . resetAllMocks ( ) ;
2134
35+ // Create a mock fetcher
36+ mockFetcher = vi . mocked < ContentFetcher > ( {
37+ canFetch : vi . fn ( ) ,
38+ fetch : vi . fn ( ) ,
39+ } ) ;
40+
2241 mockContext = {
2342 source : "http://example.com" ,
2443 content : "" , // Will be set in tests
@@ -31,8 +50,10 @@ describe("HtmlJsExecutorMiddleware", () => {
3150 url : "http://example.com" , // Can reuse context.source
3251 library : "test-lib" ,
3352 version : "1.0.0" ,
53+ signal : undefined , // Initialize signal
3454 // Add other optional ScraperOptions properties if needed for specific tests
3555 } ,
56+ fetcher : mockFetcher ,
3657 // dom property might be added by the middleware
3758 } ;
3859 mockNext = vi . fn ( ) . mockResolvedValue ( undefined ) ;
@@ -53,10 +74,14 @@ describe("HtmlJsExecutorMiddleware", () => {
5374 await middleware . process ( mockContext , mockNext ) ;
5475
5576 expect ( executeJsInSandbox ) . toHaveBeenCalledOnce ( ) ;
56- expect ( executeJsInSandbox ) . toHaveBeenCalledWith ( {
57- html : "<p>Initial</p><script></script>" ,
58- url : "http://example.com" ,
59- } ) ;
77+ // Verify fetchScriptContent is passed as a function
78+ expect ( executeJsInSandbox ) . toHaveBeenCalledWith (
79+ expect . objectContaining ( {
80+ html : "<p>Initial</p><script></script>" ,
81+ url : "http://example.com" ,
82+ fetchScriptContent : expect . any ( Function ) ,
83+ } ) ,
84+ ) ;
6085 } ) ;
6186
6287 it ( "should update context.content with finalHtml from sandbox result" , async ( ) => {
@@ -130,10 +155,14 @@ describe("HtmlJsExecutorMiddleware", () => {
130155 await middleware . process ( mockContext , mockNext ) ;
131156
132157 expect ( executeJsInSandbox ) . toHaveBeenCalledOnce ( ) ;
133- expect ( executeJsInSandbox ) . toHaveBeenCalledWith ( {
134- html : initialHtml ,
135- url : "http://example.com" ,
136- } ) ;
158+ // Updated assertion to expect fetchScriptContent
159+ expect ( executeJsInSandbox ) . toHaveBeenCalledWith (
160+ expect . objectContaining ( {
161+ html : initialHtml ,
162+ url : "http://example.com" ,
163+ fetchScriptContent : expect . any ( Function ) ,
164+ } ) ,
165+ ) ;
137166 expect ( mockNext ) . toHaveBeenCalledOnce ( ) ;
138167 } ) ;
139168
@@ -152,4 +181,112 @@ describe("HtmlJsExecutorMiddleware", () => {
152181 ) ;
153182 expect ( mockNext ) . not . toHaveBeenCalled ( ) ; // Should not proceed if middleware itself fails
154183 } ) ;
184+
185+ // --- Tests for fetchScriptContent callback logic ---
186+
187+ it ( "fetchScriptContent callback should use context.fetcher to fetch script" , async ( ) => {
188+ mockContext . content = "<p>Initial</p><script src='ext.js'></script>" ;
189+ const middleware = new HtmlJsExecutorMiddleware ( ) ;
190+ const mockScriptContent = "console.log('fetched');" ;
191+ const mockRawContent : RawContent = {
192+ content : Buffer . from ( mockScriptContent ) ,
193+ mimeType : "application/javascript" ,
194+ source : "http://example.com/ext.js" ,
195+ } ;
196+ mockFetcher . fetch . mockResolvedValue ( mockRawContent ) ;
197+
198+ await middleware . process ( mockContext , mockNext ) ;
199+
200+ // Get the options passed to the sandbox mock
201+ const sandboxOptions = ( executeJsInSandbox as Mock ) . mock
202+ . calls [ 0 ] [ 0 ] as SandboxExecutionOptions ;
203+ expect ( sandboxOptions . fetchScriptContent ) . toBeDefined ( ) ;
204+
205+ // Invoke the callback to test its behavior
206+ const fetchedContent = await sandboxOptions . fetchScriptContent ! (
207+ "http://example.com/ext.js" ,
208+ ) ;
209+
210+ expect ( mockFetcher . fetch ) . toHaveBeenCalledWith ( "http://example.com/ext.js" , {
211+ signal : undefined ,
212+ followRedirects : true ,
213+ } ) ;
214+ expect ( fetchedContent ) . toBe ( mockScriptContent ) ;
215+ expect ( mockContext . errors ) . toHaveLength ( 0 ) ; // No errors expected during fetch
216+ } ) ;
217+
218+ it ( "fetchScriptContent callback should handle fetcher errors" , async ( ) => {
219+ mockContext . content = "<p>Initial</p><script src='ext.js'></script>" ;
220+ const middleware = new HtmlJsExecutorMiddleware ( ) ;
221+ const fetchError = new Error ( "Network Failed" ) ;
222+ mockFetcher . fetch . mockRejectedValue ( fetchError ) ;
223+
224+ await middleware . process ( mockContext , mockNext ) ;
225+
226+ const sandboxOptions = ( executeJsInSandbox as Mock ) . mock
227+ . calls [ 0 ] [ 0 ] as SandboxExecutionOptions ;
228+ const fetchedContent = await sandboxOptions . fetchScriptContent ! (
229+ "http://example.com/ext.js" ,
230+ ) ;
231+
232+ expect ( mockFetcher . fetch ) . toHaveBeenCalledWith ( "http://example.com/ext.js" , {
233+ signal : undefined ,
234+ followRedirects : true ,
235+ } ) ;
236+ expect ( fetchedContent ) . toBeNull ( ) ;
237+ expect ( mockContext . errors ) . toHaveLength ( 1 ) ;
238+ expect ( mockContext . errors [ 0 ] . message ) . toContain (
239+ "Failed to fetch external script http://example.com/ext.js: Network Failed" ,
240+ ) ;
241+ expect ( mockContext . errors [ 0 ] . cause ) . toBe ( fetchError ) ;
242+ } ) ;
243+
244+ it ( "fetchScriptContent callback should handle non-JS MIME types" , async ( ) => {
245+ mockContext . content = "<p>Initial</p><script src='style.css'></script>" ;
246+ const middleware = new HtmlJsExecutorMiddleware ( ) ;
247+ const mockRawContent : RawContent = {
248+ content : "body { color: red; }" ,
249+ mimeType : "text/css" , // Incorrect MIME type
250+ source : "http://example.com/style.css" ,
251+ } ;
252+ mockFetcher . fetch . mockResolvedValue ( mockRawContent ) ;
253+
254+ await middleware . process ( mockContext , mockNext ) ;
255+
256+ const sandboxOptions = ( executeJsInSandbox as Mock ) . mock
257+ . calls [ 0 ] [ 0 ] as SandboxExecutionOptions ;
258+ const fetchedContent = await sandboxOptions . fetchScriptContent ! (
259+ "http://example.com/style.css" ,
260+ ) ;
261+
262+ expect ( mockFetcher . fetch ) . toHaveBeenCalledWith ( "http://example.com/style.css" , {
263+ signal : undefined ,
264+ followRedirects : true ,
265+ } ) ;
266+ expect ( fetchedContent ) . toBeNull ( ) ;
267+ expect ( mockContext . errors ) . toHaveLength ( 1 ) ;
268+ expect ( mockContext . errors [ 0 ] . message ) . toContain (
269+ "Skipping execution of external script http://example.com/style.css due to unexpected MIME type: text/css" ,
270+ ) ;
271+ } ) ;
272+
273+ it ( "fetchScriptContent callback should handle missing fetcher in context" , async ( ) => {
274+ mockContext . content = "<p>Initial</p><script src='ext.js'></script>" ;
275+ mockContext . fetcher = undefined ; // Remove fetcher for this test
276+ const middleware = new HtmlJsExecutorMiddleware ( ) ;
277+
278+ await middleware . process ( mockContext , mockNext ) ;
279+
280+ const sandboxOptions = ( executeJsInSandbox as Mock ) . mock
281+ . calls [ 0 ] [ 0 ] as SandboxExecutionOptions ;
282+ const fetchedContent = await sandboxOptions . fetchScriptContent ! (
283+ "http://example.com/ext.js" ,
284+ ) ;
285+
286+ expect ( mockFetcher . fetch ) . not . toHaveBeenCalled ( ) ; // Fetcher should not be called
287+ expect ( fetchedContent ) . toBeNull ( ) ;
288+ expect ( mockContext . errors ) . toHaveLength ( 0 ) ; // Only logs a warning, doesn't add error
289+ // We can't easily verify logger.warn was called without mocking it again here,
290+ // but the null return and lack of fetch call imply the check worked.
291+ } ) ;
155292} ) ;
0 commit comments