Skip to content

Commit 924c7e6

Browse files
committed
refactor: rename HtmlSmartProcessorMiddleware to HtmlSelectProcessorMiddleware
1 parent e8e4beb commit 924c7e6

File tree

4 files changed

+10
-10
lines changed

4 files changed

+10
-10
lines changed

src/scraper/middleware/components/HtmlSmartProcessorMiddleware.test.ts renamed to src/scraper/middleware/components/HtmlSelectProcessorMiddleware.test.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
22
import type { ContentProcessingContext } from "../types";
33
import { HtmlDomParserMiddleware } from "./HtmlDomParserMiddleware";
44
import { HtmlPlaywrightMiddleware } from "./HtmlPlaywrightMiddleware";
5-
import { HtmlSmartProcessorMiddleware } from "./HtmlSmartProcessorMiddleware";
5+
import { HtmlSelectProcessorMiddleware } from "./HtmlSelectProcessorMiddleware";
66

77
// Mock the underlying processors
88
vi.mock("./HtmlDomParserMiddleware");
@@ -12,7 +12,7 @@ vi.mock("./HtmlPlaywrightMiddleware");
1212
const MockedHtmlDomParserMiddleware = vi.mocked(HtmlDomParserMiddleware);
1313
const MockedHtmlPlaywrightMiddleware = vi.mocked(HtmlPlaywrightMiddleware);
1414

15-
describe("HtmlSmartProcessorMiddleware", () => {
15+
describe("HtmlSelectProcessorMiddleware", () => {
1616
const mockNext = vi.fn().mockResolvedValue(undefined);
1717

1818
beforeEach(() => {
@@ -45,7 +45,7 @@ describe("HtmlSmartProcessorMiddleware", () => {
4545

4646
it("should use HtmlDomParserMiddleware when scrapeMode is 'fetch'", async () => {
4747
const context = createContext("fetch");
48-
const middleware = new HtmlSmartProcessorMiddleware();
48+
const middleware = new HtmlSelectProcessorMiddleware();
4949

5050
await middleware.process(context, mockNext);
5151

@@ -65,7 +65,7 @@ describe("HtmlSmartProcessorMiddleware", () => {
6565

6666
it("should use HtmlPlaywrightMiddleware when scrapeMode is 'playwright'", async () => {
6767
const context = createContext("playwright");
68-
const middleware = new HtmlSmartProcessorMiddleware();
68+
const middleware = new HtmlSelectProcessorMiddleware();
6969

7070
await middleware.process(context, mockNext);
7171

@@ -85,7 +85,7 @@ describe("HtmlSmartProcessorMiddleware", () => {
8585

8686
it("should use HtmlPlaywrightMiddleware when scrapeMode is 'auto'", async () => {
8787
const context = createContext("auto");
88-
const middleware = new HtmlSmartProcessorMiddleware();
88+
const middleware = new HtmlSelectProcessorMiddleware();
8989

9090
await middleware.process(context, mockNext);
9191

@@ -105,7 +105,7 @@ describe("HtmlSmartProcessorMiddleware", () => {
105105

106106
it("should default to 'auto' (Playwright) when scrapeMode is undefined", async () => {
107107
const context = createContext(undefined); // Explicitly undefined
108-
const middleware = new HtmlSmartProcessorMiddleware();
108+
const middleware = new HtmlSelectProcessorMiddleware();
109109

110110
await middleware.process(context, mockNext);
111111

src/scraper/middleware/components/HtmlSmartProcessorMiddleware.ts renamed to src/scraper/middleware/components/HtmlSelectProcessorMiddleware.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import { HtmlPlaywrightMiddleware } from "./HtmlPlaywrightMiddleware";
1111
* - 'auto': Currently defaults to using `HtmlPlaywrightMiddleware`. Future implementations
1212
* might add more sophisticated logic for 'auto' mode.
1313
*/
14-
export class HtmlSmartProcessorMiddleware implements ContentProcessorMiddleware {
14+
export class HtmlSelectProcessorMiddleware implements ContentProcessorMiddleware {
1515
private readonly domProcessor: HtmlDomParserMiddleware;
1616
private readonly playwrightProcessor: HtmlPlaywrightMiddleware;
1717

src/scraper/middleware/components/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export * from "./HtmlLinkExtractorMiddleware";
44
export * from "./HtmlMetadataExtractorMiddleware";
55
export * from "./HtmlPlaywrightMiddleware";
66
export * from "./HtmlSanitizerMiddleware";
7-
export * from "./HtmlSmartProcessorMiddleware";
7+
export * from "./HtmlSelectProcessorMiddleware";
88
export * from "./HtmlToMarkdownMiddleware";
99
export * from "./MarkdownLinkExtractorMiddleware";
1010
export * from "./MarkdownMetadataExtractorMiddleware";

src/scraper/strategies/WebScraperStrategy.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
HtmlLinkExtractorMiddleware,
1111
HtmlMetadataExtractorMiddleware,
1212
HtmlSanitizerMiddleware,
13-
HtmlSmartProcessorMiddleware, // Import the new middleware
13+
HtmlSelectProcessorMiddleware, // Import the new middleware
1414
HtmlToMarkdownMiddleware,
1515
MarkdownLinkExtractorMiddleware,
1616
MarkdownMetadataExtractorMiddleware,
@@ -99,7 +99,7 @@ export class WebScraperStrategy extends BaseScraperStrategy {
9999
if (initialContext.contentType.startsWith("text/html")) {
100100
// Updated HTML pipeline order
101101
pipeline = new ContentProcessingPipeline([
102-
new HtmlSmartProcessorMiddleware(), // Use the smart processor
102+
new HtmlSelectProcessorMiddleware(), // Use the smart processor
103103
new HtmlMetadataExtractorMiddleware(),
104104
new HtmlLinkExtractorMiddleware(), // Extract links before cleaning
105105
new HtmlSanitizerMiddleware(),

0 commit comments

Comments
 (0)