arabold
diff --git a/‎src/pipeline/DocumentProcessingPipeline.test.ts‎
Lines changed: 4 additions & 4 deletions b/‎src/pipeline/DocumentProcessingPipeline.test.ts‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/pipeline/DocumentProcessingPipeline.ts‎
Lines changed: 5 additions & 9 deletions b/‎src/pipeline/DocumentProcessingPipeline.ts‎
Lines changed: 5 additions & 9 deletions
diff --git a/‎src/pipeline/types.ts‎
Lines changed: 19 additions & 0 deletions b/‎src/pipeline/types.ts‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎src/scraper/HtmlScraper.ts‎
Lines changed: 3 additions & 3 deletions b/‎src/scraper/HtmlScraper.ts‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/scraper/ScraperRegistry.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/scraper/ScraperRegistry.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/scraper/ScraperService.ts‎
Lines changed: 22 additions & 31 deletions b/‎src/scraper/ScraperService.ts‎
Lines changed: 22 additions & 31 deletions
@@ -2,7 +2,7 @@ import { DocumentProcessingPipeline } from "./DocumentProcessingPipeline";
 import { ScraperRegistry, ScraperService } from "../scraper";
 import { VectorStoreService } from "../store";
 import { logger } from "../utils/logger";
-import type { ScrapingProgress, ScrapeOptions } from "../types";
+import type { ScraperProgress, ScraperOptions } from "../scraper/types";
 import { describe, it, expect, beforeEach, vi } from "vitest";
 import { DocumentProcessingError, PipelineStateError } from "./errors";
 
@@ -38,7 +38,7 @@ vi.mock("../utils/logger", () => ({
 describe("DocumentProcessingPipeline", () => {
   let pipeline: DocumentProcessingPipeline;
 
-  const mockScrapeOptions: ScrapeOptions = {
+  const mockScrapeOptions: ScraperOptions = {
     url: "https://example.com",
     library: "test-lib",
     version: "1.0.0",
@@ -47,7 +47,7 @@ describe("DocumentProcessingPipeline", () => {
     subpagesOnly: true,
   };
 
-  const mockProgress: ScrapingProgress = {
+  const mockProgress: ScraperProgress = {
     currentUrl: "https://example.com/page",
     pagesScraped: 1,
     maxPages: 10,
@@ -164,7 +164,7 @@ describe("DocumentProcessingPipeline", () => {
   });
 
   it("should handle stop during processing", async () => {
-    let progressCallback: ((progress: ScrapingProgress) => void) | undefined;
+    let progressCallback: ((progress: ScraperProgress) => void) | undefined;
 
     mockScrape.mockImplementation((_, callback) => {
       progressCallback = callback;
 
@@ -1,11 +1,7 @@
 import { ScraperRegistry, ScraperService } from "../scraper";
 import type { VectorStoreService } from "../store";
-import type {
-  DocumentPipeline,
-  DocumentPipelineCallbacks,
-  ScrapeOptions,
-  ScrapingProgress,
-} from "../types";
+import type { DocumentPipeline, DocumentPipelineCallbacks } from "./types";
+import type { ScraperOptions, ScraperProgress } from "../scraper/types";
 import { logger } from "../utils/logger";
 import { DocumentProcessingError, PipelineStateError } from "./errors";
 
@@ -44,14 +40,14 @@ export class DocumentProcessingPipeline implements DocumentPipeline {
    * Initiates document processing pipeline.
    * Coordinates scraping and storage operations with progress tracking
    */
-  async process(options: ScrapeOptions): Promise<void> {
+  async process(options: ScraperOptions): Promise<void> {
     if (this.isProcessing) {
       throw new PipelineStateError("Pipeline is already processing");
     }
 
     this.isProcessing = true;
     try {
-      await this.scraperService.scrape(options, (progress: ScrapingProgress) =>
+      await this.scraperService.scrape(options, (progress: ScraperProgress) =>
         this.handleScrapingProgress(progress)
       );
       logger.info("✅ Pipeline processing complete");
@@ -72,7 +68,7 @@ export class DocumentProcessingPipeline implements DocumentPipeline {
   }
 
   private async handleScrapingProgress(
-    progress: ScrapingProgress
+    progress: ScraperProgress
   ): Promise<void> {
     if (!this.isProcessing) return;
 
 
@@ -0,0 +1,19 @@
+import type { Document } from "../types";
+import type { ScraperOptions, ScraperProgress } from "../scraper/types";
+
+/**
+ * Interface for document processing pipeline implementations
+ */
+export interface DocumentPipeline {
+  process(options: ScraperOptions): Promise<void>;
+  setCallbacks(callbacks: DocumentPipelineCallbacks): void;
+  stop(): Promise<void>;
+}
+
+/**
+ * Callbacks for pipeline progress and error handling
+ */
+export interface DocumentPipelineCallbacks {
+  onProgress?: (progress: ScraperProgress) => Promise<void>;
+  onError?: (error: Error, document?: Document) => Promise<void>;
+}
@@ -5,7 +5,7 @@ import { validateUrl } from "../utils/url";
 import { logger } from "../utils/logger";
 import createDOMPurify, { type WindowLike } from "dompurify";
 import { Window } from "happy-dom";
-import type { PageResult } from "../types";
+import type { ScrapedPage } from "./types";
 
 export type RetryOptions = {
   maxRetries?: number;
@@ -162,7 +162,7 @@ export class HtmlScraper {
    * Performs single attempt at scraping page content and converting to markdown.
    * Handles HTML sanitization and URL normalization
    */
-  public async scrapePage(url: string): Promise<PageResult> {
+  public async scrapePage(url: string): Promise<ScrapedPage> {
     validateUrl(url);
 
     const { data } = await scrapeIt<{
@@ -238,7 +238,7 @@ export class HtmlScraper {
   public async scrapePageWithRetry(
     url: string,
     options?: RetryOptions
-  ): Promise<PageResult> {
+  ): Promise<ScrapedPage> {
     validateRetryOptions(options);
     const maxRetries = options?.maxRetries ?? this.MAX_RETRIES;
     const baseDelay = options?.baseDelay ?? this.BASE_DELAY;
 
@@ -1,4 +1,4 @@
-import type { ScraperStrategy } from "../types";
+import type { ScraperStrategy } from "./types";
 import { validateUrl } from "../utils/url";
 import { DefaultScraperStrategy } from "./strategies/DefaultScraperStrategy";
 import { GitHubScraperStrategy } from "./strategies/GitHubScraperStrategy";
 
@@ -1,45 +1,36 @@
-import type {
-  ScrapeOptions,
-  ProgressCallback,
-  ScrapingProgress,
-} from "../types";
-import { ScraperError } from "../utils/errors";
-import { logger } from "../utils/logger";
 import type { ScraperRegistry } from "./ScraperRegistry";
+import type { ScraperOptions, ScraperProgress } from "./types";
+import { ScraperError } from "../utils/errors";
+import type { ProgressCallback } from "../types";
 
 /**
- * Orchestrates web scraping operations using registered scraping strategies.
- * Delegates scraping to appropriate strategies based on URL patterns and provides
- * a unified error handling layer, wrapping domain-specific errors into ScraperErrors
- * for consistent error management throughout the application.
+ * Orchestrates document scraping operations using registered scraping strategies.
+ * Automatically selects appropriate strategy based on URL patterns.
  */
 export class ScraperService {
-  constructor(private registry: ScraperRegistry) {}
+  private registry: ScraperRegistry;
+
+  constructor(registry: ScraperRegistry) {
+    this.registry = registry;
+  }
 
   /**
-   * Executes scraping using appropriate strategy for given URL.
-   * Provides progress tracking and error handling wrapper
+   * Scrapes content from the provided URL using the appropriate strategy.
+   * Reports progress via callback and handles errors.
    */
   async scrape(
-    options: ScrapeOptions,
-    progressCallback?: ProgressCallback<ScrapingProgress>
+    options: ScraperOptions,
+    progressCallback?: ProgressCallback<ScraperProgress>
   ): Promise<void> {
-    try {
-      const strategy = this.registry.getStrategy(options.url);
-      await strategy.scrape(options, progressCallback);
-    } catch (error) {
-      logger.error(
-        `❌ Scraping failed: ${error instanceof Error ? error.message : "Unknown error"}`
+    // Find strategy for this URL
+    const strategy = this.registry.getStrategy(options.url);
+    if (!strategy) {
+      throw new ScraperError(
+        `No scraper strategy found for URL: ${options.url}`,
+        false
       );
-      // Wrap non-ScraperError errors in ScraperError
-      if (!(error instanceof ScraperError)) {
-        throw new ScraperError(
-          `Failed to scrape ${options.url}: ${error instanceof Error ? error.message : "Unknown error"}`,
-          false,
-          error as Error
-        );
-      }
-      throw error;
     }
+
+    await strategy.scrape(options, progressCallback);
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-import type { ScraperStrategy } from "../types";`
	`1`	`+import type { ScraperStrategy } from "./types";`
`2`	`2`	`import { validateUrl } from "../utils/url";`
`3`	`3`	`import { DefaultScraperStrategy } from "./strategies/DefaultScraperStrategy";`
`4`	`4`	`import { GitHubScraperStrategy } from "./strategies/GitHubScraperStrategy";`