Skip to content

Commit ba8a6f1

Browse files
committed
feat: Add comprehensive logging system
- Create logger utility in src/utils/logger.ts - Add logging to scraping operations with progress tracking - Add logging to store operations for document indexing - Add logging to search operations - Improve error visibility with detailed retry logging
1 parent 9557014 commit ba8a6f1

File tree

8 files changed

+153
-48
lines changed

8 files changed

+153
-48
lines changed

ARCHITECTURE.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,34 @@ Tools that involve long-running operations support progress reporting through ca
9797
- CLI: Console output with progress information
9898
- MCP: Structured progress updates through the MCP protocol
9999

100+
### 5. Logging Strategy
101+
102+
The project uses a centralized logging system through `utils/logger.ts` that maps to console methods. The logging follows a hierarchical approach:
103+
104+
1. **Tools Layer (Highest)**
105+
106+
- Primary user-facing operations
107+
- Final results and overall progress
108+
- Example: Search queries and result counts
109+
110+
2. **Core Components (Middle)**
111+
112+
- Unique operational logs
113+
- Store creation and management
114+
- Example: Vector store operations
115+
116+
3. **Strategy Layer (Lowest)**
117+
- Detailed progress (page crawling)
118+
- Error conditions and retries
119+
- Example: Individual page scraping status
120+
121+
This hierarchy ensures:
122+
123+
- Clear operation visibility
124+
- No duplicate logging between layers
125+
- Consistent emoji usage for better readability
126+
- Error logging preserved at all levels for debugging
127+
100128
## Benefits
101129

102130
1. **Maintainability**

src/scraper/index.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import type {
33
ScrapingProgressCallback,
44
DocContent,
55
} from "../types";
6+
import { logger } from "../utils/logger";
67
import { validateUrl } from "../utils/url";
78
import { DefaultScraperStrategy } from "./strategies/default-strategy";
89
import { NpmScraperStrategy } from "./strategies/npm-strategy";
@@ -51,7 +52,15 @@ export class DocumentationScraperDispatcher {
5152
// Validate config URL before proceeding
5253
validateUrl(config.url);
5354
const strategy = this.determineStrategy(config.url);
54-
return strategy.scrape(config, progressCallback);
55+
56+
try {
57+
return await strategy.scrape(config, progressCallback);
58+
} catch (error) {
59+
logger.error(
60+
`❌ Scraping failed: ${error instanceof Error ? error.message : "Unknown error"}`
61+
);
62+
throw error;
63+
}
5564
}
5665
}
5766

src/scraper/strategies/default-strategy.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import TurndownService from "turndown";
22
import scrapeIt from "scrape-it";
33
import { ScraperError } from "../../utils/errors";
44
import { validateUrl } from "../../utils/url";
5+
import { logger } from "../../utils/logger";
56
import type {
67
ScraperConfig,
78
PageResult,
@@ -148,6 +149,9 @@ export class DefaultScraperStrategy implements ScraperStrategy {
148149
// Only retry on 4xx errors
149150
if (status !== undefined && status >= 400 && status < 500) {
150151
for (let attempt = 1; attempt < this.MAX_RETRIES; attempt++) {
152+
logger.warn(
153+
`⚠️ Retry ${attempt}/${this.MAX_RETRIES - 1} for ${url} (Status: ${status})`
154+
);
151155
try {
152156
await this.delay(this.BASE_DELAY * 2 ** attempt);
153157
return await this.scrapePageContent(url);
@@ -162,6 +166,12 @@ export class DefaultScraperStrategy implements ScraperStrategy {
162166
);
163167
}
164168
// Otherwise continue to next retry
169+
const retryStatus = (
170+
retryError as { response?: { status: number } }
171+
)?.response?.status;
172+
logger.warn(
173+
`⚠️ Attempt ${attempt} failed (Status: ${retryStatus || "unknown"})`
174+
);
165175
}
166176
}
167177
}
@@ -214,6 +224,10 @@ export class DefaultScraperStrategy implements ScraperStrategy {
214224
this.pageCount++;
215225
this.reportProgress(normalizedUrl, depth);
216226

227+
logger.info(
228+
`🌐 Crawling page ${this.pageCount}/${config.maxPages} (depth ${depth}/${config.maxDepth}): ${normalizedUrl}`
229+
);
230+
217231
const result = await this.scrapePageContentWithRetry(url, config);
218232
const results = [result];
219233

src/store/index.ts

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { OpenAIEmbeddings } from "@langchain/openai";
33
import path from "node:path";
44
import fs from "node:fs/promises";
55
import semver from "semver";
6+
import { logger } from "../utils/logger";
67
import type { SearchResult, VectorStoreProgressCallback } from "../types";
78
import type { Document } from "@langchain/core/documents";
89
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
@@ -46,6 +47,7 @@ export class VectorStoreManager {
4647
library: string,
4748
version: string
4849
): Promise<MemoryVectorStore> {
50+
logger.info(`💾 Creating store for ${library}@${version}`);
4951
const storePath = this.getStorePath(library, version);
5052
await fs.mkdir(storePath, { recursive: true });
5153

@@ -56,6 +58,9 @@ export class VectorStoreManager {
5658
library: string,
5759
targetVersion?: string
5860
): Promise<string | null> {
61+
logger.info(
62+
`🔍 Finding best version for ${library}${targetVersion ? `@${targetVersion}` : ""}`
63+
);
5964
try {
6065
const libraryPath = path.join(this.baseDir, library);
6166
const versions = await fs.readdir(libraryPath);
@@ -79,13 +84,24 @@ export class VectorStoreManager {
7984
)
8085
).filter((v): v is string => v !== null);
8186

82-
if (validVersions.length === 0) return null;
87+
if (validVersions.length === 0) {
88+
logger.warn(`⚠️ No valid versions found for ${library}`);
89+
return null;
90+
}
91+
92+
const result = !targetVersion
93+
? semver.maxSatisfying(validVersions, "*")
94+
: semver.maxSatisfying(validVersions, `<=${targetVersion}`);
8395

84-
if (!targetVersion) {
85-
return semver.maxSatisfying(validVersions, "*") || null;
96+
if (result) {
97+
logger.info(`✅ Found version ${result} for ${library}`);
98+
} else {
99+
logger.warn(
100+
`⚠️ No matching version found for ${library}${targetVersion ? `@${targetVersion}` : ""}`
101+
);
86102
}
87103

88-
return semver.maxSatisfying(validVersions, `<=${targetVersion}`) || null;
104+
return result || null;
89105
} catch (error) {
90106
if ((error as NodeJS.ErrnoException).code === "ENOENT") {
91107
return null;
@@ -95,6 +111,7 @@ export class VectorStoreManager {
95111
}
96112

97113
async clearStore(library: string, version: string): Promise<void> {
114+
logger.info(`🗑️ Clearing store for ${library}@${version}`);
98115
const storePath = this.getStorePath(library, version);
99116
const storeFile = path.join(storePath, STORE_FILENAME);
100117
try {
@@ -112,6 +129,9 @@ export class VectorStoreManager {
112129
version: string,
113130
document: Document
114131
): Promise<void> {
132+
logger.info(
133+
`📚 Adding document: ${document.metadata.title} for ${library}@${version}`
134+
);
115135
const store = await this.createStore(library, version);
116136
const splitter = new RecursiveCharacterTextSplitter({
117137
chunkSize: 4000,
@@ -125,6 +145,7 @@ export class VectorStoreManager {
125145

126146
// Split document into smaller chunks
127147
const splitDocs = await splitter.splitDocuments([document]);
148+
logger.info(`📄 Split document into ${splitDocs.length} chunks`);
128149

129150
// Report progress
130151
if (this.onProgress) {
@@ -174,20 +195,23 @@ export class VectorStoreManager {
174195
k: limit * 2,
175196
searchType: "similarity",
176197
});
177-
const results = await retriever.invoke(query);
178-
const rerankedResults = await BM25Retriever.fromDocuments(results, {
179-
k: limit,
180-
includeScore: true,
181-
}).invoke(query);
198+
const initialResults = await retriever.invoke(query);
199+
const rerankedResults = await BM25Retriever.fromDocuments(
200+
initialResults,
201+
{
202+
k: limit,
203+
includeScore: true,
204+
}
205+
).invoke(query);
182206

183207
return rerankedResults.map((doc) => ({
184208
content: doc.pageContent,
185209
score: (doc.metadata.bm25Score as number) ?? 0,
186210
metadata: {
187-
url: doc.metadata.url,
188-
title: doc.metadata.title,
189-
library: doc.metadata.library,
190-
version: doc.metadata.version,
211+
url: doc.metadata.url as string,
212+
title: doc.metadata.title as string,
213+
library: doc.metadata.library as string,
214+
version: doc.metadata.version as string,
191215
},
192216
}));
193217
} catch (error) {

src/tools/library.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { VectorStoreManager } from "../store/index.js";
2+
import { logger } from "../utils/logger";
23

34
export interface ListLibrariesOptions {
45
store: VectorStoreManager;
@@ -29,15 +30,16 @@ export const listLibraries = async (
2930
): Promise<ListLibrariesResult> => {
3031
const { store } = options;
3132
const rawLibraries = await store.listLibraries();
32-
return {
33-
libraries: rawLibraries.map(({ library, versions }) => ({
34-
name: library,
35-
versions: versions.map((v) => ({
36-
version: v.version,
37-
indexed: v.indexed,
38-
})),
33+
34+
const libraries = rawLibraries.map(({ library, versions }) => ({
35+
name: library,
36+
versions: versions.map((v) => ({
37+
version: v.version,
38+
indexed: v.indexed,
3939
})),
40-
};
40+
}));
41+
42+
return { libraries };
4143
};
4244

4345
export const findVersion = async (

src/tools/scrape.ts

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type {
66
ProgressResponse,
77
} from "../types/index.js";
88
import { Document } from "@langchain/core/documents";
9+
import { logger } from "../utils/logger";
910

1011
export interface ScrapeOptions {
1112
url: string;
@@ -35,6 +36,10 @@ export const scrape = async (options: ScrapeOptions): Promise<ScrapeResult> => {
3536
subpagesOnly,
3637
} = options;
3738

39+
logger.info(
40+
`🕷️ Starting documentation scrape for ${library}@${version} from ${url}`
41+
);
42+
3843
const scraper = new DocumentationScraperDispatcher({
3944
onProgress,
4045
});
@@ -48,31 +53,41 @@ export const scrape = async (options: ScrapeOptions): Promise<ScrapeResult> => {
4853
subpagesOnly,
4954
};
5055

51-
// Clear existing vector store data before scraping
52-
await store.clearStore(library, version);
56+
try {
57+
// Clear existing vector store data before scraping
58+
await store.clearStore(library, version);
5359

54-
const results = await scraper.scrape(config);
60+
const results = await scraper.scrape(config);
61+
let totalDocuments = 0;
5562

56-
let totalDocuments = 0;
63+
// Convert each result to a Document and add it to the store
64+
for (const result of results) {
65+
const doc = new Document({
66+
pageContent: result.content,
67+
metadata: {
68+
url: result.metadata.url,
69+
title: result.metadata.title,
70+
library,
71+
version,
72+
},
73+
});
5774

58-
// Convert each result to a Document and add it to the store
59-
for (const result of results) {
60-
const doc = new Document({
61-
pageContent: result.content,
62-
metadata: {
63-
url: result.metadata.url,
64-
title: result.metadata.title,
65-
library,
66-
version,
67-
},
68-
});
75+
await store.addDocument(library, version, doc);
76+
totalDocuments++;
77+
}
6978

70-
await store.addDocument(library, version, doc);
71-
totalDocuments++;
72-
}
79+
logger.info(
80+
`✨ Scraping completed: ${results.length} pages retrieved and indexed`
81+
);
7382

74-
return {
75-
pagesScraped: results.length,
76-
documentsIndexed: totalDocuments,
77-
};
83+
return {
84+
pagesScraped: results.length,
85+
documentsIndexed: totalDocuments,
86+
};
87+
} catch (error) {
88+
logger.error(
89+
`❌ Scraping failed: ${error instanceof Error ? error.message : "Unknown error"}`
90+
);
91+
throw error;
92+
}
7893
};

src/tools/search.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type { VectorStoreManager } from "../store/index.js";
22
import type { SearchResult } from "../types/index.js";
3+
import { logger } from "../utils/logger";
34

45
export interface SearchOptions {
56
library: string;
@@ -18,9 +19,16 @@ export const search = async (
1819
): Promise<SearchToolResult> => {
1920
const { library, version, query, limit, store } = options;
2021

21-
const results = await store.search(library, version, query, limit);
22+
logger.info(`🔍 Searching ${library}@${version} for: ${query}`);
2223

23-
return {
24-
results,
25-
};
24+
try {
25+
const results = await store.search(library, version, query, limit);
26+
logger.info(`✅ Found ${results.length} matching results`);
27+
return { results };
28+
} catch (error) {
29+
logger.error(
30+
`❌ Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
31+
);
32+
throw error;
33+
}
2634
};

src/utils/logger.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
export const logger = {
2+
info: (message: string) => console.log(message),
3+
warn: (message: string) => console.warn(message),
4+
error: (message: string) => console.error(message),
5+
};

0 commit comments

Comments
 (0)