Skip to content

Commit 4e04aa7

Browse files
committed
feat: improve search capabilities with PostgreSQL integration
Add SQL functions for efficient document search Use PostgreSQL's built-in text search instead of BM25 Update store interfaces and service implementations Add indexes for better query performance
1 parent 14b02bd commit 4e04aa7

File tree

8 files changed

+1529
-1472
lines changed

8 files changed

+1529
-1472
lines changed

init-db.sql

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,100 @@
1-
-- Install the extension
1+
-- Install required extensions
22
CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
33

44
-- Connect to our database
55
\c docs_mcp;
66

7-
-- Create necessary tables for LangChain
7+
-- Create necessary tables for LangChain with improved search capabilities
88
CREATE TABLE IF NOT EXISTS documents (
99
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
10+
library VARCHAR(255) NOT NULL,
11+
version VARCHAR(100) NOT NULL,
1012
content text,
1113
metadata jsonb,
12-
embedding vector(1536)
14+
embedding vector(1536),
15+
content_search tsvector
1316
);
17+
18+
-- Create indexes for improved search performance
19+
CREATE INDEX IF NOT EXISTS idx_documents_library ON documents(library);
20+
CREATE INDEX IF NOT EXISTS idx_documents_version ON documents(version);
21+
CREATE INDEX IF NOT EXISTS idx_documents_content_search ON documents USING GIN(content_search);
22+
23+
-- Function to add a new document
24+
CREATE OR REPLACE FUNCTION add_document(
25+
p_library VARCHAR(255),
26+
p_version VARCHAR(100),
27+
p_content TEXT,
28+
p_metadata JSONB,
29+
p_embedding vector(1536)
30+
) RETURNS UUID AS $$
31+
DECLARE
32+
v_id UUID;
33+
BEGIN
34+
INSERT INTO documents (library, version, content, metadata, embedding, content_search)
35+
VALUES (p_library, p_version, p_content, p_metadata, p_embedding, to_tsvector('english', p_content))
36+
RETURNING id INTO v_id;
37+
38+
RETURN v_id;
39+
END;
40+
$$ LANGUAGE plpgsql;
41+
42+
-- Function to search documents (supports both keyword and vector search)
43+
CREATE OR REPLACE FUNCTION search_documents(
44+
p_library VARCHAR(255) DEFAULT NULL,
45+
p_version VARCHAR(100) DEFAULT NULL,
46+
p_query TEXT DEFAULT NULL,
47+
p_embedding vector(1536) DEFAULT NULL,
48+
p_limit INT DEFAULT 5
49+
) RETURNS TABLE (
50+
id UUID,
51+
library VARCHAR(255),
52+
version VARCHAR(100),
53+
content TEXT,
54+
metadata JSONB,
55+
similarity FLOAT
56+
) AS $$
57+
BEGIN
58+
RETURN QUERY
59+
SELECT
60+
d.id,
61+
d.library,
62+
d.version,
63+
d.content,
64+
d.metadata,
65+
CASE
66+
WHEN p_embedding IS NOT NULL THEN 1 - (d.embedding <=> p_embedding)
67+
ELSE ts_rank(d.content_search, to_tsquery('english', p_query))
68+
END as similarity
69+
FROM documents d
70+
WHERE
71+
(p_library IS NULL OR d.library = p_library)
72+
AND (p_version IS NULL OR d.version = p_version)
73+
AND (
74+
(p_query IS NULL OR d.content_search @@ to_tsquery('english', p_query))
75+
OR (p_embedding IS NOT NULL)
76+
)
77+
ORDER BY similarity DESC
78+
LIMIT p_limit;
79+
END;
80+
$$ LANGUAGE plpgsql;
81+
82+
-- Function to delete documents
83+
CREATE OR REPLACE FUNCTION delete_documents(
84+
p_library VARCHAR(255),
85+
p_version VARCHAR(100) DEFAULT NULL
86+
) RETURNS INTEGER AS $$
87+
DECLARE
88+
v_count INTEGER;
89+
BEGIN
90+
WITH deleted AS (
91+
DELETE FROM documents
92+
WHERE library = p_library
93+
AND (p_version IS NULL OR version = p_version)
94+
RETURNING 1
95+
)
96+
SELECT COUNT(*) INTO v_count FROM deleted;
97+
98+
RETURN v_count;
99+
END;
100+
$$ LANGUAGE plpgsql;

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"scripts": {
1111
"build": "tsup",
1212
"cli": "node dist/cli.js",
13-
"dev:cli": "run-p \"build -- --watch\" \"cli\"",
13+
"dev:cli": "npm run build && node dist/cli.js",
1414
"server": "node --watch dist/server.js",
1515
"dev:server": "run-p \"build -- --watch\" \"server\"",
1616
"test": "vitest",

0 commit comments

Comments
 (0)