Skip to content

Commit 55d9ae2

Browse files
committed
fix: keep short txt and md chunks
1 parent e3d3f5b commit 55d9ae2

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

native/src/parser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ fn chunk_by_lines(content: &str, language: &Language) -> Vec<CodeChunk> {
682682
let end = std::cmp::min(start + lines_per_chunk, total_lines);
683683
let sub_content: String = lines[start..end].join("\n");
684684

685-
if sub_content.len() >= MIN_CHUNK_SIZE {
685+
if !sub_content.trim().is_empty() {
686686
chunks.push(CodeChunk {
687687
content: sub_content,
688688
start_line: start as u32 + 1,

tests/native.test.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,22 @@ const add = (a, b) => a + b;
6666
expect(chunks.length).toBeGreaterThanOrEqual(1);
6767
});
6868

69-
it("should return empty array for unparseable content", () => {
69+
it("should chunk plain text files", () => {
7070
const chunks = parseFile("data.txt", "just plain text");
7171

7272
expect(chunks).toBeInstanceOf(Array);
73+
expect(chunks.length).toBeGreaterThanOrEqual(1);
74+
expect(chunks[0]?.content).toContain("just plain text");
75+
expect(chunks[0]?.chunkType).toBe("block");
76+
});
77+
78+
it("should chunk markdown files", () => {
79+
const content = "# Project KB\n\nProject knowledge base delta.";
80+
const chunks = parseFile("README.md", content);
81+
82+
expect(chunks.length).toBeGreaterThanOrEqual(1);
83+
expect(chunks[0]?.content).toContain("Project knowledge base delta");
84+
expect(chunks[0]?.chunkType).toBe("block");
7385
});
7486

7587
it("should parse PHP files", () => {

0 commit comments

Comments
 (0)