Skip to content

Commit 6a8a8dd

Browse files
committed
refactor: keep embedding n_seq_max internal
1 parent e5122c5 commit 6a8a8dd

1 file changed

Lines changed: 1 addition & 15 deletions

File tree

llama_cpp/llama.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ def __init__(
7575
n_ctx: int = 512,
7676
n_batch: int = 512,
7777
n_ubatch: int = 512,
78-
n_seq_max: Optional[int] = None,
7978
n_threads: Optional[int] = None,
8079
n_threads_batch: Optional[int] = None,
8180
rope_scaling_type: Optional[
@@ -161,9 +160,6 @@ def __init__(
161160
n_ctx: Text context, 0 = from model
162161
n_batch: Prompt processing maximum batch size
163162
n_ubatch: Physical batch size
164-
n_seq_max: Maximum number of sequences. If None, embedding contexts
165-
use min(n_batch, llama_max_parallel_sequences()) and
166-
non-embedding contexts use the llama.cpp default.
167163
n_threads: Number of threads to use for generation
168164
n_threads_batch: Number of threads to use for batch processing
169165
rope_scaling_type: RoPE scaling type, from `enum llama_rope_scaling_type`. ref: https://github.com/ggerganov/llama.cpp/pull/2054
@@ -401,16 +397,7 @@ def __init__(
401397
self.context_params.n_batch = self.n_batch
402398
self.context_params.n_ubatch = min(self.n_batch, n_ubatch)
403399

404-
if n_seq_max is not None:
405-
n_seq_max_limit = llama_cpp.llama_max_parallel_sequences()
406-
if n_seq_max <= 0:
407-
raise ValueError("n_seq_max must be greater than 0")
408-
if n_seq_max > n_seq_max_limit:
409-
raise ValueError(
410-
f"n_seq_max must be less than or equal to {n_seq_max_limit}"
411-
)
412-
self.context_params.n_seq_max = n_seq_max
413-
elif embedding:
400+
if embedding:
414401
self.context_params.n_seq_max = min(
415402
self.n_batch,
416403
llama_cpp.llama_max_parallel_sequences(),
@@ -2119,7 +2106,6 @@ def __getstate__(self):
21192106
n_ctx=self.context_params.n_ctx,
21202107
n_batch=self.n_batch,
21212108
n_ubatch=self.context_params.n_ubatch,
2122-
n_seq_max=self.context_params.n_seq_max,
21232109
n_threads=self.context_params.n_threads,
21242110
n_threads_batch=self.context_params.n_threads_batch,
21252111
rope_scaling_type=self.context_params.rope_scaling_type,

0 commit comments

Comments
 (0)