Skip to content

Commit 1959e03

Browse files
authored
Merge branch 'Maximilian-Winter:master' into fix-gbnf-generation-trailing-bracket
2 parents 7c8a3a7 + 226e194 commit 1959e03

File tree

4 files changed

+64
-8
lines changed

4 files changed

+64
-8
lines changed

src/llama_cpp_agent/messages_formatter.py

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class MessagesFormatterType(Enum):
2828
AUTOCODER = 15
2929
GEMMA_2 = 16
3030
DEEP_SEEK_CODER_2 = 17
31+
PHI_4 = 18
32+
DEEPSEEK_R1_DISTILL_QWEN = 19
33+
MISTRAL_SMALL_3 = 20
3134

3235
@dataclass
3336
class PromptMarkers:
@@ -137,6 +140,14 @@ def _format_response(
137140
Roles.tool: PromptMarkers("", ""),
138141
}
139142

143+
mistral_small3_prompt_markers = {
144+
Roles.system: PromptMarkers("""[SYSTEM_PROMPT]""", """[/SYSTEM_PROMPT]"""),
145+
Roles.user: PromptMarkers("""[INST]""", """ [/INST]"""),
146+
Roles.assistant: PromptMarkers("""""", """"""),
147+
Roles.tool: PromptMarkers("", ""),
148+
}
149+
150+
140151
chatml_prompt_markers = {
141152
Roles.system: PromptMarkers("""<|im_start|>system\n""", """<|im_end|>\n"""),
142153
Roles.user: PromptMarkers("""<|im_start|>user\n""", """<|im_end|>\n"""),
@@ -161,7 +172,7 @@ def _format_response(
161172
llama_3_prompt_markers = {
162173
Roles.system: PromptMarkers("""<|start_header_id|>system<|end_header_id|>\n""", """<|eot_id|>"""),
163174
Roles.user: PromptMarkers("""<|start_header_id|>user<|end_header_id|>\n""", """<|eot_id|>"""),
164-
Roles.assistant: PromptMarkers("""<|start_header_id|>assistant<|end_header_id|>\n\n""", """<|eot_id|>"""),
175+
Roles.assistant: PromptMarkers("""<|start_header_id|>assistant<|end_header_id|>\n""", """<|eot_id|>"""),
165176
Roles.tool: PromptMarkers("""<|start_header_id|>function_calling_results<|end_header_id|>\n""", """<|eot_id|>"""),
166177
}
167178

@@ -181,7 +192,7 @@ def _format_response(
181192
gemma_2_prompt_markers = {
182193
Roles.system: PromptMarkers("""""", """\n\n"""),
183194
Roles.user: PromptMarkers("""<start_of_turn>user\n""", """<end_of_turn>\n"""),
184-
Roles.assistant: PromptMarkers("""<start_of_turn>model\n\n""", """<end_of_turn>\n"""),
195+
Roles.assistant: PromptMarkers("""<start_of_turn>model\n""", """<end_of_turn>\n"""),
185196
Roles.tool: PromptMarkers("", ""),
186197
}
187198
code_ds_prompt_markers = {
@@ -243,6 +254,18 @@ def _format_response(
243254
Roles.assistant: PromptMarkers("""Assistant: """, """<|end▁of▁sentence|>"""),
244255
Roles.tool: PromptMarkers("", ""),
245256
}
257+
phi_4_chat_prompt_markers = {
258+
Roles.system: PromptMarkers("""<|im_start|>system<|im_sep|>\n""", """<|im_end|>"""),
259+
Roles.user: PromptMarkers("""<|im_start|>user<|im_sep|>\n""", """<|im_end|>\n"""),
260+
Roles.assistant: PromptMarkers("""<|im_start|>assistant<|im_sep|>""", """<|im_end|>\n"""),
261+
Roles.tool: PromptMarkers("", ""),
262+
}
263+
deepseek_r1_distill_qwen_chat_prompt_markers = {
264+
Roles.system: PromptMarkers("""<|begin▁of▁sentence|>""", ""),
265+
Roles.user: PromptMarkers("""<|User|>""", ""),
266+
Roles.assistant: PromptMarkers("""<|Assistant|>""", ""),
267+
Roles.tool: PromptMarkers("", ""),
268+
}
246269

247270
"""
248271
### Instruction:
@@ -253,6 +276,15 @@ def _format_response(
253276
mixtral_prompt_markers,
254277
True,
255278
["</s>"],
279+
strip_prompt=False, #added
280+
)
281+
282+
mistral_small3_formatter = MessagesFormatter(
283+
"",
284+
mistral_small3_prompt_markers,
285+
True,
286+
["</s>"],
287+
strip_prompt=False, #added
256288
)
257289

258290
chatml_formatter = MessagesFormatter(
@@ -284,7 +316,7 @@ def _format_response(
284316
False,
285317
["assistant", "<|eot_id|>"],
286318
use_user_role_for_function_call_result=False,
287-
strip_prompt=True,
319+
strip_prompt=False,
288320
)
289321

290322
synthia_formatter = MessagesFormatter(
@@ -348,6 +380,14 @@ def _format_response(
348380
use_user_role_for_function_call_result=True,
349381
)
350382

383+
phi_4_chat_formatter = MessagesFormatter(
384+
"",
385+
phi_4_chat_prompt_markers,
386+
True,
387+
["<|im_end|>", "<|endoftext|>"],
388+
use_user_role_for_function_call_result=True,
389+
)
390+
351391
open_interpreter_chat_formatter = MessagesFormatter(
352392
"You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n",
353393
open_interpreter_chat_prompt_markers,
@@ -369,7 +409,8 @@ def _format_response(
369409
"",
370410
gemma_2_prompt_markers,
371411
True,
372-
["<end_of_turn>", "<start_of_turn>"]
412+
["<end_of_turn>", "<start_of_turn>"],
413+
strip_prompt=False, #added
373414
)
374415

375416
deep_seek_coder_2_chat_formatter = MessagesFormatter(
@@ -380,6 +421,14 @@ def _format_response(
380421
bos_token="<|begin▁of▁sentence|>",
381422
eos_token="<|end▁of▁sentence|>",
382423
)
424+
deepseek_r1_distill_qwen_chat_formatter = MessagesFormatter(
425+
"",
426+
deepseek_r1_distill_qwen_chat_prompt_markers,
427+
True,
428+
["<|end▁of▁sentence|>"],
429+
bos_token="<|begin▁of▁sentence|>",
430+
eos_token="<|end▁of▁sentence|>",
431+
)
383432

384433
predefined_formatter = {
385434
MessagesFormatterType.MISTRAL: mixtral_formatter,
@@ -398,7 +447,10 @@ def _format_response(
398447
MessagesFormatterType.OPEN_INTERPRETER: open_interpreter_chat_formatter,
399448
MessagesFormatterType.AUTOCODER: autocoder_chat_formatter,
400449
MessagesFormatterType.GEMMA_2: gemma_2_chat_formatter,
401-
MessagesFormatterType.DEEP_SEEK_CODER_2: deep_seek_coder_2_chat_formatter
450+
MessagesFormatterType.DEEP_SEEK_CODER_2: deep_seek_coder_2_chat_formatter,
451+
MessagesFormatterType.PHI_4: phi_4_chat_formatter,
452+
MessagesFormatterType.DEEPSEEK_R1_DISTILL_QWEN: deepseek_r1_distill_qwen_chat_formatter,
453+
MessagesFormatterType.MISTRAL_SMALL_3: mistral_small3_formatter,
402454
}
403455

404456

src/llama_cpp_agent/providers/llama_cpp_python.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def create_completion(
151151
if grammar in self.grammar_cache:
152152
grammar = self.grammar_cache[grammar]
153153
else:
154-
self.grammar_cache[grammar] = LlamaGrammar.from_string(grammar)
154+
self.grammar_cache[grammar] = LlamaGrammar.from_string(grammar, verbose=self.llama_model.verbose)
155155
grammar = self.grammar_cache[grammar]
156156

157157
settings_dictionary = deepcopy(settings.as_dict())
@@ -179,7 +179,7 @@ def create_chat_completion(
179179
if grammar in self.grammar_cache:
180180
grammar = self.grammar_cache[grammar]
181181
else:
182-
self.grammar_cache[grammar] = LlamaGrammar.from_string(grammar)
182+
self.grammar_cache[grammar] = LlamaGrammar.from_string(grammar, verbose=self.llama_model.verbose)
183183
grammar = self.grammar_cache[grammar]
184184
settings_dictionary = deepcopy(settings.as_dict())
185185
settings_dictionary["max_tokens"] = settings_dictionary.pop("n_predict")

src/llama_cpp_agent/providers/llama_cpp_server.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def create_completion(
200200
)
201201
data = response.json()
202202

203-
returned_data = {"choices": [{"text": data["content"]}]}
203+
returned_data = data # This follows the same structure used by agent
204204
return returned_data
205205

206206
def create_chat_completion(
@@ -309,7 +309,11 @@ def prepare_generation_settings(
309309
if not self.llama_cpp_python_server:
310310
settings_dictionary["mirostat"] = settings_dictionary.pop("mirostat_mode")
311311
if self.llama_cpp_python_server:
312+
# Max tokens shouldn't be -1
312313
settings_dictionary["max_tokens"] = settings_dictionary.pop("n_predict")
314+
if settings_dictionary["max_tokens"] == -1:
315+
settings_dictionary["max_tokens"] = 8192 # A good value for non-limited responses
316+
# But tests can be done in case of value stoping structured output generation
313317

314318
settings_dictionary["stop"] = settings_dictionary.pop(
315319
"additional_stop_sequences"

src/llama_cpp_agent/py.typed

Whitespace-only changes.

0 commit comments

Comments
 (0)