feat: send full conversation history with each request so the model has context

JoshuaPubNub · JoshuaPubNub · commit 8e8b16350aad · 2026-05-15T13:00:27.000+01:00
Previously every send was a single-turn POST: the agent only saw the
new user message, so it couldn't remember the rest of the chat.

Now the frontend bundles the active conversation's full message history
(filtered to user+assistant turns, dropping the thinking placeholder)
into a 'messages' array on the request payload. The handler receives
this and forwards it to vLLM as a multi-turn chat — system prompt,
prior turns, then current user message (with any attached image parts
bound to that last user message).

Backend already supports this in v1.1.0 via the new schema (either
'prompt' or 'messages' is accepted).
diff --git a/src/main.js b/src/main.js
@@ -300,7 +300,10 @@ async function sendPrompt(promptText, images) {
   setBusy(true);
   try {
     const client = await getClient();
-    const payload = { prompt: promptText, max_tokens: state.maxTokens };
+    const history = c.messages
+      .filter((m) => (m.role === 'user' || m.role === 'assistant') && !m.thinking)
+      .map((m) => ({ role: m.role, content: m.content }));
+    const payload = { messages: history, max_tokens: state.maxTokens };
     const parts = [textPart(JSON.stringify(payload), 'request')];
     for (let i = 0; i < (images || []).length; i++) {
       const bin = atob(images[i]);