fix(proxy): inject stream_options.include_usage for Ollama endpoints (#1592)

SebConejo · web-flow · commit aeb7a54a66c8 · 2026-04-16T15:34:00.000-07:00
Ollama and Ollama Cloud streaming responses returned 0 tokens because they require stream_options.include_usage to emit usage data in the final SSE chunk. Extend the existing injection (added for OpenAI and OpenRouter in #1567) to cover ollama and ollama-cloud endpoint keys. Closes #1585
diff --git a/.changeset/fix-ollama-token-count.md b/.changeset/fix-ollama-token-count.md
@@ -0,0 +1,5 @@
+---
+"manifest": patch
+---
+
+fix(proxy): capture streaming token usage for Ollama and Ollama Cloud providers
diff --git a/packages/backend/src/routing/proxy/__tests__/provider-client.spec.ts b/packages/backend/src/routing/proxy/__tests__/provider-client.spec.ts
@@ -1414,6 +1414,34 @@ describe('ProviderClient', () => {
       expect(sentBody.stream_options).toEqual({ include_usage: true });
     });
 
+    it('injects stream_options.include_usage for Ollama streaming requests', async () => {
+      mockFetch.mockResolvedValue(new Response('{}', { status: 200 }));
+      await client.forward({
+        provider: 'ollama',
+        apiKey: '',
+        model: 'llama3',
+        body: { messages: [{ role: 'user', content: 'Hello' }] },
+        stream: true,
+      });
+
+      const sentBody = JSON.parse(mockFetch.mock.calls[0][1].body);
+      expect(sentBody.stream_options).toEqual({ include_usage: true });
+    });
+
+    it('injects stream_options.include_usage for Ollama Cloud streaming requests', async () => {
+      mockFetch.mockResolvedValue(new Response('{}', { status: 200 }));
+      await client.forward({
+        provider: 'ollama-cloud',
+        apiKey: 'ollama-key',
+        model: 'llama3',
+        body: { messages: [{ role: 'user', content: 'Hello' }] },
+        stream: true,
+      });
+
+      const sentBody = JSON.parse(mockFetch.mock.calls[0][1].body);
+      expect(sentBody.stream_options).toEqual({ include_usage: true });
+    });
+
     it('does not inject stream_options for non-streaming OpenAI requests', async () => {
       mockFetch.mockResolvedValue(new Response('{}', { status: 200 }));
       await client.forward({
diff --git a/packages/backend/src/routing/proxy/provider-client.ts b/packages/backend/src/routing/proxy/provider-client.ts
@@ -123,7 +123,13 @@ export class ProviderClient {
       // Inject stream_options.include_usage so providers always send token
       // usage in streaming responses — needed for both DB logging and
       // downstream clients (e.g. OpenClaw context management).
-      if (stream && (endpointKey === 'openai' || endpointKey === 'openrouter')) {
+      if (
+        stream &&
+        (endpointKey === 'openai' ||
+          endpointKey === 'openrouter' ||
+          endpointKey === 'ollama' ||
+          endpointKey === 'ollama-cloud')
+      ) {
         const existing =
           typeof sanitized.stream_options === 'object' && sanitized.stream_options !== null
             ? (sanitized.stream_options as Record<string, unknown>)

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"manifest": patch
 +---
++
 +fix(proxy): capture streaming token usage for Ollama and Ollama Cloud providers