Skip to content

Commit 1d28da5

Browse files
committed
fix(voice-call): block Twilio webhook replay and stale transitions
1 parent 4663d68 commit 1d28da5

18 files changed

Lines changed: 513 additions & 40 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
1010

1111
### Fixes
1212

13+
- Security/Voice Call: harden Twilio webhook replay handling by preserving provider event IDs through normalization, adding bounded replay dedupe, and enforcing per-call turn-token matching for call-state transitions. This ships in the next npm release. Thanks @jiseoung for reporting.
1314
- Security/Commands: enforce sender-only matching for `commands.allowFrom` by blocking conversation-shaped `From` identities (`channel:`, `group:`, `thread:`, `@g.us`) while preserving direct-message fallback when sender fields are missing. Ships in the next npm release. Thanks @jiseoung.
1415
- Config/Kilo Gateway: Kilo provider flow now surfaces an updated list of models. (#24921) thanks @gumadeiras.
1516
- Security/Sandbox: enforce `tools.exec.applyPatch.workspaceOnly` and `tools.fs.workspaceOnly` for `apply_patch` in sandbox-mounted paths so writes/deletes cannot escape the workspace boundary via mounts like `/agent` unless explicitly opted out (`tools.exec.applyPatch.workspaceOnly=false`). This ships in the next npm release. Thanks @tdjackey for reporting.

docs/plugins/voice-call.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,12 @@ headers are trusted.
177177
`webhookSecurity.trustedProxyIPs` only trusts forwarded headers when the request
178178
remote IP matches the list.
179179

180+
Webhook replay protection is enabled for Twilio and Plivo. Replayed valid webhook
181+
requests are acknowledged but skipped for side effects.
182+
183+
Twilio conversation turns include a per-turn token in `<Gather>` callbacks, so
184+
stale/replayed speech callbacks cannot satisfy a newer pending transcript turn.
185+
180186
Example with a stable public host:
181187

182188
```json5

extensions/voice-call/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,5 +175,7 @@ Actions:
175175
## Notes
176176

177177
- Uses webhook signature verification for Twilio/Telnyx/Plivo.
178+
- Adds replay protection for Twilio and Plivo webhooks (valid duplicate callbacks are ignored safely).
179+
- Twilio speech turns include a per-turn token so stale/replayed callbacks cannot complete a newer turn.
178180
- `responseModel` / `responseSystemPrompt` control AI auto-responses.
179181
- Media streaming requires `ws` and OpenAI Realtime API key.

extensions/voice-call/src/manager.test.ts

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,16 @@ import type {
1717
} from "./types.js";
1818

1919
class FakeProvider implements VoiceCallProvider {
20-
readonly name = "plivo" as const;
20+
readonly name: "plivo" | "twilio";
2121
readonly playTtsCalls: PlayTtsInput[] = [];
2222
readonly hangupCalls: HangupCallInput[] = [];
2323
readonly startListeningCalls: StartListeningInput[] = [];
2424
readonly stopListeningCalls: StopListeningInput[] = [];
2525

26+
constructor(name: "plivo" | "twilio" = "plivo") {
27+
this.name = name;
28+
}
29+
2630
verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
2731
return { ok: true };
2832
}
@@ -319,6 +323,61 @@ describe("CallManager", () => {
319323
expect(provider.stopListeningCalls).toHaveLength(1);
320324
});
321325

326+
it("ignores speech events with mismatched turnToken while waiting for transcript", async () => {
327+
const { manager, provider } = createManagerHarness(
328+
{
329+
transcriptTimeoutMs: 5000,
330+
},
331+
new FakeProvider("twilio"),
332+
);
333+
334+
const started = await manager.initiateCall("+15550000004");
335+
expect(started.success).toBe(true);
336+
337+
markCallAnswered(manager, started.callId, "evt-turn-token-answered");
338+
339+
const turnPromise = manager.continueCall(started.callId, "Prompt");
340+
await new Promise((resolve) => setTimeout(resolve, 0));
341+
342+
const expectedTurnToken = provider.startListeningCalls[0]?.turnToken;
343+
expect(typeof expectedTurnToken).toBe("string");
344+
345+
manager.processEvent({
346+
id: "evt-turn-token-bad",
347+
type: "call.speech",
348+
callId: started.callId,
349+
providerCallId: "request-uuid",
350+
timestamp: Date.now(),
351+
transcript: "stale replay",
352+
isFinal: true,
353+
turnToken: "wrong-token",
354+
});
355+
356+
const pendingState = await Promise.race([
357+
turnPromise.then(() => "resolved"),
358+
new Promise<"pending">((resolve) => setTimeout(() => resolve("pending"), 0)),
359+
]);
360+
expect(pendingState).toBe("pending");
361+
362+
manager.processEvent({
363+
id: "evt-turn-token-good",
364+
type: "call.speech",
365+
callId: started.callId,
366+
providerCallId: "request-uuid",
367+
timestamp: Date.now(),
368+
transcript: "final answer",
369+
isFinal: true,
370+
turnToken: expectedTurnToken,
371+
});
372+
373+
const turnResult = await turnPromise;
374+
expect(turnResult.success).toBe(true);
375+
expect(turnResult.transcript).toBe("final answer");
376+
377+
const call = manager.getCall(started.callId);
378+
expect(call?.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
379+
});
380+
322381
it("tracks latency metadata across multiple closed-loop turns", async () => {
323382
const { manager, provider } = createManagerHarness({
324383
transcriptTimeoutMs: 5000,

extensions/voice-call/src/manager/context.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export type TranscriptWaiter = {
66
resolve: (text: string) => void;
77
reject: (err: Error) => void;
88
timeout: NodeJS.Timeout;
9+
turnToken?: string;
910
};
1011

1112
export type CallManagerRuntimeState = {

extensions/voice-call/src/manager/events.test.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,49 @@ describe("processEvent (functional)", () => {
234234
expect(() => processEvent(ctx, event)).not.toThrow();
235235
expect(ctx.activeCalls.size).toBe(0);
236236
});
237+
238+
it("deduplicates by dedupeKey even when event IDs differ", () => {
239+
const now = Date.now();
240+
const ctx = createContext();
241+
ctx.activeCalls.set("call-dedupe", {
242+
callId: "call-dedupe",
243+
providerCallId: "provider-dedupe",
244+
provider: "plivo",
245+
direction: "outbound",
246+
state: "answered",
247+
from: "+15550000000",
248+
to: "+15550000001",
249+
startedAt: now,
250+
transcript: [],
251+
processedEventIds: [],
252+
metadata: {},
253+
});
254+
ctx.providerCallIdMap.set("provider-dedupe", "call-dedupe");
255+
256+
processEvent(ctx, {
257+
id: "evt-1",
258+
dedupeKey: "stable-key-1",
259+
type: "call.speech",
260+
callId: "call-dedupe",
261+
providerCallId: "provider-dedupe",
262+
timestamp: now + 1,
263+
transcript: "hello",
264+
isFinal: true,
265+
});
266+
267+
processEvent(ctx, {
268+
id: "evt-2",
269+
dedupeKey: "stable-key-1",
270+
type: "call.speech",
271+
callId: "call-dedupe",
272+
providerCallId: "provider-dedupe",
273+
timestamp: now + 2,
274+
transcript: "hello",
275+
isFinal: true,
276+
});
277+
278+
const call = ctx.activeCalls.get("call-dedupe");
279+
expect(call?.transcript).toHaveLength(1);
280+
expect(Array.from(ctx.processedEventIds)).toEqual(["stable-key-1"]);
281+
});
237282
});

extensions/voice-call/src/manager/events.ts

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,11 @@ function createInboundCall(params: {
9292
}
9393

9494
export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
95-
if (ctx.processedEventIds.has(event.id)) {
95+
const dedupeKey = event.dedupeKey || event.id;
96+
if (ctx.processedEventIds.has(dedupeKey)) {
9697
return;
9798
}
98-
ctx.processedEventIds.add(event.id);
99+
ctx.processedEventIds.add(dedupeKey);
99100

100101
let call = findCall({
101102
activeCalls: ctx.activeCalls,
@@ -158,7 +159,7 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
158159
}
159160
}
160161

161-
call.processedEventIds.push(event.id);
162+
call.processedEventIds.push(dedupeKey);
162163

163164
switch (event.type) {
164165
case "call.initiated":
@@ -192,8 +193,20 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
192193

193194
case "call.speech":
194195
if (event.isFinal) {
196+
const hadWaiter = ctx.transcriptWaiters.has(call.callId);
197+
const resolved = resolveTranscriptWaiter(
198+
ctx,
199+
call.callId,
200+
event.transcript,
201+
event.turnToken,
202+
);
203+
if (hadWaiter && !resolved) {
204+
console.warn(
205+
`[voice-call] Ignoring speech event with mismatched turn token for ${call.callId}`,
206+
);
207+
break;
208+
}
195209
addTranscriptEntry(call, "user", event.transcript);
196-
resolveTranscriptWaiter(ctx, call.callId, event.transcript);
197210
}
198211
transitionState(call, "listening");
199212
break;

extensions/voice-call/src/manager/outbound.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ export async function continueCall(
291291
ctx.activeTurnCalls.add(callId);
292292

293293
const turnStartedAt = Date.now();
294+
const turnToken = provider.name === "twilio" ? crypto.randomUUID() : undefined;
294295

295296
try {
296297
await speak(ctx, callId, prompt);
@@ -299,9 +300,9 @@ export async function continueCall(
299300
persistCallRecord(ctx.storePath, call);
300301

301302
const listenStartedAt = Date.now();
302-
await provider.startListening({ callId, providerCallId });
303+
await provider.startListening({ callId, providerCallId, turnToken });
303304

304-
const transcript = await waitForFinalTranscript(ctx, callId);
305+
const transcript = await waitForFinalTranscript(ctx, callId, turnToken);
305306
const transcriptReceivedAt = Date.now();
306307

307308
// Best-effort: stop listening after final transcript.

extensions/voice-call/src/manager/timers.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,25 @@ export function resolveTranscriptWaiter(
7777
ctx: TranscriptWaiterContext,
7878
callId: CallId,
7979
transcript: string,
80-
): void {
80+
turnToken?: string,
81+
): boolean {
8182
const waiter = ctx.transcriptWaiters.get(callId);
8283
if (!waiter) {
83-
return;
84+
return false;
85+
}
86+
if (waiter.turnToken && waiter.turnToken !== turnToken) {
87+
return false;
8488
}
8589
clearTranscriptWaiter(ctx, callId);
8690
waiter.resolve(transcript);
91+
return true;
8792
}
8893

89-
export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promise<string> {
94+
export function waitForFinalTranscript(
95+
ctx: TimerContext,
96+
callId: CallId,
97+
turnToken?: string,
98+
): Promise<string> {
9099
if (ctx.transcriptWaiters.has(callId)) {
91100
return Promise.reject(new Error("Already waiting for transcript"));
92101
}
@@ -98,6 +107,6 @@ export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promi
98107
reject(new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
99108
}, timeoutMs);
100109

101-
ctx.transcriptWaiters.set(callId, { resolve, reject, timeout });
110+
ctx.transcriptWaiters.set(callId, { resolve, reject, timeout, turnToken });
102111
});
103112
}

extensions/voice-call/src/providers/plivo.ts

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,29 @@ export interface PlivoProviderOptions {
3030
type PendingSpeak = { text: string; locale?: string };
3131
type PendingListen = { language?: string };
3232

33+
function getHeader(
34+
headers: Record<string, string | string[] | undefined>,
35+
name: string,
36+
): string | undefined {
37+
const value = headers[name.toLowerCase()];
38+
if (Array.isArray(value)) {
39+
return value[0];
40+
}
41+
return value;
42+
}
43+
44+
function createPlivoRequestDedupeKey(ctx: WebhookContext): string {
45+
const nonceV3 = getHeader(ctx.headers, "x-plivo-signature-v3-nonce");
46+
if (nonceV3) {
47+
return `plivo:v3:${nonceV3}`;
48+
}
49+
const nonceV2 = getHeader(ctx.headers, "x-plivo-signature-v2-nonce");
50+
if (nonceV2) {
51+
return `plivo:v2:${nonceV2}`;
52+
}
53+
return `plivo:fallback:${crypto.createHash("sha256").update(ctx.rawBody).digest("hex")}`;
54+
}
55+
3356
export class PlivoProvider implements VoiceCallProvider {
3457
readonly name = "plivo" as const;
3558

@@ -104,7 +127,7 @@ export class PlivoProvider implements VoiceCallProvider {
104127
console.warn(`[plivo] Webhook verification failed: ${result.reason}`);
105128
}
106129

107-
return { ok: result.ok, reason: result.reason };
130+
return { ok: result.ok, reason: result.reason, isReplay: result.isReplay };
108131
}
109132

110133
parseWebhookEvent(ctx: WebhookContext): ProviderWebhookParseResult {
@@ -173,7 +196,8 @@ export class PlivoProvider implements VoiceCallProvider {
173196

174197
// Normal events.
175198
const callIdFromQuery = this.getCallIdFromQuery(ctx);
176-
const event = this.normalizeEvent(parsed, callIdFromQuery);
199+
const dedupeKey = createPlivoRequestDedupeKey(ctx);
200+
const event = this.normalizeEvent(parsed, callIdFromQuery, dedupeKey);
177201

178202
return {
179203
events: event ? [event] : [],
@@ -186,7 +210,11 @@ export class PlivoProvider implements VoiceCallProvider {
186210
};
187211
}
188212

189-
private normalizeEvent(params: URLSearchParams, callIdOverride?: string): NormalizedEvent | null {
213+
private normalizeEvent(
214+
params: URLSearchParams,
215+
callIdOverride?: string,
216+
dedupeKey?: string,
217+
): NormalizedEvent | null {
190218
const callUuid = params.get("CallUUID") || "";
191219
const requestUuid = params.get("RequestUUID") || "";
192220

@@ -201,6 +229,7 @@ export class PlivoProvider implements VoiceCallProvider {
201229

202230
const baseEvent = {
203231
id: crypto.randomUUID(),
232+
dedupeKey,
204233
callId: callIdOverride || callUuid || requestUuid,
205234
providerCallId: callUuid || requestUuid || undefined,
206235
timestamp: Date.now(),

0 commit comments

Comments
 (0)