Skip to content

Commit eccee43

Browse files
authored
feat(web): wire live WebRTC voice overlay (#20)
1 parent 94036e1 commit eccee43

File tree

3 files changed

+154
-2
lines changed

3 files changed

+154
-2
lines changed

apps/web/components/VoiceOverlay.tsx

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"use client";
22

33
import { useEffect, useRef, useState, useCallback } from "react";
4-
import { X, Mic, MicOff, Waves } from "lucide-react";
4+
import { X, Mic, MicOff, Waves, Phone, PhoneOff } from "lucide-react";
55
import { cn } from "@/lib/utils";
66
import { getVoiceClient, type VoiceMode, type VoiceState } from "@/lib/voice";
77
import { getWSClient } from "@/lib/ws";
8+
import { getWebRTCVoiceSession, type WebRTCSessionState } from "@/lib/webrtc";
89

910
interface VoiceOverlayProps {
1011
open: boolean;
@@ -15,11 +16,15 @@ interface VoiceOverlayProps {
1516

1617
export function VoiceOverlay({ open, onClose, onTranscript }: VoiceOverlayProps) {
1718
const [voiceState, setVoiceState] = useState<VoiceState>("idle");
19+
const [rtcState, setRtcState] = useState<WebRTCSessionState>("idle");
1820
const [audioLevel, setAudioLevel] = useState(0);
1921
const [transcript, setTranscript] = useState("");
2022
const [voiceMode, setVoiceMode] = useState<VoiceMode>("push-to-talk");
2123
const [statusText, setStatusText] = useState("Tap the microphone to start");
2224
const voiceClientRef = useRef(getVoiceClient());
25+
const rtcSessionRef = useRef(getWebRTCVoiceSession());
26+
const remoteAudioRef = useRef<HTMLAudioElement | null>(null);
27+
const rtcTargetChannelId = process.env.NEXT_PUBLIC_VOICE_PEER_CHANNEL_ID;
2328

2429
// Subscribe to voice client events
2530
useEffect(() => {
@@ -64,6 +69,37 @@ export function VoiceOverlay({ open, onClose, onTranscript }: VoiceOverlayProps)
6469
};
6570
}, [open, onTranscript, voiceMode]);
6671

72+
useEffect(() => {
73+
if (!open || !rtcTargetChannelId) return;
74+
75+
const rtc = rtcSessionRef.current;
76+
rtc.listen();
77+
78+
const unsubState = rtc.onStateChange((state) => {
79+
setRtcState(state);
80+
81+
if (state === "requesting-media" || state === "negotiating") {
82+
setStatusText("Connecting live call...");
83+
} else if (state === "connected") {
84+
setStatusText("Live call active");
85+
} else if (state === "ended") {
86+
setStatusText("Tap the microphone to start");
87+
}
88+
});
89+
90+
const unsubRemote = rtc.onRemoteStream((stream) => {
91+
const audio = remoteAudioRef.current;
92+
if (!audio) return;
93+
audio.srcObject = stream;
94+
void audio.play().catch(() => {});
95+
});
96+
97+
return () => {
98+
unsubState();
99+
unsubRemote();
100+
};
101+
}, [open, rtcTargetChannelId]);
102+
67103
// Listen for voice.transcript and voice.audio.response messages from WS
68104
useEffect(() => {
69105
if (!open) return;
@@ -104,9 +140,11 @@ export function VoiceOverlay({ open, onClose, onTranscript }: VoiceOverlayProps)
104140
if (vc.isRecording) {
105141
vc.stopRecording();
106142
}
143+
rtcSessionRef.current.endCall(false);
107144
setTranscript("");
108145
setAudioLevel(0);
109146
setVoiceState("idle");
147+
setRtcState("idle");
110148
}
111149
}, [open]);
112150

@@ -125,9 +163,26 @@ export function VoiceOverlay({ open, onClose, onTranscript }: VoiceOverlayProps)
125163
if (vc.isRecording) {
126164
vc.stopRecording();
127165
}
166+
rtcSessionRef.current.endCall(false);
128167
onClose();
129168
}, [onClose]);
130169

170+
const handleLiveCallClick = useCallback(() => {
171+
if (!rtcTargetChannelId) return;
172+
173+
const rtc = rtcSessionRef.current;
174+
if (rtcState === "connected" || rtcState === "requesting-media" || rtcState === "negotiating") {
175+
rtc.endCall();
176+
return;
177+
}
178+
179+
setTranscript("");
180+
void rtc.startCall(rtcTargetChannelId).catch(() => {
181+
setStatusText("Live call failed. Try again.");
182+
setRtcState("error");
183+
});
184+
}, [rtcState, rtcTargetChannelId]);
185+
131186
// Handle Escape key
132187
useEffect(() => {
133188
if (!open) return;
@@ -145,6 +200,8 @@ export function VoiceOverlay({ open, onClose, onTranscript }: VoiceOverlayProps)
145200
const isRecording = voiceState === "recording";
146201
const isProcessing = voiceState === "processing";
147202
const isPlaying = voiceState === "playing";
203+
const isRtcConnecting = rtcState === "requesting-media" || rtcState === "negotiating";
204+
const isRtcConnected = rtcState === "connected";
148205
const isActive = isRecording || isProcessing || isPlaying;
149206

150207
// Generate visualization bars based on audio level
@@ -162,6 +219,7 @@ export function VoiceOverlay({ open, onClose, onTranscript }: VoiceOverlayProps)
162219

163220
return (
164221
<div className="fixed inset-0 z-50 flex flex-col items-center justify-center bg-dark-900/95 backdrop-blur-sm">
222+
<audio ref={remoteAudioRef} autoPlay playsInline className="hidden" />
165223
{/* Close button */}
166224
<button
167225
onClick={handleClose}
@@ -191,6 +249,24 @@ export function VoiceOverlay({ open, onClose, onTranscript }: VoiceOverlayProps)
191249
{voiceMode === "continuous" ? "Continuous" : "Push to Talk"}
192250
</button>
193251

252+
{rtcTargetChannelId && (
253+
<button
254+
onClick={handleLiveCallClick}
255+
disabled={isRecording || isProcessing}
256+
className={cn(
257+
"absolute top-4 left-44 flex items-center gap-2 px-3 py-2 rounded-lg text-sm transition-colors",
258+
isRtcConnected
259+
? "bg-green-600 text-white hover:bg-green-500"
260+
: "bg-dark-800 text-dark-300 hover:text-white",
261+
(isRecording || isProcessing) && "opacity-50 cursor-not-allowed",
262+
)}
263+
title="Start live voice call"
264+
>
265+
{isRtcConnected || isRtcConnecting ? <PhoneOff size={16} /> : <Phone size={16} />}
266+
{isRtcConnected ? "End Live Call" : isRtcConnecting ? "Connecting..." : "Live Call Beta"}
267+
</button>
268+
)}
269+
194270
{/* Status text */}
195271
<p className="text-dark-300 text-sm font-medium mb-8 tracking-wide uppercase">
196272
{statusText}

apps/web/lib/webrtc.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@ export class WebRTCVoiceSession {
6868
this.ws = options?.wsClient ?? getWSClient();
6969
this.createPeerConnection =
7070
options?.peerConnectionFactory ??
71-
(() => new RTCPeerConnection({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] }));
71+
(() =>
72+
new RTCPeerConnection({
73+
iceServers: [{ urls: "stun:stun.l.google.com:19302" }],
74+
}) as unknown as RTCPeerConnectionLike);
7275
this.getUserMedia =
7376
options?.getUserMedia ??
7477
((constraints) => navigator.mediaDevices.getUserMedia(constraints));

tests/web/webrtc.test.ts

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,77 @@ describe("WebRTCVoiceSession", () => {
215215
},
216216
});
217217
});
218+
219+
it("subscribes to WebSocket rtc messages through listen()", async () => {
220+
let messageHandler: ((message: unknown) => void) | null = null;
221+
const sent: unknown[] = [];
222+
const wsClient = {
223+
currentSessionId: "session-5",
224+
send: vi.fn((message: unknown) => {
225+
sent.push(message);
226+
}),
227+
onMessage: vi.fn((handler: (message: unknown) => void) => {
228+
messageHandler = handler;
229+
return () => {
230+
messageHandler = null;
231+
};
232+
}),
233+
};
234+
const session = new WebRTCVoiceSession({
235+
wsClient: wsClient as never,
236+
peerConnectionFactory: () => createPeerConnectionStub().peer as never,
237+
getUserMedia: vi.fn().mockResolvedValue(createStream(["track-5"])),
238+
});
239+
240+
session.listen();
241+
messageHandler?.({
242+
type: "rtc.offer",
243+
payload: {
244+
sourceChannelId: "voice-peer",
245+
description: {
246+
type: "offer",
247+
sdp: "offer-from-signal",
248+
},
249+
},
250+
});
251+
await new Promise((resolve) => setTimeout(resolve, 0));
252+
253+
expect(wsClient.onMessage).toHaveBeenCalledTimes(1);
254+
expect(sent[0]).toMatchObject({
255+
type: "rtc.answer",
256+
payload: {
257+
targetChannelId: "voice-peer",
258+
},
259+
});
260+
});
261+
262+
it("sends rtc.hangup and stops local tracks when ending a call", async () => {
263+
const sent: unknown[] = [];
264+
const stream = createStream(["track-6"]);
265+
const { peer } = createPeerConnectionStub();
266+
const session = new WebRTCVoiceSession({
267+
wsClient: {
268+
currentSessionId: "session-6",
269+
send: vi.fn((message: unknown) => {
270+
sent.push(message);
271+
}),
272+
onMessage: vi.fn(() => () => {}),
273+
} as never,
274+
peerConnectionFactory: () => peer as never,
275+
getUserMedia: vi.fn().mockResolvedValue(stream),
276+
});
277+
278+
await session.startCall("mobile-peer");
279+
session.endCall();
280+
281+
expect(sent.at(-1)).toMatchObject({
282+
type: "rtc.hangup",
283+
payload: {
284+
targetChannelId: "mobile-peer",
285+
},
286+
});
287+
expect(peer.close).toHaveBeenCalledTimes(1);
288+
expect((stream as unknown as { tracks: Array<{ stop: ReturnType<typeof vi.fn> }> }).tracks[0]?.stop).toHaveBeenCalledTimes(1);
289+
expect(session.currentState).toBe("ended");
290+
});
218291
});

0 commit comments

Comments
 (0)