@@ -48,10 +48,10 @@ The SDK supports API Key authentication with automatic environment variable load
4848import com.deepgram.DeepgramClient ;
4949
5050// Using environment variable (DEEPGRAM_API_KEY)
51- DeepgramClient client = DeepgramClient . builder(). build();
51+ DeepgramClient envClient = DeepgramClient . builder(). build();
5252
5353// Using API key directly
54- DeepgramClient client = DeepgramClient . builder()
54+ DeepgramClient explicitClient = DeepgramClient . builder()
5555 .apiKey(" YOUR_DEEPGRAM_API_KEY" )
5656 .build();
5757```
@@ -220,13 +220,20 @@ Stream audio for real-time speech-to-text.
220220
221221``` java
222222import com.deepgram.DeepgramClient ;
223+ import com.deepgram.resources.listen.v1.types.ListenV1CloseStream ;
224+ import com.deepgram.resources.listen.v1.types.ListenV1CloseStreamType ;
223225import com.deepgram.resources.listen.v1.websocket.V1WebSocketClient ;
224226import com.deepgram.resources.listen.v1.websocket.V1ConnectOptions ;
225227import com.deepgram.types.ListenV1Model ;
228+ import java.nio.file.Files ;
229+ import java.nio.file.Path ;
230+ import java.util.concurrent.TimeUnit ;
231+ import okio.ByteString ;
226232
227233DeepgramClient client = DeepgramClient . builder(). build();
234+ byte [] audioBytes = Files . readAllBytes(Path . of(" audio.wav" ));
228235
229- V1WebSocketClient ws = client. listen(). v1(). websocket ();
236+ V1WebSocketClient ws = client. listen(). v1(). v1WebSocket ();
230237
231238// Register event handlers
232239ws. onResults(results - > {
@@ -247,9 +254,14 @@ ws.onError(error -> {
247254// Connect with options (model is required)
248255ws. connect(V1ConnectOptions . builder()
249256 .model(ListenV1Model . NOVA3 )
250- .build());
257+ .build())
258+ .get(10 , TimeUnit . SECONDS );
251259
252- ws. send(audioBytes);
260+ ws. sendMedia(ByteString . of(audioBytes));
261+ ws. sendCloseStream(ListenV1CloseStream . builder()
262+ .type(ListenV1CloseStreamType . CLOSE_STREAM )
263+ .build())
264+ .get(5 , TimeUnit . SECONDS );
253265
254266// Close when done
255267ws. close();
@@ -261,15 +273,25 @@ Stream text for real-time audio generation.
261273
262274``` java
263275import com.deepgram.DeepgramClient ;
276+ import com.deepgram.resources.speak.v1.types.SpeakV1Close ;
277+ import com.deepgram.resources.speak.v1.types.SpeakV1CloseType ;
278+ import com.deepgram.resources.speak.v1.types.SpeakV1Flush ;
279+ import com.deepgram.resources.speak.v1.types.SpeakV1FlushType ;
280+ import com.deepgram.resources.speak.v1.types.SpeakV1Text ;
264281import com.deepgram.resources.speak.v1.websocket.V1WebSocketClient ;
282+ import java.io.ByteArrayOutputStream ;
283+ import java.nio.file.Files ;
284+ import java.nio.file.Path ;
285+ import java.util.concurrent.TimeUnit ;
265286
266287DeepgramClient client = DeepgramClient . builder(). build();
288+ ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream ();
267289
268- var ttsWs = client. speak(). v1(). websocket ();
290+ V1WebSocketClient ttsWs = client. speak(). v1(). v1WebSocket ();
269291
270292// Register event handlers
271- ttsWs. onAudioData (audioData - > {
272- // Process audio chunks as they arrive
293+ ttsWs. onSpeakV1Audio (audioData - > {
294+ audioBuffer . writeBytes(audioData . toByteArray());
273295});
274296
275297ttsWs. onMetadata(metadata - > {
@@ -281,8 +303,23 @@ ttsWs.onError(error -> {
281303});
282304
283305// Connect and send text
284- ttsWs. connect();
285- ttsWs. send(" Hello, this is streamed text-to-speech." );
306+ ttsWs. connect(). get(10 , TimeUnit . SECONDS );
307+ ttsWs. sendText(SpeakV1Text . builder()
308+ .text(" Hello, this is streamed text-to-speech." )
309+ .build())
310+ .get(5 , TimeUnit . SECONDS );
311+ ttsWs. sendFlush(SpeakV1Flush . builder()
312+ .type(SpeakV1FlushType . FLUSH )
313+ .build())
314+ .get(5 , TimeUnit . SECONDS );
315+
316+ Thread . sleep(2000 );
317+ Files . write(Path . of(" output.wav" ), audioBuffer. toByteArray());
318+
319+ ttsWs. sendClose(SpeakV1Close . builder()
320+ .type(SpeakV1CloseType . CLOSE )
321+ .build())
322+ .get(5 , TimeUnit . SECONDS );
286323
287324// Close when done
288325ttsWs. close();
@@ -294,24 +331,58 @@ Connect to Deepgram's voice agent for real-time conversational AI.
294331
295332``` java
296333import com.deepgram.DeepgramClient ;
334+ import com.deepgram.resources.agent.v1.types.AgentV1InjectUserMessage ;
335+ import com.deepgram.resources.agent.v1.types.AgentV1Settings ;
336+ import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgent ;
337+ import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThink ;
338+ import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItem ;
339+ import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItemProvider ;
340+ import com.deepgram.resources.agent.v1.types.AgentV1SettingsAudio ;
297341import com.deepgram.resources.agent.v1.websocket.V1WebSocketClient ;
342+ import com.deepgram.types.OpenAiThinkProvider ;
343+ import java.util.List ;
344+ import java.util.Map ;
345+ import java.util.concurrent.TimeUnit ;
298346
299347DeepgramClient client = DeepgramClient . builder(). build();
300348
301- var agentWs = client. agent(). v1(). websocket ();
349+ V1WebSocketClient agentWs = client. agent(). v1(). v1WebSocket ();
302350
303351// Register event handlers
304352agentWs. onWelcome(welcome - > {
305353 System . out. println(" Agent connected" );
354+
355+ agentWs. sendSettings(AgentV1Settings . builder()
356+ .audio(AgentV1SettingsAudio . builder(). build())
357+ .agent(AgentV1SettingsAgent . builder()
358+ .think(AgentV1SettingsAgentThink . of(List . of(
359+ AgentV1SettingsAgentThinkOneItem . builder()
360+ .provider(AgentV1SettingsAgentThinkOneItemProvider . of(
361+ OpenAiThinkProvider . of(Map . of(" model" , " gpt-4o-mini" ))))
362+ .prompt(" You are a helpful voice assistant. Keep responses brief." )
363+ .build())))
364+ .greeting(" Hello! How can I help you today?" )
365+ .build())
366+ .build());
367+ });
368+
369+ agentWs. onSettingsApplied(applied - > {
370+ agentWs. sendInjectUserMessage(AgentV1InjectUserMessage . builder()
371+ .content(" What is the capital of France?" )
372+ .build());
373+ });
374+
375+ agentWs. onConversationText(text - > {
376+ System . out. printf(" [%s] %s%n" , text. getRole(), text. getContent());
306377});
307378
308379agentWs. onError(error - > {
309380 System . err. println(" Error: " + error. getMessage());
310381});
311382
312- // Connect and interact
313- agentWs. connect();
314- agentWs . send(audioBytes );
383+ // Connect and wait for the agent to respond
384+ agentWs. connect(). get( 10 , TimeUnit . SECONDS ) ;
385+ Thread . sleep( 5000 );
315386
316387// Close when done
317388agentWs. close();
@@ -330,13 +401,22 @@ Use the separate [`deepgram-sagemaker`](https://github.com/deepgram/deepgram-jav
330401``` groovy
331402dependencies {
332403 implementation 'com.deepgram:deepgram-java-sdk:0.2.1' // x-release-please-version
333- implementation 'com.deepgram:deepgram-sagemaker:0.1.0 '
404+ implementation 'com.deepgram:deepgram-sagemaker:0.1.2 '
334405}
335406```
336407
337408``` java
409+ import com.deepgram.DeepgramClient ;
338410import com.deepgram.sagemaker.SageMakerConfig ;
339411import com.deepgram.sagemaker.SageMakerTransportFactory ;
412+ import com.deepgram.resources.listen.v1.websocket.V1ConnectOptions ;
413+ import com.deepgram.types.ListenV1Model ;
414+ import java.nio.file.Files ;
415+ import java.nio.file.Path ;
416+ import java.util.concurrent.TimeUnit ;
417+ import okio.ByteString ;
418+
419+ byte [] audioBytes = Files . readAllBytes(Path . of(" audio.wav" ));
340420
341421var factory = new SageMakerTransportFactory (
342422 SageMakerConfig . builder()
@@ -351,10 +431,11 @@ DeepgramClient client = DeepgramClient.builder()
351431 .build();
352432
353433// Use the SDK exactly as normal — the transport is transparent
354- var ws = client. listen(). v1(). websocket ();
434+ var ws = client. listen(). v1(). v1WebSocket ();
355435ws. onResults(results - > { /* ... */ });
356- ws. connect(V1ConnectOptions . builder(). model(ListenV1Model . NOVA3 ). build());
357- ws. sendMedia(audioBytes);
436+ ws. connect(V1ConnectOptions . builder(). model(ListenV1Model . NOVA3 ). build())
437+ .get(10 , TimeUnit . SECONDS );
438+ ws. sendMedia(ByteString . of(audioBytes));
358439```
359440
360441See the [ SageMaker example] ( examples/sagemaker/LiveStreamingSageMaker.java ) for a complete walkthrough.
@@ -511,7 +592,6 @@ import com.deepgram.resources.listen.v1.media.types.MediaTranscribeResponse;
511592DeepgramApiHttpResponse<MediaTranscribeResponse > rawResponse =
512593 client. listen(). v1(). media(). withRawResponse(). transcribeUrl(request);
513594
514- int statusCode = rawResponse. statusCode();
515595var headers = rawResponse. headers();
516596MediaTranscribeResponse body = rawResponse. body();
517597```
@@ -524,13 +604,13 @@ The SDK provides comprehensive access to Deepgram's APIs:
524604``` java
525605client. listen(). v1(). media(). transcribeUrl(request) // Transcribe audio from URL
526606client. listen(). v1(). media(). transcribeFile(body) // Transcribe audio from file bytes
527- client. listen(). v1(). websocket() // Real-time streaming transcription
607+ client. listen(). v1(). v1WebSocket() // Real-time streaming transcription
528608```
529609
530610### Speak (Text-to-Speech)
531611``` java
532612client. speak(). v1(). audio(). generate(request) // Generate speech from text
533- client. speak(). v1(). websocket() // Real-time streaming TTS
613+ client. speak(). v1(). v1WebSocket() // Real-time streaming TTS
534614```
535615
536616### Read (Text Intelligence)
@@ -541,7 +621,7 @@ client.read().v1().text().analyze(request) // Analyze text content
541621### Agent (Voice Agent)
542622``` java
543623client. agent(). v1(). settings(). think(). models(). list() // List available agent models
544- client. agent(). v1(). websocket() // Real-time agent WebSocket
624+ client. agent(). v1(). v1WebSocket() // Real-time agent WebSocket
545625```
546626
547627### Manage (Project Management)
0 commit comments