Skip to content

Commit df02cdc

Browse files
authored
fix: align README and examples with v0.2.1 and validate them in CI (#34)
1 parent ba3c218 commit df02cdc

9 files changed

Lines changed: 493 additions & 137 deletions

File tree

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ jobs:
3333
restore-keys: |
3434
${{ runner.os }}-gradle-
3535
36-
- name: Compile
37-
run: ./gradlew compileJava
36+
- name: Compile SDK and examples
37+
run: ./gradlew compileJava compileExamples
3838

3939
test:
4040
name: Unit Tests (Java ${{ matrix.java-version }})

.github/workflows/tests-daily.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ jobs:
3636
restore-keys: |
3737
${{ runner.os }}-gradle-
3838
39-
- name: Compile
40-
run: ./gradlew compileJava
39+
- name: Compile SDK and examples
40+
run: ./gradlew compileJava compileExamples
4141

4242
unit-tests:
4343
name: Unit Tests

README.md

Lines changed: 102 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ The SDK supports API Key authentication with automatic environment variable load
4848
import com.deepgram.DeepgramClient;
4949

5050
// Using environment variable (DEEPGRAM_API_KEY)
51-
DeepgramClient client = DeepgramClient.builder().build();
51+
DeepgramClient envClient = DeepgramClient.builder().build();
5252

5353
// Using API key directly
54-
DeepgramClient client = DeepgramClient.builder()
54+
DeepgramClient explicitClient = DeepgramClient.builder()
5555
.apiKey("YOUR_DEEPGRAM_API_KEY")
5656
.build();
5757
```
@@ -220,13 +220,20 @@ Stream audio for real-time speech-to-text.
220220

221221
```java
222222
import com.deepgram.DeepgramClient;
223+
import com.deepgram.resources.listen.v1.types.ListenV1CloseStream;
224+
import com.deepgram.resources.listen.v1.types.ListenV1CloseStreamType;
223225
import com.deepgram.resources.listen.v1.websocket.V1WebSocketClient;
224226
import com.deepgram.resources.listen.v1.websocket.V1ConnectOptions;
225227
import com.deepgram.types.ListenV1Model;
228+
import java.nio.file.Files;
229+
import java.nio.file.Path;
230+
import java.util.concurrent.TimeUnit;
231+
import okio.ByteString;
226232

227233
DeepgramClient client = DeepgramClient.builder().build();
234+
byte[] audioBytes = Files.readAllBytes(Path.of("audio.wav"));
228235

229-
V1WebSocketClient ws = client.listen().v1().websocket();
236+
V1WebSocketClient ws = client.listen().v1().v1WebSocket();
230237

231238
// Register event handlers
232239
ws.onResults(results -> {
@@ -247,9 +254,14 @@ ws.onError(error -> {
247254
// Connect with options (model is required)
248255
ws.connect(V1ConnectOptions.builder()
249256
.model(ListenV1Model.NOVA3)
250-
.build());
257+
.build())
258+
.get(10, TimeUnit.SECONDS);
251259

252-
ws.send(audioBytes);
260+
ws.sendMedia(ByteString.of(audioBytes));
261+
ws.sendCloseStream(ListenV1CloseStream.builder()
262+
.type(ListenV1CloseStreamType.CLOSE_STREAM)
263+
.build())
264+
.get(5, TimeUnit.SECONDS);
253265

254266
// Close when done
255267
ws.close();
@@ -261,15 +273,25 @@ Stream text for real-time audio generation.
261273

262274
```java
263275
import com.deepgram.DeepgramClient;
276+
import com.deepgram.resources.speak.v1.types.SpeakV1Close;
277+
import com.deepgram.resources.speak.v1.types.SpeakV1CloseType;
278+
import com.deepgram.resources.speak.v1.types.SpeakV1Flush;
279+
import com.deepgram.resources.speak.v1.types.SpeakV1FlushType;
280+
import com.deepgram.resources.speak.v1.types.SpeakV1Text;
264281
import com.deepgram.resources.speak.v1.websocket.V1WebSocketClient;
282+
import java.io.ByteArrayOutputStream;
283+
import java.nio.file.Files;
284+
import java.nio.file.Path;
285+
import java.util.concurrent.TimeUnit;
265286

266287
DeepgramClient client = DeepgramClient.builder().build();
288+
ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream();
267289

268-
var ttsWs = client.speak().v1().websocket();
290+
V1WebSocketClient ttsWs = client.speak().v1().v1WebSocket();
269291

270292
// Register event handlers
271-
ttsWs.onAudioData(audioData -> {
272-
// Process audio chunks as they arrive
293+
ttsWs.onSpeakV1Audio(audioData -> {
294+
audioBuffer.writeBytes(audioData.toByteArray());
273295
});
274296

275297
ttsWs.onMetadata(metadata -> {
@@ -281,8 +303,23 @@ ttsWs.onError(error -> {
281303
});
282304

283305
// Connect and send text
284-
ttsWs.connect();
285-
ttsWs.send("Hello, this is streamed text-to-speech.");
306+
ttsWs.connect().get(10, TimeUnit.SECONDS);
307+
ttsWs.sendText(SpeakV1Text.builder()
308+
.text("Hello, this is streamed text-to-speech.")
309+
.build())
310+
.get(5, TimeUnit.SECONDS);
311+
ttsWs.sendFlush(SpeakV1Flush.builder()
312+
.type(SpeakV1FlushType.FLUSH)
313+
.build())
314+
.get(5, TimeUnit.SECONDS);
315+
316+
Thread.sleep(2000);
317+
Files.write(Path.of("output.wav"), audioBuffer.toByteArray());
318+
319+
ttsWs.sendClose(SpeakV1Close.builder()
320+
.type(SpeakV1CloseType.CLOSE)
321+
.build())
322+
.get(5, TimeUnit.SECONDS);
286323

287324
// Close when done
288325
ttsWs.close();
@@ -294,24 +331,58 @@ Connect to Deepgram's voice agent for real-time conversational AI.
294331

295332
```java
296333
import com.deepgram.DeepgramClient;
334+
import com.deepgram.resources.agent.v1.types.AgentV1InjectUserMessage;
335+
import com.deepgram.resources.agent.v1.types.AgentV1Settings;
336+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgent;
337+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThink;
338+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItem;
339+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItemProvider;
340+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAudio;
297341
import com.deepgram.resources.agent.v1.websocket.V1WebSocketClient;
342+
import com.deepgram.types.OpenAiThinkProvider;
343+
import java.util.List;
344+
import java.util.Map;
345+
import java.util.concurrent.TimeUnit;
298346

299347
DeepgramClient client = DeepgramClient.builder().build();
300348

301-
var agentWs = client.agent().v1().websocket();
349+
V1WebSocketClient agentWs = client.agent().v1().v1WebSocket();
302350

303351
// Register event handlers
304352
agentWs.onWelcome(welcome -> {
305353
System.out.println("Agent connected");
354+
355+
agentWs.sendSettings(AgentV1Settings.builder()
356+
.audio(AgentV1SettingsAudio.builder().build())
357+
.agent(AgentV1SettingsAgent.builder()
358+
.think(AgentV1SettingsAgentThink.of(List.of(
359+
AgentV1SettingsAgentThinkOneItem.builder()
360+
.provider(AgentV1SettingsAgentThinkOneItemProvider.of(
361+
OpenAiThinkProvider.of(Map.of("model", "gpt-4o-mini"))))
362+
.prompt("You are a helpful voice assistant. Keep responses brief.")
363+
.build())))
364+
.greeting("Hello! How can I help you today?")
365+
.build())
366+
.build());
367+
});
368+
369+
agentWs.onSettingsApplied(applied -> {
370+
agentWs.sendInjectUserMessage(AgentV1InjectUserMessage.builder()
371+
.content("What is the capital of France?")
372+
.build());
373+
});
374+
375+
agentWs.onConversationText(text -> {
376+
System.out.printf("[%s] %s%n", text.getRole(), text.getContent());
306377
});
307378

308379
agentWs.onError(error -> {
309380
System.err.println("Error: " + error.getMessage());
310381
});
311382

312-
// Connect and interact
313-
agentWs.connect();
314-
agentWs.send(audioBytes);
383+
// Connect and wait for the agent to respond
384+
agentWs.connect().get(10, TimeUnit.SECONDS);
385+
Thread.sleep(5000);
315386

316387
// Close when done
317388
agentWs.close();
@@ -330,13 +401,22 @@ Use the separate [`deepgram-sagemaker`](https://github.com/deepgram/deepgram-jav
330401
```groovy
331402
dependencies {
332403
implementation 'com.deepgram:deepgram-java-sdk:0.2.1' // x-release-please-version
333-
implementation 'com.deepgram:deepgram-sagemaker:0.1.0'
404+
implementation 'com.deepgram:deepgram-sagemaker:0.1.2'
334405
}
335406
```
336407

337408
```java
409+
import com.deepgram.DeepgramClient;
338410
import com.deepgram.sagemaker.SageMakerConfig;
339411
import com.deepgram.sagemaker.SageMakerTransportFactory;
412+
import com.deepgram.resources.listen.v1.websocket.V1ConnectOptions;
413+
import com.deepgram.types.ListenV1Model;
414+
import java.nio.file.Files;
415+
import java.nio.file.Path;
416+
import java.util.concurrent.TimeUnit;
417+
import okio.ByteString;
418+
419+
byte[] audioBytes = Files.readAllBytes(Path.of("audio.wav"));
340420

341421
var factory = new SageMakerTransportFactory(
342422
SageMakerConfig.builder()
@@ -351,10 +431,11 @@ DeepgramClient client = DeepgramClient.builder()
351431
.build();
352432

353433
// Use the SDK exactly as normal — the transport is transparent
354-
var ws = client.listen().v1().websocket();
434+
var ws = client.listen().v1().v1WebSocket();
355435
ws.onResults(results -> { /* ... */ });
356-
ws.connect(V1ConnectOptions.builder().model(ListenV1Model.NOVA3).build());
357-
ws.sendMedia(audioBytes);
436+
ws.connect(V1ConnectOptions.builder().model(ListenV1Model.NOVA3).build())
437+
.get(10, TimeUnit.SECONDS);
438+
ws.sendMedia(ByteString.of(audioBytes));
358439
```
359440

360441
See the [SageMaker example](examples/sagemaker/LiveStreamingSageMaker.java) for a complete walkthrough.
@@ -511,7 +592,6 @@ import com.deepgram.resources.listen.v1.media.types.MediaTranscribeResponse;
511592
DeepgramApiHttpResponse<MediaTranscribeResponse> rawResponse =
512593
client.listen().v1().media().withRawResponse().transcribeUrl(request);
513594

514-
int statusCode = rawResponse.statusCode();
515595
var headers = rawResponse.headers();
516596
MediaTranscribeResponse body = rawResponse.body();
517597
```
@@ -524,13 +604,13 @@ The SDK provides comprehensive access to Deepgram's APIs:
524604
```java
525605
client.listen().v1().media().transcribeUrl(request) // Transcribe audio from URL
526606
client.listen().v1().media().transcribeFile(body) // Transcribe audio from file bytes
527-
client.listen().v1().websocket() // Real-time streaming transcription
607+
client.listen().v1().v1WebSocket() // Real-time streaming transcription
528608
```
529609

530610
### Speak (Text-to-Speech)
531611
```java
532612
client.speak().v1().audio().generate(request) // Generate speech from text
533-
client.speak().v1().websocket() // Real-time streaming TTS
613+
client.speak().v1().v1WebSocket() // Real-time streaming TTS
534614
```
535615

536616
### Read (Text Intelligence)
@@ -541,7 +621,7 @@ client.read().v1().text().analyze(request) // Analyze text content
541621
### Agent (Voice Agent)
542622
```java
543623
client.agent().v1().settings().think().models().list() // List available agent models
544-
client.agent().v1().websocket() // Real-time agent WebSocket
624+
client.agent().v1().v1WebSocket() // Real-time agent WebSocket
545625
```
546626

547627
### Manage (Project Management)

build.gradle

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ dependencies {
4343
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
4444
testImplementation 'com.squareup.okhttp3:mockwebserver:4.12.0'
4545
testImplementation 'org.assertj:assertj-core:3.25.3'
46+
testImplementation 'com.deepgram:deepgram-sagemaker:0.1.2'
47+
4648
}
4749

4850
test {
@@ -78,8 +80,7 @@ sourceSets {
7880
examples {
7981
java {
8082
srcDir 'examples'
81-
// Exclude examples with known issues (see ISSUES section below)
82-
exclude 'agent/**' // Blocked: missing AgentV1UpdateThink type
83+
// Exclude examples that still need API updates.
8384
exclude 'manage/ListModels.java' // Duplicate class name with agent/ListModels
8485
exclude 'manage/MemberPermissions.java' // getScopes() not in generated API
8586
exclude 'manage/UsageBreakdown.java' // getModels() return type mismatch
@@ -89,6 +90,11 @@ sourceSets {
8990
}
9091
}
9192

93+
dependencies {
94+
// Optional dependencies needed by example source set
95+
examplesImplementation 'com.deepgram:deepgram-sagemaker:0.1.2'
96+
}
97+
9298
// Compile all examples
9399
tasks.register('compileExamples') {
94100
dependsOn 'examplesClasses'
@@ -123,4 +129,3 @@ spotless {
123129
removeUnusedImports()
124130
}
125131
}
126-

examples/agent/CustomProviders.java

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
import com.deepgram.DeepgramClient;
22
import com.deepgram.resources.agent.v1.types.AgentV1Settings;
33
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgent;
4-
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentContext;
5-
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentContextSpeak;
6-
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentContextThink;
4+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeak;
5+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeakEndpoint;
6+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeakEndpointProvider;
7+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeakOneItemProviderDeepgramModel;
8+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThink;
9+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItem;
10+
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItemProvider;
711
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAudio;
12+
import com.deepgram.resources.agent.v1.types.Deepgram;
813
import com.deepgram.resources.agent.v1.websocket.V1WebSocketClient;
914
import com.deepgram.types.Anthropic;
10-
import com.deepgram.types.AnthropicThinkProviderModel;
11-
import com.deepgram.types.Deepgram;
12-
import com.deepgram.types.DeepgramSpeakProviderModel;
13-
import com.deepgram.types.SpeakSettingsV1;
14-
import com.deepgram.types.SpeakSettingsV1Provider;
15-
import com.deepgram.types.ThinkSettingsV1;
16-
import com.deepgram.types.ThinkSettingsV1Provider;
15+
import java.util.List;
16+
import java.util.Map;
1717
import java.util.concurrent.CompletableFuture;
1818
import java.util.concurrent.CountDownLatch;
1919
import java.util.concurrent.TimeUnit;
@@ -57,34 +57,30 @@ public static void main(String[] args) {
5757

5858
try {
5959
// Configure Anthropic as the think provider
60-
Anthropic anthropicProvider = Anthropic.builder()
61-
.model(AnthropicThinkProviderModel.CLAUDE_SONNET420250514)
62-
.build();
63-
64-
ThinkSettingsV1 thinkSettings = ThinkSettingsV1.builder()
65-
.provider(ThinkSettingsV1Provider.anthropic(anthropicProvider))
66-
.prompt("You are a helpful assistant. Keep responses concise.")
67-
.build();
60+
Anthropic anthropicProvider = Anthropic.of(Map.of("model", "claude-sonnet-4-20250514"));
6861

6962
// Configure Deepgram as the speak provider
7063
Deepgram deepgramSpeakProvider = Deepgram.builder()
71-
.model(DeepgramSpeakProviderModel.AURA2ASTERIA_EN)
72-
.build();
73-
74-
SpeakSettingsV1 speakSettings = SpeakSettingsV1.builder()
75-
.provider(SpeakSettingsV1Provider.deepgram(deepgramSpeakProvider))
64+
.model(AgentV1SettingsAgentSpeakOneItemProviderDeepgramModel.AURA2ASTERIA_EN)
7665
.build();
7766

78-
// Build agent settings with both providers
79-
AgentV1SettingsAgentContext agentContext = AgentV1SettingsAgentContext.builder()
80-
.think(AgentV1SettingsAgentContextThink.of(thinkSettings))
81-
.speak(AgentV1SettingsAgentContextSpeak.of(speakSettings))
67+
AgentV1SettingsAgentSpeak speakSettings =
68+
AgentV1SettingsAgentSpeak.of(AgentV1SettingsAgentSpeakEndpoint.builder()
69+
.provider(AgentV1SettingsAgentSpeakEndpointProvider.deepgram(deepgramSpeakProvider))
70+
.build());
71+
72+
AgentV1SettingsAgent agentConfig = AgentV1SettingsAgent.builder()
73+
.think(AgentV1SettingsAgentThink.of(List.of(AgentV1SettingsAgentThinkOneItem.builder()
74+
.provider(AgentV1SettingsAgentThinkOneItemProvider.of(anthropicProvider))
75+
.prompt("You are a helpful assistant. Keep responses concise.")
76+
.build())))
77+
.speak(speakSettings)
8278
.greeting("Hello! I'm powered by Anthropic Claude with Deepgram voices.")
8379
.build();
8480

8581
AgentV1Settings settings = AgentV1Settings.builder()
8682
.audio(AgentV1SettingsAudio.builder().build())
87-
.agent(AgentV1SettingsAgent.of(agentContext))
83+
.agent(agentConfig)
8884
.build();
8985

9086
wsClient.sendSettings(settings);

0 commit comments

Comments
 (0)