Skip to content

Commit 3a8493b

Browse files
committed
feat: migrate to agent v1
1 parent 76468f1 commit 3a8493b

6 files changed

Lines changed: 137 additions & 147 deletions

File tree

examples/node-agent-live/index.js

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,28 +17,36 @@ const agent = async () => {
1717
audio: {
1818
input: {
1919
encoding: "linear16",
20-
sampleRate: 44100,
20+
sample_rate: 24000,
2121
},
2222
output: {
2323
encoding: "linear16",
24-
sampleRate: 16000,
24+
sample_rate: 16000,
2525
container: "wav",
2626
},
2727
},
2828
agent: {
29+
language: "en",
2930
listen: {
30-
model: "nova-3",
31-
keyterms: ["spacewalk"],
32-
},
33-
speak: {
34-
model: "aura-asteria-en",
31+
provider: {
32+
type: "deepgram",
33+
model: "nova-3",
34+
},
3535
},
3636
think: {
3737
provider: {
3838
type: "open_ai",
39+
model: "gpt-4o-mini",
40+
},
41+
prompt: "You are a friendly AI assistant.",
42+
},
43+
speak: {
44+
provider: {
45+
type: "deepgram",
46+
model: "aura-2-thalia-en",
3947
},
40-
model: "gpt-4o-mini",
4148
},
49+
greeting: "Hello! How can I help you today?",
4250
},
4351
});
4452

src/DeepgramClient.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ export default class DeepgramClient extends AbstractClient {
9797
* @returns {AgentLiveClient} A new instance of the AgentLiveClient.
9898
* @beta
9999
*/
100-
public agent(endpoint: string = "/agent"): AgentLiveClient {
100+
public agent(endpoint: string = "/:version/agent/converse"): AgentLiveClient {
101101
return new AgentLiveClient(this.options, endpoint);
102102
}
103103

src/lib/enums/AgentEvents.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ export enum AgentEvents {
1111
Audio = "Audio",
1212
/**
1313
* Confirms the successful connection to the websocket.
14-
* { type: "Welcome", session_id: "String"}
14+
* { type: "Welcome", request_id: "String"}
1515
*/
1616
Welcome = "Welcome",
1717
/**
@@ -36,7 +36,7 @@ export enum AgentEvents {
3636
AgentThinking = "AgentThinking",
3737
/**
3838
* A request to call client-side functions.
39-
* { type: "FunctionCallRequest", function_call_id: string, function_name: string, input: Record<string, any> }
39+
* { type: "FunctionCallRequest", functions: { id: string; name: string; arguments: string; client_side: boolean}[] }
4040
*/
4141
FunctionCallRequest = "FunctionCallRequest",
4242
/**
@@ -57,14 +57,14 @@ export enum AgentEvents {
5757
/**
5858
* This event is only emitted when you send an `InjectAgentMessage` request while
5959
* the user is currently speaking or the server is processing user audio.
60-
* { type: "InjectionRefused" }
60+
* { type: "InjectionRefused", message: string }
6161
*/
6262
InjectionRefused = "InjectionRefused",
6363
/**
6464
* A successful response to the `UpdateInstructions` request.
65-
* { type: "InstructionsUpdated" }
65+
* { type: "PromptUpdated" }
6666
*/
67-
InstructionsUpdated = "InstructionsUpdated",
67+
PromptUpdated = "PromptUpdated",
6868
/**
6969
* A successful response to the `UpdateSpeak` request.
7070
* { type: "SpeakUpdated" }

src/lib/types/AgentLiveSchema.ts

Lines changed: 98 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,3 @@
1-
type AudioFormat =
2-
| {
3-
encoding: "linear16";
4-
container: "wav" | "none";
5-
sampleRate: 8000 | 16000 | 24000 | 32000 | 48000;
6-
}
7-
| {
8-
encoding: "mulaw";
9-
container: "wav" | "none";
10-
sampleRate: 8000 | 16000;
11-
}
12-
| {
13-
encoding: "alaw";
14-
container: "wav" | "none";
15-
sampleRate: 8000 | 16000;
16-
};
17-
181
type AudioEncoding =
192
| "linear16"
203
| "flac"
@@ -76,119 +59,129 @@ type SpeakModel =
7659
| "aura-zeus-en"
7760
| string;
7861

79-
interface ThinkModelFunction {
80-
name: string;
81-
description: string;
82-
url: string;
83-
headers: [
84-
{
85-
key: "authorization";
86-
value: string;
87-
}
88-
];
89-
method: "POST";
90-
parameters: {
91-
type: string;
92-
properties: Record<
93-
string,
94-
{
95-
type: string;
96-
description: string;
97-
}
98-
>;
99-
};
100-
}
101-
102-
type ThinkModel =
103-
| {
104-
provider: {
105-
type: "open_ai";
106-
};
107-
model: "gpt-4o-mini";
108-
instructions?: string;
109-
functions?: ThinkModelFunction[];
110-
}
111-
| {
112-
provider: {
113-
type: "anthropic";
114-
};
115-
model: "claude-3-haiku-20240307";
116-
instructions?: string;
117-
functions?: ThinkModelFunction[];
118-
}
119-
| {
120-
provider: {
121-
type: "groq";
122-
};
123-
model: "";
124-
instructions?: string;
125-
functions?: ThinkModelFunction[];
126-
}
127-
| {
128-
provider: {
129-
type: "custom";
130-
url: string;
131-
key: string;
132-
};
133-
model: string;
134-
instructions?: string;
135-
functions?: ThinkModelFunction[];
136-
};
137-
13862
/**
13963
* @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#settingsconfiguration
14064
*/
14165
interface AgentLiveSchema extends Record<string, unknown> {
66+
/**
67+
* Set to true to enable experimental features.
68+
* @default false
69+
*/
70+
experimental: boolean;
14271
audio: {
14372
input?: {
14473
/**
145-
* @default 1
74+
* @default "linear16"
14675
*/
147-
channels?: number;
14876
encoding: AudioEncoding;
14977
/**
150-
* @default false
78+
* @default 16000
15179
*/
152-
multichannel?: boolean;
153-
sampleRate: number;
80+
sample_rate: number;
15481
};
15582
/**
15683
* @see https://developers.deepgram.com/docs/tts-media-output-settings#audio-format-combinations
15784
*/
158-
output?: AudioFormat;
159-
};
160-
agent: {
161-
listen: {
85+
output?: {
86+
encoding?: string;
87+
sample_rate?: number;
88+
bitrate?: number;
16289
/**
163-
* @see https://developers.deepgram.com/docs/model
90+
* @default "none"
16491
*/
165-
model: ListenModel;
166-
/**
167-
* @see https://developers.deepgram.com/docs/keyterm
168-
*/
169-
keyterms?: string[];
92+
container?: string;
17093
};
171-
speak: {
94+
};
95+
agent: {
96+
language?: {
17297
/**
173-
* @see https://developers.deepgram.com/docs/tts-models
98+
* ISO 639-1 language code for agent language.
99+
* @default "en"
174100
*/
175-
model: SpeakModel;
101+
type: string;
102+
};
103+
listen?: {
104+
provider: {
105+
type: "deepgram";
106+
/**
107+
* @see https://developers.deepgram.com/docs/model
108+
*/
109+
model: ListenModel;
110+
/**
111+
* Only available for Nova 3.
112+
* @see https://developers.deepgram.com/docs/keyterm
113+
*/
114+
keyterms?: string[];
115+
};
116+
};
117+
speak?: {
118+
provider: {
119+
type: "deepgram" | "eleven_labs" | "cartesia" | "open_ai" | string;
120+
/**
121+
* Deepgram OR OpenAI model to use.
122+
*/
123+
model?: SpeakModel;
124+
/**
125+
* Eleven Labs OR Cartesia model to use.
126+
*/
127+
model_id?: string;
128+
/**
129+
* Cartesia voice configuration.
130+
*/
131+
voice?: {
132+
mode: string;
133+
id: string;
134+
};
135+
/**
136+
* Optional Cartesia language.
137+
*/
138+
language?: string;
139+
/**
140+
* Optional Eleven Labs voice.
141+
*/
142+
language_code?: string;
143+
};
144+
endpoint?: {
145+
url?: string;
146+
headers?: Record<string, string>;
147+
};
176148
};
177149
/**
178150
* @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#supported-llm-providers-and-models
179151
*/
180-
think: ThinkModel;
181-
};
182-
context?: {
183-
/**
184-
* LLM message history (e.g. to restore existing conversation if websocket disconnects)
185-
*/
186-
messages: { role: "user" | "assistant"; content: string }[];
187-
/**
188-
* Whether to replay the last message, if it is an assistant message.
189-
*/
190-
replay: boolean;
152+
think?: {
153+
provider: {
154+
type: "deepgram" | "open_ai" | "anthropic" | "x_ai" | "amazon_bedrock" | string;
155+
model: string;
156+
/**
157+
* 0-2 for OpenAI, 0-1 for Anthropic.
158+
*/
159+
temperature?: number;
160+
};
161+
/**
162+
* Optional ONLY if LLM provider is Deepgram.
163+
*/
164+
endpoint?: {
165+
url?: string;
166+
headers?: Record<string, string>;
167+
};
168+
functions?: {
169+
name?: string;
170+
description?: string;
171+
parameters?: Record<string, unknown>;
172+
endpoint?: {
173+
url?: string;
174+
method?: string;
175+
headers?: Record<string, string>;
176+
};
177+
}[];
178+
prompt?: string;
179+
};
191180
};
181+
/**
182+
* Optional message the agent will say at the start of the connection.
183+
*/
184+
greeting?: string;
192185
}
193186

194187
export type { AgentLiveSchema, SpeakModel };

src/lib/types/FunctionCallResponse.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@ export interface FunctionCallResponse {
55
/**
66
* This must be the ID that was received in the request.
77
*/
8-
function_call_id: string;
8+
id: string;
9+
/**
10+
* The name of the function being called.
11+
*/
12+
name: string;
913
/**
1014
* The result of the function call.
1115
*/
12-
output: string;
16+
content: string;
1317
}

0 commit comments

Comments
 (0)