|
1 | | -type AudioFormat = |
2 | | - | { |
3 | | - encoding: "linear16"; |
4 | | - container: "wav" | "none"; |
5 | | - sampleRate: 8000 | 16000 | 24000 | 32000 | 48000; |
6 | | - } |
7 | | - | { |
8 | | - encoding: "mulaw"; |
9 | | - container: "wav" | "none"; |
10 | | - sampleRate: 8000 | 16000; |
11 | | - } |
12 | | - | { |
13 | | - encoding: "alaw"; |
14 | | - container: "wav" | "none"; |
15 | | - sampleRate: 8000 | 16000; |
16 | | - }; |
17 | | - |
18 | 1 | type AudioEncoding = |
19 | 2 | | "linear16" |
20 | 3 | | "flac" |
@@ -76,119 +59,129 @@ type SpeakModel = |
76 | 59 | | "aura-zeus-en" |
77 | 60 | | string; |
78 | 61 |
|
79 | | -interface ThinkModelFunction { |
80 | | - name: string; |
81 | | - description: string; |
82 | | - url: string; |
83 | | - headers: [ |
84 | | - { |
85 | | - key: "authorization"; |
86 | | - value: string; |
87 | | - } |
88 | | - ]; |
89 | | - method: "POST"; |
90 | | - parameters: { |
91 | | - type: string; |
92 | | - properties: Record< |
93 | | - string, |
94 | | - { |
95 | | - type: string; |
96 | | - description: string; |
97 | | - } |
98 | | - >; |
99 | | - }; |
100 | | -} |
101 | | - |
102 | | -type ThinkModel = |
103 | | - | { |
104 | | - provider: { |
105 | | - type: "open_ai"; |
106 | | - }; |
107 | | - model: "gpt-4o-mini"; |
108 | | - instructions?: string; |
109 | | - functions?: ThinkModelFunction[]; |
110 | | - } |
111 | | - | { |
112 | | - provider: { |
113 | | - type: "anthropic"; |
114 | | - }; |
115 | | - model: "claude-3-haiku-20240307"; |
116 | | - instructions?: string; |
117 | | - functions?: ThinkModelFunction[]; |
118 | | - } |
119 | | - | { |
120 | | - provider: { |
121 | | - type: "groq"; |
122 | | - }; |
123 | | - model: ""; |
124 | | - instructions?: string; |
125 | | - functions?: ThinkModelFunction[]; |
126 | | - } |
127 | | - | { |
128 | | - provider: { |
129 | | - type: "custom"; |
130 | | - url: string; |
131 | | - key: string; |
132 | | - }; |
133 | | - model: string; |
134 | | - instructions?: string; |
135 | | - functions?: ThinkModelFunction[]; |
136 | | - }; |
137 | | - |
138 | 62 | /** |
139 | 63 | * @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#settingsconfiguration |
140 | 64 | */ |
141 | 65 | interface AgentLiveSchema extends Record<string, unknown> { |
| 66 | + /** |
| 67 | + * Set to true to enable experimental features. |
| 68 | + * @default false |
| 69 | + */ |
| 70 | + experimental: boolean; |
142 | 71 | audio: { |
143 | 72 | input?: { |
144 | 73 | /** |
145 | | - * @default 1 |
| 74 | + * @default "linear16" |
146 | 75 | */ |
147 | | - channels?: number; |
148 | 76 | encoding: AudioEncoding; |
149 | 77 | /** |
150 | | - * @default false |
| 78 | + * @default 16000 |
151 | 79 | */ |
152 | | - multichannel?: boolean; |
153 | | - sampleRate: number; |
| 80 | + sample_rate: number; |
154 | 81 | }; |
155 | 82 | /** |
156 | 83 | * @see https://developers.deepgram.com/docs/tts-media-output-settings#audio-format-combinations |
157 | 84 | */ |
158 | | - output?: AudioFormat; |
159 | | - }; |
160 | | - agent: { |
161 | | - listen: { |
| 85 | + output?: { |
| 86 | + encoding?: string; |
| 87 | + sample_rate?: number; |
| 88 | + bitrate?: number; |
162 | 89 | /** |
163 | | - * @see https://developers.deepgram.com/docs/model |
| 90 | + * @default "none" |
164 | 91 | */ |
165 | | - model: ListenModel; |
166 | | - /** |
167 | | - * @see https://developers.deepgram.com/docs/keyterm |
168 | | - */ |
169 | | - keyterms?: string[]; |
| 92 | + container?: string; |
170 | 93 | }; |
171 | | - speak: { |
| 94 | + }; |
| 95 | + agent: { |
| 96 | + language?: { |
172 | 97 | /** |
173 | | - * @see https://developers.deepgram.com/docs/tts-models |
| 98 | + * ISO 639-1 language code for agent language. |
| 99 | + * @default "en" |
174 | 100 | */ |
175 | | - model: SpeakModel; |
| 101 | + type: string; |
| 102 | + }; |
| 103 | + listen?: { |
| 104 | + provider: { |
| 105 | + type: "deepgram"; |
| 106 | + /** |
| 107 | + * @see https://developers.deepgram.com/docs/model |
| 108 | + */ |
| 109 | + model: ListenModel; |
| 110 | + /** |
| 111 | + * Only available for Nova 3. |
| 112 | + * @see https://developers.deepgram.com/docs/keyterm |
| 113 | + */ |
| 114 | + keyterms?: string[]; |
| 115 | + }; |
| 116 | + }; |
| 117 | + speak?: { |
| 118 | + provider: { |
| 119 | + type: "deepgram" | "eleven_labs" | "cartesia" | "open_ai" | string; |
| 120 | + /** |
| 121 | + * Deepgram OR OpenAI model to use. |
| 122 | + */ |
| 123 | + model?: SpeakModel; |
| 124 | + /** |
| 125 | + * Eleven Labs OR Cartesia model to use. |
| 126 | + */ |
| 127 | + model_id?: string; |
| 128 | + /** |
| 129 | + * Cartesia voice configuration. |
| 130 | + */ |
| 131 | + voice?: { |
| 132 | + mode: string; |
| 133 | + id: string; |
| 134 | + }; |
| 135 | + /** |
| 136 | + * Optional Cartesia language. |
| 137 | + */ |
| 138 | + language?: string; |
| 139 | + /** |
| 140 | + * Optional Eleven Labs voice. |
| 141 | + */ |
| 142 | + language_code?: string; |
| 143 | + }; |
| 144 | + endpoint?: { |
| 145 | + url?: string; |
| 146 | + headers?: Record<string, string>; |
| 147 | + }; |
176 | 148 | }; |
177 | 149 | /** |
178 | 150 | * @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#supported-llm-providers-and-models |
179 | 151 | */ |
180 | | - think: ThinkModel; |
181 | | - }; |
182 | | - context?: { |
183 | | - /** |
184 | | - * LLM message history (e.g. to restore existing conversation if websocket disconnects) |
185 | | - */ |
186 | | - messages: { role: "user" | "assistant"; content: string }[]; |
187 | | - /** |
188 | | - * Whether to replay the last message, if it is an assistant message. |
189 | | - */ |
190 | | - replay: boolean; |
| 152 | + think?: { |
| 153 | + provider: { |
| 154 | + type: "deepgram" | "open_ai" | "anthropic" | "x_ai" | "amazon_bedrock" | string; |
| 155 | + model: string; |
| 156 | + /** |
| 157 | + * 0-2 for OpenAI, 0-1 for Anthropic. |
| 158 | + */ |
| 159 | + temperature?: number; |
| 160 | + }; |
| 161 | + /** |
| 162 | + * Optional ONLY if LLM provider is Deepgram. |
| 163 | + */ |
| 164 | + endpoint?: { |
| 165 | + url?: string; |
| 166 | + headers?: Record<string, string>; |
| 167 | + }; |
| 168 | + functions?: { |
| 169 | + name?: string; |
| 170 | + description?: string; |
| 171 | + parameters?: Record<string, unknown>; |
| 172 | + endpoint?: { |
| 173 | + url?: string; |
| 174 | + method?: string; |
| 175 | + headers?: Record<string, string>; |
| 176 | + }; |
| 177 | + }[]; |
| 178 | + prompt?: string; |
| 179 | + }; |
191 | 180 | }; |
| 181 | + /** |
| 182 | + * Optional message the agent will say at the start of the connection. |
| 183 | + */ |
| 184 | + greeting?: string; |
192 | 185 | } |
193 | 186 |
|
194 | 187 | export type { AgentLiveSchema, SpeakModel }; |
0 commit comments