feat: migrate to agent v1

naomi-lgbt · naomi-lgbt · commit 3a8493b2076e · 2025-05-05T21:13:38.000Z
diff --git a/examples/node-agent-live/index.js b/examples/node-agent-live/index.js
@@ -17,28 +17,36 @@ const agent = async () => {
       audio: {
         input: {
           encoding: "linear16",
-          sampleRate: 44100,
+          sample_rate: 24000,
         },
         output: {
           encoding: "linear16",
-          sampleRate: 16000,
+          sample_rate: 16000,
           container: "wav",
         },
       },
       agent: {
+        language: "en",
         listen: {
-          model: "nova-3",
-          keyterms: ["spacewalk"],
-        },
-        speak: {
-          model: "aura-asteria-en",
+          provider: {
+            type: "deepgram",
+            model: "nova-3",
+          },
         },
         think: {
           provider: {
             type: "open_ai",
+            model: "gpt-4o-mini",
+          },
+          prompt: "You are a friendly AI assistant.",
+        },
+        speak: {
+          provider: {
+            type: "deepgram",
+            model: "aura-2-thalia-en",
           },
-          model: "gpt-4o-mini",
         },
+        greeting: "Hello! How can I help you today?",
       },
     });
 
diff --git a/src/DeepgramClient.ts b/src/DeepgramClient.ts
@@ -97,7 +97,7 @@ export default class DeepgramClient extends AbstractClient {
    * @returns {AgentLiveClient} A new instance of the AgentLiveClient.
    * @beta
    */
-  public agent(endpoint: string = "/agent"): AgentLiveClient {
+  public agent(endpoint: string = "/:version/agent/converse"): AgentLiveClient {
     return new AgentLiveClient(this.options, endpoint);
   }
 
diff --git a/src/lib/enums/AgentEvents.ts b/src/lib/enums/AgentEvents.ts
@@ -11,7 +11,7 @@ export enum AgentEvents {
   Audio = "Audio",
   /**
    * Confirms the successful connection to the websocket.
-   * { type: "Welcome", session_id: "String"}
+   * { type: "Welcome", request_id: "String"}
    */
   Welcome = "Welcome",
   /**
@@ -36,7 +36,7 @@ export enum AgentEvents {
   AgentThinking = "AgentThinking",
   /**
    * A request to call client-side functions.
-   * { type: "FunctionCallRequest", function_call_id: string, function_name: string, input: Record<string, any> }
+   * { type: "FunctionCallRequest", functions: { id: string; name: string; arguments: string; client_side: boolean}[] }
    */
   FunctionCallRequest = "FunctionCallRequest",
   /**
@@ -57,14 +57,14 @@ export enum AgentEvents {
   /**
    * This event is only emitted when you send an `InjectAgentMessage` request while
    * the user is currently speaking or the server is processing user audio.
-   * { type: "InjectionRefused" }
+   * { type: "InjectionRefused", message: string }
    */
   InjectionRefused = "InjectionRefused",
   /**
    * A successful response to the `UpdateInstructions` request.
-   * { type: "InstructionsUpdated" }
+   * { type: "PromptUpdated" }
    */
-  InstructionsUpdated = "InstructionsUpdated",
+  PromptUpdated = "PromptUpdated",
   /**
    * A successful response to the `UpdateSpeak` request.
    * { type: "SpeakUpdated" }
diff --git a/src/lib/types/AgentLiveSchema.ts b/src/lib/types/AgentLiveSchema.ts
@@ -1,20 +1,3 @@
-type AudioFormat =
-  | {
-      encoding: "linear16";
-      container: "wav" | "none";
-      sampleRate: 8000 | 16000 | 24000 | 32000 | 48000;
-    }
-  | {
-      encoding: "mulaw";
-      container: "wav" | "none";
-      sampleRate: 8000 | 16000;
-    }
-  | {
-      encoding: "alaw";
-      container: "wav" | "none";
-      sampleRate: 8000 | 16000;
-    };
-
 type AudioEncoding =
   | "linear16"
   | "flac"
@@ -76,119 +59,129 @@ type SpeakModel =
   | "aura-zeus-en"
   | string;
 
-interface ThinkModelFunction {
-  name: string;
-  description: string;
-  url: string;
-  headers: [
-    {
-      key: "authorization";
-      value: string;
-    }
-  ];
-  method: "POST";
-  parameters: {
-    type: string;
-    properties: Record<
-      string,
-      {
-        type: string;
-        description: string;
-      }
-    >;
-  };
-}
-
-type ThinkModel =
-  | {
-      provider: {
-        type: "open_ai";
-      };
-      model: "gpt-4o-mini";
-      instructions?: string;
-      functions?: ThinkModelFunction[];
-    }
-  | {
-      provider: {
-        type: "anthropic";
-      };
-      model: "claude-3-haiku-20240307";
-      instructions?: string;
-      functions?: ThinkModelFunction[];
-    }
-  | {
-      provider: {
-        type: "groq";
-      };
-      model: "";
-      instructions?: string;
-      functions?: ThinkModelFunction[];
-    }
-  | {
-      provider: {
-        type: "custom";
-        url: string;
-        key: string;
-      };
-      model: string;
-      instructions?: string;
-      functions?: ThinkModelFunction[];
-    };
-
 /**
  * @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#settingsconfiguration
  */
 interface AgentLiveSchema extends Record<string, unknown> {
+  /**
+   * Set to true to enable experimental features.
+   * @default false
+   */
+  experimental: boolean;
   audio: {
     input?: {
       /**
-       * @default 1
+       * @default "linear16"
        */
-      channels?: number;
       encoding: AudioEncoding;
       /**
-       * @default false
+       * @default 16000
        */
-      multichannel?: boolean;
-      sampleRate: number;
+      sample_rate: number;
     };
     /**
      * @see https://developers.deepgram.com/docs/tts-media-output-settings#audio-format-combinations
      */
-    output?: AudioFormat;
-  };
-  agent: {
-    listen: {
+    output?: {
+      encoding?: string;
+      sample_rate?: number;
+      bitrate?: number;
       /**
-       * @see https://developers.deepgram.com/docs/model
+       * @default "none"
        */
-      model: ListenModel;
-      /**
-       * @see https://developers.deepgram.com/docs/keyterm
-       */
-      keyterms?: string[];
+      container?: string;
     };
-    speak: {
+  };
+  agent: {
+    language?: {
       /**
-       * @see https://developers.deepgram.com/docs/tts-models
+       * ISO 639-1 language code for agent language.
+       * @default "en"
        */
-      model: SpeakModel;
+      type: string;
+    };
+    listen?: {
+      provider: {
+        type: "deepgram";
+        /**
+         * @see https://developers.deepgram.com/docs/model
+         */
+        model: ListenModel;
+        /**
+         * Only available for Nova 3.
+         * @see https://developers.deepgram.com/docs/keyterm
+         */
+        keyterms?: string[];
+      };
+    };
+    speak?: {
+      provider: {
+        type: "deepgram" | "eleven_labs" | "cartesia" | "open_ai" | string;
+        /**
+         * Deepgram OR OpenAI model to use.
+         */
+        model?: SpeakModel;
+        /**
+         * Eleven Labs OR Cartesia model to use.
+         */
+        model_id?: string;
+        /**
+         * Cartesia voice configuration.
+         */
+        voice?: {
+          mode: string;
+          id: string;
+        };
+        /**
+         * Optional Cartesia language.
+         */
+        language?: string;
+        /**
+         * Optional Eleven Labs voice.
+         */
+        language_code?: string;
+      };
+      endpoint?: {
+        url?: string;
+        headers?: Record<string, string>;
+      };
     };
     /**
      * @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#supported-llm-providers-and-models
      */
-    think: ThinkModel;
-  };
-  context?: {
-    /**
-     * LLM message history (e.g. to restore existing conversation if websocket disconnects)
-     */
-    messages: { role: "user" | "assistant"; content: string }[];
-    /**
-     * Whether to replay the last message, if it is an assistant message.
-     */
-    replay: boolean;
+    think?: {
+      provider: {
+        type: "deepgram" | "open_ai" | "anthropic" | "x_ai" | "amazon_bedrock" | string;
+        model: string;
+        /**
+         * 0-2 for OpenAI, 0-1 for Anthropic.
+         */
+        temperature?: number;
+      };
+      /**
+       * Optional ONLY if LLM provider is Deepgram.
+       */
+      endpoint?: {
+        url?: string;
+        headers?: Record<string, string>;
+      };
+      functions?: {
+        name?: string;
+        description?: string;
+        parameters?: Record<string, unknown>;
+        endpoint?: {
+          url?: string;
+          method?: string;
+          headers?: Record<string, string>;
+        };
+      }[];
+      prompt?: string;
+    };
   };
+  /**
+   * Optional message the agent will say at the start of the connection.
+   */
+  greeting?: string;
 }
 
 export type { AgentLiveSchema, SpeakModel };
diff --git a/src/lib/types/FunctionCallResponse.ts b/src/lib/types/FunctionCallResponse.ts
@@ -5,9 +5,13 @@ export interface FunctionCallResponse {
   /**
    * This must be the ID that was received in the request.
    */
-  function_call_id: string;
+  id: string;
+  /**
+   * The name of the function being called.
+   */
+  name: string;
   /**
    * The result of the function call.
    */
-  output: string;
+  content: string;
 }
diff --git a/src/packages/AgentLiveClient.ts b/src/packages/AgentLiveClient.ts

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ export default class DeepgramClient extends AbstractClient {`
`97`	`97`	`* @returns {AgentLiveClient} A new instance of the AgentLiveClient.`
`98`	`98`	`* @beta`
`99`	`99`	`*/`
`100`		`- public agent(endpoint: string = "/agent"): AgentLiveClient {`
	`100`	`+ public agent(endpoint: string = "/:version/agent/converse"): AgentLiveClient {`
`101`	`101`	`return new AgentLiveClient(this.options, endpoint);`
`102`	`102`	`}`
`103`	`103`