feat(api): responses

stainless-app[bot] · stainless-app[bot] · commit 32778769c861 · 2026-03-09T16:44:56.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 193
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradient-2344b44246a44d39ad5b74d3077bd2958745aad67feb15970756532fa0b3f9d6.yml
 openapi_spec_hash: a1913979235ce152a8dc380fabe5362e
-config_hash: 6c9a04f3cc5dd88e1e4f0ae42d98ba9a
+config_hash: 3302f40607e596148c9ac7706346a858
diff --git a/api.md b/api.md
@@ -1075,3 +1075,15 @@ from gradient.types import BillingListInsightsResponse
 Methods:
 
 - <code title="get /v2/billing/{account_urn}/insights/{start_date}/{end_date}">client.billing.<a href="./src/gradient/resources/billing.py">list_insights</a>(end_date, \*, account_urn, start_date, \*\*<a href="src/gradient/types/billing_list_insights_params.py">params</a>) -> <a href="./src/gradient/types/billing_list_insights_response.py">BillingListInsightsResponse</a></code>
+
+# Responses
+
+Types:
+
+```python
+from gradient.types import ResponseCreateResponse
+```
+
+Methods:
+
+- <code title="post /responses">client.responses.<a href="./src/gradient/resources/responses.py">create</a>(\*\*<a href="src/gradient/types/response_create_params.py">params</a>) -> <a href="./src/gradient/types/response_create_response.py">ResponseCreateResponse</a></code>
diff --git a/src/gradient/types/__init__.py b/src/gradient/types/__init__.py
@@ -84,6 +84,7 @@
 from .api_openai_api_key_info import APIOpenAIAPIKeyInfo as APIOpenAIAPIKeyInfo
 from .gpu_droplet_list_params import GPUDropletListParams as GPUDropletListParams
 from .image_generate_response import ImageGenerateResponse as ImageGenerateResponse
+from .response_create_response import ResponseCreateResponse as ResponseCreateResponse
 from .api_deployment_visibility import APIDeploymentVisibility as APIDeploymentVisibility
 from .gpu_droplet_create_params import GPUDropletCreateParams as GPUDropletCreateParams
 from .gpu_droplet_list_response import GPUDropletListResponse as GPUDropletListResponse
diff --git a/src/gradient/types/response_create_response.py b/src/gradient/types/response_create_response.py
@@ -0,0 +1,332 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from .._models import BaseModel
+from .shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
+
+__all__ = [
+    "ResponseCreateResponse",
+    "Usage",
+    "UsageInputTokensDetails",
+    "UsageOutputTokensDetails",
+    "Choice",
+    "ChoiceMessage",
+    "ChoiceMessageToolCall",
+    "ChoiceMessageToolCallFunction",
+    "ChoiceLogprobs",
+    "Output",
+    "OutputUnionMember0",
+    "OutputUnionMember1",
+    "OutputUnionMember2",
+    "OutputUnionMember2Content",
+    "Tool",
+]
+
+
+class UsageInputTokensDetails(BaseModel):
+    """A detailed breakdown of the input tokens."""
+
+    cached_tokens: int
+    """The number of tokens that were retrieved from the cache.
+
+    [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+
+class UsageOutputTokensDetails(BaseModel):
+    """A detailed breakdown of the output tokens."""
+
+    reasoning_tokens: int
+    """The number of reasoning tokens."""
+
+    tool_output_tokens: int
+    """The number of tool output tokens."""
+
+
+class Usage(BaseModel):
+    """
+    Detailed token usage statistics for the request, including input/output token counts and detailed breakdowns.
+    """
+
+    input_tokens: int
+    """The number of input tokens."""
+
+    input_tokens_details: UsageInputTokensDetails
+    """A detailed breakdown of the input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: UsageOutputTokensDetails
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class ChoiceMessageToolCallFunction(BaseModel):
+    """The function that the model called."""
+
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChoiceMessageToolCall(BaseModel):
+    id: str
+    """The ID of the tool call."""
+
+    function: ChoiceMessageToolCallFunction
+    """The function that the model called."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChoiceMessage(BaseModel):
+    """The generated message response."""
+
+    content: Optional[str] = None
+    """The generated text content."""
+
+    role: Optional[Literal["assistant"]] = None
+    """The role of the message author, which is always `assistant`."""
+
+    tool_calls: Optional[List[ChoiceMessageToolCall]] = None
+    """The tool calls generated by the model, such as function calls."""
+
+
+class ChoiceLogprobs(BaseModel):
+    """Log probability information for the choice.
+
+    Only present if logprobs was requested in the request.
+    """
+
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+
+class Choice(BaseModel):
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter"]
+    """The reason the model stopped generating tokens.
+
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, `length` if the maximum number of tokens specified in the request was
+    reached, or `tool_calls` if the model called a tool.
+    """
+
+    index: int
+    """The index of the choice in the list of choices."""
+
+    message: ChoiceMessage
+    """The generated message response."""
+
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice.
+
+    Only present if logprobs was requested in the request.
+    """
+
+
+class OutputUnionMember0(BaseModel):
+    arguments: str
+    """JSON string of function arguments"""
+
+    call_id: str
+    """The unique ID of the function tool call"""
+
+    name: str
+    """The name of the function to call"""
+
+    type: Literal["function_call"]
+    """The type of output item"""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call (same as call_id)"""
+
+    encrypted_content: Optional[str] = None
+    """Encrypted content (optional)"""
+
+    status: Optional[str] = None
+    """Status of the item (optional, can be null)"""
+
+
+class OutputUnionMember1(BaseModel):
+    text: str
+    """The text content"""
+
+    type: Literal["text"]
+    """The type of output item"""
+
+
+class OutputUnionMember2Content(BaseModel):
+    text: str
+    """The reasoning text content"""
+
+    type: Literal["reasoning_text"]
+    """The type of content"""
+
+
+class OutputUnionMember2(BaseModel):
+    id: str
+    """The unique ID of the reasoning item"""
+
+    content: List[OutputUnionMember2Content]
+    """Array of reasoning content parts"""
+
+    summary: List[object]
+    """Summary of the reasoning (usually empty)"""
+
+    type: Literal["reasoning"]
+    """The type of output item"""
+
+    encrypted_content: Optional[str] = None
+    """Encrypted content (optional)"""
+
+    status: Optional[str] = None
+    """Status of the item (optional, can be null)"""
+
+
+Output: TypeAlias = Union[OutputUnionMember0, OutputUnionMember1, OutputUnionMember2]
+
+
+class Tool(BaseModel):
+    """Tool definition for Responses API (flat format).
+
+    This format is used by VLLM's Responses API where name, description, and parameters are at the top level of the tool object.
+    """
+
+    type: Literal["function", "web_search", "web_search_2025_08_26"]
+    """The type of the tool.
+
+    Supported values are `function` (custom tools), `web_search`, and
+    `web_search_2025_08_26` (built-in web search).
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    name: Optional[str] = None
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    parameters: Optional[Dict[str, object]] = None
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the [guide](/docs/guides/function-calling) for examples, and the
+    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
+    documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
+
+
+class ResponseCreateResponse(BaseModel):
+    """
+    Represents a text-to-text response returned by the model, based on the provided input. VLLM models only.
+    """
+
+    id: str
+    """A unique identifier for the response."""
+
+    created: int
+    """The Unix timestamp (in seconds) of when the response was created."""
+
+    model: str
+    """The model used to generate the response."""
+
+    object: Literal["response"]
+    """The object type, which is always `response`."""
+
+    usage: Usage
+    """
+    Detailed token usage statistics for the request, including input/output token
+    counts and detailed breakdowns.
+    """
+
+    background: Optional[bool] = None
+    """Whether the request was processed in the background"""
+
+    choices: Optional[List[Choice]] = None
+    """A list of response choices.
+
+    Can be more than one if `n` is greater than 1. Optional - Responses API
+    primarily uses the output array.
+    """
+
+    input_messages: Optional[List[builtins.object]] = None
+    """Input messages (if applicable)"""
+
+    max_output_tokens: Optional[int] = None
+    """Maximum output tokens setting"""
+
+    max_tool_calls: Optional[int] = None
+    """Maximum tool calls setting"""
+
+    output: Optional[List[Output]] = None
+    """An array of content items generated by the model.
+
+    This includes text content, function calls, reasoning items, and other output
+    types. Use this field for Responses API compatibility.
+    """
+
+    output_messages: Optional[List[builtins.object]] = None
+    """Output messages (if applicable)"""
+
+    parallel_tool_calls: Optional[bool] = None
+    """Whether parallel tool calls are enabled"""
+
+    previous_response_id: Optional[str] = None
+    """Previous response ID (for multi-turn conversations)"""
+
+    prompt: Optional[str] = None
+    """Prompt used for the response"""
+
+    reasoning: Optional[str] = None
+    """Reasoning content"""
+
+    service_tier: Optional[str] = None
+    """Service tier used"""
+
+    status: Optional[str] = None
+    """Status of the response"""
+
+    temperature: Optional[float] = None
+    """Temperature setting used for the response"""
+
+    text: Optional[str] = None
+    """Text content"""
+
+    tool_choice: Optional[str] = None
+    """Tool choice setting used for the response"""
+
+    tools: Optional[List[Tool]] = None
+    """Tools available for the response"""
+
+    top_logprobs: Optional[int] = None
+    """Top logprobs setting"""
+
+    top_p: Optional[float] = None
+    """Top-p setting used for the response"""
+
+    truncation: Optional[str] = None
+    """Truncation setting"""
+
+    user: Optional[str] = None
+    """User identifier"""