import { APIResource } from "../../core/resource.js"; import * as ClientSecretsAPI from "./client-secrets.js"; import * as RealtimeAPI from "./realtime.js"; import * as ResponsesAPI from "../responses/responses.js"; import { APIPromise } from "../../core/api-promise.js"; import { RequestOptions } from "../../internal/request-options.js"; export declare class ClientSecrets extends APIResource { /** * Create a Realtime client secret with an associated session configuration. * * @example * ```ts * const clientSecret = * await client.realtime.clientSecrets.create(); * ``` */ create(body: ClientSecretCreateParams, options?: RequestOptions): APIPromise; } /** * Ephemeral key returned by the API. */ export interface RealtimeSessionClientSecret { /** * Timestamp for when the token expires. Currently, all tokens expire after one * minute. */ expires_at: number; /** * Ephemeral key usable in client environments to authenticate connections to the * Realtime API. Use this in client-side environments rather than a standard API * token, which should only be used server-side. */ value: string; } /** * A new Realtime session configuration, with an ephemeral key. Default TTL for * keys is one minute. */ export interface RealtimeSessionCreateResponse { /** * Ephemeral key returned by the API. */ client_secret: RealtimeSessionClientSecret; /** * The type of session to create. Always `realtime` for the Realtime API. */ type: 'realtime'; /** * Configuration for input and output audio. */ audio?: RealtimeSessionCreateResponse.Audio; /** * Additional fields to include in server outputs. * * `item.input_audio_transcription.logprobs`: Include logprobs for input audio * transcription. */ include?: Array<'item.input_audio_transcription.logprobs'>; /** * The default system instructions (i.e. system message) prepended to model calls. * This field allows the client to guide the model on desired responses. The model * can be instructed on response content and format, (e.g. "be extremely succinct", * "act friendly", "here are examples of good responses") and on audio behavior * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The * instructions are not guaranteed to be followed by the model, but they provide * guidance to the model on the desired behavior. * * Note that the server sets default instructions which will be used if this field * is not set and are visible in the `session.created` event at the start of the * session. */ instructions?: string; /** * Maximum number of output tokens for a single assistant response, inclusive of * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or * `inf` for the maximum available tokens for a given model. Defaults to `inf`. */ max_output_tokens?: number | 'inf'; /** * The Realtime model used for this session. */ model?: (string & {}) | 'gpt-realtime' | 'gpt-realtime-2025-08-28' | 'gpt-4o-realtime-preview' | 'gpt-4o-realtime-preview-2024-10-01' | 'gpt-4o-realtime-preview-2024-12-17' | 'gpt-4o-realtime-preview-2025-06-03' | 'gpt-4o-mini-realtime-preview' | 'gpt-4o-mini-realtime-preview-2024-12-17' | 'gpt-realtime-mini' | 'gpt-realtime-mini-2025-10-06' | 'gpt-audio-mini' | 'gpt-audio-mini-2025-10-06'; /** * The set of modalities the model can respond with. It defaults to `["audio"]`, * indicating that the model will respond with audio plus a transcript. `["text"]` * can be used to make the model respond with text only. It is not possible to * request both `text` and `audio` at the same time. */ output_modalities?: Array<'text' | 'audio'>; /** * Reference to a prompt template and its variables. * [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). */ prompt?: ResponsesAPI.ResponsePrompt | null; /** * How the model chooses tools. Provide one of the string modes or force a specific * function/MCP tool. */ tool_choice?: ResponsesAPI.ToolChoiceOptions | ResponsesAPI.ToolChoiceFunction | ResponsesAPI.ToolChoiceMcp; /** * Tools available to the model. */ tools?: Array; /** * Realtime API can write session traces to the * [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once * tracing is enabled for a session, the configuration cannot be modified. * * `auto` will create a trace for the session with default values for the workflow * name, group id, and metadata. */ tracing?: 'auto' | RealtimeSessionCreateResponse.TracingConfiguration | null; /** * When the number of tokens in a conversation exceeds the model's input token * limit, the conversation be truncated, meaning messages (starting from the * oldest) will not be included in the model's context. A 32k context model with * 4,096 max output tokens can only include 28,224 tokens in the context before * truncation occurs. Clients can configure truncation behavior to truncate with a * lower max token limit, which is an effective way to control token usage and * cost. Truncation will reduce the number of cached tokens on the next turn * (busting the cache), since messages are dropped from the beginning of the * context. However, clients can also configure truncation to retain messages up to * a fraction of the maximum context size, which will reduce the need for future * truncations and thus improve the cache rate. Truncation can be disabled * entirely, which means the server will never truncate but would instead return an * error if the conversation exceeds the model's input token limit. */ truncation?: RealtimeAPI.RealtimeTruncation; } export declare namespace RealtimeSessionCreateResponse { /** * Configuration for input and output audio. */ interface Audio { input?: Audio.Input; output?: Audio.Output; } namespace Audio { interface Input { /** * The format of the input audio. */ format?: RealtimeAPI.RealtimeAudioFormats; /** * Configuration for input audio noise reduction. This can be set to `null` to turn * off. Noise reduction filters audio added to the input audio buffer before it is * sent to VAD and the model. Filtering the audio can improve VAD and turn * detection accuracy (reducing false positives) and model performance by improving * perception of the input audio. */ noise_reduction?: Input.NoiseReduction; /** * Configuration for input audio transcription, defaults to off and can be set to * `null` to turn off once on. Input audio transcription is not native to the * model, since the model consumes audio directly. Transcription runs * asynchronously through * [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) * and should be treated as guidance of input audio content rather than precisely * what the model heard. The client can optionally set the language and prompt for * transcription, these offer additional guidance to the transcription service. */ transcription?: RealtimeAPI.AudioTranscription; /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be * set to `null` to turn off, in which case the client must manually trigger model * response. * * Server VAD means that the model will detect the start and end of speech based on * audio volume and respond at the end of user speech. * * Semantic VAD is more advanced and uses a turn detection model (in conjunction * with VAD) to semantically estimate whether the user has finished speaking, then * dynamically sets a timeout based on this probability. For example, if user audio * trails off with "uhhm", the model will score a low probability of turn end and * wait longer for the user to continue speaking. This can be useful for more * natural conversations, but may have a higher latency. */ turn_detection?: Input.ServerVad | Input.SemanticVad | null; } namespace Input { /** * Configuration for input audio noise reduction. This can be set to `null` to turn * off. Noise reduction filters audio added to the input audio buffer before it is * sent to VAD and the model. Filtering the audio can improve VAD and turn * detection accuracy (reducing false positives) and model performance by improving * perception of the input audio. */ interface NoiseReduction { /** * Type of noise reduction. `near_field` is for close-talking microphones such as * headphones, `far_field` is for far-field microphones such as laptop or * conference room microphones. */ type?: RealtimeAPI.NoiseReductionType; } /** * Server-side voice activity detection (VAD) which flips on when user speech is * detected and off after a period of silence. */ interface ServerVad { /** * Type of turn detection, `server_vad` to turn on simple Server VAD. */ type: 'server_vad'; /** * Whether or not to automatically generate a response when a VAD stop event * occurs. */ create_response?: boolean; /** * Optional timeout after which a model response will be triggered automatically. * This is useful for situations in which a long pause from the user is unexpected, * such as a phone call. The model will effectively prompt the user to continue the * conversation based on the current context. * * The timeout value will be applied after the last model response's audio has * finished playing, i.e. it's set to the `response.done` time plus audio playback * duration. * * An `input_audio_buffer.timeout_triggered` event (plus events associated with the * Response) will be emitted when the timeout is reached. Idle timeout is currently * only supported for `server_vad` mode. */ idle_timeout_ms?: number | null; /** * Whether or not to automatically interrupt any ongoing response with output to * the default conversation (i.e. `conversation` of `auto`) when a VAD start event * occurs. */ interrupt_response?: boolean; /** * Used only for `server_vad` mode. Amount of audio to include before the VAD * detected speech (in milliseconds). Defaults to 300ms. */ prefix_padding_ms?: number; /** * Used only for `server_vad` mode. Duration of silence to detect speech stop (in * milliseconds). Defaults to 500ms. With shorter values the model will respond * more quickly, but may jump in on short pauses from the user. */ silence_duration_ms?: number; /** * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this * defaults to 0.5. A higher threshold will require louder audio to activate the * model, and thus might perform better in noisy environments. */ threshold?: number; } /** * Server-side semantic turn detection which uses a model to determine when the * user has finished speaking. */ interface SemanticVad { /** * Type of turn detection, `semantic_vad` to turn on Semantic VAD. */ type: 'semantic_vad'; /** * Whether or not to automatically generate a response when a VAD stop event * occurs. */ create_response?: boolean; /** * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` * will wait longer for the user to continue speaking, `high` will respond more * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, * and `high` have max timeouts of 8s, 4s, and 2s respectively. */ eagerness?: 'low' | 'medium' | 'high' | 'auto'; /** * Whether or not to automatically interrupt any ongoing response with output to * the default conversation (i.e. `conversation` of `auto`) when a VAD start event * occurs. */ interrupt_response?: boolean; } } interface Output { /** * The format of the output audio. */ format?: RealtimeAPI.RealtimeAudioFormats; /** * The speed of the model's spoken response as a multiple of the original speed. * 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. * This value can only be changed in between model turns, not while a response is * in progress. * * This parameter is a post-processing adjustment to the audio after it is * generated, it's also possible to prompt the model to speak faster or slower. */ speed?: number; /** * The voice the model uses to respond. Voice cannot be changed during the session * once the model has responded with audio at least once. Current voice options are * `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, * and `cedar`. We recommend `marin` and `cedar` for best quality. */ voice?: (string & {}) | 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse' | 'marin' | 'cedar'; } } /** * Give the model access to additional tools via remote Model Context Protocol * (MCP) servers. * [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). */ interface McpTool { /** * A label for this MCP server, used to identify it in tool calls. */ server_label: string; /** * The type of the MCP tool. Always `mcp`. */ type: 'mcp'; /** * List of allowed tool names or a filter object. */ allowed_tools?: Array | McpTool.McpToolFilter | null; /** * An OAuth access token that can be used with a remote MCP server, either with a * custom MCP server URL or a service connector. Your application must handle the * OAuth authorization flow and provide the token here. */ authorization?: string; /** * Identifier for service connectors, like those available in ChatGPT. One of * `server_url` or `connector_id` must be provided. Learn more about service * connectors * [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). * * Currently supported `connector_id` values are: * * - Dropbox: `connector_dropbox` * - Gmail: `connector_gmail` * - Google Calendar: `connector_googlecalendar` * - Google Drive: `connector_googledrive` * - Microsoft Teams: `connector_microsoftteams` * - Outlook Calendar: `connector_outlookcalendar` * - Outlook Email: `connector_outlookemail` * - SharePoint: `connector_sharepoint` */ connector_id?: 'connector_dropbox' | 'connector_gmail' | 'connector_googlecalendar' | 'connector_googledrive' | 'connector_microsoftteams' | 'connector_outlookcalendar' | 'connector_outlookemail' | 'connector_sharepoint'; /** * Optional HTTP headers to send to the MCP server. Use for authentication or other * purposes. */ headers?: { [key: string]: string; } | null; /** * Specify which of the MCP server's tools require approval. */ require_approval?: McpTool.McpToolApprovalFilter | 'always' | 'never' | null; /** * Optional description of the MCP server, used to provide more context. */ server_description?: string; /** * The URL for the MCP server. One of `server_url` or `connector_id` must be * provided. */ server_url?: string; } namespace McpTool { /** * A filter object to specify which tools are allowed. */ interface McpToolFilter { /** * Indicates whether or not a tool modifies data or is read-only. If an MCP server * is * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), * it will match this filter. */ read_only?: boolean; /** * List of allowed tool names. */ tool_names?: Array; } /** * Specify which of the MCP server's tools require approval. Can be `always`, * `never`, or a filter object associated with tools that require approval. */ interface McpToolApprovalFilter { /** * A filter object to specify which tools are allowed. */ always?: McpToolApprovalFilter.Always; /** * A filter object to specify which tools are allowed. */ never?: McpToolApprovalFilter.Never; } namespace McpToolApprovalFilter { /** * A filter object to specify which tools are allowed. */ interface Always { /** * Indicates whether or not a tool modifies data or is read-only. If an MCP server * is * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), * it will match this filter. */ read_only?: boolean; /** * List of allowed tool names. */ tool_names?: Array; } /** * A filter object to specify which tools are allowed. */ interface Never { /** * Indicates whether or not a tool modifies data or is read-only. If an MCP server * is * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), * it will match this filter. */ read_only?: boolean; /** * List of allowed tool names. */ tool_names?: Array; } } } /** * Granular configuration for tracing. */ interface TracingConfiguration { /** * The group id to attach to this trace to enable filtering and grouping in the * Traces Dashboard. */ group_id?: string; /** * The arbitrary metadata to attach to this trace to enable filtering in the Traces * Dashboard. */ metadata?: unknown; /** * The name of the workflow to attach to this trace. This is used to name the trace * in the Traces Dashboard. */ workflow_name?: string; } } /** * A Realtime transcription session configuration object. */ export interface RealtimeTranscriptionSessionCreateResponse { /** * Unique identifier for the session that looks like `sess_1234567890abcdef`. */ id: string; /** * The object type. Always `realtime.transcription_session`. */ object: string; /** * The type of session. Always `transcription` for transcription sessions. */ type: 'transcription'; /** * Configuration for input audio for the session. */ audio?: RealtimeTranscriptionSessionCreateResponse.Audio; /** * Expiration timestamp for the session, in seconds since epoch. */ expires_at?: number; /** * Additional fields to include in server outputs. * * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio * transcription. */ include?: Array<'item.input_audio_transcription.logprobs'>; } export declare namespace RealtimeTranscriptionSessionCreateResponse { /** * Configuration for input audio for the session. */ interface Audio { input?: Audio.Input; } namespace Audio { interface Input { /** * The PCM audio format. Only a 24kHz sample rate is supported. */ format?: RealtimeAPI.RealtimeAudioFormats; /** * Configuration for input audio noise reduction. */ noise_reduction?: Input.NoiseReduction; /** * Configuration of the transcription model. */ transcription?: RealtimeAPI.AudioTranscription; /** * Configuration for turn detection. Can be set to `null` to turn off. Server VAD * means that the model will detect the start and end of speech based on audio * volume and respond at the end of user speech. */ turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection; } namespace Input { /** * Configuration for input audio noise reduction. */ interface NoiseReduction { /** * Type of noise reduction. `near_field` is for close-talking microphones such as * headphones, `far_field` is for far-field microphones such as laptop or * conference room microphones. */ type?: RealtimeAPI.NoiseReductionType; } } } } /** * Configuration for turn detection. Can be set to `null` to turn off. Server VAD * means that the model will detect the start and end of speech based on audio * volume and respond at the end of user speech. */ export interface RealtimeTranscriptionSessionTurnDetection { /** * Amount of audio to include before the VAD detected speech (in milliseconds). * Defaults to 300ms. */ prefix_padding_ms?: number; /** * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. * With shorter values the model will respond more quickly, but may jump in on * short pauses from the user. */ silence_duration_ms?: number; /** * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher * threshold will require louder audio to activate the model, and thus might * perform better in noisy environments. */ threshold?: number; /** * Type of turn detection, only `server_vad` is currently supported. */ type?: string; } /** * Response from creating a session and client secret for the Realtime API. */ export interface ClientSecretCreateResponse { /** * Expiration timestamp for the client secret, in seconds since epoch. */ expires_at: number; /** * The session configuration for either a realtime or transcription session. */ session: RealtimeSessionCreateResponse | RealtimeTranscriptionSessionCreateResponse; /** * The generated client secret value. */ value: string; } export interface ClientSecretCreateParams { /** * Configuration for the client secret expiration. Expiration refers to the time * after which a client secret will no longer be valid for creating sessions. The * session itself may continue after that time once started. A secret can be used * to create multiple sessions until it expires. */ expires_after?: ClientSecretCreateParams.ExpiresAfter; /** * Session configuration to use for the client secret. Choose either a realtime * session or a transcription session. */ session?: RealtimeAPI.RealtimeSessionCreateRequest | RealtimeAPI.RealtimeTranscriptionSessionCreateRequest; } export declare namespace ClientSecretCreateParams { /** * Configuration for the client secret expiration. Expiration refers to the time * after which a client secret will no longer be valid for creating sessions. The * session itself may continue after that time once started. A secret can be used * to create multiple sessions until it expires. */ interface ExpiresAfter { /** * The anchor point for the client secret expiration, meaning that `seconds` will * be added to the `created_at` time of the client secret to produce an expiration * timestamp. Only `created_at` is currently supported. */ anchor?: 'created_at'; /** * The number of seconds from the anchor point to the expiration. Select a value * between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if * not specified. */ seconds?: number; } } export declare namespace ClientSecrets { export { type RealtimeSessionClientSecret as RealtimeSessionClientSecret, type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse, type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse, type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection, type ClientSecretCreateResponse as ClientSecretCreateResponse, type ClientSecretCreateParams as ClientSecretCreateParams, }; } //# sourceMappingURL=client-secrets.d.ts.map