Source code for pipecat.processors.frameworks.rtvi.models

#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""RTVI protocol v2 message models.

Contains all RTVI protocol v2 message definitions and data structures.
Import this module under the ``RTVI`` alias to use as a namespace::

    import pipecat.processors.frameworks.rtvi.models as RTVI

    msg = RTVI.BotReady(id="1", data=RTVI.BotReadyData(version=RTVI.PROTOCOL_VERSION))
"""

from collections.abc import Mapping
from typing import (
    Any,
    Literal,
)

from pydantic import BaseModel, ConfigDict, Field

from pipecat.audio.dtmf.types import KeypadEntry
from pipecat.frames.frames import (
    AggregationType,
)
from pipecat.utils.deprecation import deprecated

# -- Constants --
PROTOCOL_VERSION = "2.1.0"

# -- Version compatibility --
# Any 1.x client is deprecated but still supported with the old bot-output format.
LEGACY_SUPPORTED_MAJOR = 1

MESSAGE_LABEL = "rtvi-ai"
MessageLiteral = Literal["rtvi-ai"]

# -- Base Message Structure --



[docs]
class Message(BaseModel):
    """Base RTVI message structure.

    Represents the standard format for RTVI protocol messages.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: str
    id: str
    data: dict[str, Any] | None = None



# -- Client -> Pipecat messages.



[docs]
class RawClientMessageData(BaseModel):
    """Data structure expected from client messages sent to the RTVI server."""

    t: str
    d: Any | None = None




[docs]
class ClientMessage(BaseModel):
    """Cleansed data structure for client messages for handling."""

    msg_id: str
    type: str
    data: Any | None = None




[docs]
class RawServerResponseData(BaseModel):
    """Data structure for server responses to client messages."""

    t: str
    d: Any | None = None




[docs]
class ServerResponse(BaseModel):
    """The RTVI-formatted message response from the server to the client.

    This message is used to respond to custom messages sent by the client.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["server-response"] = "server-response"
    id: str
    data: RawServerResponseData




[docs]
class AboutClientData(BaseModel):
    """Data about the RTVI client.

    Contains information about the client, including which RTVI library it
    is using, what platform it is on and any additional details, if available.
    """

    library: str
    library_version: str | None = None
    platform: str | None = None
    platform_version: str | None = None
    platform_details: Any | None = None




[docs]
class ClientReadyData(BaseModel):
    """Data format of client ready messages.

    Contains the RTVI protocol version and client information.
    """

    version: str
    about: AboutClientData



# -- Pipecat -> Client errors



[docs]
class ErrorResponseData(BaseModel):
    """Data for an RTVI error response.

    Contains the error message to send back to the client.
    """

    error: str




[docs]
class ErrorResponse(BaseModel):
    """RTVI error response message.

    RTVI formatted error response message for relaying failed client requests.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["error-response"] = "error-response"
    id: str
    data: ErrorResponseData




[docs]
class ErrorData(BaseModel):
    """Data for an RTVI error event.

    Contains error information including whether it's fatal.
    """

    error: str
    fatal: bool  # Indicates the pipeline has stopped due to this error




[docs]
class Error(BaseModel):
    """RTVI error event message.

    RTVI formatted error message for relaying errors in the pipeline.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["error"] = "error"
    data: ErrorData



# -- Pipecat -> Client responses and messages.



[docs]
class BotReadyData(BaseModel):
    """Data for bot ready notification.

    Contains protocol version and initial configuration.
    """

    version: str
    about: Mapping[str, Any] | None = None




[docs]
class BotReady(BaseModel):
    """Message indicating bot is ready for interaction.

    Sent after bot initialization is complete.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-ready"] = "bot-ready"
    id: str
    data: BotReadyData




[docs]
@deprecated(
    "`LLMFunctionCallMessageData` is deprecated since 0.0.102 and will be removed in 2.0.0. "
    "Use `LLMFunctionCallInProgressMessageData` instead."
)
class LLMFunctionCallMessageData(BaseModel):
    """Data for LLM function call notification.

    Contains function call details including name, ID, and arguments.

    .. deprecated:: 0.0.102
        Use :class:`LLMFunctionCallInProgressMessageData` instead. Will be removed in 2.0.0.
    """

    function_name: str
    tool_call_id: str
    args: Mapping[str, Any]




[docs]
@deprecated(
    "`LLMFunctionCallMessage` is deprecated since 0.0.102 and will be removed in 2.0.0. "
    "Use `LLMFunctionCallInProgressMessage` instead."
)
class LLMFunctionCallMessage(BaseModel):
    """Message notifying of an LLM function call.

    Sent when the LLM makes a function call.

    .. deprecated:: 0.0.102
        Use :class:`LLMFunctionCallInProgressMessage` with the
        ``llm-function-call-in-progress`` event type instead. Will be removed in 2.0.0.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["llm-function-call"] = "llm-function-call"
    data: LLMFunctionCallMessageData




[docs]
class SendTextOptions(BaseModel):
    """Options for sending text input to the LLM.

    Contains options for how the pipeline should process the text input.
    """

    run_immediately: bool = True
    audio_response: bool = True




[docs]
class SendTextData(BaseModel):
    """Data format for sending text input to the LLM.

    Contains the text content to send and any options for how the pipeline should process it.
    """

    content: str
    options: SendTextOptions | None = None




[docs]
class DTMFInputData(BaseModel):
    """Data format for DTMF keypresses sent from the client.

    Carries one or more keypad entries (``0``-``9``, ``*``, ``#``), delivered
    in order, so the bot's DTMF handling (e.g. a ``DTMFAggregator``) sees them
    exactly as a telephony transport would deliver rapid keypresses.

    Protocol 2.1.0 replaced the single ``button`` field with ``buttons``.
    """

    buttons: list[KeypadEntry] = Field(min_length=1)




[docs]
class LLMFunctionCallStartMessageData(BaseModel):
    """Data for LLM function call start notification.

    Contains the function name being called. Fields may be omitted based on
    the configured function_call_report_level for security.
    """

    function_name: str | None = None




[docs]
class LLMFunctionCallStartMessage(BaseModel):
    """Message notifying that an LLM function call has started.

    Sent when the LLM begins a function call.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["llm-function-call-started"] = "llm-function-call-started"
    data: LLMFunctionCallStartMessageData




[docs]
class LLMFunctionCallResultData(BaseModel):
    """Data for LLM function call result.

    Contains function call details and result.
    """

    function_name: str
    tool_call_id: str
    arguments: dict
    result: dict | str




[docs]
class LLMFunctionCallInProgressMessageData(BaseModel):
    """Data for LLM function call in-progress notification.

    Contains function call details including name, ID, and arguments.
    Fields may be omitted based on the configured function_call_report_level for security.
    """

    tool_call_id: str
    function_name: str | None = None
    arguments: Mapping[str, Any] | None = None




[docs]
class LLMFunctionCallInProgressMessage(BaseModel):
    """Message notifying that an LLM function call is in progress.

    Sent when the LLM function call execution begins.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["llm-function-call-in-progress"] = "llm-function-call-in-progress"
    data: LLMFunctionCallInProgressMessageData




[docs]
class LLMFunctionCallStoppedMessageData(BaseModel):
    """Data for LLM function call stopped notification.

    Contains details about the function call that stopped, including
    whether it was cancelled or completed with a result.
    Fields may be omitted based on the configured function_call_report_level for security.
    """

    tool_call_id: str
    cancelled: bool
    function_name: str | None = None
    result: Any | None = None




[docs]
class LLMFunctionCallStoppedMessage(BaseModel):
    """Message notifying that an LLM function call has stopped.

    Sent when a function call completes (with result) or is cancelled.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["llm-function-call-stopped"] = "llm-function-call-stopped"
    data: LLMFunctionCallStoppedMessageData




[docs]
class BotLLMStartedMessage(BaseModel):
    """Message indicating bot LLM processing has started."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-llm-started"] = "bot-llm-started"




[docs]
class BotLLMStoppedMessage(BaseModel):
    """Message indicating bot LLM processing has stopped."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-llm-stopped"] = "bot-llm-stopped"




[docs]
class BotTTSStartedMessage(BaseModel):
    """Message indicating bot TTS processing has started."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-tts-started"] = "bot-tts-started"




[docs]
class BotTTSStoppedMessage(BaseModel):
    """Message indicating bot TTS processing has stopped."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-tts-stopped"] = "bot-tts-stopped"




[docs]
class TextMessageData(BaseModel):
    """Data for text-based RTVI messages.

    Contains text content.
    """

    text: str



SpokenStatus = Literal["new", "in-progress", "completed"] | None



[docs]
class SpokenProgressData(BaseModel):
    """Word-level TTS progress within a spoken segment.

    Parameters:
        accumulated_text: Text already spoken in this segment, including the current word.
        remaining_text: Text not yet spoken in this segment.
    """

    accumulated_text: str
    remaining_text: str




[docs]
class BotOutputTransformResult(BaseModel):
    """Return type for bot output transform functions.

    Parameters:
        text: The transformed full text of the segment.
        accumulated_text: Transformed spoken-so-far portion. Only populated
            when the transform is called from a progress context.
        remaining_text: Transformed not-yet-spoken portion. Only populated
            when the transform is called from a progress context.
    """

    text: str
    accumulated_text: str | None = None
    remaining_text: str | None = None




[docs]
class BotOutputMessageData(TextMessageData):
    """Data for bot output RTVI messages.

    Extends TextMessageData to include metadata about the output.

    This class supports both protocol v1 (1.4.x) and v2 (2.0.0+) clients. The
    observer populates different field subsets depending on the negotiated version;
    ``send_rtvi_message`` serialises with ``exclude_none=True`` so each client
    only sees the fields relevant to its version.

    Parameters:
        aggregated_by: What form the text is in (e.g., sentence, code, etc.).
        segment_id: ID of the source AggregatedTextFrame.
        spoken: **(v1 only)** Whether the text has been spoken by TTS.
        will_be_spoken: **(v2+)** Whether the text will be spoken by TTS.
        spoken_status: **(v2+)** Lifecycle status of the segment:
            ``"new"`` on first emit, ``"in-progress"`` during word playback,
            ``"completed"`` when the last word is spoken (or immediately for
            non-spoken segments).
        spoken_progress: **(v2+)** Accumulated / remaining text breakdown.
            Present when ``will_be_spoken`` is ``True``.
    """

    aggregated_by: AggregationType | str
    segment_id: int | None = None
    # v1 field (protocol 1.4.x)
    spoken: bool | None = None
    # v2 fields (protocol 2.0.0+)
    will_be_spoken: bool | None = None
    spoken_status: SpokenStatus | None = None
    spoken_progress: SpokenProgressData | None = None




[docs]
class BotOutputMessage(BaseModel):
    """Message containing bot output text.

    An event meant to holistically represent what the bot is outputting,
    along with metadata about the output and if it has been spoken.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-output"] = "bot-output"
    data: BotOutputMessageData




[docs]
class BotTranscriptionMessage(BaseModel):
    """Message containing bot transcription text.

    Sent when the bot's speech is transcribed.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-transcription"] = "bot-transcription"
    data: TextMessageData




[docs]
class BotLLMTextMessage(BaseModel):
    """Message containing bot LLM text output.

    Sent when the bot's LLM generates text.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-llm-text"] = "bot-llm-text"
    data: TextMessageData




[docs]
class BotTTSTextMessage(BaseModel):
    """Message containing bot TTS text output.

    Sent when text is being processed by TTS.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-tts-text"] = "bot-tts-text"
    data: TextMessageData




[docs]
class AudioMessageData(BaseModel):
    """Data for audio-based RTVI messages.

    Contains audio data and metadata.
    """

    audio: str
    sample_rate: int
    num_channels: int




[docs]
class BotTTSAudioMessage(BaseModel):
    """Message containing bot TTS audio output.

    Sent when the bot's TTS generates audio.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-tts-audio"] = "bot-tts-audio"
    data: AudioMessageData




[docs]
class UserTranscriptionMessageData(BaseModel):
    """Data for user transcription messages.

    Contains transcription text and metadata.
    """

    text: str
    user_id: str
    timestamp: str
    final: bool




[docs]
class UserTranscriptionMessage(BaseModel):
    """Message containing user transcription.

    Sent when user speech is transcribed.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["user-transcription"] = "user-transcription"
    data: UserTranscriptionMessageData




[docs]
class UserLLMTextMessage(BaseModel):
    """Message containing user text input for LLM.

    Sent when user text is processed by the LLM.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["user-llm-text"] = "user-llm-text"
    data: TextMessageData




[docs]
class UserStartedSpeakingMessage(BaseModel):
    """Message indicating user has started speaking."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["user-started-speaking"] = "user-started-speaking"




[docs]
class UserStoppedSpeakingMessage(BaseModel):
    """Message indicating user has stopped speaking."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["user-stopped-speaking"] = "user-stopped-speaking"




[docs]
class VADUserStartedSpeakingMessage(BaseModel):
    """Message indicating VAD detected the user started speaking.

    Raw VAD signal, emitted independently of turn finalization (unlike
    ``user-started-speaking``, which a turn strategy may gate or defer).
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["vad-user-started-speaking"] = "vad-user-started-speaking"




[docs]
class VADUserStoppedSpeakingMessage(BaseModel):
    """Message indicating VAD detected the user stopped speaking.

    Raw VAD signal, emitted independently of turn finalization (unlike
    ``user-stopped-speaking``, which a turn strategy may gate or defer).
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["vad-user-stopped-speaking"] = "vad-user-stopped-speaking"




[docs]
class UserMuteStartedMessage(BaseModel):
    """Message indicating user has been muted."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["user-mute-started"] = "user-mute-started"




[docs]
class UserMuteStoppedMessage(BaseModel):
    """Message indicating user has been unmuted."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["user-mute-stopped"] = "user-mute-stopped"




[docs]
class BotStartedSpeakingMessage(BaseModel):
    """Message indicating bot has started speaking."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-started-speaking"] = "bot-started-speaking"




[docs]
class BotStoppedSpeakingMessage(BaseModel):
    """Message indicating bot has stopped speaking."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-stopped-speaking"] = "bot-stopped-speaking"




[docs]
class BotInterruptedMessage(BaseModel):
    """Message indicating the bot was interrupted and its in-flight output cut off.

    Fires for any pipeline interruption — a VAD-detected user barge-in or a
    programmatic interrupt (e.g. ``send-text`` with ``run_immediately``) — so a
    client can drop whatever the bot was mid-saying.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-interrupted"] = "bot-interrupted"




[docs]
class MetricsMessage(BaseModel):
    """Message containing performance metrics.

    Sent to provide performance and usage metrics.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["metrics"] = "metrics"
    data: Mapping[str, Any]




[docs]
class ServerMessage(BaseModel):
    """Generic server message.

    Used for custom server-to-client messages.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["server-message"] = "server-message"
    data: Any




[docs]
class AudioLevelMessageData(BaseModel):
    """Data format for sending audio levels."""

    value: float




[docs]
class UserAudioLevelMessage(BaseModel):
    """Message indicating user audio level."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["user-audio-level"] = "user-audio-level"
    data: AudioLevelMessageData




[docs]
class BotAudioLevelMessage(BaseModel):
    """Message indicating bot audio level."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["bot-audio-level"] = "bot-audio-level"
    data: AudioLevelMessageData




[docs]
class SystemLogMessage(BaseModel):
    """Message including a system log."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["system-log"] = "system-log"
    data: TextMessageData



# -- UI Worker Protocol ------------------------------------------------------
#
# A structured RTVI message vocabulary that lets server-side workers
# observe and drive a GUI app on the client side. The protocol covers
# five first-class RTVI message types:
#
#   ui-event         client-to-server event message
#   ui-command       server-to-client command message
#   ui-snapshot      client-to-server accessibility snapshot
#   ui-cancel-job-group   client-to-server cancellation request
#   ui-job-group          server-to-client job-group lifecycle envelope
#
# This section is data only (constants and payload models, no
# behavior). ``pipecat.workers.ui.UIWorker`` builds the higher-level
# abstractions on top, and single-LLM Pipecat apps can target the same
# wire format directly via custom tools that emit typed RTVI messages
# with these types. The matching client-side implementation lives in
# ``@pipecat-ai/client-js`` and ``@pipecat-ai/client-react``.

# The wire-format ``type`` strings (``"ui-event"``, ``"ui-command"``,
# ``"ui-snapshot"``, ``"ui-cancel-job-group"``, ``"ui-job-group"``) are pinned
# as ``Literal[...]`` field defaults on the corresponding ``*Message``
# pydantic class below, matching the convention used for every other
# RTVI message type in this module.

# Each ``ui-job-group`` envelope carries a ``kind`` field that the client's
# reducer dispatches on. The four kinds form the lifecycle of a
# user-facing job group:
#
#   group_started → job_update* → job_completed × N → group_completed
#
# where N is the number of workers in the group. The kind strings are
# pinned as ``Literal[...]`` defaults on the matching ``UIJob*Data``
# class below.


# -- UI envelope data classes --



[docs]
class UIEventData(BaseModel):
    """Inner ``data`` for a ``ui-event`` message.

    Parameters:
        event: App-defined event.
        payload: App-defined payload, schemaless by design.
    """

    event: str
    payload: Any | None = None




[docs]
class UICommandData(BaseModel):
    """Inner ``data`` for a ``ui-command`` message.

    Parameters:
        command: App-defined command.
        payload: App-defined payload (already a plain dict by the
            time it lands on the wire). The standard command payload models
            below produce the right shape via ``model_dump()``.
    """

    command: str
    payload: Any | None = None




[docs]
class A11yNode(BaseModel):
    """One node in the UI accessibility snapshot tree.

    Mirrors the client-side ``A11yNode`` wire shape. Extra fields are
    allowed so clients can add platform-specific or future metadata
    without breaking older servers.

    Parameters:
        ref: Stable client-assigned element reference.
        role: ARIA-style role for the node.
        name: Optional accessible name.
        value: Optional current value for inputs/progress/etc.
        state: Optional short state tags (e.g. ``"focused"``,
            ``"disabled"``, ``"offscreen"``).
        level: Optional heading level.
        colcount: Optional column count for grid-like containers.
        rowcount: Optional row count for grid-like containers.
        children: Optional child nodes.
    """

    model_config = ConfigDict(extra="allow")

    ref: str
    role: str
    name: str | None = None
    value: str | None = None
    state: list[str] | None = None
    level: int | None = None
    colcount: int | None = None
    rowcount: int | None = None
    children: list["A11yNode"] | None = None




[docs]
class A11ySelection(BaseModel):
    """The user's current text selection in the UI snapshot.

    Extra fields are allowed for forward compatibility with client
    snapshot additions.

    Parameters:
        ref: Ref of the element that carries the selection.
        text: Selected text.
        start_offset: Optional selection start offset.
        end_offset: Optional selection end offset.
    """

    model_config = ConfigDict(extra="allow")

    ref: str
    text: str
    start_offset: int | None = None
    end_offset: int | None = None




[docs]
class A11ySnapshot(BaseModel):
    """Client accessibility snapshot sent in a ``ui-snapshot`` message.

    Mirrors the client-side ``A11ySnapshot`` wire shape. Extra fields
    are allowed so clients can add compatible metadata over time.

    Parameters:
        root: Root accessibility node.
        captured_at: Client-side epoch milliseconds when captured.
        selection: Optional current text selection.
    """

    model_config = ConfigDict(extra="allow")

    root: A11yNode
    captured_at: int
    selection: A11ySelection | None = None




[docs]
class UISnapshotData(BaseModel):
    """Inner ``data`` for a ``ui-snapshot`` message.

    The accessibility snapshot tree mirrors the client-side
    ``A11ySnapshot`` wire shape and is kept forward-compatible by
    allowing extra fields on the snapshot models.

    Parameters:
        tree: The serialized accessibility tree.
    """

    tree: A11ySnapshot




[docs]
class UICancelJobGroupData(BaseModel):
    """Inner ``data`` for a ``ui-cancel-job-group`` message.

    Parameters:
        job_id: The job group id the client wants cancelled.
        reason: Optional human-readable reason.
    """

    job_id: str
    reason: str | None = None




[docs]
class UIJobGroupStartedData(BaseModel):
    """``data`` for a ``ui-job-group`` envelope with kind ``group_started``.

    Parameters:
        kind: Always ``"group_started"``.
        job_id: Shared job-group identifier for the group.
        workers: Names of the workers the work was dispatched to.
        label: Optional human-readable label for the group.
        cancellable: Whether the client may request cancellation.
        at: Epoch milliseconds when the group started.
    """

    kind: Literal["group_started"] = "group_started"
    job_id: str
    workers: list[str] | None = None
    label: str | None = None
    cancellable: bool = True
    at: int = 0




[docs]
class UIJobUpdateData(BaseModel):
    """``data`` for a ``ui-job-group`` envelope with kind ``job_update``.

    Parameters:
        kind: Always ``"job_update"``.
        job_id: The shared job-group identifier.
        worker_name: The worker that produced the update.
        data: The worker's update payload, forwarded verbatim.
        at: Epoch milliseconds when the update was emitted.
    """

    kind: Literal["job_update"] = "job_update"
    job_id: str
    worker_name: str
    data: Any | None = None
    at: int = 0




[docs]
class UIJobCompletedData(BaseModel):
    """``data`` for a ``ui-job-group`` envelope with kind ``job_completed``.

    Parameters:
        kind: Always ``"job_completed"``.
        job_id: The shared job-group identifier.
        worker_name: The worker that produced the response.
        status: Completion status string.
        response: The worker's response payload.
        at: Epoch milliseconds when the response was received.
    """

    kind: Literal["job_completed"] = "job_completed"
    job_id: str
    worker_name: str
    status: str
    response: Any | None = None
    at: int = 0




[docs]
class UIJobGroupCompletedData(BaseModel):
    """``data`` for a ``ui-job-group`` envelope with kind ``group_completed``.

    Parameters:
        kind: Always ``"group_completed"``.
        job_id: The shared job-group identifier.
        at: Epoch milliseconds when the group completed.
    """

    kind: Literal["group_completed"] = "group_completed"
    job_id: str
    at: int = 0



#: Discriminated union over the four job-group lifecycle data shapes,
#: keyed by the ``kind`` field.
UIJobGroupData = (
    UIJobGroupStartedData | UIJobUpdateData | UIJobCompletedData | UIJobGroupCompletedData
)


# -- UI envelope message classes --



[docs]
class UIEventMessage(BaseModel):
    """RTVI ``ui-event`` message (client → server)."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["ui-event"] = "ui-event"
    id: str
    data: UIEventData




[docs]
class UICommandMessage(BaseModel):
    """RTVI ``ui-command`` message (server → client)."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["ui-command"] = "ui-command"
    data: UICommandData




[docs]
class UISnapshotMessage(BaseModel):
    """RTVI ``ui-snapshot`` message (client → server)."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["ui-snapshot"] = "ui-snapshot"
    id: str
    data: UISnapshotData




[docs]
class UICancelJobGroupMessage(BaseModel):
    """RTVI ``ui-cancel-job-group`` message (client → server)."""

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["ui-cancel-job-group"] = "ui-cancel-job-group"
    id: str
    data: UICancelJobGroupData




[docs]
class UIJobGroupMessage(BaseModel):
    """RTVI ``ui-job-group`` message (server → client).

    The ``data`` field is one of the four job-group lifecycle
    discriminated by the ``kind`` field.
    """

    label: MessageLiteral = MESSAGE_LABEL
    type: Literal["ui-job-group"] = "ui-job-group"
    data: UIJobGroupData



# -- UI command payloads --
#
# These models describe commands that have matching default React
# handlers in ``@pipecat-ai/client-react``'s ``standardHandlers``.
# Apps can use them as-is, override the client handler to customize
# rendering, or ignore them entirely and define their own command
# names.
#
# Server-side helpers that send commands accept these models directly.
# ``BaseModel.model_dump()`` converts them to the plain-dict shape
# that travels over the wire.



[docs]
class Toast(BaseModel):
    """A transient notification surface shown on the client.

    Parameters:
        title: Required headline.
        subtitle: Optional second line beneath the title.
        description: Optional body text.
        image_url: Optional leading image.
        duration_ms: Optional dismiss timer. Client default applies
            when None.
    """

    title: str
    subtitle: str | None = None
    description: str | None = None
    image_url: str | None = None
    duration_ms: int | None = None




[docs]
class Navigate(BaseModel):
    """Client-side navigation to a named view.

    Parameters:
        view: App-defined view name (route, screen id, tab key, etc.).
        params: Optional view-specific parameters.
    """

    view: str
    params: dict | None = None




[docs]
class ScrollTo(BaseModel):
    """Scroll a target element into view.

    The client resolves the target by ``ref`` first (a snapshot ref
    like ``"e42"`` assigned by the a11y walker), then falls back to
    ``target_id`` (``document.getElementById``). Supply whichever you
    have; ``ref`` is the normal choice when acting on a node from
    ``<ui_state>``.

    Parameters:
        ref: Snapshot ref from ``<ui_state>``.
        target_id: Element id registered on the client.
        behavior: Optional scroll behavior hint. Typical values:
            ``"smooth"`` or ``"instant"``. Clients may ignore.
    """

    ref: str | None = None
    target_id: str | None = None
    behavior: str | None = None




[docs]
class Highlight(BaseModel):
    """Briefly emphasize a target element (flash, glow, pulse).

    Parameters:
        ref: Snapshot ref from ``<ui_state>``.
        target_id: Element id registered on the client.
        duration_ms: Optional highlight duration. Client default
            applies when None.
    """

    ref: str | None = None
    target_id: str | None = None
    duration_ms: int | None = None




[docs]
class Focus(BaseModel):
    """Move input focus to a target element.

    Parameters:
        ref: Snapshot ref from ``<ui_state>``.
        target_id: Element id registered on the client.
    """

    ref: str | None = None
    target_id: str | None = None




[docs]
class Click(BaseModel):
    """Click an element on the client.

    Closes the form-fill loop for non-text inputs (checkboxes, radios)
    and exposes the rest of the action vocabulary (submit buttons,
    links, app-specific clickable nodes). The standard handler
    silently no-ops on ``disabled`` targets so the worker can't bypass
    UI affordances the user is meant to control.

    For native ``<select>``, prefer ``SetInputValue`` (clicking
    options doesn't reliably change the selection); for custom
    comboboxes (ARIA listbox + popup), apps wire their own command
    matching the library's interaction model.

    Parameters:
        ref: Snapshot ref from ``<ui_state>``.
        target_id: Element id registered on the client. Used as a
            fallback when ``ref`` is not set or has gone stale.
    """

    ref: str | None = None
    target_id: str | None = None




[docs]
class SetInputValue(BaseModel):
    """Write a value into a text input or textarea on the client.

    Use this for form-filling: the worker has decided what should go
    into a field (clarifying answer, tax form entry, etc.) and asks
    the client to populate it. With ``replace=True`` (the default),
    the existing value is overwritten; with ``replace=False`` the
    value is appended.

    The standard handler silently no-ops on ``disabled``, ``readonly``,
    and ``<input type="hidden">`` targets so the worker can't write
    into fields the user can't.

    Parameters:
        value: The text to write.
        ref: Snapshot ref from ``<ui_state>``. Typically the ref of
            an ``<input>`` or ``<textarea>``.
        target_id: Element id registered on the client. Used as a
            fallback when ``ref`` is not set or has gone stale.
        replace: When True (the default), overwrite the current
            value. When False, append to it.
    """

    value: str = ""
    ref: str | None = None
    target_id: str | None = None
    replace: bool = True




[docs]
class SelectText(BaseModel):
    """Select text on the page so the user can see what the worker means.

    Mirror of the ``selection`` field surfaced in the snapshot. Use
    this to point the user's attention at a specific paragraph or
    range after the worker has decided what it's referring to.

    With ``start_offset`` and ``end_offset`` omitted, the entire
    target's text content is selected (``Range.selectNodeContents``
    for document elements; ``el.select()`` for ``<input>`` /
    ``<textarea>``).

    Parameters:
        ref: Snapshot ref from ``<ui_state>``. Typically the ref of
            a paragraph or input element.
        target_id: Element id registered on the client. Used as a
            fallback when ``ref`` is not set or has gone stale.
        start_offset: Character offset within the target's text
            where the selection should start. For ``<input>`` and
            ``<textarea>`` this is the value offset; for document
            elements it is computed against the concatenation of
            descendant text nodes in document order.
        end_offset: End character offset, exclusive. Same coordinate
            system as ``start_offset``.
    """

    ref: str | None = None
    target_id: str | None = None
    start_offset: int | None = None
    end_offset: int | None = None