Source code for pipecat.services.inception.llm

#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""Inception LLM service implementation using OpenAI-compatible interface."""

from dataclasses import dataclass, field
from typing import Literal

from loguru import logger

from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
from pipecat.services.openai.base_llm import BaseOpenAILLMService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN
from pipecat.services.settings import _NotGiven, is_given



[docs]
@dataclass
class InceptionLLMSettings(BaseOpenAILLMService.Settings):
    """Settings for InceptionLLMService.

    Parameters:
        reasoning_effort: Controls how much reasoning the model applies.
            One of "instant", "low", "medium", or "high". When unset, the
            parameter is omitted and Inception's server-side default applies.
        realtime: When True, reduces time to first diffusion block (TTFT).
            Defaults to True.
    """

    reasoning_effort: Literal["instant", "low", "medium", "high"] | None | _NotGiven = field(
        default_factory=lambda: _NOT_GIVEN
    )
    realtime: bool | None | _NotGiven = field(default_factory=lambda: _NOT_GIVEN)




[docs]
class InceptionLLMService(OpenAILLMService):
    """A service for interacting with Inception's API using the OpenAI-compatible interface.

    This service extends OpenAILLMService to connect to Inception's API endpoint while
    maintaining full compatibility with OpenAI's interface and functionality.
    Supports Mercury-2, Inception's diffusion-based reasoning model.
    """

    # Inception doesn't support the "developer" message role.
    supports_developer_role = False

    Settings = InceptionLLMSettings
    _settings: Settings


[docs]
    def __init__(
        self,
        *,
        api_key: str,
        base_url: str = "https://api.inceptionlabs.ai/v1",
        settings: Settings | None = None,
        **kwargs,
    ):
        """Initialize the Inception LLM service.

        Args:
            api_key: The API key for accessing Inception's API.
            base_url: The base URL for Inception API. Defaults to "https://api.inceptionlabs.ai/v1".
            settings: Runtime-updatable settings.
            **kwargs: Additional keyword arguments passed to OpenAILLMService.
        """
        default_settings = self.Settings(
            model="mercury-2",
            reasoning_effort=None,
            realtime=True,
        )

        if settings is not None:
            default_settings.apply_update(settings)

        super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs)



[docs]
    def create_client(self, api_key=None, base_url=None, **kwargs):
        """Create OpenAI-compatible client for Inception API endpoint.

        Args:
            api_key: The API key for authentication. If None, uses instance default.
            base_url: The base URL for the API. If None, uses instance default.
            **kwargs: Additional keyword arguments for client configuration.

        Returns:
            An OpenAI-compatible client configured for Inception's API.
        """
        logger.debug(f"Creating Inception client with api {base_url}")
        return super().create_client(api_key, base_url, **kwargs)



[docs]
    def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
        """Build parameters for Inception chat completion request.

        Extends the base OpenAI parameters with Inception-specific options
        such as reasoning_effort and realtime.

        Args:
            params_from_context: Parameters, derived from the LLM context, to
                use for the chat completion. Contains messages, tools, and tool
                choice.

        Returns:
            Dictionary of parameters for the chat completion request.
        """
        params = super().build_chat_completion_params(params_from_context)

        if (
            is_given(self._settings.reasoning_effort)
            and self._settings.reasoning_effort is not None
        ):
            params["reasoning_effort"] = self._settings.reasoning_effort

        # realtime is Inception-specific and unknown to the OpenAI SDK,
        # so it must be passed via extra_body to avoid validation errors.
        extra_body = {}
        if is_given(self._settings.realtime) and self._settings.realtime is not None:
            extra_body["realtime"] = self._settings.realtime

        if extra_body:
            params["extra_body"] = extra_body

        return params