class AsyncMultiUnify

source code

properties


cache

source code

def cache(self) -> bool:

Get default the cache bool.

Returns:

The default cache bool.


clients

source code

def clients(self) -> Dict[str, _UniClient]:

Get the current dictionary of clients, with endpoint names as keys and Unify or AsyncUnify instances as values.

Returns:

The dictionary of clients.


default_prompt

source code

def default_prompt(self) -> Prompt:

Get the default prompt, if set.

Returns:

The default prompt.


drop_params

source code

def drop_params(self) -> Optional[bool]:

Get the default drop_params bool, if set.

Returns:

The default drop_params bool.


endpoints

source code

def endpoints(self) -> Tuple[str, ...]:

Get the current tuple of endpoints.

Returns:

The tuple of endpoints.


extra_body

source code

def extra_body(self) -> Optional[Mapping[str, str]]:

Get the default extra body, if set.

Returns:

The default extra body.


extra_headers

source code

def extra_headers(self) -> Optional[Headers]:

Get the default extra headers, if set.

Returns:

The default extra headers.


extra_query

source code

def extra_query(self) -> Optional[Query]:

Get the default extra query, if set.

Returns:

The default extra query.


frequency_penalty

source code

def frequency_penalty(self) -> Optional[float]:

Get the default frequency penalty, if set.

Returns:

The default frequency penalty.


http_client

source code

def http_client(self) -> Union[httpx.AsyncClient, httpx.Client]:

Get the http client used under the hood.

Returns:

The http client used under the hood.


input_cost

source code

def input_cost(self) -> Dict[str, float]:

inter_token_latency

source code

def inter_token_latency(self) -> Dict[str, float]:

log_query_body

source code

def log_query_body(self) -> Optional[bool]:

Get the default log query body bool, if set.

Returns:

The default log query body bool.


log_response_body

source code

def log_response_body(self) -> Optional[bool]:

Get the default log response body bool, if set.

Returns:

The default log response body bool.


logit_bias

source code

def logit_bias(self) -> Optional[Dict[str, int]]:

Get the default logit bias, if set.

Returns:

The default logit bias.


logprobs

source code

def logprobs(self) -> Optional[bool]:

Get the default logprobs, if set.

Returns:

The default logprobs.


max_completion_tokens

source code

def max_completion_tokens(self) -> Optional[int]:

Get the default max tokens, if set.

Returns:

The default max tokens.


messages

source code

def messages(
        self,
    ) -> Optional[
        Union[
            List[ChatCompletionMessageParam],
            Dict[str, List[ChatCompletionMessageParam]],
        ]
    ]:

Get the default messages, if set.

Returns:

The default messages.


n

source code

def n(self) -> Optional[int]:

Get the default n, if set.

Returns:

The default n value.


output_cost

source code

def output_cost(self) -> Dict[str, float]:

parallel_tool_calls

source code

def parallel_tool_calls(self) -> Optional[bool]:

Get the default parallel tool calls bool, if set.

Returns:

The default parallel tool calls bool.


presence_penalty

source code

def presence_penalty(self) -> Optional[float]:

Get the default presence penalty, if set.

Returns:

The default presence penalty.


region

source code

def region(self) -> Optional[str]:

Get the default region, if set.

Returns:

The default region.


response_format

source code

def response_format(self) -> Optional[ResponseFormat]:

Get the default response format, if set.

Returns:

The default response format.


return_full_completion

source code

def return_full_completion(self) -> bool:

Get the default return full completion bool.

Returns:

The default return full completion bool.


seed

source code

def seed(self) -> Optional[int]:

Get the default seed value, if set.

Returns:

The default seed value.


stop

source code

def stop(self) -> Union[Optional[str], List[str]]:

Get the default stop value, if set.

Returns:

The default stop value.


stream

source code

def stream(self) -> Optional[bool]:

Get the default stream bool, if set.

Returns:

The default stream bool.


stream_options

source code

def stream_options(self) -> Optional[ChatCompletionStreamOptionsParam]:

Get the default stream options, if set.

Returns:

The default stream options.


system_message

source code

def system_message(self) -> Optional[str]:

Get the default system message, if set.

Returns:

The default system message.


tags

source code

def tags(self) -> Optional[List[str]]:

Get the default tags, if set.

Returns:

The default tags.


temperature

source code

def temperature(self) -> Optional[float]:

Get the default temperature, if set.

Returns:

The default temperature.


time_to_first_token

source code

def time_to_first_token(self) -> Dict[str, float]:

tool_choice

source code

def tool_choice(self) -> Optional[ChatCompletionToolChoiceOptionParam]:

Get the default tool choice, if set.

Returns:

The default tool choice.


tools

source code

def tools(self) -> Optional[Iterable[ChatCompletionToolParam]]:

Get the default tools, if set.

Returns:

The default tools.


top_logprobs

source code

def top_logprobs(self) -> Optional[int]:

Get the default top logprobs, if set.

Returns:

The default top logprobs.


top_p

source code

def top_p(self) -> Optional[float]:

Get the default top p value, if set.

Returns:

The default top p value.


use_custom_keys

source code

def use_custom_keys(self) -> bool:

Get the default use custom keys bool, if set.

Returns:

The default use custom keys bool.

setters


set_cache

source code

def set_cache(self, value: bool) -> Self:

Set the default cache bool.

Arguments:

  • value - The default cache bool.

Returns:

This client, useful for chaining inplace calls.


set_default_prompt

source code

def set_default_prompt(self, value: Prompt) -> Self:

Set the default prompt.

Returns:

This client, useful for chaining inplace calls.


set_drop_params

source code

def set_drop_params(self, value: bool) -> Self:

Set the default drop params bool.

Arguments:

  • value - The default drop params bool.

Returns:

This client, useful for chaining inplace calls.


set_extra_body

source code

def set_extra_body(self, value: Body) -> Self:

Set the default extra body.

Arguments:

  • value - The default extra body.

Returns:

This client, useful for chaining inplace calls.


set_extra_headers

source code

def set_extra_headers(self, value: Headers) -> Self:

Set the default extra headers.

Arguments:

  • value - The default extra headers.

Returns:

This client, useful for chaining inplace calls.


set_extra_query

source code

def set_extra_query(self, value: Query) -> Self:

Set the default extra query.

Arguments:

  • value - The default extra query.

Returns:

This client, useful for chaining inplace calls.


set_frequency_penalty

source code

def set_frequency_penalty(self, value: float) -> Self:

Set the default frequency penalty.

Arguments:

  • value - The default frequency penalty.

Returns:

This client, useful for chaining inplace calls.


set_log_query_body

source code

def set_log_query_body(self, value: bool) -> Self:

Set the default log query body bool.

Arguments:

  • value - The default log query body bool.

Returns:

This client, useful for chaining inplace calls.


set_log_response_body

source code

def set_log_response_body(self, value: bool) -> Self:

Set the default log response body bool.

Arguments:

  • value - The default log response body bool.

Returns:

This client, useful for chaining inplace calls.


set_logit_bias

source code

def set_logit_bias(self, value: Dict[str, int]) -> Self:

Set the default logit bias.

Arguments:

  • value - The default logit bias.

Returns:

This client, useful for chaining inplace calls.


set_logprobs

source code

def set_logprobs(self, value: bool) -> Self:

Set the default logprobs.

Arguments:

  • value - The default logprobs.

Returns:

This client, useful for chaining inplace calls.


set_max_completion_tokens

source code

def set_max_completion_tokens(self, value: int) -> Self:

Set the default max tokens.

Arguments:

  • value - The default max tokens.

Returns:

This client, useful for chaining inplace calls.


set_messages

source code

def set_messages(
        self,
        value: Union[
            List[ChatCompletionMessageParam],
            Dict[str, List[ChatCompletionMessageParam]],
        ],
    ) -> Self:

Set the default messages.

Arguments:

  • value - The default messages.

Returns:

This client, useful for chaining inplace calls.


set_n

source code

def set_n(self, value: int) -> Self:

Set the default n value.

Arguments:

  • value - The default n value.

Returns:

This client, useful for chaining inplace calls.


set_parallel_tool_calls

source code

def set_parallel_tool_calls(self, value: bool) -> Self:

Set the default parallel tool calls bool.

Arguments:

  • value - The default parallel tool calls bool.

Returns:

This client, useful for chaining inplace calls.


set_presence_penalty

source code

def set_presence_penalty(self, value: float) -> Self:

Set the default presence penalty.

Arguments:

  • value - The default presence penalty.

Returns:

This client, useful for chaining inplace calls.


set_region

source code

def set_region(self, value: str) -> Self:

Set the default region.

Arguments:

  • value - The default region.

Returns:

This client, useful for chaining inplace calls.


set_response_format

source code

def set_response_format(self, value: ResponseFormat) -> Self:

Set the default response format.

Arguments:

  • value - The default response format.

Returns:

This client, useful for chaining inplace calls.


set_return_full_completion

source code

def set_return_full_completion(self, value: bool) -> Self:

Set the default return full completion bool.

Arguments:

  • value - The default return full completion bool.

Returns:

This client, useful for chaining inplace calls.


set_seed

source code

def set_seed(self, value: int) -> Self:

Set the default seed value.

Arguments:

  • value - The default seed value.

Returns:

This client, useful for chaining inplace calls.


set_stop

source code

def set_stop(self, value: Union[str, List[str]]) -> Self:

Set the default stop value.

Arguments:

  • value - The default stop value.

Returns:

This client, useful for chaining inplace calls.


set_stream

source code

def set_stream(self, value: bool) -> Self:

Set the default stream bool.

Arguments:

  • value - The default stream bool.

Returns:

This client, useful for chaining inplace calls.


set_stream_options

source code

def set_stream_options(self, value: ChatCompletionStreamOptionsParam) -> Self:

Set the default stream options.

Arguments:

  • value - The default stream options.

Returns:

This client, useful for chaining inplace calls.


set_system_message

source code

def set_system_message(self, value: str) -> Self:

Set the default system message.

Arguments:

  • value - The default system message.

Returns:

This client, useful for chaining inplace calls.


set_tags

source code

def set_tags(self, value: List[str]) -> Self:

Set the default tags.

Arguments:

  • value - The default tags.

Returns:

This client, useful for chaining inplace calls.


set_temperature

source code

def set_temperature(self, value: float) -> Self:

Set the default temperature.

Arguments:

  • value - The default temperature.

Returns:

This client, useful for chaining inplace calls.


set_tool_choice

source code

def set_tool_choice(self, value: ChatCompletionToolChoiceOptionParam) -> Self:

Set the default tool choice.

Arguments:

  • value - The default tool choice.

Returns:

This client, useful for chaining inplace calls.


set_tools

source code

def set_tools(self, value: Iterable[ChatCompletionToolParam]) -> Self:

Set the default tools.

Arguments:

  • value - The default tools.

Returns:

This client, useful for chaining inplace calls.


set_top_logprobs

source code

def set_top_logprobs(self, value: int) -> Self:

Set the default top logprobs.

Arguments:

  • value - The default top logprobs.

Returns:

This client, useful for chaining inplace calls.


set_top_p

source code

def set_top_p(self, value: float) -> Self:

Set the default top p value.

Arguments:

  • value - The default top p value.

Returns:

This client, useful for chaining inplace calls.


set_use_custom_keys

source code

def set_use_custom_keys(self, value: bool) -> Self:

Set the default use custom keys bool.

Arguments:

  • value - The default use custom keys bool.

Returns:

This client, useful for chaining inplace calls.

methods


add_endpoints

source code

def add_endpoints(
        self,
        endpoints: Union[List[str], str],
        ignore_duplicates: bool = True,
    ) -> Self:

Add extra endpoints to be queried for each call to generate.

Arguments:

  • endpoints - The extra endpoints to add.
  • ignore_duplicates - Whether or not to ignore duplicate endpoints passed.

Returns:

This client, useful for chaining inplace calls.


generate

source code

def generate(
        self,
        arg0: Optional[Union[str, List[Union[str, Tuple[Any], Dict[str, Any]]]]] = None,
        /,
        system_message: Optional[str] = None,
        messages: Optional[
            Union[
                List[ChatCompletionMessageParam],
                Dict[str, List[ChatCompletionMessageParam]],
            ]
        ] = None,
        *,
        frequency_penalty: Optional[float] = None,
        logit_bias: Optional[Dict[str, int]] = None,
        logprobs: Optional[bool] = None,
        top_logprobs: Optional[int] = None,
        max_completion_tokens: Optional[int] = None,
        n: Optional[int] = None,
        presence_penalty: Optional[float] = None,
        response_format: Optional[ResponseFormat] = None,
        seed: Optional[int] = None,
        stop: Union[Optional[str], List[str]] = None,
        stream: Optional[bool] = None,
        stream_options: Optional[ChatCompletionStreamOptionsParam] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        tools: Optional[Iterable[ChatCompletionToolParam]] = None,
        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] = None,
        parallel_tool_calls: Optional[bool] = None,
        # platform arguments
        use_custom_keys: Optional[bool] = None,
        tags: Optional[List[str]] = None,
        drop_params: Optional[bool] = None,
        region: Optional[str] = None,
        log_query_body: Optional[bool] = None,
        log_response_body: Optional[bool] = None,
        # python client arguments
        return_full_completion: Optional[bool] = None,
        cache: Optional[bool] = None,
        # passthrough arguments
        extra_headers: Optional[Headers] = None,
        extra_query: Optional[Query] = None,
        **kwargs,
    ):

Generate a ChatCompletion response for the specified endpoint, from the provided query parameters.

Arguments:

  • arg0 - A string containing the user message, or a list containing the inputs
  • system_message - An optional string containing the system message. This
  • messages - A list of messages comprising the conversation so far, or
  • frequency_penalty - Number between -2.0 and 2.0. Positive values penalize new
  • logit_bias - Modify the likelihood of specified tokens appearing in the
  • logprobs - Whether to return log probabilities of the output tokens or not.
  • top_logprobs - An integer between 0 and 20 specifying the number of most
  • max_completion_tokens - The maximum number of tokens that can be generated in
  • n - How many chat completion choices to generate for each input message. Note
  • presence_penalty - Number between -2.0 and 2.0. Positive values penalize new
  • response_format - An object specifying the format that the model must output.
  • seed - If specified, a best effort attempt is made to sample
  • stop - Up to 4 sequences where the API will stop generating further tokens.
  • stream - If True, generates content as a stream. If False, generates content
  • stream_options - Options for streaming response. Only set this when you set
  • stream - true.
  • temperature - What sampling temperature to use, between 0 and 2.
  • top_p - An alternative to sampling with temperature, called nucleus sampling,
  • tools - A list of tools the model may call. Currently, only functions are
  • tool_choice - Controls which (if any) tool is called by the
  • parallel_tool_calls - Whether to enable parallel function calling during tool
  • use_custom_keys - Whether to use custom API keys or our unified API keys
  • tags - Arbitrary number of tags to classify this API query as needed. Helpful
  • drop_params - Whether or not to drop unsupported OpenAI params by the
  • region - A string used to represent the region where the endpoint is
  • log_query_body - Whether to log the contents of the query json body.
  • log_response_body - Whether to log the contents of the response json body.
  • return_full_completion - If False, only return the message content
  • cache - If True, then the arguments will be stored in a local cache file, and
  • extra_headers - Additional “passthrough” headers for the request which are
  • extra_query - Additional “passthrough” query parameters for the request which
  • kwargs - Additional “passthrough” JSON properties for the body of the

Returns:

If stream is True, returns a generator yielding chunks of content. If stream is False, returns a single string response.

Raises:

  • UnifyError: If an error occurs during content generation.

get_credit_balance

source code

def get_credit_balance(self) -> Union[float, None]:

Get the remaining credits left on your account.

Returns:

The remaining credits on the account if successful, otherwise None. Raises: BadRequestError: If there was an HTTP error. ValueError: If there was an error parsing the JSON response.

  • BadRequestError - If there was an HTTP error.
  • ValueError - If there was an error parsing the JSON response.

remove_endpoints

source code

def remove_endpoints(
        self,
        endpoints: Union[List[str], str],
        ignore_missing: bool = True,
    ) -> Self:

Remove endpoints from the current list, which are queried for each call to generate.

Arguments:

  • endpoints - The extra endpoints to add.
  • ignore_missing - Whether or not to ignore endpoints passed which are not

Returns:

This client, useful for chaining inplace calls.


to_sync_client

source code

def to_sync_client(self):

Return a synchronous version of the client (MultiUnify instance), with the exact same configuration as this asynchronous (AsyncMultiUnify) client.

Returns:

A MultiUnify instance with the same configuration as this AsyncMultiUnify instance.

dunder_methods


__init__

source code

def __init__(
        self,
        endpoints: Optional[Union[str, Iterable[str]]] = None,
        *,
        system_message: Optional[str] = None,
        messages: Optional[
            Union[
                List[ChatCompletionMessageParam],
                Dict[str, List[ChatCompletionMessageParam]],
            ]
        ] = None,
        frequency_penalty: Optional[float] = None,
        logit_bias: Optional[Dict[str, int]] = None,
        logprobs: Optional[bool] = None,
        top_logprobs: Optional[int] = None,
        max_completion_tokens: Optional[int] = 1024,
        n: Optional[int] = None,
        presence_penalty: Optional[float] = None,
        response_format: Optional[ResponseFormat] = None,
        seed: Optional[int] = None,
        stop: Union[Optional[str], List[str]] = None,
        temperature: Optional[float] = 1.0,
        top_p: Optional[float] = None,
        tools: Optional[Iterable[ChatCompletionToolParam]] = None,
        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] = None,
        parallel_tool_calls: Optional[bool] = None,
        # platform arguments
        use_custom_keys: bool = False,
        tags: Optional[List[str]] = None,
        drop_params: Optional[bool] = True,
        region: Optional[str] = None,
        log_query_body: Optional[bool] = True,
        log_response_body: Optional[bool] = True,
        api_key: Optional[str] = None,
        # python client arguments
        return_full_completion: bool = False,
        cache: bool = False,
        # passthrough arguments
        extra_headers: Optional[Headers] = None,
        extra_query: Optional[Query] = None,
        **kwargs,
    ) -> None:

Initialize the Multi LLM Unify client.

Arguments:

  • endpoints - A single endpoint name or a list of endpoint names, with each name
  • system_message - An optional string containing the system message. This
  • messages - A list of messages comprising the conversation so far. This will
  • frequency_penalty - Number between -2.0 and 2.0. Positive values penalize new
  • logit_bias - Modify the likelihood of specified tokens appearing in the
  • logprobs - Whether to return log probabilities of the output tokens or not.
  • top_logprobs - An integer between 0 and 20 specifying the number of most
  • max_completion_tokens - The maximum number of tokens that can be generated in
  • n - How many chat completion choices to generate for each input message. Note
  • presence_penalty - Number between -2.0 and 2.0. Positive values penalize new
  • response_format - An object specifying the format that the model must output.
  • seed - If specified, a best effort attempt is made to sample
  • stop - Up to 4 sequences where the API will stop generating further tokens.
  • temperature - What sampling temperature to use, between 0 and 2.
  • top_p - An alternative to sampling with temperature, called nucleus sampling,
  • tools - A list of tools the model may call. Currently, only functions are
  • tool_choice - Controls which (if any) tool is called by the
  • parallel_tool_calls - Whether to enable parallel function calling during tool
  • use_custom_keys - Whether to use custom API keys or our unified API keys
  • tags - Arbitrary number of tags to classify this API query as needed. Helpful
  • drop_params - Whether or not to drop unsupported OpenAI params by the
  • region - A string used to represent the region where the endpoint is
  • log_query_body - Whether to log the contents of the query json body.
  • log_response_body - Whether to log the contents of the response json body.
  • return_full_completion - If False, only return the message content
  • cache - If True, then the arguments will be stored in a local cache file, and
  • extra_headers - Additional “passthrough” headers for the request which are
  • extra_query - Additional “passthrough” query parameters for the request which
  • kwargs - Additional “passthrough” JSON properties for the body of the

Raises:

  • UnifyError: If the API key is missing.

__repr__

source code

def __repr__(self):

__str__

source code

def __str__(self):