evals.models#

class AnthropicModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'claude-2.1', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 256, stop_sequences: List[str] = <factory>, extra_parameters: Dict[str, Any] = <factory>, max_content_size: Optional[int] = None)#

Bases: BaseModel

extra_parameters: Dict[str, Any]#

Any extra parameters to add to the request body (e.g., countPenalty for a21 models)

invocation_parameters() Dict[str, Any]#
max_content_size: int | None = None#

If you’re using a fine-tuned model, set this to the maximum content size

max_tokens: int = 256#

The maximum number of tokens to generate in the completion.

model: str = 'claude-2.1'#

The model name to use.

stop_sequences: List[str]#

If the model encounters a stop sequence, it stops generating further tokens.

temperature: float = 0.0#

What sampling temperature to use.

top_k: int = 256#

The cutoff where the model no longer selects the words.

top_p: float = 1#

Total probability mass of tokens to consider at each step.

class BaseModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>)#

Bases: ABC

default_concurrency: int = 20#
reload_client() None#
verbose_generation_info() str#
class BedrockModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model_id: str = 'anthropic.claude-v2', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 256, stop_sequences: List[str] = <factory>, session: Any = None, client: Any = None, max_content_size: Optional[int] = None, extra_parameters: Dict[str, Any] = <factory>)#

Bases: BaseModel

client: Any = None#

The bedrock session client. If unset, a new one is created with boto3.

extra_parameters: Dict[str, Any]#

Any extra parameters to add to the request body (e.g., countPenalty for a21 models)

max_content_size: int | None = None#

If you’re using a fine-tuned model, set this to the maximum content size

max_tokens: int = 256#

The maximum number of tokens to generate in the completion.

model_id: str = 'anthropic.claude-v2'#

The model name to use.

session: Any = None#

A bedrock session. If provided, a new bedrock client will be created using this session.

stop_sequences: List[str]#

If the model encounters a stop sequence, it stops generating further tokens.

temperature: float = 0.0#

What sampling temperature to use.

top_k: int = 256#

The cutoff where the model no longer selects the words

top_p: float = 1#

Total probability mass of tokens to consider at each step.

class GeminiModel(default_concurrency: int = 5, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[ForwardRef('Credentials')] = None, model: str = 'gemini-pro', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 32, stop_sequences: List[str] = <factory>)#

Bases: BaseModel

credentials: Credentials | None = None#
default_concurrency: int = 5#
property generation_config: Dict[str, Any]#
location: str | None = None#

The default location to use when making API calls. If not

Type:

location (str)

max_tokens: int = 256#

The maximum number of tokens to generate in the completion.

model: str = 'gemini-pro'#

The model name to use.

project: str | None = None#

The default project to use when making API calls.

Type:

project (str)

reload_client() None#
stop_sequences: List[str]#

If the model encounters a stop sequence, it stops generating further tokens.

temperature: float = 0.0#

What sampling temperature to use.

top_k: int = 32#

The cutoff where the model no longer selects the words

top_p: float = 1#

Total probability mass of tokens to consider at each step.

class LiteLLMModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'gpt-3.5-turbo', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, num_retries: int = 0, request_timeout: int = 60, model_kwargs: Dict[str, Any] = <factory>, model_name: Optional[str] = None)#

Bases: BaseModel

max_tokens: int = 256#

The maximum number of tokens to generate in the completion.

model: str = 'gpt-3.5-turbo'#

The model name to use.

model_kwargs: Dict[str, Any]#

Model specific params

model_name: str | None = None#

Deprecated since version 3.0.0.

use model instead. This will be removed in a future release.

num_retries: int = 0#

Maximum number to retry a model if an RateLimitError, OpenAIError, or ServiceUnavailableError occurs.

request_timeout: int = 60#

Maximum number of seconds to wait when retrying.

temperature: float = 0.0#

What sampling temperature to use.

top_p: float = 1#

Total probability mass of tokens to consider at each step.

class MistralAIModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: ~phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'mistral-large-latest', temperature: float = 0, top_p: float | None = None, random_seed: int | None = None, response_format: ~typing.Dict[str, str] | None = None, safe_mode: bool = False, safe_prompt: bool = False)#

Bases: BaseModel

A model class for Mistral AI. Requires mistralai package to be installed.

invocation_parameters() Dict[str, Any]#
model: str = 'mistral-large-latest'#
random_seed: int | None = None#
response_format: Dict[str, str] | None = None#
safe_mode: bool = False#
safe_prompt: bool = False#
temperature: float = 0#
top_p: float | None = None#
class OpenAIModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, api_key: Optional[str] = None, organization: Optional[str] = None, base_url: Optional[str] = None, model: str = 'gpt-4', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0, n: int = 1, model_kwargs: Dict[str, Any] = <factory>, batch_size: int = 20, request_timeout: Union[float, Tuple[float, float], NoneType] = None, api_version: Optional[str] = None, azure_endpoint: Optional[str] = None, azure_deployment: Optional[str] = None, azure_ad_token: Optional[str] = None, azure_ad_token_provider: Optional[Callable[[], str]] = None, default_headers: Optional[Mapping[str, str]] = None, model_name: Optional[str] = None)#

Bases: BaseModel

api_key: str | None = None#

Your OpenAI key. If not provided, will be read from the environment variable

api_version: str | None = None#

//learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning

Type:

https

azure_ad_token: str | None = None#
azure_ad_token_provider: Callable[[], str] | None = None#
azure_deployment: str | None = None#
azure_endpoint: str | None = None#

The endpoint to use for azure openai. Available in the azure portal. https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource

base_url: str | None = None#

An optional base URL to use for the OpenAI API. If not provided, will default to what’s configured in OpenAI

batch_size: int = 20#

Batch size to use when passing multiple documents to generate.

default_headers: Mapping[str, str] | None = None#

Default headers required by AzureOpenAI

frequency_penalty: float = 0#

Penalizes repeated tokens according to frequency.

property invocation_params: Dict[str, Any]#
max_tokens: int = 256#

The maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the models maximal context size.

model: str = 'gpt-4'#

Model name to use. In of azure, this is the deployment name such as gpt-35-instant

model_kwargs: Dict[str, Any]#

Holds any model parameters valid for create call not explicitly specified.

model_name: str | None = None#

Deprecated since version 3.0.0.

use model instead. This will be removed

n: int = 1#

How many completions to generate for each prompt.

organization: str | None = None#

The organization to use for the OpenAI API. If not provided, will default to what’s configured in OpenAI

presence_penalty: float = 0#

Penalizes repeated tokens.

property public_invocation_params: Dict[str, Any]#
reload_client() None#
request_timeout: float | Tuple[float, float] | None = None#

Timeout for requests to OpenAI completion API. Default is 600 seconds.

property supports_function_calling: bool#
temperature: float = 0.0#

What sampling temperature to use.

top_p: float = 1#

Total probability mass of tokens to consider at each step.

verbose_generation_info() str#
class VertexAIModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[ForwardRef('Credentials')] = None, model: str = 'text-bison', tuned_model: Optional[str] = None, temperature: float = 0.0, max_tokens: int = 256, top_p: float = 0.95, top_k: int = 40, model_name: Optional[str] = None, tuned_model_name: Optional[str] = None)#

Bases: BaseModel

credentials: Credentials | None = None#
property invocation_params: Dict[str, Any]#
property is_codey_model: bool#
location: str | None = None#

The default location to use when making API calls. If not

Type:

location (str)

max_tokens: int = 256#

The maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the models maximal context size.

model: str = 'text-bison'#
model_name: str | None = None#

Deprecated since version 3.0.0.

use model instead. This will be removed in a future release.

project: str | None = None#

The default project to use when making API calls.

Type:

project (str)

temperature: float = 0.0#

What sampling temperature to use.

top_k: int = 40#

How the model selects tokens for output, the next token is selected from

top_p: float = 0.95#

Tokens are selected from most probable to least until the sum of their

tuned_model: str | None = None#

The name of a tuned model. If provided, model is ignored.

tuned_model_name: str | None = None#

Deprecated since version 3.0.0.

use tuned_model instead. This will be removed in a future release.

verbose_generation_info() str#
set_verbosity(model: BaseModel, verbose: bool = False) Generator[BaseModel, None, None]#