evals.models package#
Submodules#
evals.models.anthropic module#
- class evals.models.anthropic.AnthropicModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'claude-2.1', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 256, stop_sequences: List[str] = <factory>, extra_parameters: Dict[str, Any] = <factory>, max_content_size: Optional[int] = None)#
Bases:
BaseModel
- extra_parameters: Dict[str, Any]#
Any extra parameters to add to the request body (e.g., countPenalty for a21 models)
- invocation_parameters() Dict[str, Any] #
- max_content_size: int | None = None#
If you’re using a fine-tuned model, set this to the maximum content size
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model: str = 'claude-2.1'#
The model name to use.
- stop_sequences: List[str]#
If the model encounters a stop sequence, it stops generating further tokens.
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 256#
The cutoff where the model no longer selects the words.
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
- evals.models.anthropic.anthropic_version(version_str: str) Tuple[int, ...] #
evals.models.base module#
- class evals.models.base.BaseModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>)#
Bases:
ABC
- default_concurrency: int = 20#
- reload_client() None #
- verbose_generation_info() str #
- evals.models.base.is_list_of(lst: Sequence[object], tp: T) bool #
evals.models.bedrock module#
- class evals.models.bedrock.BedrockModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model_id: str = 'anthropic.claude-v2', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 256, stop_sequences: List[str] = <factory>, session: Any = None, client: Any = None, max_content_size: Optional[int] = None, extra_parameters: Dict[str, Any] = <factory>)#
Bases:
BaseModel
- client: Any = None#
The bedrock session client. If unset, a new one is created with boto3.
- extra_parameters: Dict[str, Any]#
Any extra parameters to add to the request body (e.g., countPenalty for a21 models)
- max_content_size: int | None = None#
If you’re using a fine-tuned model, set this to the maximum content size
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model_id: str = 'anthropic.claude-v2'#
The model name to use.
- session: Any = None#
A bedrock session. If provided, a new bedrock client will be created using this session.
- stop_sequences: List[str]#
If the model encounters a stop sequence, it stops generating further tokens.
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 256#
The cutoff where the model no longer selects the words
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
evals.models.litellm module#
- class evals.models.litellm.LiteLLMModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'gpt-3.5-turbo', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, num_retries: int = 0, request_timeout: int = 60, model_kwargs: Dict[str, Any] = <factory>, model_name: Optional[str] = None)#
Bases:
BaseModel
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model: str = 'gpt-3.5-turbo'#
The model name to use.
- model_kwargs: Dict[str, Any]#
Model specific params
- model_name: str | None = None#
Deprecated since version 3.0.0.
use model instead. This will be removed in a future release.
- num_retries: int = 0#
Maximum number to retry a model if an RateLimitError, OpenAIError, or ServiceUnavailableError occurs.
- request_timeout: int = 60#
Maximum number of seconds to wait when retrying.
- temperature: float = 0.0#
What sampling temperature to use.
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
evals.models.mistralai module#
- evals.models.mistralai.DEFAULT_MISTRAL_MODEL = 'mistral-large-latest'#
Use the latest large mistral model by default.
- class evals.models.mistralai.MistralAIModel(*args: Any, **kwargs: Any)#
Bases:
BaseModel
A model class for Mistral AI. Requires mistralai package to be installed.
- invocation_parameters() Dict[str, Any] #
- model: str = 'mistral-large-latest'#
- random_seed: int | None = None#
- response_format: Dict[str, str] | None = None#
- safe_mode: bool = False#
- safe_prompt: bool = False#
- temperature: float = 0#
- top_p: float | None = None#
- exception evals.models.mistralai.MistralRateLimitError#
Bases:
Exception
evals.models.openai module#
- class evals.models.openai.AzureOptions(api_version: str, azure_endpoint: str, azure_deployment: str | None, azure_ad_token: str | None, azure_ad_token_provider: Callable[[], str] | None)#
Bases:
object
- api_version: str#
- azure_ad_token: str | None#
- azure_ad_token_provider: Callable[[], str] | None#
- azure_deployment: str | None#
- azure_endpoint: str#
- class evals.models.openai.OpenAIModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, api_key: Optional[str] = None, organization: Optional[str] = None, base_url: Optional[str] = None, model: str = 'gpt-4', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0, n: int = 1, model_kwargs: Dict[str, Any] = <factory>, batch_size: int = 20, request_timeout: Union[float, Tuple[float, float], NoneType] = None, api_version: Optional[str] = None, azure_endpoint: Optional[str] = None, azure_deployment: Optional[str] = None, azure_ad_token: Optional[str] = None, azure_ad_token_provider: Optional[Callable[[], str]] = None, default_headers: Optional[Mapping[str, str]] = None, model_name: Optional[str] = None)#
Bases:
BaseModel
- api_key: str | None = None#
Your OpenAI key. If not provided, will be read from the environment variable
- api_version: str | None = None#
//learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
- Type:
https
- azure_ad_token: str | None = None#
- azure_ad_token_provider: Callable[[], str] | None = None#
- azure_deployment: str | None = None#
- azure_endpoint: str | None = None#
The endpoint to use for azure openai. Available in the azure portal. https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
- base_url: str | None = None#
An optional base URL to use for the OpenAI API. If not provided, will default to what’s configured in OpenAI
- batch_size: int = 20#
Batch size to use when passing multiple documents to generate.
- default_headers: Mapping[str, str] | None = None#
Default headers required by AzureOpenAI
- frequency_penalty: float = 0#
Penalizes repeated tokens according to frequency.
- property invocation_params: Dict[str, Any]#
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the models maximal context size.
- model: str = 'gpt-4'#
Model name to use. In of azure, this is the deployment name such as gpt-35-instant
- model_kwargs: Dict[str, Any]#
Holds any model parameters valid for create call not explicitly specified.
- model_name: str | None = None#
Deprecated since version 3.0.0.
use model instead. This will be removed
- n: int = 1#
How many completions to generate for each prompt.
- organization: str | None = None#
The organization to use for the OpenAI API. If not provided, will default to what’s configured in OpenAI
- presence_penalty: float = 0#
Penalizes repeated tokens.
- property public_invocation_params: Dict[str, Any]#
- reload_client() None #
- request_timeout: float | Tuple[float, float] | None = None#
Timeout for requests to OpenAI completion API. Default is 600 seconds.
- property supports_function_calling: bool#
- temperature: float = 0.0#
What sampling temperature to use.
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
- verbose_generation_info() str #
evals.models.rate_limiters module#
- class evals.models.rate_limiters.AdaptiveTokenBucket(initial_per_second_request_rate: float, maximum_per_second_request_rate: float = 1000, minimum_per_second_request_rate: float = 0.1, enforcement_window_minutes: float = 1, rate_reduction_factor: float = 0.5, rate_increase_factor: float = 0.01, cooldown_seconds: float = 5)#
Bases:
object
An adaptive rate-limiter that adjusts the rate based on the number of rate limit errors.
This rate limiter does not need to know the exact rate limit. Instead, it starts with a high rate and reduces it whenever a rate limit error occurs. The rate is increased slowly over time if no further errors occur.
Args: initial_per_second_request_rate (float): The allowed request rate. maximum_per_second_request_rate (float): The maximum allowed request rate. enforcement_window_minutes (float): The time window over which the rate limit is enforced. rate_reduction_factor (float): Multiplier used to reduce the rate limit after an error. rate_increase_factor (float): Exponential factor increasing the rate limit over time. cooldown_seconds (float): The minimum time before allowing the rate limit to decrease again.
- async async_wait_until_ready(max_wait_time: float = 10) None #
- available_requests() float #
- increase_rate() None #
- make_request_if_ready() None #
- max_tokens() float #
- on_rate_limit_error(request_start_time: float, verbose: bool = False) None #
- wait_until_ready(max_wait_time: float = 300) None #
- exception evals.models.rate_limiters.RateLimitError#
Bases:
PhoenixException
- class evals.models.rate_limiters.RateLimiter(rate_limit_error: Type[BaseException] | None = None, max_rate_limit_retries: int = 3, initial_per_second_request_rate: float = 1, maximum_per_second_request_rate: float = 50, enforcement_window_minutes: float = 1, rate_reduction_factor: float = 0.5, rate_increase_factor: float = 0.01, cooldown_seconds: float = 5, verbose: bool = False)#
Bases:
object
- alimit(fn: Callable[[ParameterSpec], Coroutine[Any, Any, GenericType]]) Callable[[ParameterSpec], Coroutine[Any, Any, GenericType]] #
- limit(fn: Callable[[ParameterSpec], GenericType]) Callable[[ParameterSpec], GenericType] #
Bases:
PhoenixException
evals.models.vertex module#
- class evals.models.vertex.GeminiModel(default_concurrency: int = 5, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[ForwardRef('Credentials')] = None, model: str = 'gemini-pro', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 32, stop_sequences: List[str] = <factory>)#
Bases:
BaseModel
- credentials: Credentials | None = None#
- default_concurrency: int = 5#
- property generation_config: Dict[str, Any]#
- location: str | None = None#
The default location to use when making API calls. If not
- Type:
location (str)
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model: str = 'gemini-pro'#
The model name to use.
- project: str | None = None#
The default project to use when making API calls.
- Type:
project (str)
- reload_client() None #
- stop_sequences: List[str]#
If the model encounters a stop sequence, it stops generating further tokens.
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 32#
The cutoff where the model no longer selects the words
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
evals.models.vertexai module#
- class evals.models.vertexai.VertexAIModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[ForwardRef('Credentials')] = None, model: str = 'text-bison', tuned_model: Optional[str] = None, temperature: float = 0.0, max_tokens: int = 256, top_p: float = 0.95, top_k: int = 40, model_name: Optional[str] = None, tuned_model_name: Optional[str] = None)#
Bases:
BaseModel
- credentials: Credentials | None = None#
- property invocation_params: Dict[str, Any]#
- property is_codey_model: bool#
- location: str | None = None#
The default location to use when making API calls. If not
- Type:
location (str)
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the models maximal context size.
- model: str = 'text-bison'#
- model_name: str | None = None#
Deprecated since version 3.0.0.
use model instead. This will be removed in a future release.
- project: str | None = None#
The default project to use when making API calls.
- Type:
project (str)
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 40#
How the model selects tokens for output, the next token is selected from
- top_p: float = 0.95#
Tokens are selected from most probable to least until the sum of their
- tuned_model: str | None = None#
The name of a tuned model. If provided, model is ignored.
- tuned_model_name: str | None = None#
Deprecated since version 3.0.0.
use tuned_model instead. This will be removed in a future release.
- verbose_generation_info() str #
- evals.models.vertexai.is_codey_model(model_name: str) bool #
Returns True if the model name is a Codey model.
- Parameters:
model_name – The model name to check.
Returns: True if the model name is a Codey model.
Module contents#
- class evals.models.AnthropicModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'claude-2.1', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 256, stop_sequences: List[str] = <factory>, extra_parameters: Dict[str, Any] = <factory>, max_content_size: Optional[int] = None)#
Bases:
BaseModel
- extra_parameters: Dict[str, Any]#
Any extra parameters to add to the request body (e.g., countPenalty for a21 models)
- invocation_parameters() Dict[str, Any] #
- max_content_size: int | None = None#
If you’re using a fine-tuned model, set this to the maximum content size
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model: str = 'claude-2.1'#
The model name to use.
- stop_sequences: List[str]#
If the model encounters a stop sequence, it stops generating further tokens.
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 256#
The cutoff where the model no longer selects the words.
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
- class evals.models.BaseModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>)#
Bases:
ABC
- default_concurrency: int = 20#
- reload_client() None #
- verbose_generation_info() str #
- class evals.models.BedrockModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model_id: str = 'anthropic.claude-v2', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 256, stop_sequences: List[str] = <factory>, session: Any = None, client: Any = None, max_content_size: Optional[int] = None, extra_parameters: Dict[str, Any] = <factory>)#
Bases:
BaseModel
- client: Any = None#
The bedrock session client. If unset, a new one is created with boto3.
- extra_parameters: Dict[str, Any]#
Any extra parameters to add to the request body (e.g., countPenalty for a21 models)
- max_content_size: int | None = None#
If you’re using a fine-tuned model, set this to the maximum content size
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model_id: str = 'anthropic.claude-v2'#
The model name to use.
- session: Any = None#
A bedrock session. If provided, a new bedrock client will be created using this session.
- stop_sequences: List[str]#
If the model encounters a stop sequence, it stops generating further tokens.
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 256#
The cutoff where the model no longer selects the words
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
- class evals.models.GeminiModel(default_concurrency: int = 5, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[ForwardRef('Credentials')] = None, model: str = 'gemini-pro', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, top_k: int = 32, stop_sequences: List[str] = <factory>)#
Bases:
BaseModel
- credentials: Credentials | None = None#
- default_concurrency: int = 5#
- property generation_config: Dict[str, Any]#
- location: str | None = None#
The default location to use when making API calls. If not
- Type:
location (str)
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model: str = 'gemini-pro'#
The model name to use.
- project: str | None = None#
The default project to use when making API calls.
- Type:
project (str)
- reload_client() None #
- stop_sequences: List[str]#
If the model encounters a stop sequence, it stops generating further tokens.
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 32#
The cutoff where the model no longer selects the words
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
- class evals.models.LiteLLMModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'gpt-3.5-turbo', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, num_retries: int = 0, request_timeout: int = 60, model_kwargs: Dict[str, Any] = <factory>, model_name: Optional[str] = None)#
Bases:
BaseModel
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion.
- model: str = 'gpt-3.5-turbo'#
The model name to use.
- model_kwargs: Dict[str, Any]#
Model specific params
- model_name: str | None = None#
Deprecated since version 3.0.0.
use model instead. This will be removed in a future release.
- num_retries: int = 0#
Maximum number to retry a model if an RateLimitError, OpenAIError, or ServiceUnavailableError occurs.
- request_timeout: int = 60#
Maximum number of seconds to wait when retrying.
- temperature: float = 0.0#
What sampling temperature to use.
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
- class evals.models.MistralAIModel(*args: Any, **kwargs: Any)#
Bases:
BaseModel
A model class for Mistral AI. Requires mistralai package to be installed.
- invocation_parameters() Dict[str, Any] #
- model: str = 'mistral-large-latest'#
- random_seed: int | None = None#
- response_format: Dict[str, str] | None = None#
- safe_mode: bool = False#
- safe_prompt: bool = False#
- temperature: float = 0#
- top_p: float | None = None#
- class evals.models.OpenAIModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, api_key: Optional[str] = None, organization: Optional[str] = None, base_url: Optional[str] = None, model: str = 'gpt-4', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0, n: int = 1, model_kwargs: Dict[str, Any] = <factory>, batch_size: int = 20, request_timeout: Union[float, Tuple[float, float], NoneType] = None, api_version: Optional[str] = None, azure_endpoint: Optional[str] = None, azure_deployment: Optional[str] = None, azure_ad_token: Optional[str] = None, azure_ad_token_provider: Optional[Callable[[], str]] = None, default_headers: Optional[Mapping[str, str]] = None, model_name: Optional[str] = None)#
Bases:
BaseModel
- api_key: str | None = None#
Your OpenAI key. If not provided, will be read from the environment variable
- api_version: str | None = None#
//learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
- Type:
https
- azure_ad_token: str | None = None#
- azure_ad_token_provider: Callable[[], str] | None = None#
- azure_deployment: str | None = None#
- azure_endpoint: str | None = None#
The endpoint to use for azure openai. Available in the azure portal. https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
- base_url: str | None = None#
An optional base URL to use for the OpenAI API. If not provided, will default to what’s configured in OpenAI
- batch_size: int = 20#
Batch size to use when passing multiple documents to generate.
- default_headers: Mapping[str, str] | None = None#
Default headers required by AzureOpenAI
- frequency_penalty: float = 0#
Penalizes repeated tokens according to frequency.
- property invocation_params: Dict[str, Any]#
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the models maximal context size.
- model: str = 'gpt-4'#
Model name to use. In of azure, this is the deployment name such as gpt-35-instant
- model_kwargs: Dict[str, Any]#
Holds any model parameters valid for create call not explicitly specified.
- model_name: str | None = None#
Deprecated since version 3.0.0.
use model instead. This will be removed
- n: int = 1#
How many completions to generate for each prompt.
- organization: str | None = None#
The organization to use for the OpenAI API. If not provided, will default to what’s configured in OpenAI
- presence_penalty: float = 0#
Penalizes repeated tokens.
- property public_invocation_params: Dict[str, Any]#
- reload_client() None #
- request_timeout: float | Tuple[float, float] | None = None#
Timeout for requests to OpenAI completion API. Default is 600 seconds.
- property supports_function_calling: bool#
- temperature: float = 0.0#
What sampling temperature to use.
- top_p: float = 1#
Total probability mass of tokens to consider at each step.
- verbose_generation_info() str #
- class evals.models.VertexAIModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[ForwardRef('Credentials')] = None, model: str = 'text-bison', tuned_model: Optional[str] = None, temperature: float = 0.0, max_tokens: int = 256, top_p: float = 0.95, top_k: int = 40, model_name: Optional[str] = None, tuned_model_name: Optional[str] = None)#
Bases:
BaseModel
- credentials: Credentials | None = None#
- property invocation_params: Dict[str, Any]#
- property is_codey_model: bool#
- location: str | None = None#
The default location to use when making API calls. If not
- Type:
location (str)
- max_tokens: int = 256#
The maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the models maximal context size.
- model: str = 'text-bison'#
- model_name: str | None = None#
Deprecated since version 3.0.0.
use model instead. This will be removed in a future release.
- project: str | None = None#
The default project to use when making API calls.
- Type:
project (str)
- temperature: float = 0.0#
What sampling temperature to use.
- top_k: int = 40#
How the model selects tokens for output, the next token is selected from
- top_p: float = 0.95#
Tokens are selected from most probable to least until the sum of their
- tuned_model: str | None = None#
The name of a tuned model. If provided, model is ignored.
- tuned_model_name: str | None = None#
Deprecated since version 3.0.0.
use tuned_model instead. This will be removed in a future release.
- verbose_generation_info() str #