evals.models.litellm#

class LiteLLMModel(default_concurrency: int = 20, _verbose: bool = False, _rate_limiter: phoenix.evals.models.rate_limiters.RateLimiter = <factory>, model: str = 'gpt-3.5-turbo', temperature: float = 0.0, max_tokens: int = 256, top_p: float = 1, num_retries: int = 0, request_timeout: int = 60, model_kwargs: Dict[str, Any] = <factory>, model_name: Optional[str] = None)#

Bases: BaseModel

max_tokens: int = 256#

The maximum number of tokens to generate in the completion.

model: str = 'gpt-3.5-turbo'#

The model name to use.

model_kwargs: Dict[str, Any]#

Model specific params

model_name: str | None = None#

Deprecated since version 3.0.0.

use model instead. This will be removed in a future release.

num_retries: int = 0#

Maximum number to retry a model if an RateLimitError, OpenAIError, or ServiceUnavailableError occurs.

request_timeout: int = 60#

Maximum number of seconds to wait when retrying.

temperature: float = 0.0#

What sampling temperature to use.

top_p: float = 1#

Total probability mass of tokens to consider at each step.