experiments.evaluators#
- class CoherenceEvaluator(*args: Any, **kwargs: Any)#
Bases:
LLMCriteriaEvaluator
- template = "Determine if the following text is coherent. In this context, 'coherent' means the text 'is coherent, well-structured, and logically sound'. First, explain step-by-step why you think the text is or is not coherent. Then provide a single word label; 'true' if the text is coherent or 'false' if the text is not coherent. Here is an example template for whether the text meets a criteria:\n\nCRITERIA: the text is 'coherent'\nTEXT: *the provided text to evaluate*\nEXPLANATION: *a step by step explanation of your reasoning for whether the text meets the criteria*\nLABEL: *true or false*\n\nFollow this template for the following example:\n\nCRITERIA: the text is 'coherent'\nTEXT: {text}\nEXPLANATION: "#
- class ConcisenessEvaluator(*args: Any, **kwargs: Any)#
Bases:
LLMCriteriaEvaluator
- template = "Determine if the following text is concise. In this context, 'concise' means the text 'is just a few sentences and easy to follow'. First, explain step-by-step why you think the text is or is not concise. Then provide a single word label; 'true' if the text is concise or 'false' if the text is not concise. Here is an example template for whether the text meets a criteria:\n\nCRITERIA: the text is 'concise'\nTEXT: *the provided text to evaluate*\nEXPLANATION: *a step by step explanation of your reasoning for whether the text meets the criteria*\nLABEL: *true or false*\n\nFollow this template for the following example:\n\nCRITERIA: the text is 'concise'\nTEXT: {text}\nEXPLANATION: "#
- class ContainsAllKeywords(*args: Any, **kwargs: Any)#
Bases:
CodeEvaluator
- evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, **_: Any) EvaluationResult #
- class ContainsAnyKeyword(*args: Any, **kwargs: Any)#
Bases:
CodeEvaluator
- evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, **_: Any) EvaluationResult #
- class ContainsKeyword(*args: Any, **kwargs: Any)#
Bases:
CodeEvaluator
- evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, **_: Any) EvaluationResult #
- class HelpfulnessEvaluator(*args: Any, **kwargs: Any)#
Bases:
LLMCriteriaEvaluator
- template = "Determine if the following text is helpful. In this context, 'helpful' means the text 'provides useful information'. First, explain step-by-step why you think the text is or is not helpful. Then provide a single word label; 'true' if the text is helpful or 'false' if the text is not helpful. Here is an example template for whether the text meets a criteria:\n\nCRITERIA: the text is 'helpful'\nTEXT: *the provided text to evaluate*\nEXPLANATION: *a step by step explanation of your reasoning for whether the text meets the criteria*\nLABEL: *true or false*\n\nFollow this template for the following example:\n\nCRITERIA: the text is 'helpful'\nTEXT: {text}\nEXPLANATION: "#
- class JSONParsable(*args: Any, **kwargs: Any)#
Bases:
CodeEvaluator
- evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, **_: Any) EvaluationResult #
- class LLMCriteriaEvaluator(*args: Any, **kwargs: Any)#
Bases:
LLMEvaluator
- async async_evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, **_: Any) EvaluationResult #
- evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, **_: Any) EvaluationResult #
- class MatchesRegex(*args: Any, **kwargs: Any)#
Bases:
CodeEvaluator
- evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, **_: Any) EvaluationResult #
- class RelevanceEvaluator(*args: Any, **kwargs: Any)#
Bases:
LLMEvaluator
- async async_evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, metadata: Mapping[str, Dict[str, Any] | List[Any] | str | int | float | bool | None] = mappingproxy({}), input: Mapping[str, Dict[str, Any] | List[Any] | str | int | float | bool | None] = mappingproxy({}), **_: Any) EvaluationResult #
- evaluate(*, output: Dict[str, Any] | List[Any] | str | int | float | bool | None = None, metadata: Mapping[str, Dict[str, Any] | List[Any] | str | int | float | bool | None] = mappingproxy({}), input: Mapping[str, Dict[str, Any] | List[Any] | str | int | float | bool | None] = mappingproxy({}), **_: Any) EvaluationResult #
- template = "Determine if the following response is relevant to the query. In this context, 'relevance' means that the response directly addresses the core question or topic of the query. First, explain step-by-step why you think the text is or is not relevant. Then provide a single word label; 'true' if the text is relevant or 'false' if the text is not relevant. Here is an example template for your reponse:\n\nCRITERIA: the response is 'relevant' to the query\nQUERY: *text that contains a query*\nRESPONSE: *a response that may or may not be relevant to the query*\nEXPLANATION: *a step by step explanation of your reasoning for whether or not the response is relevant to the query*\nLABEL: *true or false*\n\nFollow this template for the following example:\n\nCRITERIA: the response is 'relevant' to the query\nQUERY: {reference}\nRESPONSE: {submission}\nEXPLANATION: "#
- create_evaluator(kind: str | AnnotatorKind = AnnotatorKind.CODE, name: str | None = None, scorer: Callable[[Any], EvaluationResult] | None = None) Callable[[Callable[[...], Any]], Evaluator] #