inferences package#

Submodules#

inferences.errors module#

exception inferences.errors.DatasetError(errors: ValidationError | List[ValidationError])#

Bases: Exception

An error raised when the dataset is invalid or incomplete

exception inferences.errors.EmbeddingVectorSizeMismatch(embedding_feature_name: str, vector_column_name: str, vector_lengths: List[int])#

Bases: ValidationError

An error raised when there is an embedding feature with multiple different vector lengths

error_message() str#
exception inferences.errors.InvalidColumnType(error_msgs: Iterable[str])#

Bases: ValidationError

An error raised when the column type is invalid

error_message() str#
exception inferences.errors.InvalidEmbeddingReservedName(reserved_name: str, schema_field: str)#

Bases: ValidationError

An error raised when there is an embedding feature with a name, i.e. dictionary key, that is reserved

error_message() str#
exception inferences.errors.InvalidEmbeddingVectorDataType(embedding_feature_name: str, vector_column_type: str)#

Bases: ValidationError

An error raised when there is an embedding feature with a vector of an unsupported data type

error_message() str#
exception inferences.errors.InvalidEmbeddingVectorSize(embedding_feature_name: str, vector_column_name: str, vector_length: int)#

Bases: ValidationError

An error raised when there is an embedding feature with an invalid vector length

error_message() str#
exception inferences.errors.InvalidEmbeddingVectorValuesDataType(embedding_feature_name: str, vector_column_name: str, vector: Any)#

Bases: ValidationError

An error raised when there is an embedding feature with a vector that has values of an unsupported data type

error_message() str#
exception inferences.errors.InvalidSchemaError(invalid_props: Iterable[str])#

Bases: ValidationError

error_message() str#
exception inferences.errors.MissingColumns(cols: Iterable[str])#

Bases: ValidationError

An error raised when the dataset is missing columns specified in the schema

error_message() str#
exception inferences.errors.MissingEmbeddingFeatureColumnNames(embedding_feature_name: str)#

Bases: ValidationError

An error raised when trying to access an EmbeddingColumnNames config that is absent from the schema

error_message() str#
exception inferences.errors.MissingEmbeddingFeatureLinkToDataColumnName(embedding_feature_name: str)#

Bases: ValidationError

An error raised when trying to access an EmbeddingColumnNames.link_to_data_column_name absent from the schema

error_message() str#
exception inferences.errors.MissingEmbeddingFeatureRawDataColumnName(embedding_feature_name: str)#

Bases: ValidationError

An error raised when trying to access an EmbeddingColumnNames.raw_data_column_name that is absent from the schema

error_message() str#
exception inferences.errors.MissingEmbeddingFeatureVectorColumnName(embedding_feature_name: str)#

Bases: ValidationError

An error raised when trying to access an EmbeddingColumnNames.vector_column_name that is absent from the schema

error_message() str#
exception inferences.errors.MissingField(field: str)#

Bases: ValidationError

An error raised when trying to access a field that is absent from the Schema

error_message() str#
exception inferences.errors.MissingTimestampColumnName#

Bases: ValidationError

An error raised when trying to access a timestamp column that is absent from the schema.

error_message() str#
exception inferences.errors.MissingVectorColumn(col: str)#

Bases: ValidationError

An error raised when the dataset is missing an embedding vector column specified in the schema

error_message() str#
exception inferences.errors.SchemaError(errors: ValidationError | List[ValidationError])#

Bases: Exception

An error raised when the Schema is invalid or incomplete

exception inferences.errors.ValidationError#

Bases: Exception

abstract error_message() str#

inferences.fixtures module#

class inferences.fixtures.ExampleInferences(primary: Inferences, reference: Inferences | None = None, corpus: Inferences | None = None)#

Bases: object

A primary and optional reference dataset pair.

corpus: Inferences | None = None#
primary: Inferences#
reference: Inferences | None = None#
class inferences.fixtures.Fixture(name: str, description: str, prefix: str, primary_file_name: str, primary_schema: phoenix.inferences.schema.Schema, reference_file_name: str | None = None, reference_schema: phoenix.inferences.schema.Schema | None = None, corpus_file_name: str | None = None, corpus_schema: phoenix.inferences.schema.Schema | None = None)#

Bases: object

corpus_file_name: str | None = None#
corpus_schema: Schema | None = None#
description: str#
name: str#
paths() Iterator[Tuple[InferencesRole, Path]]#
prefix: str#
primary_file_name: str#
primary_schema: Schema#
reference_file_name: str | None = None#
reference_schema: Schema | None = None#
class inferences.fixtures.GCSAssets(host, bucket, prefix)#

Bases: NamedTuple

bucket: str#

Alias for field number 1

host: str#

Alias for field number 0

metadata(path: Path) Metadata#
prefix: str#

Alias for field number 2

class inferences.fixtures.InferencesRole(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)#

Bases: Enum

CORPUS = 3#
PRIMARY = 1#
REFERENCE = 2#
class inferences.fixtures.Metadata(path, mediaLink, md5Hash)#

Bases: NamedTuple

md5Hash: str#

Alias for field number 2

Alias for field number 1

path: str#

Alias for field number 0

save_artifact(location: Path) Path#
inferences.fixtures.get_inferences(fixture_name: str, no_internet: bool = False) Tuple[Inferences, Inferences | None, Inferences | None]#

Downloads primary and reference inferences for a fixture if they are not found locally.

inferences.fixtures.load_example(use_case: str) ExampleInferences#

Loads an example primary and reference dataset for a given use-case.

Parameters:

use_case (str) –

Name of the phoenix supported use case Valid values include:
  • ”sentiment_classification_language_drift”

  • ”image_classification”

  • ”fashion_mnist”

  • ”ner_token_drift”

  • ”credit_card_fraud”

  • ”click_through_rate”

Returns:

datasets – A dictionary of datasets, split out by dataset type (primary, reference).

Return type:

DatasetDict

inferences.inferences module#

class inferences.inferences.Inferences(dataframe: DataFrame, schema: Schema | Any, name: str | None = None)#

Bases: object

A dataset to use for analysis using phoenix. Used to construct a phoenix session via px.launch_app

Parameters:
  • dataframe (pandas.DataFrame) – The pandas dataframe containing the data to analyze

  • schema (phoenix.Schema) – the schema of the dataset. Maps dataframe columns to the appropriate model inference dimensions (features, predictions, actuals).

  • name (str, optional) – The name of the dataset. If not provided, a random name will be generated. Is helpful for identifying the dataset in the application.

Returns:

dataset – The dataset object that can be used in a phoenix session

Return type:

Dataset

Examples

>>> primary_inferences = px.Inferences(
>>>    dataframe=production_dataframe, schema=schema, name="primary"
>>> )
property dataframe: DataFrame#
classmethod from_name(name: str) Inferences#

Retrieves a dataset by name from the file system

classmethod from_open_inference(dataframe: DataFrame) Inferences#
property name: str#
property schema: Schema#
to_disc() None#

writes the data and schema to disc

class inferences.inferences.OpenInferenceCategory(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)#

Bases: Enum

actual = 'actual'#
feature = 'feature'#
id = 'id'#
prediction = 'prediction'#
tag = 'tag'#
timestamp = 'timestamp'#
class inferences.inferences.OpenInferenceSpecifier(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)#

Bases: Enum

default = ''#
embedding = 'embedding'#
label = 'label'#
raw_data = 'raw_data'#
retrieved_document_ids = 'retrieved_document_ids'#
retrieved_document_scores = 'retrieved_document_scores'#
score = 'score'#

inferences.schema module#

class inferences.schema.EmbeddingColumnNames(vector_column_name: str, raw_data_column_name: str | None = None, link_to_data_column_name: str | None = None)#

Bases: Dict[str, Any]

A dataclass to hold the column names for the embedding features. An embedding feature is a feature that is represented by a vector. The vector is a representation of unstructured data, such as text or an image

raw_data_column_name: str | None = None#
vector_column_name: str#
class inferences.schema.RetrievalEmbeddingColumnNames(vector_column_name: str, raw_data_column_name: str | None = None, link_to_data_column_name: str | None = None, context_retrieval_ids_column_name: str | None = None, context_retrieval_scores_column_name: str | None = None)#

Bases: EmbeddingColumnNames

A relationship is a column that maps a prediction to another record.

Example

For example, in context retrieval from a vector store, a query is embedded and used to search for relevant records in a vector store. In this case you would add a column to the dataset that maps the query to the vector store records. E.x. [document_1, document_5, document_3]

A table view of the primary dataset could look like this:

query | retrieved_document_ids | document_relevance_scores |

|-------|————————|---------------------------| | … | [doc_1, doc_5, doc_3] | [0.4567, 0.3456, 0.2345] | | … | [doc_1, doc_6, doc_2] | [0.7890, 0.6789, 0.5678] | | … | [doc_1, doc_6, doc_9] | [0.9012, 0.8901, 0.0123] |

The corresponding vector store dataset would look like this:

id | embedding_vector | document_text |

|----------|——————|---------------| | doc_1 | … | lorem ipsum | | doc_2 | … | lorem ipsum | | doc_3 | … | lorem ipsum |

To declare this relationship in the schema, you would configure the schema as follows:

>>> schema = Schema(
...     prompt_column_names=RetrievalEmbeddingColumnNames(
...         context_retrieval_ids_column_name="retrieved_document_ids",
...         context_retrieval_scores_column_name="document_relevance_scores",
...     )
...)
context_retrieval_ids_column_name: str | None = None#
context_retrieval_scores_column_name: str | None = None#
class inferences.schema.Schema(prediction_id_column_name: str | None = None, id_column_name: str | None = None, timestamp_column_name: str | None = None, feature_column_names: List[str] | None = None, tag_column_names: List[str] | None = None, prediction_label_column_name: str | None = None, prediction_score_column_name: str | None = None, actual_label_column_name: str | None = None, actual_score_column_name: str | None = None, prompt_column_names: inferences.schema.EmbeddingColumnNames | inferences.schema.RetrievalEmbeddingColumnNames | NoneType = None, response_column_names: str | inferences.schema.EmbeddingColumnNames | NoneType = None, document_column_names: inferences.schema.EmbeddingColumnNames | None = None, embedding_feature_column_names: Optional[Dict[str, ForwardRef('EmbeddingColumnNames')]] = None, excluded_column_names: List[str] | None = None)#

Bases: object

actual_label_column_name: str | None = None#
actual_score_column_name: str | None = None#
asdict() Dict[str, str]#
document_column_names: EmbeddingColumnNames | None = None#
embedding_feature_column_names: Dict[str, EmbeddingColumnNames] | None = None#
excluded_column_names: List[str] | None = None#
feature_column_names: List[str] | None = None#
classmethod from_json(json_string: str) Schema#
id_column_name: str | None = None#
prediction_id_column_name: str | None = None#
prediction_label_column_name: str | None = None#
prediction_score_column_name: str | None = None#
prompt_column_names: EmbeddingColumnNames | RetrievalEmbeddingColumnNames | None = None#
replace(**changes: Any) Schema#
response_column_names: str | EmbeddingColumnNames | None = None#
tag_column_names: List[str] | None = None#
timestamp_column_name: str | None = None#
to_json() str#

Converts the schema to a dict for JSON serialization

inferences.validation module#

inferences.validation.validate_inferences_inputs(dataframe: DataFrame, schema: Schema) List[ValidationError]#

Module contents#