inferences.schema#

class EmbeddingColumnNames(vector_column_name: str, raw_data_column_name: str | None = None, link_to_data_column_name: str | None = None)#

Bases: Dict[str, Any]

A dataclass to hold the column names for the embedding features. An embedding feature is a feature that is represented by a vector. The vector is a representation of unstructured data, such as text or an image

raw_data_column_name: str | None = None#
vector_column_name: str#
class RetrievalEmbeddingColumnNames(vector_column_name: str, raw_data_column_name: str | None = None, link_to_data_column_name: str | None = None, context_retrieval_ids_column_name: str | None = None, context_retrieval_scores_column_name: str | None = None)#

Bases: EmbeddingColumnNames

A relationship is a column that maps a prediction to another record.

Example

For example, in context retrieval from a vector store, a query is embedded and used to search for relevant records in a vector store. In this case you would add a column to the dataset that maps the query to the vector store records. E.x. [document_1, document_5, document_3]

A table view of the primary dataset could look like this:

query | retrieved_document_ids | document_relevance_scores |

|-------|————————|---------------------------| | … | [doc_1, doc_5, doc_3] | [0.4567, 0.3456, 0.2345] | | … | [doc_1, doc_6, doc_2] | [0.7890, 0.6789, 0.5678] | | … | [doc_1, doc_6, doc_9] | [0.9012, 0.8901, 0.0123] |

The corresponding vector store dataset would look like this:

id | embedding_vector | document_text |

|----------|——————|---------------| | doc_1 | … | lorem ipsum | | doc_2 | … | lorem ipsum | | doc_3 | … | lorem ipsum |

To declare this relationship in the schema, you would configure the schema as follows:

>>> schema = Schema(
...     prompt_column_names=RetrievalEmbeddingColumnNames(
...         context_retrieval_ids_column_name="retrieved_document_ids",
...         context_retrieval_scores_column_name="document_relevance_scores",
...     )
...)
context_retrieval_ids_column_name: str | None = None#
context_retrieval_scores_column_name: str | None = None#
class Schema(prediction_id_column_name: str | None = None, id_column_name: str | None = None, timestamp_column_name: str | None = None, feature_column_names: List[str] | None = None, tag_column_names: List[str] | None = None, prediction_label_column_name: str | None = None, prediction_score_column_name: str | None = None, actual_label_column_name: str | None = None, actual_score_column_name: str | None = None, prompt_column_names: inferences.schema.EmbeddingColumnNames | inferences.schema.RetrievalEmbeddingColumnNames | NoneType = None, response_column_names: str | inferences.schema.EmbeddingColumnNames | NoneType = None, document_column_names: inferences.schema.EmbeddingColumnNames | None = None, embedding_feature_column_names: Optional[Dict[str, ForwardRef('EmbeddingColumnNames')]] = None, excluded_column_names: List[str] | None = None)#

Bases: object

actual_label_column_name: str | None = None#
actual_score_column_name: str | None = None#
asdict() Dict[str, str]#
document_column_names: EmbeddingColumnNames | None = None#
embedding_feature_column_names: Dict[str, EmbeddingColumnNames] | None = None#
excluded_column_names: List[str] | None = None#
feature_column_names: List[str] | None = None#
classmethod from_json(json_string: str) Schema#
id_column_name: str | None = None#
prediction_id_column_name: str | None = None#
prediction_label_column_name: str | None = None#
prediction_score_column_name: str | None = None#
prompt_column_names: EmbeddingColumnNames | RetrievalEmbeddingColumnNames | None = None#
replace(**changes: Any) Schema#
response_column_names: str | EmbeddingColumnNames | None = None#
tag_column_names: List[str] | None = None#
timestamp_column_name: str | None = None#
to_json() str#

Converts the schema to a dict for JSON serialization