Source code for weaviate.collections.classes.internal

import datetime
import sys
import uuid as uuid_package
from dataclasses import dataclass, field
from typing import (
    Any,
    Dict,
    Generic,
    List,
    Mapping,
    Optional,
    Sequence,
    Tuple,
    Type,
    Union,
    cast,
)
from typing_extensions import deprecated

from typing_extensions import TypeAlias

if sys.version_info < (3, 9):
    from typing_extensions import Annotated, get_type_hints, get_origin, get_args
else:
    from typing import Annotated, get_type_hints, get_origin, get_args

from weaviate.collections.classes.generative import (
    _GenerativeConfigRuntime,
    _GenerativeConfigRuntimeOptions,
    _GroupedTask,
    _SinglePrompt,
    _to_text_array,
)
from weaviate.collections.classes.grpc import (
    QueryNested,
    _QueryReference,
    _QueryReferenceMultiTarget,
    GroupBy,
    MetadataQuery,
    METADATA,
    PROPERTIES,
    REFERENCES,
    Rerank,
)
from weaviate.collections.classes.types import (
    Properties,
    References,
    IReferences,
    TReferences,
    M,
    P,
    R,
    TProperties,
    WeaviateProperties,
    _WeaviateInput,
)
from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError
from weaviate.util import _to_beacons, _ServerVersion
from weaviate.types import INCLUDE_VECTOR, UUID, UUIDS

from weaviate.proto.v1 import search_get_pb2, generative_pb2


[docs] @dataclass class MetadataReturn: """Metadata of an object returned by a query.""" creation_time: Optional[datetime.datetime] = None last_update_time: Optional[datetime.datetime] = None distance: Optional[float] = None certainty: Optional[float] = None score: Optional[float] = None explain_score: Optional[str] = None is_consistent: Optional[bool] = None rerank_score: Optional[float] = None
[docs] def _is_empty(self) -> bool: return all( [ self.creation_time is None, self.last_update_time is None, self.distance is None, self.certainty is None, self.score is None, self.explain_score is None, self.is_consistent is None, self.rerank_score is None, ] )
[docs] @dataclass class GroupByMetadataReturn: """Metadata of an object returned by a group by query.""" distance: Optional[float] = None
[docs] @dataclass class _Object(Generic[P, R, M]): uuid: uuid_package.UUID metadata: M properties: P references: R vector: Dict[str, Union[List[float], List[List[float]]]] collection: str
[docs] @dataclass class Object(Generic[P, R], _Object[P, R, MetadataReturn]): """A single Weaviate object returned by a query within the `.query` namespace of a collection."""
[docs] @dataclass class MetadataSingleObjectReturn: """Metadata of an object returned by the `fetch_object_by_id` query.""" creation_time: datetime.datetime last_update_time: datetime.datetime is_consistent: Optional[bool]
[docs] @dataclass class ObjectSingleReturn(Generic[P, R], _Object[P, R, MetadataSingleObjectReturn]): """A single Weaviate object returned by the `fetch_object_by_id` query."""
[docs] @dataclass class GroupByObject(Generic[P, R], _Object[P, R, GroupByMetadataReturn]): """A single Weaviate object returned by a query with the `group_by` argument specified.""" belongs_to_group: str
GenerativeMetadata = Union[ generative_pb2.GenerativeAnthropicMetadata, generative_pb2.GenerativeAnyscaleMetadata, generative_pb2.GenerativeAWSMetadata, generative_pb2.GenerativeCohereMetadata, generative_pb2.GenerativeDatabricksMetadata, generative_pb2.GenerativeDummyMetadata, generative_pb2.GenerativeFriendliAIMetadata, generative_pb2.GenerativeGoogleMetadata, generative_pb2.GenerativeMistralMetadata, generative_pb2.GenerativeNvidiaMetadata, generative_pb2.GenerativeOllamaMetadata, generative_pb2.GenerativeOpenAIMetadata, ]
[docs] @dataclass class GenerativeSingle: """The generative data returned relevant to a single prompt generative query.""" debug: Optional[generative_pb2.GenerativeDebug] metadata: Optional[GenerativeMetadata] text: Optional[str]
[docs] @dataclass class GenerativeGrouped: """The generative data returned relevant to a grouped prompt generative query.""" metadata: Optional[GenerativeMetadata] text: Optional[str]
[docs] class GenerativeObject(Generic[P, R], Object[P, R]): """A single Weaviate object returned by a query within the `generate` namespace of a collection.""" __generated: Optional[str] generative: Optional[GenerativeSingle] # init required because of nuances of dataclass when defining @property generated and private var __generated def __init__( self, generated: Optional[str], generative: Optional[GenerativeSingle], uuid: uuid_package.UUID, metadata: MetadataReturn, properties: P, references: R, vector: Dict[str, Union[List[float], List[List[float]]]], collection: str, ) -> None: self.__generated = generated self.generative = generative super().__init__( uuid=uuid, metadata=metadata, properties=properties, references=references, vector=vector, collection=collection, ) @property @deprecated( "The generated field is deprecated. Use generative.text instead.", category=None ) # todo: turn into a runtime warning in the future def generated(self) -> Optional[str]: """The single generated text of the object.""" return self.__generated
[docs] class GenerativeReturn(Generic[P, R]): """The return type of a query within the `generate` namespace of a collection.""" __generated: Optional[str] objects: List[GenerativeObject[P, R]] generative: Optional[GenerativeGrouped] # init required because of nuances of dataclass when defining @property generated and private var __generated def __init__( self, generated: Optional[str], objects: List[GenerativeObject[P, R]], generative: Optional[GenerativeGrouped], ) -> None: self.__generated = generated self.objects = objects self.generative = generative @property @deprecated( "The generated field is deprecated. Use generative.text instead.", category=None ) # todo: turn into a runtime warning in the future def generated(self) -> Optional[str]: """The grouped generated text of the objects.""" return self.__generated
[docs] @dataclass class Group(Generic[P, R]): """A group of objects returned in a group by query.""" name: str min_distance: float max_distance: float number_of_objects: int objects: List[GroupByObject[P, R]] rerank_score: Optional[float]
[docs] @dataclass class GenerativeGroup(Generic[P, R], Group[P, R]): """A group of objects returned in a generative group by query.""" generated: Optional[str]
[docs] @dataclass class GenerativeGroupByReturn(Generic[P, R]): """The return type of a query within the `.generate` namespace of a collection with the `group_by` argument specified.""" objects: List[GroupByObject[P, R]] groups: Dict[str, GenerativeGroup[P, R]] generated: Optional[str]
[docs] @dataclass class GroupByReturn(Generic[P, R]): """The return type of a query within the `.query` namespace of a collection with the `group_by` argument specified.""" objects: List[GroupByObject[P, R]] groups: Dict[str, Group[P, R]]
[docs] @dataclass class QueryReturn(Generic[P, R]): """The return type of a query within the `.query` namespace of a collection.""" objects: List[Object[P, R]]
_GQLEntryReturnType: TypeAlias = Dict[str, List[Dict[str, Any]]]
[docs] @dataclass class _RawGQLReturn: aggregate: _GQLEntryReturnType explore: _GQLEntryReturnType get: _GQLEntryReturnType errors: Optional[Dict[str, Any]]
[docs] class _Generative: single: Union[str, _SinglePrompt, None] grouped: Union[str, _GroupedTask, None] grouped_properties: Optional[List[str]] generative_provider: Optional[_GenerativeConfigRuntime] def __init__( self, single: Union[str, _SinglePrompt, None], grouped: Union[str, _GroupedTask, None], grouped_properties: Optional[List[str]], generative_provider: Optional[_GenerativeConfigRuntime] = None, ) -> None: self.single = single self.grouped = grouped self.grouped_properties = grouped_properties self.generative_provider = generative_provider
[docs] def to_grpc(self, server_version: _ServerVersion) -> generative_pb2.GenerativeSearch: if server_version.is_lower_than(1, 27, 14): if self.generative_provider is not None: raise WeaviateUnsupportedFeatureError("Dynamic RAG", str(server_version), "1.30.0") if isinstance(self.single, _SinglePrompt): single_prompt: Optional[str] = self.single.prompt else: single_prompt = self.single if isinstance(self.grouped, _GroupedTask): grouped_task: Optional[str] = self.grouped.prompt grouped_properties = self.grouped.non_blob_properties else: grouped_task = self.grouped grouped_properties = self.grouped_properties return generative_pb2.GenerativeSearch( single_response_prompt=single_prompt, grouped_response_task=grouped_task, grouped_properties=grouped_properties, ) else: single: Optional[generative_pb2.GenerativeSearch.Single] = None if isinstance(self.single, _SinglePrompt): single = generative_pb2.GenerativeSearch.Single( prompt=self.single.prompt, queries=( [ self.generative_provider._to_grpc( _GenerativeConfigRuntimeOptions( self.single.metadata, self.single.images, self.single.image_properties, ) ) ] if self.generative_provider is not None else None ), debug=self.single.debug, ) if isinstance(self.single, str): single = generative_pb2.GenerativeSearch.Single( prompt=self.single, queries=( [self.generative_provider._to_grpc(_GenerativeConfigRuntimeOptions())] if self.generative_provider is not None else None ), ) grouped: Optional[generative_pb2.GenerativeSearch.Grouped] = None if isinstance(self.grouped, _GroupedTask): grouped = generative_pb2.GenerativeSearch.Grouped( task=self.grouped.prompt, properties=_to_text_array(self.grouped.non_blob_properties), queries=( [ self.generative_provider._to_grpc( _GenerativeConfigRuntimeOptions( self.grouped.metadata, self.grouped.images, self.grouped.image_properties, ) ) ] if self.generative_provider is not None else None ), ) if isinstance(self.grouped, str): grouped = generative_pb2.GenerativeSearch.Grouped( task=self.grouped, properties=( _to_text_array(self.grouped_properties) if self.grouped_properties is not None else None ), queries=( [self.generative_provider._to_grpc(_GenerativeConfigRuntimeOptions())] if self.generative_provider is not None else None ), ) return generative_pb2.GenerativeSearch( single=single, grouped=grouped, )
[docs] class _GroupBy: prop: str number_of_groups: int objects_per_group: int def __init__(self, prop: str, number_of_groups: int, objects_per_group: int) -> None: self.prop = prop self.number_of_groups = number_of_groups self.objects_per_group = objects_per_group
[docs] def to_grpc(self) -> search_get_pb2.GroupBy: return search_get_pb2.GroupBy( path=[self.prop], number_of_groups=self.number_of_groups, objects_per_group=self.objects_per_group, )
[docs] @classmethod def from_input(cls, group_by: Optional[GroupBy]) -> Optional["_GroupBy"]: return ( cls( prop=group_by.prop, number_of_groups=group_by.number_of_groups, objects_per_group=group_by.objects_per_group, ) if group_by else None )
Nested = Annotated[P, "NESTED"]
[docs] def __is_nested(value: Any) -> bool: return ( get_origin(value) is Annotated and len(get_args(value)) == 2 and cast(str, get_args(value)[1]) == "NESTED" )
[docs] def __create_nested_property_from_nested(name: str, value: Any) -> QueryNested: inner_type = get_args(value)[0] return QueryNested( name=name, properties=[ __create_nested_property_from_nested(key, val) if __is_nested(val) else key for key, val in get_type_hints(inner_type, include_extras=True).items() ], )
[docs] class _Reference: def __init__( self, target_collection: Optional[str], uuids: UUIDS, ): """You should not initialise this class directly. Use the `.to_multi()` class methods instead.""" self.__target_collection = target_collection if target_collection else "" self.__uuids = uuids
[docs] def _to_beacons(self) -> List[Dict[str, str]]: return _to_beacons(self.__uuids, self.__target_collection)
@property def is_one_to_many(self) -> bool: """Returns True if the reference is to a one-to-many references, i.e. points to more than one object.""" return self.__uuids is not None and isinstance(self.__uuids, list) and len(self.__uuids) > 1
[docs] class ReferenceToMulti(_WeaviateInput): """Use this class when you want to insert a multi-target reference property.""" target_collection: str uuids: UUIDS
[docs] def _to_beacons(self) -> List[Dict[str, str]]: return _to_beacons(self.uuids, self.target_collection)
@property def uuids_str(self) -> List[str]: """Returns the UUIDs as strings.""" if isinstance(self.uuids, list): return [str(uid) for uid in self.uuids] else: return [str(self.uuids)]
[docs] class _CrossReference(Generic[Properties, IReferences]): def __init__( self, objects: Optional[List[Object[Properties, IReferences]]], ): self.__objects = objects
[docs] @classmethod def _from( cls, objects: List[Object[Properties, IReferences]] ) -> "_CrossReference[Properties, IReferences]": return cls(objects)
@property def objects(self) -> List[Object[Properties, IReferences]]: """Returns the objects of the cross reference.""" return self.__objects or []
CrossReference: TypeAlias = _CrossReference[Properties, IReferences] """Use this TypeAlias when you want to type hint a cross reference within a generic data model. If you want to define a reference property when creating your collection, use `ReferenceProperty` or `ReferencePropertyMultiTarget` instead. If you want to create a reference when inserting an object, supply the UUIDs directly or use `Reference.to_multi()` instead. Example: >>> import typing >>> import weaviate.classes as wvc >>> >>> class One(typing.TypedDict): ... prop: str >>> >>> class Two(typing.TypedDict): ... one: wvc.CrossReference[One] """ CrossReferences = Mapping[str, _CrossReference[WeaviateProperties, "CrossReferences"]] SingleReferenceInput = Union[UUID, ReferenceToMulti] ReferenceInput: TypeAlias = Union[UUID, Sequence[UUID], ReferenceToMulti] """This type alias is used when providing references as inputs within the `.data` namespace of a collection.""" ReferenceInputs: TypeAlias = Mapping[str, ReferenceInput] """This type alias is used when providing references as inputs within the `.data` namespace of a collection."""
[docs] @dataclass class CrossReferenceAnnotation: """Dataclass to be used when annotating a generic cross reference property with options for retrieving data from the cross referenced object when querying. Example: >>> import typing >>> import weaviate.classes as wvc >>> >>> class One(typing.TypedDict): ... prop: str >>> >>> class Two(typing.TypedDict): ... one: typing.Annotated[ ... wvc.CrossReference[One], ... wvc.CrossReferenceAnnotation(include_vector=True) ... ] """ include_vector: bool = field(default=False) metadata: Optional[MetadataQuery] = field(default=None) target_collection: Optional[str] = field(default=None)
[docs] def _extract_types_from_reference( type_: CrossReference[Properties, "References"], field: str ) -> Tuple[Type[Properties], Type["References"]]: """Extract first inner type from CrossReference[Properties, References].""" if get_origin(type_) == _CrossReference: return cast(Tuple[Type[Properties], Type[References]], get_args(type_)) raise WeaviateInvalidInputError( f"Type: {type_} of field: {field} is not CrossReference[Properties, References]" )
[docs] def _extract_types_from_annotated_reference( type_: Annotated[CrossReference[Properties, "References"], CrossReferenceAnnotation], field: str ) -> Tuple[Type[Properties], Type["References"]]: """Extract inner type from Annotated[CrossReference[Properties, References]].""" assert get_origin(type_) is Annotated, f"field: {field} with type: {type_} must be annotated" args = get_args(type_) inner_type = cast(CrossReference[Properties, References], args[0]) return _extract_types_from_reference(inner_type, field)
[docs] def __is_annotated_reference(value: Any) -> bool: return ( get_origin(value) is Annotated and len(get_args(value)) == 2 and get_origin(get_args(value)[0]) is _CrossReference )
[docs] def _extract_properties_from_data_model(type_: Type[Properties]) -> PROPERTIES: """Extract properties of Properties recursively from Properties. Checks to see if there is a _Reference[Properties], Annotated[_Reference[Properties]], or _Nested[Properties] in the data model and lists out the properties as classes readily consumable by the underlying API. """ return [ __create_nested_property_from_nested(key, value) if __is_nested(value) else key for key, value in get_type_hints(type_, include_extras=True).items() ]
[docs] def _extract_references_from_data_model(type_: Type["References"]) -> Optional[REFERENCES]: """Extract references of References recursively from References. Checks to see if there is a _Reference[References], Annotated[_Reference[References]], or _Nested[References] in the data model and lists out the references as classes readily consumable by the underlying API. """ refs = [ ( __create_link_to_from_annotated_reference(key, value) if __is_annotated_reference(value) else __create_link_to_from_reference(key, value) ) for key, value in get_type_hints(type_, include_extras=True).items() ] return refs if len(refs) > 0 else None
ReturnProperties: TypeAlias = Union[PROPERTIES, bool, Type[TProperties]] ReturnReferences: TypeAlias = Union[ Union[_QueryReference, Sequence[_QueryReference]], Type[TReferences] ]
[docs] @dataclass class _QueryOptions: include_metadata: bool include_properties: bool include_references: bool include_vector: bool is_group_by: bool
[docs] @classmethod def from_input( cls, return_metadata: Optional[METADATA], return_properties: Optional[ReturnProperties[Any]], include_vector: INCLUDE_VECTOR, collection_references: Optional[Type[Any]], query_references: Optional[ReturnReferences[Any]], rerank: Optional[Rerank] = None, group_by: Optional[GroupBy] = None, ) -> "_QueryOptions": return cls( include_metadata=return_metadata is not None or rerank is not None, include_properties=not ( isinstance(return_properties, list) and len(return_properties) == 0 ), include_references=collection_references is not None or query_references is not None, include_vector=include_vector if isinstance(include_vector, bool) else True, is_group_by=group_by is not None, )
QuerySingleReturn = Union[ ObjectSingleReturn[Properties, References], ObjectSingleReturn[TProperties, TReferences], ObjectSingleReturn[Properties, CrossReferences], ObjectSingleReturn[Properties, TReferences], ObjectSingleReturn[TProperties, References], ObjectSingleReturn[TProperties, CrossReferences], None, ] GenerativeGroupByReturnType = Union[ GenerativeGroupByReturn[Properties, References], GenerativeGroupByReturn[TProperties, TReferences], GenerativeGroupByReturn[Properties, CrossReferences], GenerativeGroupByReturn[Properties, TReferences], GenerativeGroupByReturn[TProperties, References], GenerativeGroupByReturn[TProperties, CrossReferences], ] GenerativeReturnType = Union[ GenerativeReturn[Properties, References], GenerativeReturn[TProperties, TReferences], GenerativeReturn[Properties, CrossReferences], GenerativeReturn[Properties, TReferences], GenerativeReturn[TProperties, References], GenerativeReturn[TProperties, CrossReferences], ] # The way in which generic type aliases work requires that all the generic arguments # are listed first and in the order of their appearance in the typealias. # GenerativeNearMediaReturn[Properties, References, TProperties, TReferences] is the intended use and so # these four generics appear first. All others resolve afterwards correctly GenerativeNearMediaReturnType = Union[ GenerativeReturnType[Properties, References, TProperties, TReferences], GenerativeGroupByReturnType[Properties, References, TProperties, TReferences], ] """@Deprecated: Use `GenerativeSearchReturnType` instead.""" GenerativeSearchReturnType = Union[ GenerativeReturnType[Properties, References, TProperties, TReferences], GenerativeGroupByReturnType[Properties, References, TProperties, TReferences], ] QueryReturnType = Union[ QueryReturn[Properties, References], QueryReturn[TProperties, TReferences], QueryReturn[Properties, CrossReferences], QueryReturn[Properties, TReferences], QueryReturn[TProperties, References], QueryReturn[TProperties, CrossReferences], ] GroupByReturnType = Union[ GroupByReturn[Properties, References], GroupByReturn[TProperties, TReferences], GroupByReturn[Properties, CrossReferences], GroupByReturn[Properties, TReferences], GroupByReturn[TProperties, References], GroupByReturn[TProperties, CrossReferences], ] QuerySearchReturnType = Union[ QueryReturnType[Properties, References, TProperties, TReferences], GroupByReturnType[Properties, References, TProperties, TReferences], ] QueryNearMediaReturnType = Union[ QueryReturnType[Properties, References, TProperties, TReferences], GroupByReturnType[Properties, References, TProperties, TReferences], ] """@Deprecated: Use `QuerySearchReturnType` instead."""