Source code for weaviate.collections.classes.grpc

from dataclasses import dataclass, replace
from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from typing import (
    Any,
    Dict,
    Generic,
    List,
    Literal,
    Mapping,
    Optional,
    Sequence,
    Type,
    Union,
    cast,
)

from pydantic import ConfigDict, Field
from typing_extensions import ClassVar, TypeAlias, TypeGuard, TypeVar

from weaviate.collections.classes.filters import FilterReturn
from weaviate.collections.classes.types import _WeaviateInput
from weaviate.exceptions import WeaviateInvalidInputError
from weaviate.proto.v1 import base_search_pb2
from weaviate.str_enum import BaseEnum
from weaviate.types import INCLUDE_VECTOR, NUMBER, UUID
from weaviate.util import _ServerVersion



[docs]
class HybridFusion(str, BaseEnum):
    """Define how the query's hybrid fusion operation should be performed."""

    RANKED = "FUSION_TYPE_RANKED"
    RELATIVE_SCORE = "FUSION_TYPE_RELATIVE_SCORE"




[docs]
class Move:
    """Define how the query's move operation should be performed."""

    def __init__(
        self,
        force: float,
        objects: Optional[Union[List[UUID], UUID]] = None,
        concepts: Optional[Union[List[str], str]] = None,
    ):
        if (objects is None or (isinstance(objects, list) and len(objects) == 0)) and (
            concepts is None or (isinstance(concepts, list) and len(concepts) == 0)
        ):
            raise ValueError("Either objects or concepts need to be given")

        self.force = force

        # accept single values, but make them a list
        if objects is None:
            self.__objects = None
        elif not isinstance(objects, list):
            self.__objects = [str(objects)]
        else:
            self.__objects = [str(obj_uuid) for obj_uuid in objects]

        if concepts is None:
            self.__concepts = None
        elif not isinstance(concepts, list):
            self.__concepts = [concepts]
        else:
            self.__concepts = concepts

    @property
    def _objects_list(self) -> Optional[List[str]]:
        return self.__objects

    @property
    def _concepts_list(self) -> Optional[List[str]]:
        return self.__concepts


[docs]
    def _to_gql_payload(self) -> dict:
        payload: dict = {"force": self.force}
        if self.__objects is not None:
            payload["objects"] = [{"id": obj} for obj in self.__objects]
        if self.__concepts is not None:
            payload["concepts"] = self.__concepts
        return payload





[docs]
class MetadataQuery(_WeaviateInput):
    """Define which metadata should be returned in the query's results."""

    creation_time: bool = Field(default=False)
    last_update_time: bool = Field(default=False)
    distance: bool = Field(default=False)
    certainty: bool = Field(default=False)
    score: bool = Field(default=False)
    explain_score: bool = Field(default=False)
    is_consistent: bool = Field(default=False)
    query_profile: bool = Field(default=False)


[docs]
    @classmethod
    def full(cls) -> "MetadataQuery":
        """Return a MetadataQuery with all fields set to True.

        NOTE: `query_profile` is excluded because it adds performance overhead.
        Use `full_with_profile()` to include it.
        """
        return cls(
            creation_time=True,
            last_update_time=True,
            distance=True,
            certainty=True,
            score=True,
            explain_score=True,
            is_consistent=True,
        )



[docs]
    @classmethod
    def full_with_profile(cls) -> "MetadataQuery":
        """Return a MetadataQuery with all fields set to True, including query profiling.

        Query profiling adds per-shard execution timing breakdowns to the response
        but has performance overhead. Requires Weaviate >= 1.36.9.
        """
        return cls(
            creation_time=True,
            last_update_time=True,
            distance=True,
            certainty=True,
            score=True,
            explain_score=True,
            is_consistent=True,
            query_profile=True,
        )





[docs]
@dataclass
class _MetadataQuery:
    vector: bool
    uuid: bool = True
    creation_time_unix: bool = False
    last_update_time_unix: bool = False
    distance: bool = False
    certainty: bool = False
    score: bool = False
    explain_score: bool = False
    is_consistent: bool = False
    vectors: Optional[List[str]] = None
    query_profile: bool = False


[docs]
    @classmethod
    def from_public(
        cls, public: Optional[MetadataQuery], include_vector: INCLUDE_VECTOR
    ) -> "_MetadataQuery":
        return (
            cls(
                vector=include_vector if isinstance(include_vector, bool) else False,
                vectors=include_vector if isinstance(include_vector, list) else None,
            )
            if public is None
            else cls(
                vector=include_vector if isinstance(include_vector, bool) else False,
                vectors=include_vector if isinstance(include_vector, list) else None,
                creation_time_unix=public.creation_time,
                last_update_time_unix=public.last_update_time,
                distance=public.distance,
                certainty=public.certainty,
                score=public.score,
                explain_score=public.explain_score,
                is_consistent=public.is_consistent,
                query_profile=public.query_profile,
            )
        )




METADATA = Union[
    List[
        Literal[
            "creation_time",
            "last_update_time",
            "distance",
            "certainty",
            "score",
            "explain_score",
            "is_consistent",
            "query_profile",
        ]
    ],
    MetadataQuery,
]



[docs]
class Generate(_WeaviateInput):
    """Define how the query's RAG capabilities should be performed."""

    single_prompt: Optional[str] = Field(default=None)
    grouped_task: Optional[str] = Field(default=None)
    grouped_properties: Optional[List[str]] = Field(default=None)




[docs]
class GroupBy(_WeaviateInput):
    """Define how the query's group-by operation should be performed."""

    prop: str
    objects_per_group: int
    number_of_groups: int




[docs]
class _Sort(_WeaviateInput):
    prop: str
    ascending: bool = Field(default=True)




[docs]
class _Sorting:
    def __init__(self) -> None:
        self.sorts: List[_Sort] = []


[docs]
    def by_property(self, name: str, ascending: bool = True) -> "_Sorting":
        """Sort by an object property in the collection."""
        self.sorts.append(_Sort(prop=name, ascending=ascending))
        return self



[docs]
    def by_id(self, ascending: bool = True) -> "_Sorting":
        """Sort by an object's ID in the collection."""
        self.sorts.append(_Sort(prop="_id", ascending=ascending))
        return self



[docs]
    def by_creation_time(self, ascending: bool = True) -> "_Sorting":
        """Sort by an object's creation time."""
        self.sorts.append(_Sort(prop="_creationTimeUnix", ascending=ascending))
        return self



[docs]
    def by_update_time(self, ascending: bool = True) -> "_Sorting":
        """Sort by an object's last update time."""
        self.sorts.append(_Sort(prop="_lastUpdateTimeUnix", ascending=ascending))
        return self




Sorting = _Sorting
"""The type returned by the `Sort` class to be used when defining programmatic sort chains."""



[docs]
class Sort:
    """Define how the query's sort operation should be performed using the available static methods."""

    def __init__(self) -> None:
        raise TypeError("Sort cannot be instantiated. Use the static methods to create a sorter.")


[docs]
    @staticmethod
    def by_property(name: str, ascending: bool = True) -> Sorting:
        """Sort by an object property in the collection."""
        return _Sorting().by_property(name=name, ascending=ascending)



[docs]
    @staticmethod
    def by_id(ascending: bool = True) -> Sorting:
        """Sort by an object's ID in the collection."""
        return _Sorting().by_id(ascending=ascending)



[docs]
    @staticmethod
    def by_creation_time(ascending: bool = True) -> Sorting:
        """Sort by an object's creation time."""
        return _Sorting().by_creation_time(ascending=ascending)



[docs]
    @staticmethod
    def by_update_time(ascending: bool = True) -> Sorting:
        """Sort by an object's last update time."""
        return _Sorting().by_update_time(ascending=ascending)





[docs]
class Rerank(_WeaviateInput):
    """Define how the query's rerank operation should be performed."""

    prop: str
    query: Optional[str] = Field(default=None)




[docs]
@dataclass
class _TimeDecayFunction:
    property: str  # noqa: A003
    origin: str
    scale: str
    offset: Optional[str] = None
    curve: Optional["_BoostCurve"] = None
    decay_value: Optional[float] = None




[docs]
@dataclass
class _NumericDecayFunction:
    property: str  # noqa: A003
    origin: float
    scale: float
    offset: Optional[float] = None
    curve: Optional["_BoostCurve"] = None
    decay_value: Optional[float] = None




[docs]
@dataclass
class _PropertyValueFunction:
    property: str  # noqa: A003
    modifier: Optional["_BoostModifier"] = None




[docs]
@dataclass
class _BoostCondition:
    filter: Optional[FilterReturn] = None  # noqa: A003
    time_decay: Optional[_TimeDecayFunction] = None
    numeric_decay: Optional[_NumericDecayFunction] = None
    property_value: Optional[_PropertyValueFunction] = None
    weight: Optional[float] = None




[docs]
@dataclass
class _Boost:
    conditions: List[_BoostCondition]
    weight: Optional[float] = None
    depth: Optional[int] = None



BoostReturn: TypeAlias = _Boost



[docs]
def _decay_duration_to_str(val: Union[str, timedelta]) -> str:
    """Convert a decay duration (scale/offset) to the duration string format expected by the server, e.g. "7d"."""
    if isinstance(val, timedelta):
        total_seconds = val.total_seconds()
        if total_seconds >= 86400 and total_seconds % 86400 == 0:
            return f"{int(total_seconds // 86400)}d"
        if total_seconds >= 3600 and total_seconds % 3600 == 0:
            return f"{int(total_seconds // 3600)}h"
        if total_seconds >= 60 and total_seconds % 60 == 0:
            return f"{int(total_seconds // 60)}m"
        if total_seconds == int(total_seconds):
            return f"{int(total_seconds)}s"
        return f"{total_seconds}s"
    return val




[docs]
def _decay_origin_to_str(val: Union[str, datetime]) -> str:
    """Convert a decay origin to the RFC3339 string format expected by the server, or pass through "now"."""
    if isinstance(val, datetime):
        if val.tzinfo is None:
            val = val.replace(tzinfo=timezone.utc)
        return val.isoformat()
    return val




[docs]
class _BoostCurve(str, BaseEnum):
    """The decay curve used by a distance-based boost (`time_decay`, `numeric_decay`).

    Each curve scores 1 at the origin and falls to the `decay` value at `scale` distance.

    Attributes:
        EXPONENTIAL: Heavy-tailed decay that halves geometrically. The default if no curve is set.
        GAUSSIAN: Bell-shaped decay with a sharp falloff once past `scale`.
        LINEAR: Straight-line decay that reaches zero beyond `scale`.
    """

    EXPONENTIAL = "exp"
    GAUSSIAN = "gauss"
    LINEAR = "linear"




[docs]
class _BoostModifier(str, BaseEnum):
    """The transform applied to a numeric property's value in `numeric_property` before normalization.

    Use a modifier to reduce the impact of large property values. If no modifier is
    set, the raw value is used.

    Attributes:
        LOG1P: Apply `log(1 + value)` to strongly reduce the impact of large values.
        SQRT: Apply `sqrt(value)` to mildly reduce the impact of large values.
    """

    LOG1P = "log1p"
    SQRT = "sqrt"




[docs]
class Boost:
    """Soft-rank search results: promote or demote objects without removing them from the result set.

    A boost is a query-time rescorer. The primary search (vector, hybrid, or BM25) fetches a pool of
    candidates, the boost re-scores them against its conditions, and the results are re-sorted. Unlike
    a filter, a boost never excludes objects: non-matching objects stay in the result set but rank lower.

    Use the static methods to build a boost, then pass it to a query or generate method via `boost=`:

    - `filter()`: promote or demote objects matching a filter condition.
    - `time_decay()`: rank by recency, decaying with distance from an origin date.
    - `numeric_decay()`: rank by closeness to a target numeric value.
    - `numeric_property()`: rank by a numeric property's raw value.
    - `blend()`: combine several of the above, each with its own weight.

    Available in Weaviate `v1.38` and later.
    """

    Curve = _BoostCurve
    Modifier = _BoostModifier

    def __init__(self) -> None:
        raise TypeError("Boost cannot be instantiated. Use the static methods to create a boost.")


[docs]
    @staticmethod
    def filter(  # noqa: A003
        filter: FilterReturn,  # noqa: A002
        *,
        weight: Optional[float] = None,
        depth: Optional[int] = None,
    ) -> BoostReturn:
        """Promote or demote objects that match a filter condition.

        Matching objects score 1 and non-matching objects score 0, so this acts as a soft `WHERE`:
        non-matching objects are demoted but stay in the result set.

        Args:
            filter: The filter condition, built the same way as for the `filters=` parameter.
                Only `Equal`, `NotEqual`, the comparison operators, and `And`/`Or`/`Not` are supported.
            weight: How much the boost influences the final score, in `[0, 1]`: the result is
                `(1 - weight)` of the primary score plus `weight` of the boost score. `0` is a no-op.
                If not set, the server default of `0.5` is used.
            depth: How many candidates the primary search fetches for the boost to re-score.
                Higher values let the boost reorder more results, at the cost of performance.
                If not set, the server default (`100`) is used.
        """
        return _Boost(conditions=[_BoostCondition(filter=filter)], weight=weight, depth=depth)



[docs]
    @staticmethod
    def time_decay(
        property: str,  # noqa: A002
        *,
        origin: Optional[Union[str, datetime]] = None,
        scale: Union[str, timedelta],
        offset: Optional[Union[str, timedelta]] = None,
        curve: Optional[_BoostCurve] = None,
        decay: Optional[float] = None,
        weight: Optional[float] = None,
        depth: Optional[int] = None,
    ) -> BoostReturn:
        """Rank objects by recency: the score decays with distance from an origin date.

        Objects at the origin score 1; the score falls along the chosen `curve` as the property
        value moves away from the origin. Use this to favour more recent (or near-a-date) objects.

        Args:
            property: The name of the `date` property to measure distance from.
            origin: The reference point. Use `"now"` for the current time or a `datetime` for a
                specific time. Defaults to `"now"`.
            scale: The distance from the origin at which the score equals `decay`. Use a `timedelta`
                (e.g. `timedelta(days=7)`) or a duration string such as `"7d"`, `"24h"`, `"30m"`.
            offset: Objects within this distance from the origin keep the full score of 1; decay
                starts beyond it. Accepts the same types as `scale`. If not set, no offset is applied.
            curve: The decay curve: `Boost.Curve.EXPONENTIAL`, `Boost.Curve.GAUSSIAN`, or
                `Boost.Curve.LINEAR`. If not set, the server default (`EXPONENTIAL`) is used.
            decay: The score at `scale` distance from the origin, in `(0, 1]`. If not set, the
                server default of `0.5` is used.
            weight: How much the boost influences the final score, in `[0, 1]`: the result is
                `(1 - weight)` of the primary score plus `weight` of the boost score. `0` is a no-op.
                If not set, the server default of `0.5` is used.
            depth: How many candidates the primary search fetches for the boost to re-score.
                Higher values let the boost reorder more results, at the cost of performance.
                If not set, the server default (`100`) is used.
        """
        return _Boost(
            conditions=[
                _BoostCondition(
                    time_decay=_TimeDecayFunction(
                        property=property,
                        origin=_decay_origin_to_str(origin) if origin is not None else "now",
                        scale=_decay_duration_to_str(scale),
                        offset=_decay_duration_to_str(offset) if offset is not None else None,
                        curve=curve,
                        decay_value=decay,
                    )
                )
            ],
            weight=weight,
            depth=depth,
        )



[docs]
    @staticmethod
    def numeric_decay(
        property: str,  # noqa: A002
        *,
        origin: float,
        scale: float,
        offset: Optional[float] = None,
        curve: Optional[_BoostCurve] = None,
        decay: Optional[float] = None,
        weight: Optional[float] = None,
        depth: Optional[int] = None,
    ) -> BoostReturn:
        """Rank objects by closeness to a target numeric value: the score decays with distance from it.

        Use this when "closer to X is better" (e.g. prefer prices near $50, apartments near 80 m2).
        Requires an origin and a scale. For simple "higher is better" ranking without an origin,
        use `Boost.numeric_property()` instead.

        Args:
            property: The name of the numeric (`int`/`number`) property to measure distance from.
            origin: The target value; objects closest to it score highest.
            scale: The distance from the origin at which the score equals `decay`.
            offset: Objects within this distance from the origin keep the full score of 1; decay
                starts beyond it. If not set, no offset is applied.
            curve: The decay curve: `Boost.Curve.EXPONENTIAL`, `Boost.Curve.GAUSSIAN`, or
                `Boost.Curve.LINEAR`. If not set, the server default (`EXPONENTIAL`) is used.
            decay: The score at `scale` distance from the origin, in `(0, 1]`. If not set, the
                server default of `0.5` is used.
            weight: How much the boost influences the final score, in `[0, 1]`: the result is
                `(1 - weight)` of the primary score plus `weight` of the boost score. `0` is a no-op.
                If not set, the server default of `0.5` is used.
            depth: How many candidates the primary search fetches for the boost to re-score.
                Higher values let the boost reorder more results, at the cost of performance.
                If not set, the server default (`100`) is used.
        """
        return _Boost(
            conditions=[
                _BoostCondition(
                    numeric_decay=_NumericDecayFunction(
                        property=property,
                        origin=float(origin),
                        scale=float(scale),
                        offset=float(offset) if offset is not None else None,
                        curve=curve,
                        decay_value=decay,
                    )
                )
            ],
            weight=weight,
            depth=depth,
        )



[docs]
    @staticmethod
    def numeric_property(
        name: str,
        *,
        modifier: Optional[_BoostModifier] = None,
        weight: Optional[float] = None,
        depth: Optional[int] = None,
    ) -> BoostReturn:
        """Rank objects by a numeric property's raw value: higher values rank higher.

        Use this for simple proportional ranking (e.g. popularity count, review score) when you
        don't need an origin or scale. For distance-based decay from a target value, use
        `Boost.numeric_decay()` instead.

        Only supports numeric (`int`/`number`) properties. To rank by other property types, use
        `Boost.filter()`.

        Args:
            name: The name of the numeric property to use as a ranking signal.
            modifier: A transform applied to the value before normalization: `Boost.Modifier.LOG1P`
                or `Boost.Modifier.SQRT`, both of which dampen values that span many orders of
                magnitude. If not set, the raw value is used.
            weight: How much the boost influences the final score, in `[0, 1]`: the result is
                `(1 - weight)` of the primary score plus `weight` of the boost score. `0` is a no-op.
                If not set, the server default of `0.5` is used.
            depth: How many candidates the primary search fetches for the boost to re-score.
                Higher values let the boost reorder more results, at the cost of performance.
                If not set, the server default (`100`) is used.
        """
        return _Boost(
            conditions=[
                _BoostCondition(
                    property_value=_PropertyValueFunction(
                        property=name,
                        modifier=modifier,
                    )
                )
            ],
            weight=weight,
            depth=depth,
        )



[docs]
    @staticmethod
    def blend(
        boosts: Union[BoostReturn, Sequence[BoostReturn]],
        *,
        weight: Optional[float] = None,
        depth: Optional[int] = None,
    ) -> BoostReturn:
        """Combine several boosts into one, each weighted relative to the others.

        Each input boost's `weight` becomes a per-condition weight, balancing the conditions
        against each other (e.g. recency twice as important as popularity). A per-condition weight
        defaults to `1.0` and may be negative to actively demote matching objects. The `weight`
        argument here is separate: it sets the overall strength of the combined boost. A boost may
        carry at most 20 conditions in total.

        Args:
            boosts: One or more boosts created via `Boost.filter()`, `Boost.time_decay()`,
                `Boost.numeric_decay()`, or `Boost.numeric_property()`.
            weight: How much the combined boost influences the final score, in `[0, 1]`: the result
                is `(1 - weight)` of the primary score plus `weight` of the boost score. `0` is a
                no-op. If not set, the server default of `0.5` is used.
            depth: How many candidates the primary search fetches for the boost to re-score.
                Higher values let the boost reorder more results, at the cost of performance.
                If not set, the server default (`100`) is used.

        Raises:
            WeaviateInvalidInputError: If no boosts are provided, or if any input boost has its own
                `depth` set (set `depth` here on `blend()` instead).
        """
        if isinstance(boosts, _Boost):
            boosts = [boosts]
        if len(boosts) == 0:
            raise WeaviateInvalidInputError("Boost.blend() requires at least one boost.")
        for r in boosts:
            if r.depth is not None:
                raise WeaviateInvalidInputError(
                    "Cannot set `depth` on sub-boosts passed to `blend()`. Use the top-level `depth` parameter instead."
                )
        conditions: List[_BoostCondition] = []
        for r in boosts:
            for cond in r.conditions:
                if cond.weight is None and r.weight is not None:
                    cond = replace(cond, weight=r.weight)
                conditions.append(cond)
        return _Boost(conditions=conditions, weight=weight, depth=depth)





[docs]
@dataclass
class MMR:
    """Define MMR (Maximal Marginal Relevance) diversity selection.

    Args:
        limit: Optional number of candidates to consider for diversification.
        balance: Optional MMR lambda in [0.0, 1.0] — 1.0 is pure relevance, 0.0 is pure diversity.
    """

    limit: Optional[int] = None
    balance: Optional[float] = None




[docs]
class Diversity:
    """Use this factory class to apply diversity selection to search results via MMR."""

    def __init__(self) -> None:
        raise TypeError("Diversity cannot be instantiated directly. Use Diversity.mmr(...).")


[docs]
    @staticmethod
    def mmr(limit: Optional[int] = None, balance: Optional[float] = None) -> MMR:
        """Maximal Marginal Relevance diversity selection.

        Args:
            limit: Number of candidates to consider for diversification.
            balance: MMR lambda in [0.0, 1.0] — 1.0 pure relevance, 0.0 pure diversity.
        """
        return MMR(limit=limit, balance=balance)





[docs]
@dataclass
class BM25OperatorOptions:
    # replace with ClassVar[base_search_pb2.SearchOperatorOptions.Operator] once python 3.10 is removed
    operator: ClassVar[Any]




[docs]
@dataclass
class BM25OperatorOr(BM25OperatorOptions):
    """Define the 'Or' operator for keyword queries."""

    operator = base_search_pb2.SearchOperatorOptions.OPERATOR_OR
    minimum_should_match: int




[docs]
@dataclass
class BM25OperatorAnd(BM25OperatorOptions):
    """Define the 'And' operator for keyword queries."""

    operator = base_search_pb2.SearchOperatorOptions.OPERATOR_AND




[docs]
class BM25OperatorFactory:
    """Define how the BM25 query's token matching should be performed."""

    def __init__(self) -> None:
        raise TypeError("BM25Operator cannot be instantiated. Use the static methods to create.")


[docs]
    @staticmethod
    def or_(minimum_match: int) -> BM25OperatorOptions:
        """Use the 'Or' operator for keyword queries, where at least a minimum number of tokens must match.

        Note that the query is tokenized using the respective tokenization method of each property.

        Args:
            minimum_match: The minimum number of keyword tokens (excluding stopwords) that must match for an object to be considered a match.
        """
        return BM25OperatorOr(minimum_should_match=minimum_match)



[docs]
    @staticmethod
    def and_() -> BM25OperatorOptions:
        """Use the 'And' operator for keyword queries, where all query tokens must match.

        Note that the query is tokenized using the respective tokenization method of each property.
        """
        return BM25OperatorAnd()




OneDimensionalVectorType = Sequence[NUMBER]
"""Represents a one-dimensional vector, e.g. one produced by the `Configure.Vectors.text2vec_jinaai()` module"""
TwoDimensionalVectorType = Sequence[Sequence[NUMBER]]
"""Represents a two-dimensional vector, e.g. one produced by the `Configure.MultiVectors.text2vec_jinaai()` module"""

PrimitiveVectorType = Union[OneDimensionalVectorType, TwoDimensionalVectorType]


V = TypeVar("V", OneDimensionalVectorType, TwoDimensionalVectorType)



[docs]
class _ListOfVectorsQuery(_WeaviateInput, Generic[V]):
    dimensionality: Literal["1D", "2D"]
    vectors: Sequence[V]


[docs]
    @staticmethod
    def is_one_dimensional(
        self_: "_ListOfVectorsQuery",
    ) -> TypeGuard["_ListOfVectorsQuery[OneDimensionalVectorType]"]:
        return self_.dimensionality == "1D"



[docs]
    @staticmethod
    def is_two_dimensional(
        self_: "_ListOfVectorsQuery",
    ) -> TypeGuard["_ListOfVectorsQuery[TwoDimensionalVectorType]"]:
        return self_.dimensionality == "2D"




ListOfVectorsQuery = _ListOfVectorsQuery
"""Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space."""


NearVectorInputType = Union[
    OneDimensionalVectorType,
    TwoDimensionalVectorType,
    Mapping[
        str,
        Union[
            OneDimensionalVectorType,
            TwoDimensionalVectorType,
            ListOfVectorsQuery[OneDimensionalVectorType],
            ListOfVectorsQuery[TwoDimensionalVectorType],
        ],
    ],
]
"""Define the input types that can be used in a near vector search"""



[docs]
class NearVector:
    """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods."""


[docs]
    @staticmethod
    def list_of_vectors(*vectors: V) -> _ListOfVectorsQuery[V]:
        """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space."""
        if len(vectors) > 0 and len(vectors[0]) > 0:
            try:
                len(cast(Sequence[TwoDimensionalVectorType], vectors)[0][0])
                dimensionality: Literal["1D", "2D"] = "2D"
            except TypeError:
                dimensionality = "1D"
            return _ListOfVectorsQuery[V](dimensionality=dimensionality, vectors=vectors)
        else:
            raise WeaviateInvalidInputError(f"At least one vector must be given, got: {vectors}")





[docs]
class _HybridNearBase(_WeaviateInput):
    model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")

    distance: Optional[float] = None
    certainty: Optional[float] = None




[docs]
class _HybridNearText(_HybridNearBase):
    text: Union[str, List[str]]
    move_to: Optional[Move] = None
    move_away: Optional[Move] = None




[docs]
class _HybridNearVector:  # can't be a Pydantic model because of validation issues parsing numpy, pd, pl arrays/series
    vector: NearVectorInputType
    distance: Optional[float]
    certainty: Optional[float]

    def __init__(
        self,
        *,
        vector: NearVectorInputType,
        distance: Optional[float] = None,
        certainty: Optional[float] = None,
    ) -> None:
        self.vector = vector
        self.distance = distance
        self.certainty = certainty



HybridVectorType = Union[NearVectorInputType, _HybridNearText, _HybridNearVector]



[docs]
class _MultiTargetVectorJoinEnum(BaseEnum):
    """Define how multi target vector searches should be combined."""

    SUM = auto()
    AVERAGE = auto()
    MINIMUM = auto()
    RELATIVE_SCORE = auto()
    MANUAL_WEIGHTS = auto()




[docs]
@dataclass
class _MultiTargetVectorJoin:
    combination: _MultiTargetVectorJoinEnum
    target_vectors: List[str]
    weights: Optional[Dict[str, Union[float, List[float]]]] = None


[docs]
    def to_grpc_target_vector(self, version: _ServerVersion) -> base_search_pb2.Targets:
        combination = self.combination
        if combination == _MultiTargetVectorJoinEnum.AVERAGE:
            combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_AVERAGE
        elif combination == _MultiTargetVectorJoinEnum.SUM:
            combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_SUM
        elif combination == _MultiTargetVectorJoinEnum.RELATIVE_SCORE:
            combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_RELATIVE_SCORE
        elif combination == _MultiTargetVectorJoinEnum.MANUAL_WEIGHTS:
            combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_MANUAL
        else:
            assert combination == _MultiTargetVectorJoinEnum.MINIMUM
            combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_MIN

        weights: List[base_search_pb2.WeightsForTarget] = []
        target_vectors: List[str] = self.target_vectors
        if self.weights is not None:
            target_vectors = []
            for target, weight in self.weights.items():
                if isinstance(weight, list):
                    for w in weight:
                        weights.append(base_search_pb2.WeightsForTarget(target=target, weight=w))
                        target_vectors.append(target)
                else:
                    weights.append(base_search_pb2.WeightsForTarget(target=target, weight=weight))
                    target_vectors.append(target)

        return base_search_pb2.Targets(
            target_vectors=target_vectors,
            weights_for_targets=weights,
            combination=combination_grpc,
        )




TargetVectorJoinType = Union[str, List[str], _MultiTargetVectorJoin]



[docs]
class TargetVectors:
    """Define how the distances from different target vectors should be combined using the available methods."""


[docs]
    @staticmethod
    def sum(target_vectors: List[str]) -> _MultiTargetVectorJoin:  # noqa: A003
        """Combine the distance from different target vectors by summing them."""
        return _MultiTargetVectorJoin(
            combination=_MultiTargetVectorJoinEnum.SUM, target_vectors=target_vectors
        )



[docs]
    @staticmethod
    def average(target_vectors: List[str]) -> _MultiTargetVectorJoin:
        """Combine the distance from different target vectors by averaging them."""
        return _MultiTargetVectorJoin(
            combination=_MultiTargetVectorJoinEnum.AVERAGE,
            target_vectors=target_vectors,
        )



[docs]
    @staticmethod
    def minimum(target_vectors: List[str]) -> _MultiTargetVectorJoin:
        """Combine the distance from different target vectors by using the minimum distance."""
        return _MultiTargetVectorJoin(
            combination=_MultiTargetVectorJoinEnum.MINIMUM,
            target_vectors=target_vectors,
        )



[docs]
    @staticmethod
    def manual_weights(weights: Dict[str, Union[float, List[float]]]) -> _MultiTargetVectorJoin:
        """Combine the distance from different target vectors by summing them using manual weights."""
        return _MultiTargetVectorJoin(
            combination=_MultiTargetVectorJoinEnum.MANUAL_WEIGHTS,
            target_vectors=list(weights.keys()),
            weights=weights,
        )



[docs]
    @staticmethod
    def relative_score(weights: Dict[str, Union[float, List[float]]]) -> _MultiTargetVectorJoin:
        """Combine the distance from different target vectors using score fusion."""
        return _MultiTargetVectorJoin(
            combination=_MultiTargetVectorJoinEnum.RELATIVE_SCORE,
            target_vectors=list(weights.keys()),
            weights=weights,
        )





[docs]
class HybridVector:
    """Use this factory class to define the appropriate classes needed when defining near text and near vector sub-searches in hybrid queries."""


[docs]
    @staticmethod
    def near_text(
        query: Union[str, List[str]],
        *,
        certainty: Optional[float] = None,
        distance: Optional[float] = None,
        move_to: Optional[Move] = None,
        move_away: Optional[Move] = None,
    ) -> _HybridNearText:
        """Define a near text search to be used within a hybrid query.

        Args:
            query: The text to search for as a string or a list of strings.
            certainty: The minimum similarity score to return. If not specified, the default certainty specified by the server is used.
            distance: The maximum distance to search. If not specified, the default distance specified by the server is used.
            move_to: Define the concepts that should be moved towards in the vector space during the search.
            move_away: Define the concepts that should be moved away from in the vector space during the search.

        Returns:
            A `_HybridNearText` object to be used in the `vector` parameter of the `query.hybrid` and `generate.hybrid` search methods.
        """
        return _HybridNearText(
            text=query,
            distance=distance,
            certainty=certainty,
            move_to=move_to,
            move_away=move_away,
        )



[docs]
    @staticmethod
    def near_vector(
        vector: NearVectorInputType,
        *,
        certainty: Optional[float] = None,
        distance: Optional[float] = None,
    ) -> _HybridNearVector:
        """Define a near vector search to be used within a hybrid query.

        Args:
            certainty: The minimum similarity score to return. If not specified, the default certainty specified by the server is used.
            distance: The maximum distance to search. If not specified, the default distance specified by the server is used.

        Returns:
            A `_HybridNearVector` object to be used in the `vector` parameter of the `query.hybrid` and `generate.hybrid` search methods.
        """
        return _HybridNearVector(
            vector=vector,
            distance=distance,
            certainty=certainty,
        )





[docs]
class _QueryReference(_WeaviateInput):
    link_on: str
    include_vector: INCLUDE_VECTOR = Field(default=False)
    return_metadata: Optional[MetadataQuery] = Field(default=None)
    return_properties: Union["PROPERTIES", bool, None] = Field(default=None)
    return_references: Optional["REFERENCES"] = Field(default=None)

    def __hash__(self) -> int:  # for set
        return hash(str(self))

    @property
    def _return_metadata(self) -> _MetadataQuery:
        return _MetadataQuery.from_public(self.return_metadata, self.include_vector)




[docs]
class _QueryReferenceMultiTarget(_QueryReference):
    target_collection: str




[docs]
class QueryReference(_QueryReference):
    """Define a query-time reference to a single-target property when querying through cross-references."""

    MultiTarget: ClassVar[Type[_QueryReferenceMultiTarget]] = _QueryReferenceMultiTarget
    """Define a query-time reference to a multi-target property when querying through cross-references."""




[docs]
class QueryNested(_WeaviateInput):
    """Define the query-time return properties of a nested property."""

    name: str
    properties: "PROPERTIES"

    def __hash__(self) -> int:  # for set
        return hash(str(self))



REFERENCE = Union[_QueryReference, _QueryReferenceMultiTarget]
REFERENCES = Union[Sequence[REFERENCE], REFERENCE]

PROPERTY = Union[str, QueryNested]
PROPERTIES = Union[Sequence[PROPERTY], PROPERTY]

NestedProperties = Union[List[Union[str, QueryNested]], str, QueryNested]



[docs]
class NearMediaType(str, Enum):
    """The different types of media that can be used in a `near_media` query to leverage the `multi2vec-*` modules.

    All are available when using `multi2vec-bind` but only `IMAGE` is available when using `multi2vec-clip`.
    """

    AUDIO = "audio"
    DEPTH = "depth"
    IMAGE = "image"
    IMU = "imu"
    THERMAL = "thermal"
    VIDEO = "video"