Source code for weaviate.collections.classes.config_vector_index

from abc import abstractmethod
from enum import Enum
from typing import Any, Dict, Optional, overload

from pydantic import Field
from typing_extensions import deprecated

from weaviate.collections.classes.config_base import (
    _ConfigCreateModel,
    _ConfigUpdateModel,
    _QuantizerConfigCreate,
    _QuantizerConfigUpdate,
)
from weaviate.collections.classes.config_vectorizers import VectorDistances
from weaviate.str_enum import BaseEnum
from weaviate.warnings import _Warnings



[docs]
class VectorFilterStrategy(str, Enum):
    """Set the strategy when doing a filtered HNSW search.

    Attributes:
        SWEEPING: Do normal ANN search and skip nodes.
        ACORN: Multi-hop search to find new candidates matching the filter.
    """

    SWEEPING = "sweeping"
    ACORN = "acorn"




[docs]
class VectorIndexType(str, Enum):
    """The available vector index types in Weaviate.

    Attributes:
        HNSW: Hierarchical Navigable Small World (HNSW) index.
        FLAT: Flat index.
    """

    HNSW = "hnsw"
    FLAT = "flat"
    DYNAMIC = "dynamic"




[docs]
class _MultiVectorConfigCreateBase(_ConfigCreateModel):
    enabled: bool = Field(default=True)




[docs]
class _MultiVectorEncodingConfigCreate(_MultiVectorConfigCreateBase):
    enabled: bool = Field(default=True)


[docs]
    @staticmethod
    @abstractmethod
    def encoding_name() -> str: ...





[docs]
class _MuveraConfigCreate(_MultiVectorEncodingConfigCreate):
    ksim: Optional[int]
    dprojections: Optional[int]
    repetitions: Optional[int]


[docs]
    @staticmethod
    def encoding_name() -> str:
        return "muvera"





[docs]
class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase):
    encoding: Optional[_MultiVectorEncodingConfigCreate] = Field(exclude=True)
    aggregation: Optional[str]




[docs]
class _VectorIndexConfigCreate(_ConfigCreateModel):
    distance: Optional[VectorDistances]
    multivector: Optional[_MultiVectorConfigCreate]
    quantizer: Optional[_QuantizerConfigCreate] = Field(exclude=True)


[docs]
    @staticmethod
    @abstractmethod
    def vector_index_type() -> VectorIndexType: ...



[docs]
    def _to_dict(self) -> Dict[str, Any]:
        ret_dict = super()._to_dict()
        if self.quantizer is not None:
            ret_dict[self.quantizer.quantizer_name()] = self.quantizer._to_dict()
        if self.distance is not None:
            ret_dict["distance"] = str(self.distance.value)
        if self.multivector is not None and self.multivector.encoding is not None:
            ret_dict["multivector"][self.multivector.encoding.encoding_name()] = (
                self.multivector.encoding._to_dict()
            )

        return ret_dict





[docs]
class _VectorIndexConfigUpdate(_ConfigUpdateModel):
    quantizer: Optional[_QuantizerConfigUpdate] = Field(exclude=True)


[docs]
    @staticmethod
    @abstractmethod
    def vector_index_type() -> VectorIndexType: ...





[docs]
class _VectorIndexConfigSkipCreate(_VectorIndexConfigCreate):
    skip: bool = True


[docs]
    @staticmethod
    def vector_index_type() -> VectorIndexType:
        return VectorIndexType.HNSW





[docs]
class _VectorIndexConfigHNSWCreate(_VectorIndexConfigCreate):
    cleanupIntervalSeconds: Optional[int]
    dynamicEfMin: Optional[int]
    dynamicEfMax: Optional[int]
    dynamicEfFactor: Optional[int]
    efConstruction: Optional[int]
    ef: Optional[int]
    filterStrategy: Optional[VectorFilterStrategy]
    flatSearchCutoff: Optional[int]
    maxConnections: Optional[int]
    vectorCacheMaxObjects: Optional[int]


[docs]
    @staticmethod
    def vector_index_type() -> VectorIndexType:
        return VectorIndexType.HNSW





[docs]
class _VectorIndexConfigFlatCreate(_VectorIndexConfigCreate):
    vectorCacheMaxObjects: Optional[int]


[docs]
    @staticmethod
    def vector_index_type() -> VectorIndexType:
        return VectorIndexType.FLAT





[docs]
class _VectorIndexConfigHNSWUpdate(_VectorIndexConfigUpdate):
    dynamicEfMin: Optional[int]
    dynamicEfMax: Optional[int]
    dynamicEfFactor: Optional[int]
    ef: Optional[int]
    filterStrategy: Optional[VectorFilterStrategy]
    flatSearchCutoff: Optional[int]
    vectorCacheMaxObjects: Optional[int]


[docs]
    @staticmethod
    def vector_index_type() -> VectorIndexType:
        return VectorIndexType.HNSW





[docs]
class _VectorIndexConfigFlatUpdate(_VectorIndexConfigUpdate):
    vectorCacheMaxObjects: Optional[int]


[docs]
    @staticmethod
    def vector_index_type() -> VectorIndexType:
        return VectorIndexType.FLAT





[docs]
class _VectorIndexConfigDynamicCreate(_VectorIndexConfigCreate):
    threshold: Optional[int]
    hnsw: Optional[_VectorIndexConfigHNSWCreate]
    flat: Optional[_VectorIndexConfigFlatCreate]


[docs]
    @staticmethod
    def vector_index_type() -> VectorIndexType:
        return VectorIndexType.DYNAMIC



[docs]
    def _to_dict(self) -> dict:
        ret_dict = super()._to_dict()
        if self.hnsw is not None:
            ret_dict["hnsw"] = self.hnsw._to_dict()
        if self.flat is not None:
            ret_dict["flat"] = self.flat._to_dict()
        if self.threshold is not None:
            ret_dict["threshold"] = self.threshold

        return ret_dict





[docs]
class _VectorIndexConfigDynamicUpdate(_VectorIndexConfigUpdate):
    threshold: Optional[int]
    hnsw: Optional[_VectorIndexConfigHNSWUpdate]
    flat: Optional[_VectorIndexConfigFlatUpdate]


[docs]
    @staticmethod
    def vector_index_type() -> VectorIndexType:
        return VectorIndexType.DYNAMIC





[docs]
class PQEncoderType(str, BaseEnum):
    """Type of the PQ encoder.

    Attributes:
        KMEANS: K-means encoder.
        TILE: Tile encoder.
    """

    KMEANS = "kmeans"
    TILE = "tile"




[docs]
class PQEncoderDistribution(str, BaseEnum):
    """Distribution of the PQ encoder.

    Attributes:
        LOG_NORMAL: Log-normal distribution.
        NORMAL: Normal distribution.
    """

    LOG_NORMAL = "log-normal"
    NORMAL = "normal"




[docs]
class MultiVectorAggregation(str, BaseEnum):
    """Aggregation type to use for multivector indices.

    Attributes:
        MAX_SIM: Maximum similarity.
    """

    MAX_SIM = "maxSim"




[docs]
class _PQEncoderConfigCreate(_ConfigCreateModel):
    type_: Optional[PQEncoderType] = Field(serialization_alias="type")
    distribution: Optional[PQEncoderDistribution]




[docs]
class _PQEncoderConfigUpdate(_ConfigUpdateModel):
    type_: Optional[PQEncoderType]
    distribution: Optional[PQEncoderDistribution]


[docs]
    def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]:
        """Must be done manually since Pydantic does not work well with type and type_.

        Errors shadowing type occur if we want to use type as a field name.
        """
        if self.type_ is not None:
            schema["type"] = str(self.type_.value)
        if self.distribution is not None:
            schema["distribution"] = str(self.distribution.value)
        return schema





[docs]
class _PQConfigCreate(_QuantizerConfigCreate):
    bitCompression: Optional[bool] = Field(default=None)
    centroids: Optional[int]
    encoder: _PQEncoderConfigCreate
    segments: Optional[int]
    trainingLimit: Optional[int]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "pq"





[docs]
class _BQConfigCreate(_QuantizerConfigCreate):
    cache: Optional[bool]
    rescoreLimit: Optional[int]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "bq"





[docs]
class _SQConfigCreate(_QuantizerConfigCreate):
    cache: Optional[bool]
    rescoreLimit: Optional[int]
    trainingLimit: Optional[int]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "sq"





[docs]
class _RQConfigCreate(_QuantizerConfigCreate):
    bits: Optional[int]
    rescoreLimit: Optional[int]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "rq"





[docs]
class _PQConfigUpdate(_QuantizerConfigUpdate):
    bitCompression: Optional[bool] = Field(default=None)
    centroids: Optional[int]
    enabled: Optional[bool]
    segments: Optional[int]
    trainingLimit: Optional[int]
    encoder: Optional[_PQEncoderConfigUpdate]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "pq"





[docs]
class _BQConfigUpdate(_QuantizerConfigUpdate):
    enabled: Optional[bool]
    rescoreLimit: Optional[int]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "bq"





[docs]
class _RQConfigUpdate(_QuantizerConfigUpdate):
    enabled: Optional[bool]
    rescoreLimit: Optional[int]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "rq"





[docs]
class _SQConfigUpdate(_QuantizerConfigUpdate):
    enabled: Optional[bool]
    rescoreLimit: Optional[int]
    trainingLimit: Optional[int]


[docs]
    @staticmethod
    def quantizer_name() -> str:
        return "sq"





[docs]
class _VectorIndexMultivectorEncoding:

[docs]
    @staticmethod
    def muvera(
        ksim: Optional[int] = None,
        dprojections: Optional[int] = None,
        repetitions: Optional[int] = None,
    ) -> _MultiVectorEncodingConfigCreate:
        return _MuveraConfigCreate(
            enabled=True,
            ksim=ksim,
            dprojections=dprojections,
            repetitions=repetitions,
        )





[docs]
class _VectorIndexMultiVector:
    Encoding = _VectorIndexMultivectorEncoding

    @deprecated(
        'Using the "encoding" argument is deprecated. Instead, specify it at the top-level when creating your `vector_config`'
    )
    @overload
    @staticmethod
    def multi_vector(
        encoding: _MultiVectorEncodingConfigCreate,
        aggregation: Optional[MultiVectorAggregation] = None,
    ) -> _MultiVectorConfigCreate: ...

    @overload
    @staticmethod
    def multi_vector(
        encoding: Optional[_MultiVectorEncodingConfigCreate] = None,
        aggregation: Optional[MultiVectorAggregation] = None,
    ) -> _MultiVectorConfigCreate: ...


[docs]
    @staticmethod
    def multi_vector(
        encoding: Optional[_MultiVectorEncodingConfigCreate] = None,
        aggregation: Optional[MultiVectorAggregation] = None,
    ) -> _MultiVectorConfigCreate:
        if encoding is not None:
            _Warnings.encoding_in_multi_vector_config()
        return _MultiVectorConfigCreate(
            encoding=encoding if encoding is not None else None,
            aggregation=aggregation.value if aggregation is not None else None,
        )





[docs]
class _VectorIndexQuantizer:

[docs]
    @staticmethod
    def pq(
        bit_compression: Optional[bool] = None,
        centroids: Optional[int] = None,
        encoder_distribution: Optional[PQEncoderDistribution] = None,
        encoder_type: Optional[PQEncoderType] = None,
        segments: Optional[int] = None,
        training_limit: Optional[int] = None,
    ) -> _PQConfigCreate:
        """Create a `_PQConfigCreate` object to be used when defining the product quantization (PQ) configuration of Weaviate.

        Use this method when defining the `quantizer` argument in the `vector_index` configuration.

        Args:
            See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#hnsw-with-compression) for a more detailed view!
        """  # noqa: D417 (missing argument descriptions in the docstring)
        if bit_compression is not None:
            _Warnings.bit_compression_in_pq_config()
        return _PQConfigCreate(
            centroids=centroids,
            segments=segments,
            trainingLimit=training_limit,
            encoder=_PQEncoderConfigCreate(type_=encoder_type, distribution=encoder_distribution),
        )



[docs]
    @staticmethod
    def bq(
        cache: Optional[bool] = None,
        rescore_limit: Optional[int] = None,
    ) -> _BQConfigCreate:
        """Create a `_BQConfigCreate` object to be used when defining the binary quantization (BQ) configuration of Weaviate.

        Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW.

        Args:
            See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#binary-quantization) for a more detailed view!
        """  # noqa: D417 (missing argument descriptions in the docstring)
        return _BQConfigCreate(
            cache=cache,
            rescoreLimit=rescore_limit,
        )



[docs]
    @staticmethod
    def sq(
        cache: Optional[bool] = None,
        rescore_limit: Optional[int] = None,
        training_limit: Optional[int] = None,
    ) -> _SQConfigCreate:
        """Create a `_SQConfigCreate` object to be used when defining the scalar quantization (SQ) configuration of Weaviate.

        Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW.

        Args:
            See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#binary-quantization) for a more detailed view!
        """  # noqa: D417 (missing argument descriptions in the docstring)
        return _SQConfigCreate(
            cache=cache,
            rescoreLimit=rescore_limit,
            trainingLimit=training_limit,
        )



[docs]
    @staticmethod
    def rq(
        bits: Optional[int] = None,
        rescore_limit: Optional[int] = None,
    ) -> _RQConfigCreate:
        """Create a `_RQConfigCreate` object to be used when defining the Rotational quantization (RQ) configuration of Weaviate.

        Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW.

        Arguments:
            See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index) for a more detailed view!
        """  # noqa: D417 (missing argument descriptions in the docstring)
        return _RQConfigCreate(
            bits=bits,
            rescoreLimit=rescore_limit,
        )





[docs]
class _VectorIndex:
    MultiVector = _VectorIndexMultiVector
    Quantizer = _VectorIndexQuantizer


[docs]
    @staticmethod
    def none() -> _VectorIndexConfigSkipCreate:
        """Create a `_VectorIndexConfigSkipCreate` object to be used when configuring Weaviate to not index your vectors.

        Use this method when defining the `vector_index_config` argument in `collections.create()`.
        """
        return _VectorIndexConfigSkipCreate(
            distance=None,
            quantizer=None,
            multivector=None,
        )


    @overload
    @staticmethod
    @deprecated(
        'Using the "multi_vector" argument is deprecated. Instead, specify it at the top-level in `multi_vector_index_config` when creating your `vector_config` with `MultiVectors.module()`'
    )
    def hnsw(
        cleanup_interval_seconds: Optional[int] = None,
        distance_metric: Optional[VectorDistances] = None,
        dynamic_ef_factor: Optional[int] = None,
        dynamic_ef_max: Optional[int] = None,
        dynamic_ef_min: Optional[int] = None,
        ef: Optional[int] = None,
        ef_construction: Optional[int] = None,
        filter_strategy: Optional[VectorFilterStrategy] = None,
        flat_search_cutoff: Optional[int] = None,
        max_connections: Optional[int] = None,
        vector_cache_max_objects: Optional[int] = None,
        *,
        quantizer: Optional[_QuantizerConfigCreate] = None,
        multi_vector: _MultiVectorConfigCreate,
    ) -> _VectorIndexConfigHNSWCreate: ...

    @overload
    @staticmethod
    def hnsw(
        cleanup_interval_seconds: Optional[int] = None,
        distance_metric: Optional[VectorDistances] = None,
        dynamic_ef_factor: Optional[int] = None,
        dynamic_ef_max: Optional[int] = None,
        dynamic_ef_min: Optional[int] = None,
        ef: Optional[int] = None,
        ef_construction: Optional[int] = None,
        filter_strategy: Optional[VectorFilterStrategy] = None,
        flat_search_cutoff: Optional[int] = None,
        max_connections: Optional[int] = None,
        vector_cache_max_objects: Optional[int] = None,
        quantizer: Optional[_QuantizerConfigCreate] = None,
        multi_vector: Optional[_MultiVectorConfigCreate] = None,
    ) -> _VectorIndexConfigHNSWCreate: ...


[docs]
    @staticmethod
    def hnsw(
        cleanup_interval_seconds: Optional[int] = None,
        distance_metric: Optional[VectorDistances] = None,
        dynamic_ef_factor: Optional[int] = None,
        dynamic_ef_max: Optional[int] = None,
        dynamic_ef_min: Optional[int] = None,
        ef: Optional[int] = None,
        ef_construction: Optional[int] = None,
        filter_strategy: Optional[VectorFilterStrategy] = None,
        flat_search_cutoff: Optional[int] = None,
        max_connections: Optional[int] = None,
        vector_cache_max_objects: Optional[int] = None,
        quantizer: Optional[_QuantizerConfigCreate] = None,
        multi_vector: Optional[_MultiVectorConfigCreate] = None,
    ) -> _VectorIndexConfigHNSWCreate:
        """Create a `_VectorIndexConfigHNSWCreate` object to be used when defining the HNSW vector index configuration of Weaviate.

        Use this method when defining the `vector_index_config` argument in `collections.create()`.

        Args:
            See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view!
        """  # noqa: D417 (missing argument descriptions in the docstring)
        if multi_vector is not None:
            _Warnings.multi_vector_in_hnsw_config()
        return _VectorIndexConfigHNSWCreate(
            cleanupIntervalSeconds=cleanup_interval_seconds,
            distance=distance_metric,
            dynamicEfMin=dynamic_ef_min,
            dynamicEfMax=dynamic_ef_max,
            dynamicEfFactor=dynamic_ef_factor,
            efConstruction=ef_construction,
            ef=ef,
            filterStrategy=filter_strategy,
            flatSearchCutoff=flat_search_cutoff,
            maxConnections=max_connections,
            vectorCacheMaxObjects=vector_cache_max_objects,
            quantizer=quantizer,
            multivector=multi_vector,
        )



[docs]
    @staticmethod
    def flat(
        distance_metric: Optional[VectorDistances] = None,
        vector_cache_max_objects: Optional[int] = None,
        quantizer: Optional[_QuantizerConfigCreate] = None,
    ) -> _VectorIndexConfigFlatCreate:
        """Create a `_VectorIndexConfigFlatCreate` object to be used when defining the FLAT vector index configuration of Weaviate.

        Use this method when defining the `vector_index_config` argument in `collections.create()`.

        Args:
            See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view!
        """  # noqa: D417 (missing argument descriptions in the docstring)
        return _VectorIndexConfigFlatCreate(
            distance=distance_metric,
            vectorCacheMaxObjects=vector_cache_max_objects,
            quantizer=quantizer,
            multivector=None,
        )



[docs]
    @staticmethod
    def dynamic(
        distance_metric: Optional[VectorDistances] = None,
        threshold: Optional[int] = None,
        hnsw: Optional[_VectorIndexConfigHNSWCreate] = None,
        flat: Optional[_VectorIndexConfigFlatCreate] = None,
    ) -> _VectorIndexConfigDynamicCreate:
        """Create a `_VectorIndexConfigDynamicCreate` object to be used when defining the DYNAMIC vector index configuration of Weaviate.

        Use this method when defining the `vector_index_config` argument in `collections.create()`.

        Args:
            See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view!
        """  # noqa: D417 (missing argument descriptions in the docstring)
        return _VectorIndexConfigDynamicCreate(
            distance=distance_metric,
            threshold=threshold,
            hnsw=hnsw,
            flat=flat,
            quantizer=None,
            multivector=None,
        )