Source code for weaviate.collections.classes.config

from dataclasses import dataclass
from typing import (
    Any,
    ClassVar,
    Dict,
    List,
    Literal,
    Optional,
    Sequence,
    Type,
    TypeVar,
    Union,
    cast,
)

from deprecation import deprecated as docstring_deprecated
from pydantic import AnyHttpUrl, Field, ValidationInfo, field_validator
from typing_extensions import TypeAlias
from typing_extensions import deprecated as typing_deprecated

from weaviate.collections.classes.config_base import (
    _ConfigBase,
    _ConfigCreateModel,
    _ConfigUpdateModel,
    _EnumLikeStr,
    _QuantizerConfigUpdate,
)
from weaviate.collections.classes.config_named_vectors import (
    _NamedVectorConfigCreate,
    _NamedVectorConfigUpdate,
    _NamedVectors,
    _NamedVectorsUpdate,
)
from weaviate.collections.classes.config_vector_index import (
    PQEncoderDistribution,
    PQEncoderType,
    VectorFilterStrategy,
    _BQConfigUpdate,
    _PQConfigUpdate,
    _PQEncoderConfigUpdate,
    _RQConfigUpdate,
    _SQConfigUpdate,
    _VectorIndex,
    _VectorIndexConfigCreate,
    _VectorIndexConfigDynamicUpdate,
    _VectorIndexConfigFlatUpdate,
    _VectorIndexConfigHNSWUpdate,
    _VectorIndexConfigUpdate,
)
from weaviate.collections.classes.config_vector_index import (
    VectorIndexType as VectorIndexTypeAlias,
)
from weaviate.collections.classes.config_vectorizers import (
    CohereModel,
    _Vectorizer,
    _VectorizerConfigCreate,
)
from weaviate.collections.classes.config_vectorizers import (
    VectorDistances as VectorDistancesAlias,
)
from weaviate.collections.classes.config_vectorizers import (
    Vectorizers as VectorizersAlias,
)
from weaviate.collections.classes.config_vectors import (
    _MultiVectors,
    _VectorConfigCreate,
    _VectorConfigUpdate,
    _Vectors,
    _VectorsUpdate,
)
from weaviate.exceptions import WeaviateInvalidInputError
from weaviate.str_enum import BaseEnum
from weaviate.util import _capitalize_first_letter
from weaviate.warnings import _Warnings

# BC for direct imports
Vectorizers: TypeAlias = VectorizersAlias
VectorIndexType: TypeAlias = VectorIndexTypeAlias
VectorDistances: TypeAlias = VectorDistancesAlias

AWSService: TypeAlias = Literal[
    "bedrock",
    "sagemaker",
]


[docs] class ConsistencyLevel(str, BaseEnum): """The consistency levels when writing to Weaviate with replication enabled. Attributes: ALL: Wait for confirmation of write success from all, `N`, replicas. ONE: Wait for confirmation of write success from only one replica. QUORUM: Wait for confirmation of write success from a quorum: `N/2+1`, of replicas. """ ALL = "ALL" ONE = "ONE" QUORUM = "QUORUM"
[docs] class DataType(str, BaseEnum): """The available primitive data types in Weaviate. Attributes: TEXT: Text data type. TEXT_ARRAY: Text array data type. INT: Integer data type. INT_ARRAY: Integer array data type. BOOL: Boolean data type. BOOL_ARRAY: Boolean array data type. NUMBER: Number data type. NUMBER_ARRAY: Number array data type. DATE: Date data type. DATE_ARRAY: Date array data type. UUID: UUID data type. UUID_ARRAY: UUID array data type. GEO_COORDINATES: Geo coordinates data type. BLOB: Blob data type. PHONE_NUMBER: Phone number data type. OBJECT: Object data type. OBJECT_ARRAY: Object array data type. """ TEXT = "text" TEXT_ARRAY = "text[]" INT = "int" INT_ARRAY = "int[]" BOOL = "boolean" BOOL_ARRAY = "boolean[]" NUMBER = "number" NUMBER_ARRAY = "number[]" DATE = "date" DATE_ARRAY = "date[]" UUID = "uuid" UUID_ARRAY = "uuid[]" GEO_COORDINATES = "geoCoordinates" BLOB = "blob" PHONE_NUMBER = "phoneNumber" OBJECT = "object" OBJECT_ARRAY = "object[]"
[docs] class Tokenization(str, BaseEnum): """The available inverted index tokenization methods for text properties in Weaviate. Attributes: WORD: Tokenize by word. WHITESPACE: Tokenize by whitespace. LOWERCASE: Tokenize by lowercase. FIELD: Tokenize by field. GSE: Tokenize using GSE (for Chinese and Japanese). TRIGRAM: Tokenize into trigrams. KAGOME_JA: Tokenize using the 'Kagome' tokenizer (for Japanese). KAGOME_KR: Tokenize using the 'Kagome' tokenizer and a Korean MeCab dictionary (for Korean). """ WORD = "word" WHITESPACE = "whitespace" LOWERCASE = "lowercase" FIELD = "field" GSE = "gse" TRIGRAM = "trigram" KAGOME_JA = "kagome_ja" KAGOME_KR = "kagome_kr"
[docs] class GenerativeSearches(str, BaseEnum): """The available generative search modules in Weaviate. These modules generate text from text-based inputs. See the [docs](https://weaviate.io/developers/weaviate/modules/reader-generator-modules) for more details. Attributes: AWS: Weaviate module backed by AWS Bedrock generative models. ANTHROPIC: Weaviate module backed by Anthropic generative models. ANYSCALE: Weaviate module backed by Anyscale generative models. COHERE: Weaviate module backed by Cohere generative models. DATABRICKS: Weaviate module backed by Databricks generative models. FRIENDLIAI: Weaviate module backed by FriendliAI generative models. MISTRAL: Weaviate module backed by Mistral generative models. NVIDIA: Weaviate module backed by NVIDIA generative models. OLLAMA: Weaviate module backed by generative models deployed on Ollama infrastructure. OPENAI: Weaviate module backed by OpenAI and Azure-OpenAI generative models. PALM: Weaviate module backed by PaLM generative models. """ AWS = "generative-aws" ANTHROPIC = "generative-anthropic" ANYSCALE = "generative-anyscale" COHERE = "generative-cohere" DATABRICKS = "generative-databricks" DUMMY = "generative-dummy" FRIENDLIAI = "generative-friendliai" MISTRAL = "generative-mistral" NVIDIA = "generative-nvidia" OLLAMA = "generative-ollama" OPENAI = "generative-openai" PALM = "generative-palm" # rename to google once all versions support it XAI = "generative-xai"
[docs] class Rerankers(str, BaseEnum): """The available reranker modules in Weaviate. These modules rerank the results of a search query. See the [docs](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules#re-ranking) for more details. Attributes: NONE: No reranker. COHERE: Weaviate module backed by Cohere reranking models. TRANSFORMERS: Weaviate module backed by Transformers reranking models. VOYAGEAI: Weaviate module backed by VoyageAI reranking models. JINAAI: Weaviate module backed by JinaAI reranking models. NVIDIA: Weaviate module backed by NVIDIA reranking models. """ NONE = "none" COHERE = "reranker-cohere" TRANSFORMERS = "reranker-transformers" VOYAGEAI = "reranker-voyageai" JINAAI = "reranker-jinaai" NVIDIA = "reranker-nvidia"
[docs] class StopwordsPreset(str, BaseEnum): """Preset stopwords to use in the `Stopwords` class. Attributes: EN: English stopwords. NONE: No stopwords. """ NONE = "none" EN = "en"
[docs] class ReplicationDeletionStrategy(str, BaseEnum): """How object deletions in multi node environments should be resolved. Attributes: PERMANENT_DELETION: Once an object has been deleted on one node it will be deleted on all nodes in case of conflicts. NO_AUTOMATED_RESOLUTION: No deletion resolution. """ DELETE_ON_CONFLICT = "DeleteOnConflict" NO_AUTOMATED_RESOLUTION = "NoAutomatedResolution" TIME_BASED_RESOLUTION = "TimeBasedResolution"
[docs] class _ShardingConfigCreate(_ConfigCreateModel): virtualPerPhysical: Optional[int] desiredCount: Optional[int] desiredVirtualCount: Optional[int] key: str = "_id" strategy: str = "hash" function: str = "murmur3"
[docs] class _ReplicationConfigCreate(_ConfigCreateModel): factor: Optional[int] asyncEnabled: Optional[bool] deletionStrategy: Optional[ReplicationDeletionStrategy]
[docs] class _ReplicationConfigUpdate(_ConfigUpdateModel): factor: Optional[int] asyncEnabled: Optional[bool] deletionStrategy: Optional[ReplicationDeletionStrategy]
[docs] class _BM25ConfigCreate(_ConfigCreateModel): b: float k1: float
[docs] class _BM25ConfigUpdate(_ConfigUpdateModel): b: Optional[float] k1: Optional[float]
[docs] class _StopwordsCreate(_ConfigCreateModel): preset: Optional[StopwordsPreset] additions: Optional[List[str]] removals: Optional[List[str]]
[docs] class _StopwordsUpdate(_ConfigUpdateModel): preset: Optional[StopwordsPreset] additions: Optional[List[str]] removals: Optional[List[str]]
[docs] class _InvertedIndexConfigCreate(_ConfigCreateModel): bm25: Optional[_BM25ConfigCreate] cleanupIntervalSeconds: Optional[int] indexTimestamps: Optional[bool] indexPropertyLength: Optional[bool] indexNullState: Optional[bool] stopwords: _StopwordsCreate
[docs] class _InvertedIndexConfigUpdate(_ConfigUpdateModel): bm25: Optional[_BM25ConfigUpdate] cleanupIntervalSeconds: Optional[int] stopwords: Optional[_StopwordsUpdate]
[docs] class _MultiTenancyConfigCreate(_ConfigCreateModel): enabled: bool autoTenantCreation: Optional[bool] autoTenantActivation: Optional[bool]
[docs] class _MultiTenancyConfigUpdate(_ConfigUpdateModel): autoTenantCreation: Optional[bool] autoTenantActivation: Optional[bool]
[docs] class _GenerativeProvider(_ConfigCreateModel): generative: Union[GenerativeSearches, _EnumLikeStr]
[docs] class _GenerativeAnyscale(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.ANYSCALE, frozen=True, exclude=True ) baseURL: Optional[str] temperature: Optional[float] model: Optional[str]
[docs] class _GenerativeCustom(_GenerativeProvider): module_config: Optional[Dict[str, Any]]
[docs] def _to_dict(self) -> Dict[str, Any]: if self.module_config is None: return {} return self.module_config
[docs] class _GenerativeDatabricks(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.DATABRICKS, frozen=True, exclude=True ) endpoint: str maxTokens: Optional[int] temperature: Optional[float] topK: Optional[int] topP: Optional[float]
[docs] class _GenerativeMistral(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.MISTRAL, frozen=True, exclude=True ) temperature: Optional[float] model: Optional[str] maxTokens: Optional[int] baseURL: Optional[str]
[docs] class _GenerativeNvidia(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.NVIDIA, frozen=True, exclude=True ) temperature: Optional[float] model: Optional[str] maxTokens: Optional[int] baseURL: Optional[str]
[docs] class _GenerativeXai(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.XAI, frozen=True, exclude=True ) temperature: Optional[float] model: Optional[str] maxTokens: Optional[int] baseURL: Optional[str]
[docs] class _GenerativeFriendliai(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.FRIENDLIAI, frozen=True, exclude=True ) temperature: Optional[float] model: Optional[str] maxTokens: Optional[int] baseURL: Optional[str]
[docs] class _GenerativeOllama(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.OLLAMA, frozen=True, exclude=True ) model: Optional[str] apiEndpoint: Optional[str]
[docs] class _GenerativeOpenAIConfigBase(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.OPENAI, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] frequencyPenaltyProperty: Optional[float] presencePenaltyProperty: Optional[float] maxTokensProperty: Optional[int] temperatureProperty: Optional[float] topPProperty: Optional[float]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _GenerativeOpenAIConfig(_GenerativeOpenAIConfigBase): model: Optional[str]
[docs] class _GenerativeAzureOpenAIConfig(_GenerativeOpenAIConfigBase): resourceName: str deploymentId: str
[docs] class _GenerativeCohereConfig(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.COHERE, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] kProperty: Optional[int] model: Optional[str] maxTokensProperty: Optional[int] returnLikelihoodsProperty: Optional[str] stopSequencesProperty: Optional[List[str]] temperatureProperty: Optional[float]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _GenerativeGoogleConfig(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.PALM, frozen=True, exclude=True ) apiEndpoint: Optional[str] maxOutputTokens: Optional[int] modelId: Optional[str] projectId: str temperature: Optional[float] topK: Optional[int] topP: Optional[float]
[docs] class _GenerativeAWSConfig(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.AWS, frozen=True, exclude=True ) region: str service: str model: Optional[str] endpoint: Optional[str]
[docs] class _GenerativeAnthropicConfig(_GenerativeProvider): generative: Union[GenerativeSearches, _EnumLikeStr] = Field( default=GenerativeSearches.ANTHROPIC, frozen=True, exclude=True ) model: Optional[str] maxTokens: Optional[int] stopSequences: Optional[List[str]] temperature: Optional[float] topK: Optional[int] topP: Optional[float]
[docs] class _RerankerProvider(_ConfigCreateModel): reranker: Union[Rerankers, _EnumLikeStr]
RerankerCohereModel = Literal["rerank-english-v2.0", "rerank-multilingual-v2.0"]
[docs] class _RerankerCohereConfig(_RerankerProvider): reranker: Union[Rerankers, _EnumLikeStr] = Field( default=Rerankers.COHERE, frozen=True, exclude=True ) model: Optional[Union[RerankerCohereModel, str]] = Field(default=None)
[docs] class _RerankerCustomConfig(_RerankerProvider): module_config: Optional[Dict[str, Any]]
[docs] def _to_dict(self) -> Dict[str, Any]: if self.module_config is None: return {} return self.module_config
[docs] class _RerankerTransformersConfig(_RerankerProvider): reranker: Union[Rerankers, _EnumLikeStr] = Field( default=Rerankers.TRANSFORMERS, frozen=True, exclude=True )
RerankerJinaAIModel = Literal[ "jina-reranker-v2-base-multilingual", "jina-reranker-v1-base-en", "jina-reranker-v1-turbo-en", "jina-reranker-v1-tiny-en", "jina-colbert-v1-en", ]
[docs] class _RerankerJinaAIConfig(_RerankerProvider): reranker: Union[Rerankers, _EnumLikeStr] = Field( default=Rerankers.JINAAI, frozen=True, exclude=True ) model: Optional[Union[RerankerJinaAIModel, str]] = Field(default=None)
RerankerVoyageAIModel = Literal["rerank-2", "rerank-2-lite", "rerank-lite-1", "rerank-1"]
[docs] class _RerankerVoyageAIConfig(_RerankerProvider): reranker: Union[Rerankers, _EnumLikeStr] = Field( default=Rerankers.VOYAGEAI, frozen=True, exclude=True ) model: Optional[Union[RerankerVoyageAIModel, str]] = Field(default=None)
[docs] class _RerankerNvidiaConfig(_RerankerProvider): reranker: Union[Rerankers, _EnumLikeStr] = Field( default=Rerankers.NVIDIA, frozen=True, exclude=True ) model: Optional[str] = Field(default=None) baseURL: Optional[AnyHttpUrl]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Generative: """Use this factory class to create the correct object for the `generative_config` argument in the `collections.create()` method. Each staticmethod provides options specific to the named generative search module in the function's name. Under-the-hood data validation steps will ensure that any mis-specifications will be caught before the request is sent to Weaviate. """
[docs] @staticmethod def anyscale( model: Optional[str] = None, temperature: Optional[float] = None, base_url: Optional[str] = None, ) -> _GenerativeProvider: """Create a `_GenerativeAnyscale` object for use when generating using the `generative-anyscale` module. Args: model: The model to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default """ return _GenerativeAnyscale(model=model, temperature=temperature, baseURL=base_url)
[docs] @staticmethod def custom( module_name: str, module_config: Optional[Dict[str, Any]] = None, ) -> _GenerativeProvider: """Create a `_GenerativeCustom` object for use when generating using a custom specification. Args: module_name: The name of the module to use, REQUIRED. module_config: The configuration to use for the module. Defaults to `None`, which uses the server-defined default. """ return _GenerativeCustom(generative=_EnumLikeStr(module_name), module_config=module_config)
[docs] @staticmethod def databricks( *, endpoint: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_k: Optional[int] = None, top_p: Optional[float] = None, ) -> _GenerativeProvider: """Create a `_GenerativeDatabricks` object for use when performing AI generation using the `generative-databricks` module. Args: endpoint: The URL where the API request should go. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default top_k: The top K value to use. Defaults to `None`, which uses the server-defined default top_p: The top P value to use. Defaults to `None`, which uses the server-defined default """ return _GenerativeDatabricks( endpoint=endpoint, maxTokens=max_tokens, temperature=temperature, topK=top_k, topP=top_p, )
[docs] @staticmethod def friendliai( *, base_url: Optional[str] = None, model: Optional[str] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, ) -> _GenerativeProvider: """Create a `_GenerativeFriendliai` object for use when performing AI generation using the `generative-friendliai` module. Args: base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default model: The model to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default """ return _GenerativeFriendliai( model=model, temperature=temperature, maxTokens=max_tokens, baseURL=base_url )
[docs] @staticmethod def mistral( model: Optional[str] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, base_url: Optional[str] = None, ) -> _GenerativeProvider: """Create a `_GenerativeMistral` object for use when performing AI generation using the `generative-mistral` module. Args: model: The model to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default """ return _GenerativeMistral( model=model, temperature=temperature, maxTokens=max_tokens, baseURL=base_url )
[docs] @staticmethod def nvidia( *, base_url: Optional[str] = None, model: Optional[str] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, ) -> _GenerativeProvider: """Create a `_GenerativeNvidia` object for use when performing AI generation using the `generative-nvidia` module. Args: base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default model: The model to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default """ return _GenerativeNvidia( model=model, temperature=temperature, maxTokens=max_tokens, baseURL=base_url )
[docs] @staticmethod def xai( *, base_url: Optional[str] = None, model: Optional[str] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, ) -> _GenerativeProvider: """Create a `_GenerativeXai` object for use when performing AI generation using the `generative-xai` module. Args: base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default model: The model to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default """ return _GenerativeXai( model=model, temperature=temperature, maxTokens=max_tokens, baseURL=base_url )
[docs] @staticmethod def ollama( *, api_endpoint: Optional[str] = None, model: Optional[str] = None, ) -> _GenerativeProvider: """Create a `_GenerativeOllama` object for use when performing AI generation using the `generative-ollama` module. Args: api_endpoint: The API endpoint to use. Defaults to `None`, which uses the server-defined default Docker users may need to specify an alias, such as `http://host.docker.internal:11434` so that the container can access the host machine. model: The model to use. Defaults to `None`, which uses the server-defined default """ return _GenerativeOllama(model=model, apiEndpoint=api_endpoint)
[docs] @staticmethod def openai( model: Optional[str] = None, frequency_penalty: Optional[float] = None, max_tokens: Optional[int] = None, presence_penalty: Optional[float] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, base_url: Optional[AnyHttpUrl] = None, ) -> _GenerativeProvider: """Create a `_GenerativeOpenAIConfig` object for use when performing AI generation using the `generative-openai` module. See the [documentation](https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-openai) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default frequency_penalty: The frequency penalty to use. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default presence_penalty: The presence penalty to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default top_p: The top P to use. Defaults to `None`, which uses the server-defined default base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default """ return _GenerativeOpenAIConfig( baseURL=base_url, frequencyPenaltyProperty=frequency_penalty, maxTokensProperty=max_tokens, model=model, presencePenaltyProperty=presence_penalty, temperatureProperty=temperature, topPProperty=top_p, )
[docs] @staticmethod def azure_openai( resource_name: str, deployment_id: str, frequency_penalty: Optional[float] = None, max_tokens: Optional[int] = None, presence_penalty: Optional[float] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, base_url: Optional[AnyHttpUrl] = None, ) -> _GenerativeProvider: """Create a `_GenerativeAzureOpenAIConfig` object for use when performing AI generation using the `generative-openai` module. See the [documentation](https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-openai) for detailed usage. Args: resource_name: The name of the Azure OpenAI resource to use. deployment_id: The Azure OpenAI deployment ID to use. frequency_penalty: The frequency penalty to use. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default presence_penalty: The presence penalty to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default top_p: The top P to use. Defaults to `None`, which uses the server-defined default base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default """ return _GenerativeAzureOpenAIConfig( baseURL=base_url, deploymentId=deployment_id, frequencyPenaltyProperty=frequency_penalty, maxTokensProperty=max_tokens, presencePenaltyProperty=presence_penalty, resourceName=resource_name, temperatureProperty=temperature, topPProperty=top_p, )
[docs] @staticmethod def cohere( model: Optional[Union[CohereModel, str]] = None, k: Optional[int] = None, max_tokens: Optional[int] = None, return_likelihoods: Optional[str] = None, stop_sequences: Optional[List[str]] = None, temperature: Optional[float] = None, base_url: Optional[AnyHttpUrl] = None, ) -> _GenerativeProvider: """Create a `_GenerativeCohereConfig` object for use when performing AI generation using the `generative-cohere` module. See the [documentation](https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-cohere) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default k: The number of sequences to generate. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default return_likelihoods: Whether to return the likelihoods. Defaults to `None`, which uses the server-defined default stop_sequences: The stop sequences to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default base_url: The base URL where the API request should go. Defaults to `None`, which uses the server-defined default """ return _GenerativeCohereConfig( baseURL=base_url, kProperty=k, maxTokensProperty=max_tokens, model=model, returnLikelihoodsProperty=return_likelihoods, stopSequencesProperty=stop_sequences, temperatureProperty=temperature, )
[docs] @staticmethod @docstring_deprecated( deprecated_in="4.9.0", details=""" This method is deprecated and will be removed in Q2 '25. Please use :meth:`~weaviate.collections.classes.config._Generative.google` instead. """, ) @typing_deprecated( "This method is deprecated and will be removed in Q2 '25. Please use `google` instead." ) def palm( project_id: str, api_endpoint: Optional[str] = None, max_output_tokens: Optional[int] = None, model_id: Optional[str] = None, temperature: Optional[float] = None, top_k: Optional[int] = None, top_p: Optional[float] = None, ) -> _GenerativeProvider: """Create a `_GenerativePaLMConfig` object for use when performing AI generation using the `generative-palm` module. See the [documentation](https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-palm) for detailed usage. Args: project_id: The PalM project ID to use. api_endpoint: The API endpoint to use without a leading scheme such as `http://`. Defaults to `None`, which uses the server-defined default max_output_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default model_id: The model ID to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default top_k: The top K to use. Defaults to `None`, which uses the server-defined default top_p: The top P to use. Defaults to `None`, which uses the server-defined default """ _Warnings.palm_to_google_gen() return _GenerativeGoogleConfig( apiEndpoint=api_endpoint, maxOutputTokens=max_output_tokens, modelId=model_id, projectId=project_id, temperature=temperature, topK=top_k, topP=top_p, )
[docs] @staticmethod def google( project_id: str, api_endpoint: Optional[str] = None, max_output_tokens: Optional[int] = None, model_id: Optional[str] = None, temperature: Optional[float] = None, top_k: Optional[int] = None, top_p: Optional[float] = None, ) -> _GenerativeProvider: """Create a `_GenerativeGoogleConfig` object for use when performing AI generation using the `generative-google` module. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/generative) for detailed usage. Args: project_id: The PalM project ID to use. api_endpoint: The API endpoint to use without a leading scheme such as `http://`. Defaults to `None`, which uses the server-defined default max_output_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default model_id: The model ID to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default top_k: The top K to use. Defaults to `None`, which uses the server-defined default top_p: The top P to use. Defaults to `None`, which uses the server-defined default """ return _GenerativeGoogleConfig( apiEndpoint=api_endpoint, maxOutputTokens=max_output_tokens, modelId=model_id, projectId=project_id, temperature=temperature, topK=top_k, topP=top_p, )
[docs] @staticmethod def aws( model: Optional[str] = None, region: str = "", # cant have a non-default value after a default value, but we cant change the order for BC endpoint: Optional[str] = None, service: Union[AWSService, str] = "bedrock", ) -> _GenerativeProvider: """Create a `_GenerativeAWSConfig` object for use when performing AI generation using the `generative-aws` module. See the [documentation](https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-aws) for detailed usage. Args: model: The model to use, REQUIRED for service "bedrock". region: The AWS region to run the model from, REQUIRED. endpoint: The model to use, REQUIRED for service "sagemaker". service: The AWS service to use, options are "bedrock" and "sagemaker". """ return _GenerativeAWSConfig( model=model, region=region, service=service, endpoint=endpoint, )
[docs] @staticmethod def anthropic( model: Optional[str] = None, max_tokens: Optional[int] = None, stop_sequences: Optional[List[str]] = None, temperature: Optional[float] = None, top_k: Optional[int] = None, top_p: Optional[float] = None, ) -> _GenerativeProvider: """Create a `_GenerativeAnthropicConfig` object for use when performing AI generation using the `generative-anthropic` module. Args: model: The model to use. Defaults to `None`, which uses the server-defined default max_tokens: The maximum number of tokens to generate. Defaults to `None`, which uses the server-defined default stop_sequences: The stop sequences to use. Defaults to `None`, which uses the server-defined default temperature: The temperature to use. Defaults to `None`, which uses the server-defined default top_k: The top K to use. Defaults to `None`, which uses the server-defined default top_p: The top P to use. Defaults to `None`, which uses the server-defined default """ return _GenerativeAnthropicConfig( model=model, maxTokens=max_tokens, stopSequences=stop_sequences, temperature=temperature, topK=top_k, topP=top_p, )
[docs] class _Reranker: """Use this factory class to create the correct object for the `reranker_config` argument in the `collections.create()` method. Each staticmethod provides options specific to the named reranker in the function's name. Under-the-hood data validation steps will ensure that any mis-specifications will be caught before the request is sent to Weaviate. """
[docs] @staticmethod def transformers() -> _RerankerProvider: """Create a `_RerankerTransformersConfig` object for use when reranking using the `reranker-transformers` module. See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/reranker-transformers) for detailed usage. """ return _RerankerTransformersConfig(reranker=Rerankers.TRANSFORMERS)
[docs] @staticmethod def custom( module_name: str, module_config: Optional[Dict[str, Any]] = None ) -> _RerankerProvider: """Create a `_RerankerCustomConfig` object for use when reranking using a custom module. Args: module_name: The name of the module to use, REQUIRED. module_config: The configuration to use for the module. Defaults to `None`, which uses the server-defined default. """ return _RerankerCustomConfig( reranker=_EnumLikeStr(module_name), module_config=module_config )
[docs] @staticmethod def cohere( model: Optional[Union[RerankerCohereModel, str]] = None, ) -> _RerankerProvider: """Create a `_RerankerCohereConfig` object for use when reranking using the `reranker-cohere` module. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/reranker) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default """ return _RerankerCohereConfig(model=model)
[docs] @staticmethod def jinaai( model: Optional[Union[RerankerJinaAIModel, str]] = None, ) -> _RerankerProvider: """Create a `_RerankerJinaAIConfig` object for use when reranking using the `reranker-jinaai` module. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/reranker) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default """ return _RerankerJinaAIConfig(model=model)
[docs] @staticmethod def voyageai( model: Optional[Union[RerankerVoyageAIModel, str]] = None, ) -> _RerankerProvider: """Create a `_RerankerVoyageAIConfig` object for use when reranking using the `reranker-voyageai` module. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/reranker) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default """ return _RerankerVoyageAIConfig(model=model)
[docs] @staticmethod def nvidia( model: Optional[str] = None, base_url: Optional[AnyHttpUrl] = None, ) -> _RerankerProvider: """Create a `_RerankerNvidiaConfig` object for use when reranking using the `reranker-nvidia` module. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/reranker) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default base_url: The base URL to send the reranker requests to. Defaults to `None`, which uses the server-defined default. """ return _RerankerNvidiaConfig(model=model, baseURL=base_url)
[docs] class _CollectionConfigCreateBase(_ConfigCreateModel): description: Optional[str] = Field(default=None) invertedIndexConfig: Optional[_InvertedIndexConfigCreate] = Field( default=None, alias="inverted_index_config" ) multiTenancyConfig: Optional[_MultiTenancyConfigCreate] = Field( default=None, alias="multi_tenancy_config" ) replicationConfig: Optional[_ReplicationConfigCreate] = Field( default=None, alias="replication_config" ) shardingConfig: Optional[_ShardingConfigCreate] = Field(default=None, alias="sharding_config") vectorIndexConfig: Optional[_VectorIndexConfigCreate] = Field( default=None, alias="vector_index_config" ) moduleConfig: _VectorizerConfigCreate = Field( default=_Vectorizer.none(), alias="vectorizer_config" ) generativeSearch: Optional[_GenerativeProvider] = Field(default=None, alias="generative_config") rerankerConfig: Optional[_RerankerProvider] = Field(default=None, alias="reranker_config")
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict: Dict[str, Any] = {} for cls_field in type(self).model_fields: val = getattr(self, cls_field) if cls_field in ["name", "model", "properties", "references"] or val is None: continue elif isinstance(val, (bool, float, str, int)): ret_dict[cls_field] = str(val) elif isinstance(val, _GenerativeProvider): self.__add_to_module_config(ret_dict, val.generative.value, val._to_dict()) elif isinstance(val, _RerankerProvider): self.__add_to_module_config(ret_dict, val.reranker.value, val._to_dict()) elif isinstance(val, _VectorizerConfigCreate): ret_dict["vectorizer"] = val.vectorizer.value if val.vectorizer != Vectorizers.NONE: self.__add_to_module_config(ret_dict, val.vectorizer.value, val._to_dict()) elif isinstance(val, _VectorIndexConfigCreate): ret_dict["vectorIndexType"] = val.vector_index_type() ret_dict[cls_field] = val._to_dict() else: assert isinstance(val, _ConfigCreateModel) ret_dict[cls_field] = val._to_dict() if self.vectorIndexConfig is None: ret_dict["vectorIndexType"] = VectorIndexType.HNSW.value return ret_dict
@staticmethod def __add_to_module_config( return_dict: Dict[str, Any], addition_key: str, addition_val: Dict[str, Any] ) -> None: if "moduleConfig" not in return_dict: return_dict["moduleConfig"] = {addition_key: addition_val} else: return_dict["moduleConfig"][addition_key] = addition_val
[docs] class _CollectionConfigUpdate(_ConfigUpdateModel): description: Optional[str] = Field(default=None) property_descriptions: Optional[Dict[str, str]] = Field(default=None) invertedIndexConfig: Optional[_InvertedIndexConfigUpdate] = Field( default=None, alias="inverted_index_config" ) replicationConfig: Optional[_ReplicationConfigUpdate] = Field( default=None, alias="replication_config" ) vectorIndexConfig: Optional[_VectorIndexConfigUpdate] = Field( default=None, alias="vector_index_config" ) vectorizerConfig: Optional[Union[_VectorIndexConfigUpdate, List[_NamedVectorConfigUpdate]]] = ( Field(default=None, alias="vectorizer_config") ) vectorConfig: Optional[Union[_VectorConfigUpdate, List[_VectorConfigUpdate]]] = Field( default=None, alias="vector_config" ) multiTenancyConfig: Optional[_MultiTenancyConfigUpdate] = Field( default=None, alias="multi_tenancy_config" ) generativeConfig: Optional[_GenerativeProvider] = Field(default=None, alias="generative_config") rerankerConfig: Optional[_RerankerProvider] = Field(default=None, alias="reranker_config") @field_validator("vectorConfig", mode="before") def mutual_exclusivity( cls, v: Optional[Union[_VectorConfigUpdate, List[_VectorConfigUpdate]]], info: ValidationInfo, ): if v is None: return v if info.data["vectorizerConfig"] is not None: raise ValueError( "Cannot specify vectorizerConfig when also specifying vectorConfig. Please use one or the other." ) if info.data["vectorIndexConfig"] is not None: raise ValueError( "Cannot specify vectorIndexConfig when also specifying vectorConfig. Please use one or the other." ) return v def __check_quantizers( self, quantizer: Optional[_QuantizerConfigUpdate], vector_index_config: dict, ) -> None: if ( ( isinstance(quantizer, _PQConfigUpdate) and ( vector_index_config.get("bq", {"enabled": False})["enabled"] or vector_index_config.get("sq", {"enabled": False})["enabled"] or vector_index_config.get("rq", {"enabled": False})["enabled"] ) ) or ( isinstance(quantizer, _BQConfigUpdate) and ( vector_index_config["pq"]["enabled"] or vector_index_config.get("sq", {"enabled": False})["enabled"] or vector_index_config.get("rq", {"enabled": False})["enabled"] ) ) or ( isinstance(quantizer, _SQConfigUpdate) and ( vector_index_config["pq"]["enabled"] or vector_index_config.get("bq", {"enabled": False})["enabled"] or vector_index_config.get("rq", {"enabled": False})["enabled"] ) ) or ( isinstance(quantizer, _RQConfigUpdate) and ( vector_index_config["pq"]["enabled"] or vector_index_config.get("bq", {"enabled": False})["enabled"] or vector_index_config.get("sq", {"enabled": False})["enabled"] ) ) ): raise WeaviateInvalidInputError( f"Cannot update vector index config {vector_index_config} to change its quantizer. To do this, you must recreate the collection." ) return None
[docs] def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: if self.description is not None: schema["description"] = self.description if self.property_descriptions is not None: if (p := schema["properties"]) is None: raise WeaviateInvalidInputError( "Cannot update property descriptions without existing properties in the schema" ) props = {prop["name"]: prop for prop in p} for prop_name, prop_desc in self.property_descriptions.items(): if prop_name not in props: raise WeaviateInvalidInputError( f"Property {prop_name} does not exist in the existing properties" ) props[prop_name]["description"] = prop_desc if self.invertedIndexConfig is not None: schema["invertedIndexConfig"] = self.invertedIndexConfig.merge_with_existing( schema["invertedIndexConfig"] ) if self.replicationConfig is not None: schema["replicationConfig"] = self.replicationConfig.merge_with_existing( schema["replicationConfig"] ) if self.multiTenancyConfig is not None: schema["multiTenancyConfig"] = self.multiTenancyConfig.merge_with_existing( schema["multiTenancyConfig"] ) if self.vectorIndexConfig is not None: self.__check_quantizers(self.vectorIndexConfig.quantizer, schema["vectorIndexConfig"]) schema["vectorIndexConfig"] = self.vectorIndexConfig.merge_with_existing( schema["vectorIndexConfig"] ) if self.generativeConfig is not None: # clear any existing generative config if "moduleConfig" in schema: schema["moduleConfig"] = { k: v for k, v in schema["moduleConfig"].items() if "generative" not in k } self.__add_to_module_config( schema, self.generativeConfig.generative.value, self.generativeConfig._to_dict(), ) if self.rerankerConfig is not None: # clear any existing reranker config if "moduleConfig" in schema: schema["moduleConfig"] = { k: v for k, v in schema["moduleConfig"].items() if "reranker" not in k } self.__add_to_module_config( schema, self.rerankerConfig.reranker.value, self.rerankerConfig._to_dict(), ) if self.vectorizerConfig is not None: if isinstance(self.vectorizerConfig, _VectorIndexConfigUpdate): self.__check_quantizers( self.vectorizerConfig.quantizer, schema["vectorIndexConfig"] ) schema["vectorIndexConfig"] = self.vectorizerConfig.merge_with_existing( schema["vectorIndexConfig"] ) else: for vc in self.vectorizerConfig: if vc.name not in schema["vectorConfig"]: raise WeaviateInvalidInputError( f"Vector config with name {vc.name} does not exist in the existing vector config" ) self.__check_quantizers( vc.vectorIndexConfig.quantizer, schema["vectorConfig"][vc.name]["vectorIndexConfig"], ) schema["vectorConfig"][vc.name]["vectorIndexConfig"] = ( vc.vectorIndexConfig.merge_with_existing( schema["vectorConfig"][vc.name]["vectorIndexConfig"] ) ) schema["vectorConfig"][vc.name]["vectorIndexType"] = ( vc.vectorIndexConfig.vector_index_type() ) if self.vectorConfig is not None: vcs = ( [self.vectorConfig] if isinstance(self.vectorConfig, _VectorConfigUpdate) else self.vectorConfig ) for vc in vcs: if vc.name not in schema["vectorConfig"]: raise WeaviateInvalidInputError( f"Vector config with name {vc.name} does not exist in the existing vector config" ) self.__check_quantizers( vc.vectorIndexConfig.quantizer, schema["vectorConfig"][vc.name]["vectorIndexConfig"], ) schema["vectorConfig"][vc.name]["vectorIndexConfig"] = ( vc.vectorIndexConfig.merge_with_existing( schema["vectorConfig"][vc.name]["vectorIndexConfig"] ) ) schema["vectorConfig"][vc.name]["vectorIndexType"] = ( vc.vectorIndexConfig.vector_index_type() ) return schema
@staticmethod def __add_to_module_config( return_dict: Dict[str, Any], addition_key: str, addition_val: Dict[str, Any] ) -> None: if "moduleConfig" not in return_dict: return_dict["moduleConfig"] = {addition_key: addition_val} else: return_dict["moduleConfig"][addition_key] = addition_val
[docs] @dataclass class _BM25Config(_ConfigBase): b: float k1: float
BM25Config = _BM25Config
[docs] @dataclass class _StopwordsConfig(_ConfigBase): preset: StopwordsPreset additions: Optional[List[str]] removals: Optional[List[str]]
StopwordsConfig = _StopwordsConfig
[docs] @dataclass class _InvertedIndexConfig(_ConfigBase): bm25: BM25Config cleanup_interval_seconds: int index_null_state: bool index_property_length: bool index_timestamps: bool stopwords: StopwordsConfig
InvertedIndexConfig = _InvertedIndexConfig
[docs] @dataclass class _MultiTenancyConfig(_ConfigBase): enabled: bool auto_tenant_creation: bool auto_tenant_activation: bool
MultiTenancyConfig = _MultiTenancyConfig
[docs] @dataclass class _PropertyVectorizerConfig: skip: bool vectorize_property_name: bool
PropertyVectorizerConfig = _PropertyVectorizerConfig
[docs] @dataclass class _NestedProperty(_ConfigBase): data_type: DataType description: Optional[str] index_filterable: bool index_searchable: bool name: str nested_properties: Optional[List["NestedProperty"]] tokenization: Optional[Tokenization]
[docs] def to_dict(self) -> Dict[str, Any]: out = super().to_dict() out["dataType"] = [str(self.data_type.value)] if self.nested_properties is not None and len(self.nested_properties) > 0: out["nestedProperties"] = [np.to_dict() for np in self.nested_properties] return out
NestedProperty = _NestedProperty
[docs] @dataclass class _PropertyBase(_ConfigBase): name: str description: Optional[str]
[docs] def to_dict(self) -> Dict[str, Any]: out = {"name": self.name} if self.description is not None: out["description"] = self.description return out
[docs] @dataclass class _Property(_PropertyBase): data_type: DataType index_filterable: bool index_range_filters: bool index_searchable: bool nested_properties: Optional[List[NestedProperty]] tokenization: Optional[Tokenization] vectorizer_config: Optional[PropertyVectorizerConfig] vectorizer: Optional[str] vectorizer_configs: Optional[Dict[str, PropertyVectorizerConfig]]
[docs] def to_dict(self) -> Dict[str, Any]: out = super().to_dict() out["dataType"] = [self.data_type.value] out["indexFilterable"] = self.index_filterable out["indexSearchable"] = self.index_searchable out["indexRangeFilters"] = self.index_range_filters out["tokenization"] = self.tokenization.value if self.tokenization else None if self.nested_properties is not None and len(self.nested_properties) > 0: out["nestedProperties"] = [np.to_dict() for np in self.nested_properties] module_config: Dict[str, Any] = {} if self.vectorizer is not None: module_config[self.vectorizer] = {} if self.vectorizer_config is not None: assert self.vectorizer is not None module_config[self.vectorizer] = { "skip": self.vectorizer_config.skip, "vectorizePropertyName": self.vectorizer_config.vectorize_property_name, } if self.vectorizer_configs is not None: module_config = { k: {"skip": v.skip, "vectorizePropertyName": v.vectorize_property_name} for k, v in self.vectorizer_configs.items() } if len(module_config) > 0: out["moduleConfig"] = module_config return out
PropertyConfig = _Property
[docs] @dataclass class _ReferenceProperty(_PropertyBase): target_collections: List[str]
[docs] def to_dict(self) -> Dict[str, Any]: out = super().to_dict() out["dataType"] = self.target_collections return out
ReferencePropertyConfig = _ReferenceProperty
[docs] @dataclass class _ReplicationConfig(_ConfigBase): factor: int async_enabled: bool deletion_strategy: ReplicationDeletionStrategy
ReplicationConfig = _ReplicationConfig
[docs] @dataclass class _ShardingConfig(_ConfigBase): virtual_per_physical: int desired_count: int actual_count: int desired_virtual_count: int actual_virtual_count: int key: str strategy: str function: str
ShardingConfig = _ShardingConfig
[docs] @dataclass class _PQEncoderConfig(_ConfigBase): type_: PQEncoderType distribution: PQEncoderDistribution
[docs] def to_dict(self) -> Dict[str, Any]: ret_dict = super().to_dict() ret_dict["type"] = str(ret_dict.pop("type")) ret_dict["distribution"] = str(ret_dict.pop("distribution")) return ret_dict
PQEncoderConfig = _PQEncoderConfig
[docs] @dataclass class _PQConfig(_ConfigBase): internal_bit_compression: bool segments: int centroids: int training_limit: int encoder: PQEncoderConfig @property def bit_compression(self) -> bool: _Warnings.bit_compression_in_pq_config() return self.internal_bit_compression
PQConfig = _PQConfig
[docs] @dataclass class _BQConfig(_ConfigBase): cache: Optional[bool] rescore_limit: int
[docs] @dataclass class _SQConfig(_ConfigBase): cache: Optional[bool] rescore_limit: int training_limit: int
[docs] @dataclass class _RQConfig(_ConfigBase): bits: Optional[int] rescore_limit: int
BQConfig = _BQConfig SQConfig = _SQConfig RQConfig = _RQConfig
[docs] @dataclass class _MuveraConfig(_ConfigBase): enabled: Optional[bool] ksim: Optional[int] dprojections: Optional[int] repetitions: Optional[int]
MuveraConfig = _MuveraConfig
[docs] @dataclass class _MultiVectorConfig(_ConfigBase): encoding: Optional[_MuveraConfig] aggregation: str
MultiVector = _MultiVectorConfig
[docs] @dataclass class _VectorIndexConfig(_ConfigBase): multi_vector: Optional[_MultiVectorConfig] quantizer: Optional[Union[PQConfig, BQConfig, SQConfig, RQConfig]]
[docs] def to_dict(self) -> Dict[str, Any]: out = super().to_dict() if isinstance(self.quantizer, _PQConfig): out["pq"] = {**out.pop("quantizer"), "enabled": True} elif isinstance(self.quantizer, _BQConfig): out["bq"] = {**out.pop("quantizer"), "enabled": True} elif isinstance(self.quantizer, _SQConfig): out["sq"] = {**out.pop("quantizer"), "enabled": True} elif isinstance(self.quantizer, _RQConfig): out["rq"] = {**out.pop("quantizer"), "enabled": True} return out
[docs] @dataclass class _VectorIndexConfigHNSW(_VectorIndexConfig): cleanup_interval_seconds: int distance_metric: VectorDistances dynamic_ef_min: int dynamic_ef_max: int dynamic_ef_factor: int ef: int ef_construction: int filter_strategy: VectorFilterStrategy flat_search_cutoff: int max_connections: int skip: bool vector_cache_max_objects: int
[docs] @staticmethod def vector_index_type() -> str: return VectorIndexType.HNSW.value
VectorIndexConfigHNSW = _VectorIndexConfigHNSW
[docs] @dataclass class _VectorIndexConfigFlat(_VectorIndexConfig): distance_metric: VectorDistances vector_cache_max_objects: int
[docs] @staticmethod def vector_index_type() -> str: return VectorIndexType.FLAT.value
VectorIndexConfigFlat = _VectorIndexConfigFlat
[docs] @dataclass class _VectorIndexConfigDynamic(_ConfigBase): distance_metric: VectorDistances hnsw: Optional[VectorIndexConfigHNSW] flat: Optional[VectorIndexConfigFlat] threshold: Optional[int]
[docs] @staticmethod def vector_index_type() -> str: return VectorIndexType.DYNAMIC.value
VectorIndexConfigDynamic = _VectorIndexConfigDynamic
[docs] @dataclass class _GenerativeConfig(_ConfigBase): generative: Union[GenerativeSearches, str] model: Dict[str, Any]
GenerativeConfig = _GenerativeConfig
[docs] @dataclass class _VectorizerConfig(_ConfigBase): vectorizer: Union[Vectorizers, str] model: Dict[str, Any] vectorize_collection_name: bool
VectorizerConfig = _VectorizerConfig
[docs] @dataclass class _RerankerConfig(_ConfigBase): model: Dict[str, Any] reranker: Union[Rerankers, str]
RerankerConfig = _RerankerConfig
[docs] @dataclass class _NamedVectorizerConfig(_ConfigBase): vectorizer: Union[Vectorizers, str] model: Dict[str, Any] source_properties: Optional[List[str]]
[docs] def to_dict(self) -> Dict[str, Any]: ret_dict = super().to_dict() if "sourceProperties" in ret_dict: ret_dict["properties"] = ret_dict.pop("sourceProperties") return ret_dict
[docs] @dataclass class _NamedVectorConfig(_ConfigBase): vectorizer: _NamedVectorizerConfig vector_index_config: Union[ VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic ]
[docs] def to_dict(self) -> Dict: ret_dict = super().to_dict() ret_dict["vectorIndexType"] = self.vector_index_config.vector_index_type() return ret_dict
NamedVectorConfig = _NamedVectorConfig
[docs] @dataclass class _CollectionConfig(_ConfigBase): name: str description: Optional[str] generative_config: Optional[GenerativeConfig] inverted_index_config: InvertedIndexConfig multi_tenancy_config: MultiTenancyConfig properties: List[PropertyConfig] references: List[ReferencePropertyConfig] replication_config: ReplicationConfig reranker_config: Optional[RerankerConfig] sharding_config: Optional[ShardingConfig] vector_index_config: Union[ VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic, None ] vector_index_type: Optional[VectorIndexType] vectorizer_config: Optional[VectorizerConfig] vectorizer: Optional[Union[Vectorizers, str]] vector_config: Optional[Dict[str, _NamedVectorConfig]]
[docs] def to_dict(self) -> dict: out = super().to_dict() out["class"] = out.pop("name") out["moduleConfig"] = {} for name in [ ("generativeConfig", "generative"), ("vectorizerConfig", "vectorizer"), ("rerankerConfig", "reranker"), ]: if name[0] not in out: continue val = out.pop(name[0]) module_name = val[name[1]] out["moduleConfig"][module_name] = val.get("model", {}) vectorize_collection_name = val.get("vectorizeCollectionName", None) if vectorize_collection_name is not None: out["moduleConfig"][module_name]["vectorizeClassName"] = vectorize_collection_name if "vectorConfig" in out: for k, v in out["vectorConfig"].items(): extra_values = v["vectorizer"].pop("model", {}) vectorizer = v["vectorizer"].pop("vectorizer") out["vectorConfig"][k]["vectorizer"] = { vectorizer: {**extra_values, **v["vectorizer"]} } # remove default values for single vector setup out.pop( "vectorIndexType", None ) # if doesn't exist (in the case of named vectors) then do nothing out.pop( "vectorIndexConfig", None ) # if doesn't exist (in the case of named vectors) then do nothing out["properties"] = [ *[prop.to_dict() for prop in self.properties], *[prop.to_dict() for prop in self.references], ] out.pop("references") return out
CollectionConfig = _CollectionConfig
[docs] @dataclass class _CollectionConfigSimple(_ConfigBase): name: str description: Optional[str] generative_config: Optional[GenerativeConfig] properties: List[PropertyConfig] references: List[ReferencePropertyConfig] reranker_config: Optional[RerankerConfig] vectorizer_config: Optional[VectorizerConfig] vectorizer: Optional[Union[Vectorizers, str]] vector_config: Optional[Dict[str, _NamedVectorConfig]]
CollectionConfigSimple = _CollectionConfigSimple ShardTypes = Literal["READONLY", "READY", "INDEXING"]
[docs] @dataclass class _ShardStatus: name: str status: ShardTypes vector_queue_size: int
ShardStatus = _ShardStatus
[docs] class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. Attributes: name: The name of the property, REQUIRED. data_type: The data type of the property, REQUIRED. description: A description of the property. index_filterable: Whether the property should be filterable in the inverted index. index_range_filters: Whether the property should support range filters in the inverted index. index_searchable: Whether the property should be searchable in the inverted index. nested_properties: nested properties for data type OBJECT and OBJECT_ARRAY`. skip_vectorization: Whether to skip vectorization of the property. Defaults to `False`. tokenization: The tokenization method to use for the inverted index. Defaults to `None`. vectorize_property_name: Whether to vectorize the property name. Defaults to `True`. """ name: str dataType: DataType = Field(default=..., alias="data_type") description: Optional[str] = Field(default=None) indexFilterable: Optional[bool] = Field(default=None, alias="index_filterable") indexSearchable: Optional[bool] = Field(default=None, alias="index_searchable") indexRangeFilters: Optional[bool] = Field(default=None, alias="index_range_filters") nestedProperties: Optional[Union["Property", List["Property"]]] = Field( default=None, alias="nested_properties" ) skip_vectorization: bool = Field(default=False) tokenization: Optional[Tokenization] = Field(default=None) vectorize_property_name: bool = Field(default=True) @field_validator("name") def _check_name(cls, v: str) -> str: if v in ["id", "vector"]: raise ValueError(f"Property name '{v}' is reserved and cannot be used") return v
[docs] def _to_dict( self, vectorizers: Optional[Sequence[Union[Vectorizers, _EnumLikeStr]]] = None ) -> Dict[str, Any]: ret_dict = super()._to_dict() ret_dict["dataType"] = [ret_dict["dataType"]] if vectorizers is not None: for vectorizer in vectorizers: if vectorizer is not None and vectorizer != Vectorizers.NONE: if "moduleConfig" not in ret_dict: ret_dict["moduleConfig"] = {} ret_dict["moduleConfig"][vectorizer.value] = { "skip": self.skip_vectorization, "vectorizePropertyName": self.vectorize_property_name, } del ret_dict["skip_vectorization"] del ret_dict["vectorize_property_name"] if self.nestedProperties is not None: ret_dict["nestedProperties"] = ( [prop._to_dict() for prop in self.nestedProperties] if isinstance(self.nestedProperties, list) else [self.nestedProperties._to_dict()] ) return ret_dict
[docs] class _ReferencePropertyBase(_ConfigCreateModel): name: str @field_validator("name") def check_name(cls, v: str) -> str: if v in ["id", "vector"]: raise ValueError(f"Property name '{v}' is reserved and cannot be used") return v
[docs] class _ReferencePropertyMultiTarget(_ReferencePropertyBase): """This class defines properties that are cross references to multiple target collections. Use this class when you want to create a cross-reference in the collection's config that is capable of having cross-references to multiple other collections at once. Attributes: name: The name of the property, REQUIRED. target_collections: The names of the target collections, REQUIRED. description: A description of the property. """ target_collections: List[str] description: Optional[str] = Field(default=None)
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() ret_dict["dataType"] = [ _capitalize_first_letter(target) for target in self.target_collections ] del ret_dict["target_collections"] return ret_dict
[docs] class ReferenceProperty(_ReferencePropertyBase): """This class defines properties that are cross references to a single target collection. Use this class when you want to create a cross-reference in the collection's config that is capable of having only cross-references to a single other collection. Attributes: name: The name of the property, REQUIRED. target_collection: The name of the target collection, REQUIRED. description: A description of the property. """ target_collection: str description: Optional[str] = Field(default=None) MultiTarget: ClassVar[Type[_ReferencePropertyMultiTarget]] = _ReferencePropertyMultiTarget
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() ret_dict["dataType"] = [_capitalize_first_letter(self.target_collection)] del ret_dict["target_collection"] return ret_dict
PropertyType = Union[Property, ReferenceProperty, _ReferencePropertyMultiTarget] T = TypeVar("T", bound="_CollectionConfigCreate")
[docs] class _CollectionConfigCreate(_ConfigCreateModel): name: str properties: Optional[Sequence[Property]] = Field(default=None) references: Optional[List[_ReferencePropertyBase]] = Field(default=None) description: Optional[str] = Field(default=None) invertedIndexConfig: Optional[_InvertedIndexConfigCreate] = Field( default=None, alias="inverted_index_config" ) multiTenancyConfig: Optional[_MultiTenancyConfigCreate] = Field( default=None, alias="multi_tenancy_config" ) replicationConfig: Optional[_ReplicationConfigCreate] = Field( default=None, alias="replication_config" ) shardingConfig: Optional[_ShardingConfigCreate] = Field(default=None, alias="sharding_config") vectorIndexConfig: Optional[_VectorIndexConfigCreate] = Field( default=None, alias="vector_index_config" ) vectorizerConfig: Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate], None] = Field( default=None, alias="vectorizer_config" ) vectorConfig: Union[_VectorConfigCreate, List[_VectorConfigCreate], None] = Field( default=None, alias="vector_config", validate_default=True, ) generativeSearch: Optional[_GenerativeProvider] = Field(default=None, alias="generative_config") rerankerConfig: Optional[_RerankerProvider] = Field(default=None, alias="reranker_config")
[docs] def model_post_init(self, __context: Any) -> None: self.name = _capitalize_first_letter(self.name)
@field_validator("vectorizerConfig", "vectorConfig", mode="after") @classmethod def validate_vector_names( cls, v: Union[_VectorizerConfigCreate, _NamedVectorConfigCreate, List[_NamedVectorConfigCreate]], info: ValidationInfo, ) -> Union[_VectorizerConfigCreate, _NamedVectorConfigCreate, List[_NamedVectorConfigCreate]]: if isinstance(v, list): names = [vc.name for vc in v] if len(names) != len(set(names)): dups = {name for name in names if names.count(name) > 1} raise ValueError(f"Vector config names must be unique. Found duplicates: {dups}") return v @field_validator("vectorConfig", mode="after") @classmethod def inject_vector_config_none( cls, v: Union[_VectorConfigCreate, List[_VectorConfigCreate], None], info: ValidationInfo, ) -> Union[_VectorConfigCreate, List[_VectorConfigCreate], None]: if ( v is None and info.data["vectorizerConfig"] is None and info.data["vectorIndexConfig"] is None ): return _VectorConfigCreate( name="default", vectorizer=_VectorizerConfigCreate(vectorizer=Vectorizers.NONE) ) return v @staticmethod def __add_to_module_config( return_dict: Dict[str, Any], addition_key: str, addition_val: Dict[str, Any] ) -> None: if "moduleConfig" not in return_dict: return_dict["moduleConfig"] = {addition_key: addition_val} else: return_dict["moduleConfig"][addition_key] = addition_val
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict: Dict[str, Any] = {} for cls_field in type(self).model_fields: val = getattr(self, cls_field) if cls_field in ["name", "model", "properties", "references"] or val is None: continue elif isinstance(val, (bool, float, str, int)): ret_dict[cls_field] = str(val) elif isinstance(val, _GenerativeProvider): self.__add_to_module_config(ret_dict, val.generative.value, val._to_dict()) elif isinstance(val, _RerankerProvider): self.__add_to_module_config(ret_dict, val.reranker.value, val._to_dict()) elif isinstance(val, _VectorizerConfigCreate): ret_dict["vectorizer"] = val.vectorizer.value if val.vectorizer != Vectorizers.NONE: self.__add_to_module_config(ret_dict, val.vectorizer.value, val._to_dict()) elif isinstance(val, _VectorIndexConfigCreate): ret_dict["vectorIndexType"] = val.vector_index_type().value ret_dict[cls_field] = val._to_dict() elif isinstance(val, _VectorConfigCreate): ret_dict["vectorConfig"] = {val.name or "default": val._to_dict()} elif ( isinstance(val, list) and len(val) > 0 and all(isinstance(item, _NamedVectorConfigCreate) for item in val) ): val = cast(List[_NamedVectorConfigCreate], val) ret_dict["vectorConfig"] = {item.name: item._to_dict() for item in val} elif ( isinstance(val, list) and len(val) > 0 and all(isinstance(item, _VectorConfigCreate) for item in val) ): val = cast(List[_VectorConfigCreate], val) ret_dict["vectorConfig"] = {} for item in val: if item.name is None: raise WeaviateInvalidInputError( "Vector config name must be set when specifying multiple vectors" ) ret_dict["vectorConfig"][item.name] = item._to_dict() else: assert isinstance(val, _ConfigCreateModel) ret_dict[cls_field] = val._to_dict() if self.vectorIndexConfig is None and "vectorConfig" not in ret_dict: ret_dict["vectorIndexType"] = VectorIndexType.HNSW ret_dict["class"] = self.name self.__add_props(self.properties, ret_dict) self.__add_props(self.references, ret_dict) return ret_dict
def __add_props( self, props: Optional[ Union[ Sequence[Union[Property, _ReferencePropertyBase]], List[_ReferencePropertyBase], ] ], ret_dict: Dict[str, Any], ) -> None: if props is None: return existing_props = ret_dict.get("properties", []) existing_props.extend( [ ( prop._to_dict( [self.vectorizerConfig.vectorizer] if isinstance(self.vectorizerConfig, _VectorizerConfigCreate) else ( None if self.vectorizerConfig is None else [conf.vectorizer.vectorizer for conf in self.vectorizerConfig] ) ) if isinstance(prop, Property) else prop._to_dict() ) for prop in props ] ) ret_dict["properties"] = existing_props
[docs] class Configure: """Use this factory class to generate the correct object for use when using the `collections.create()` method. E.g., `.multi_tenancy()` will return a `MultiTenancyConfigCreate` object to be used in the `multi_tenancy_config` argument. Each class method provides options specific to the named configuration type in the function's name. Under-the-hood data validation steps will ensure that any mis-specifications are caught before the request is sent to Weaviate. """ Generative = _Generative Reranker = _Reranker Vectorizer = _Vectorizer VectorIndex = _VectorIndex NamedVectors = _NamedVectors Vectors = _Vectors MultiVectors = _MultiVectors
[docs] @staticmethod def inverted_index( bm25_b: Optional[float] = None, bm25_k1: Optional[float] = None, cleanup_interval_seconds: Optional[int] = None, index_timestamps: Optional[bool] = None, index_property_length: Optional[bool] = None, index_null_state: Optional[bool] = None, stopwords_preset: Optional[StopwordsPreset] = None, stopwords_additions: Optional[List[str]] = None, stopwords_removals: Optional[List[str]] = None, ) -> _InvertedIndexConfigCreate: """Create an `InvertedIndexConfigCreate` object to be used when defining the configuration of the keyword searching algorithm of Weaviate. Args: See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for details! """ # noqa: D417 (missing argument descriptions in the docstring) if bm25_b is None and bm25_k1 is not None or bm25_k1 is None and bm25_b is not None: raise ValueError("bm25_b and bm25_k1 must be specified together") return _InvertedIndexConfigCreate( bm25=( _BM25ConfigCreate(b=bm25_b, k1=bm25_k1) if bm25_b is not None and bm25_k1 is not None else None ), cleanupIntervalSeconds=cleanup_interval_seconds, indexTimestamps=index_timestamps, indexPropertyLength=index_property_length, indexNullState=index_null_state, stopwords=_StopwordsCreate( preset=stopwords_preset, additions=stopwords_additions, removals=stopwords_removals, ), )
[docs] @staticmethod def multi_tenancy( enabled: bool = True, auto_tenant_creation: Optional[bool] = None, auto_tenant_activation: Optional[bool] = None, ) -> _MultiTenancyConfigCreate: """Create a `MultiTenancyConfigCreate` object to be used when defining the multi-tenancy configuration of Weaviate. Args: enabled: Whether multi-tenancy is enabled. Defaults to `True`. auto_tenant_creation: Automatically create nonexistent tenants during object creation. Defaults to `None`, which uses the server-defined default. auto_tenant_activation: Automatically turn tenants implicitly HOT when they are accessed. Defaults to `None`, which uses the server-defined default. """ return _MultiTenancyConfigCreate( enabled=enabled, autoTenantCreation=auto_tenant_creation, autoTenantActivation=auto_tenant_activation, )
[docs] @staticmethod def replication( factor: Optional[int] = None, async_enabled: Optional[bool] = None, deletion_strategy: Optional[ReplicationDeletionStrategy] = None, ) -> _ReplicationConfigCreate: """Create a `ReplicationConfigCreate` object to be used when defining the replication configuration of Weaviate. NOTE: `async_enabled` is only available with WeaviateDB `>=v1.26.0` Args: factor: The replication factor. async_enabled: Enabled async replication. deletion_strategy: How conflicts between different nodes about deleted objects are resolved. """ return _ReplicationConfigCreate( factor=factor, asyncEnabled=async_enabled, deletionStrategy=deletion_strategy, )
[docs] @staticmethod def sharding( virtual_per_physical: Optional[int] = None, desired_count: Optional[int] = None, actual_count: Optional[int] = None, desired_virtual_count: Optional[int] = None, actual_virtual_count: Optional[int] = None, ) -> _ShardingConfigCreate: """Create a `ShardingConfigCreate` object to be used when defining the sharding configuration of Weaviate. NOTE: You can only use one of Sharding or Replication, not both. See [the docs](https://weaviate.io/developers/weaviate/concepts/replication-architecture#replication-vs-sharding) for more details. Args: virtual_per_physical: The number of virtual shards per physical shard. desired_count: The desired number of physical shards. actual_count: The actual number of physical shards. This is a read-only field so has no effect. It is kept for backwards compatibility but will be removed in a future release. desired_virtual_count: The desired number of virtual shards. actual_virtual_count: The actual number of virtual shards. This is a read-only field so has no effect. It is kept for backwards compatibility but will be removed in a future release. """ if actual_count is not None: _Warnings.sharding_actual_count_is_deprecated("actual_count") if actual_virtual_count is not None: _Warnings.sharding_actual_count_is_deprecated("actual_virtual_count") return _ShardingConfigCreate( virtualPerPhysical=virtual_per_physical, desiredCount=desired_count, desiredVirtualCount=desired_virtual_count, )
[docs] class _VectorIndexQuantizerUpdate:
[docs] @staticmethod def pq( bit_compression: Optional[bool] = None, centroids: Optional[int] = None, encoder_distribution: Optional[PQEncoderDistribution] = None, encoder_type: Optional[PQEncoderType] = None, segments: Optional[int] = None, training_limit: Optional[int] = None, enabled: bool = True, ) -> _PQConfigUpdate: """Create a `_PQConfigUpdate` object to be used when updating the product quantization (PQ) configuration of Weaviate. Use this method when defining the `quantizer` argument in the `vector_index` configuration in `collection.update()`. Args: See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#hnsw-with-compression) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) if bit_compression is not None: _Warnings.bit_compression_in_pq_config() return _PQConfigUpdate( enabled=enabled, centroids=centroids, segments=segments, trainingLimit=training_limit, encoder=( _PQEncoderConfigUpdate(type_=encoder_type, distribution=encoder_distribution) if encoder_type is not None or encoder_distribution is not None else None ), )
[docs] @staticmethod def bq(rescore_limit: Optional[int] = None, enabled: bool = True) -> _BQConfigUpdate: """Create a `_BQConfigUpdate` object to be used when updating the binary quantization (BQ) configuration of Weaviate. Use this method when defining the `quantizer` argument in the `vector_index` configuration in `collection.update()`. Args: See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#hnsw-with-compression) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _BQConfigUpdate(rescoreLimit=rescore_limit, enabled=enabled)
[docs] @staticmethod def sq( rescore_limit: Optional[int] = None, training_limit: Optional[int] = None, enabled: bool = True, ) -> _SQConfigUpdate: """Create a `_SQConfigUpdate` object to be used when updating the scalar quantization (SQ) configuration of Weaviate. Use this method when defining the `quantizer` argument in the `vector_index` configuration in `collection.update()`. Args: See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#hnsw-with-compression) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _SQConfigUpdate( enabled=enabled, rescoreLimit=rescore_limit, trainingLimit=training_limit )
[docs] @staticmethod def rq( rescore_limit: Optional[int] = None, enabled: bool = True, ) -> _RQConfigUpdate: """Create a `_RQConfigUpdate` object to be used when updating the Rotational quantization (RQ) configuration of Weaviate. Use this method when defining the `quantizer` argument in the `vector_index` configuration in `collection.update()`. Arguments: See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#hnsw-with-compression) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _RQConfigUpdate(enabled=enabled, rescoreLimit=rescore_limit)
[docs] class _VectorIndexUpdate: Quantizer = _VectorIndexQuantizerUpdate
[docs] @staticmethod def hnsw( dynamic_ef_factor: Optional[int] = None, dynamic_ef_min: Optional[int] = None, dynamic_ef_max: Optional[int] = None, ef: Optional[int] = None, flat_search_cutoff: Optional[int] = None, filter_strategy: Optional[VectorFilterStrategy] = None, vector_cache_max_objects: Optional[int] = None, quantizer: Optional[ Union[_PQConfigUpdate, _BQConfigUpdate, _SQConfigUpdate, _RQConfigUpdate] ] = None, ) -> _VectorIndexConfigHNSWUpdate: """Create an `_VectorIndexConfigHNSWUpdate` object to update the configuration of the HNSW vector index. Use this method when defining the `vectorizer_config` argument in `collection.update()`. Args: See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _VectorIndexConfigHNSWUpdate( dynamicEfMin=dynamic_ef_min, dynamicEfMax=dynamic_ef_max, dynamicEfFactor=dynamic_ef_factor, ef=ef, filterStrategy=filter_strategy, flatSearchCutoff=flat_search_cutoff, vectorCacheMaxObjects=vector_cache_max_objects, quantizer=quantizer, )
[docs] @staticmethod def flat( vector_cache_max_objects: Optional[int] = None, quantizer: Optional[_BQConfigUpdate] = None, ) -> _VectorIndexConfigFlatUpdate: """Create an `_VectorIndexConfigFlatUpdate` object to update the configuration of the FLAT vector index. Use this method when defining the `vectorizer_config` argument in `collection.update()`. Args: See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _VectorIndexConfigFlatUpdate( vectorCacheMaxObjects=vector_cache_max_objects, quantizer=quantizer, )
[docs] @staticmethod def dynamic( *, threshold: Optional[int] = None, hnsw: Optional[_VectorIndexConfigHNSWUpdate] = None, flat: Optional[_VectorIndexConfigFlatUpdate] = None, quantizer: Optional[_BQConfigUpdate] = None, ) -> _VectorIndexConfigDynamicUpdate: """Create an `_VectorIndexConfigDynamicUpdate` object to update the configuration of the Dynamic vector index. Use this method when defining the `vectorizer_config` argument in `collection.update()`. Args: See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _VectorIndexConfigDynamicUpdate( threshold=threshold, hnsw=hnsw, flat=flat, quantizer=quantizer, )
[docs] class Reconfigure: """Use this factory class to generate the correct `xxxConfig` object for use when using the `collection.update()` method. Each staticmethod provides options specific to the named configuration type in the function's name. Under-the-hood data validation steps will ensure that any mis-specifications are caught before the request is sent to Weaviate. Only those configurations that are mutable are available in this class. If you wish to update the configuration of an immutable aspect of your collection then you will have to delete the collection and re-create it with the new configuration. """ NamedVectors = _NamedVectorsUpdate Vectors = _VectorsUpdate VectorIndex = _VectorIndexUpdate Generative = _Generative # config is the same for create and update Reranker = _Reranker # config is the same for create and update
[docs] @staticmethod def inverted_index( bm25_b: Optional[float] = None, bm25_k1: Optional[float] = None, cleanup_interval_seconds: Optional[int] = None, stopwords_additions: Optional[List[str]] = None, stopwords_preset: Optional[StopwordsPreset] = None, stopwords_removals: Optional[List[str]] = None, ) -> _InvertedIndexConfigUpdate: """Create an `InvertedIndexConfigUpdate` object. Use this method when defining the `inverted_index_config` argument in `collection.update()`. Args: See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _InvertedIndexConfigUpdate( bm25=_BM25ConfigUpdate(b=bm25_b, k1=bm25_k1), cleanupIntervalSeconds=cleanup_interval_seconds, stopwords=_StopwordsUpdate( preset=stopwords_preset, additions=stopwords_additions, removals=stopwords_removals, ), )
[docs] @staticmethod def replication( factor: Optional[int] = None, async_enabled: Optional[bool] = None, deletion_strategy: Optional[ReplicationDeletionStrategy] = None, ) -> _ReplicationConfigUpdate: """Create a `ReplicationConfigUpdate` object. Use this method when defining the `replication_config` argument in `collection.update()`. Args: factor: The replication factor. async_enabled: Enable async replication. deletion_strategy: How conflicts between different nodes about deleted objects are resolved. """ return _ReplicationConfigUpdate( factor=factor, asyncEnabled=async_enabled, deletionStrategy=deletion_strategy, )
[docs] @staticmethod def multi_tenancy( auto_tenant_creation: Optional[bool] = None, auto_tenant_activation: Optional[bool] = None, ) -> _MultiTenancyConfigUpdate: """Create a `MultiTenancyConfigUpdate` object. Use this method when defining the `multi_tenancy` argument in `collection.update()`. Args: auto_tenant_creation: When set, implicitly creates nonexistent tenants during object creation auto_tenant_activation: Automatically turn tenants implicitly HOT when they are accessed. Defaults to `None`, which uses the server-defined default. """ return _MultiTenancyConfigUpdate( autoTenantCreation=auto_tenant_creation, autoTenantActivation=auto_tenant_activation, )