Source code for weaviate.collections.classes.config_vectorizers

import warnings
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union, cast

from deprecation import deprecated as docstring_deprecated
from pydantic import AnyHttpUrl, BaseModel, Field, field_validator
from typing_extensions import TypeAlias
from typing_extensions import deprecated as typing_deprecated

from weaviate.collections.classes.config_base import _ConfigCreateModel, _EnumLikeStr

from ...warnings import _Warnings

# See https://docs.cohere.com/docs/cohere-embed for reference
CohereModel: TypeAlias = Literal[
    "embed-v4.0",
    "embed-multilingual-v2.0",
    "embed-multilingual-v3.0",
    "embed-multilingual-light-v3.0",
    "small",
    "medium",
    "large",
    "multilingual-22-12",
    "embed-english-v2.0",
    "embed-english-light-v2.0",
    "embed-english-v3.0",
    "embed-english-light-v3.0",
]
CohereMultimodalModel: TypeAlias = Literal[
    "embed-v4.0",
    "embed-multilingual-v3.0",
    "embed-multilingual-light-v3.0",
    "embed-english-v3.0",
    "embed-english-light-v3.0",
]
CohereTruncation: TypeAlias = Literal["NONE", "START", "END", "LEFT", "RIGHT"]
OpenAIModel: TypeAlias = Literal[
    "text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"
]
JinaModel: TypeAlias = Literal[
    "jina-embeddings-v2-base-en",
    "jina-embeddings-v2-small-en",
    "jina-embeddings-v2-base-zh",
    "jina-embeddings-v2-base-es",
    "jina-embeddings-v2-base-code",
    "jina-embeddings-v3",
    "jina-embeddings-v4",
]
JinaMultimodalModel: TypeAlias = Literal["jina-clip-v1", "jina-clip-v2", "jina-embeddings-v4"]
VoyageModel: TypeAlias = Literal[
    "voyage-4",
    "voyage-4-lite",
    "voyage-4-large",
    "voyage-3.5",
    "voyage-3.5-lite",
    "voyage-3-large",
    "voyage-3",
    "voyage-3-lite",
    "voyage-context-3",
    "voyage-large-2",
    "voyage-code-2",
    "voyage-2",
    "voyage-law-2",
    "voyage-large-2-instruct",
    "voyage-finance-2",
    "voyage-multilingual-2",
]
VoyageMultimodalModel: TypeAlias = Literal[
    "voyage-multimodal-3",
    "voyage-multimodal-3.5",
]
AWSModel: TypeAlias = Literal[
    "amazon.titan-embed-text-v1",
    "cohere.embed-english-v3",
    "cohere.embed-multilingual-v3",
]
AWSService: TypeAlias = Literal[
    "bedrock",
    "sagemaker",
]
WeaviateModel: TypeAlias = Literal[
    "Snowflake/snowflake-arctic-embed-l-v2.0", "Snowflake/snowflake-arctic-embed-m-v1.5"
]
WeaviateMultimodalModel: TypeAlias = Literal["ModernVBERT/colmodernvbert"]


[docs] class Vectorizers(str, Enum): """The available vectorization modules in Weaviate. These modules encode binary data into lists of floats called vectors. See the [docs](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules) for more details. Attributes: NONE: No vectorizer. TEXT2VEC_AWS: Weaviate module backed by AWS text-based embedding models. TEXT2VEC_COHERE: Weaviate module backed by Cohere text-based embedding models. TEXT2VEC_CONTEXTIONARY: Weaviate module backed by Contextionary text-based embedding models. TEXT2VEC_GPT4ALL: Weaviate module backed by GPT-4-All text-based embedding models. TEXT2VEC_HUGGINGFACE: Weaviate module backed by HuggingFace text-based embedding models. TEXT2VEC_OPENAI: Weaviate module backed by OpenAI and Azure-OpenAI text-based embedding models. TEXT2VEC_PALM: Weaviate module backed by PaLM text-based embedding models. TEXT2VEC_TRANSFORMERS: Weaviate module backed by Transformers text-based embedding models. TEXT2VEC_JINAAI: Weaviate module backed by Jina AI text-based embedding models. TEXT2VEC_VOYAGEAI: Weaviate module backed by Voyage AI text-based embedding models. TEXT2VEC_NVIDIA: Weaviate module backed by NVIDIA text-based embedding models. TEXT2VEC_WEAVIATE: Weaviate module backed by Weaviate's self-hosted text-based embedding models. IMG2VEC_NEURAL: Weaviate module backed by a ResNet-50 neural network for images. MULTI2VEC_CLIP: Weaviate module backed by a Sentence-BERT CLIP model for images and text. MULTI2VEC_PALM: Weaviate module backed by a palm model for images and text. MULTI2VEC_BIND: Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video. MULTI2VEC_VOYAGEAI: Weaviate module backed by a Voyage AI multimodal embedding models. MULTI2VEC_NVIDIA: Weaviate module backed by NVIDIA multimodal embedding models. REF2VEC_CENTROID: Weaviate module backed by a centroid-based model that calculates an object's vectors from its referenced vectors. """ NONE = "none" TEXT2COLBERT_JINAAI = "text2colbert-jinaai" TEXT2VEC_AWS = "text2vec-aws" TEXT2VEC_COHERE = "text2vec-cohere" TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary" TEXT2VEC_DATABRICKS = "text2vec-databricks" TEXT2VEC_GPT4ALL = "text2vec-gpt4all" TEXT2VEC_HUGGINGFACE = "text2vec-huggingface" TEXT2VEC_MISTRAL = "text2vec-mistral" TEXT2VEC_MORPH = "text2vec-morph" TEXT2VEC_MODEL2VEC = "text2vec-model2vec" TEXT2VEC_NVIDIA = "text2vec-nvidia" TEXT2VEC_OLLAMA = "text2vec-ollama" TEXT2VEC_OPENAI = "text2vec-openai" TEXT2VEC_PALM = "text2vec-palm" # change to google once 1.27 is the lowest supported version TEXT2VEC_TRANSFORMERS = "text2vec-transformers" TEXT2VEC_JINAAI = "text2vec-jinaai" TEXT2VEC_VOYAGEAI = "text2vec-voyageai" TEXT2VEC_WEAVIATE = "text2vec-weaviate" IMG2VEC_NEURAL = "img2vec-neural" MULTI2VEC_AWS = "multi2vec-aws" MULTI2VEC_CLIP = "multi2vec-clip" MULTI2VEC_COHERE = "multi2vec-cohere" MULTI2VEC_JINAAI = "multi2vec-jinaai" MULTI2MULTI_JINAAI = "multi2multivec-jinaai" MULTI2MULTI_WEAVIATE = "multi2multivec-weaviate" MULTI2VEC_BIND = "multi2vec-bind" MULTI2VEC_PALM = "multi2vec-palm" # change to google once 1.27 is the lowest supported version MULTI2VEC_VOYAGEAI = "multi2vec-voyageai" MULTI2VEC_NVIDIA = "multi2vec-nvidia" REF2VEC_CENTROID = "ref2vec-centroid"
[docs] class VectorDistances(str, Enum): """Vector similarity distance metric to be used in the `VectorIndexConfig` class. To ensure optimal search results, we recommend reviewing whether your model provider advises a specific distance metric and following their advice. Attributes: COSINE: Cosine distance: [reference](https://en.wikipedia.org/wiki/Cosine_similarity) DOT: Dot distance: [reference](https://en.wikipedia.org/wiki/Dot_product) L2_SQUARED: L2 squared distance: [reference](https://en.wikipedia.org/wiki/Euclidean_distance) HAMMING: Hamming distance: [reference](https://en.wikipedia.org/wiki/Hamming_distance) MANHATTAN: Manhattan distance: [reference](https://en.wikipedia.org/wiki/Taxicab_geometry) """ COSINE = "cosine" DOT = "dot" L2_SQUARED = "l2-squared" HAMMING = "hamming" MANHATTAN = "manhattan"
[docs] class _VectorizerConfigCreate(_ConfigCreateModel): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(default=..., exclude=True)
[docs] class _Text2ColbertJinaAIConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2COLBERT_JINAAI, frozen=True, exclude=True ) vectorizeClassName: bool model: Optional[str] dimensions: Optional[int]
[docs] class _Text2VecContextionaryConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_CONTEXTIONARY, frozen=True, exclude=True ) vectorizeClassName: bool
[docs] class _Text2VecModel2VecConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_MODEL2VEC, frozen=True, exclude=True ) vectorizeClassName: bool inferenceUrl: Optional[str]
[docs] class _VectorizerCustomConfig(_VectorizerConfigCreate): module_config: Optional[Dict[str, Any]]
[docs] def _to_dict(self) -> Dict[str, Any]: if self.module_config is None: return {} return self.module_config
[docs] class _Text2VecAWSConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_AWS, frozen=True, exclude=True ) model: Optional[str] endpoint: Optional[str] region: str service: str targetModel: Optional[str] targetVariant: Optional[str] vectorizeClassName: bool
[docs] @field_validator("region") def _check_name(cls, r: str) -> str: if r == "": raise ValueError("region is a required argument and must be given") return r
[docs] class _Text2VecAzureOpenAIConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_OPENAI, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] resourceName: str deploymentId: str vectorizeClassName: bool dimensions: Optional[int] model: Optional[str]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() ret_dict["isAzure"] = True return ret_dict
[docs] class _Text2VecHuggingFaceConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_HUGGINGFACE, frozen=True, exclude=True ) model: Optional[str] passageModel: Optional[str] queryModel: Optional[str] endpointURL: Optional[AnyHttpUrl] waitForModel: Optional[bool] useGPU: Optional[bool] useCache: Optional[bool] vectorizeClassName: bool
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() options = {} if self.waitForModel is not None: options["waitForModel"] = ret_dict.pop("waitForModel") if self.useGPU is not None: options["useGPU"] = ret_dict.pop("useGPU") if self.useCache is not None: options["useCache"] = ret_dict.pop("useCache") if len(options) > 0: ret_dict["options"] = options if self.endpointURL is not None: ret_dict["endpointURL"] = self.endpointURL.unicode_string() return ret_dict
[docs] class _Text2VecMistralConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_MISTRAL, frozen=True, exclude=True ) model: Optional[str] vectorizeClassName: bool baseURL: Optional[AnyHttpUrl]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Text2VecMorphConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True ) model: Optional[str] vectorizeClassName: bool baseURL: Optional[AnyHttpUrl]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Text2VecDatabricksConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_DATABRICKS, frozen=True, exclude=True ) endpoint: str instruction: Optional[str] vectorizeClassName: bool
OpenAIType = Literal["text", "code"]
[docs] class _Text2VecOpenAIConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_OPENAI, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] dimensions: Optional[int] model: Optional[str] modelVersion: Optional[str] type_: Optional[OpenAIType] vectorizeClassName: bool
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.type_ is not None: ret_dict["type"] = ret_dict.pop("type_") if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() ret_dict["isAzure"] = False return ret_dict
[docs] class _Text2VecCohereConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_COHERE, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] model: Optional[str] dimensions: Optional[int] truncate: Optional[CohereTruncation] vectorizeClassName: bool
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Text2VecGoogleConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_PALM, frozen=True, exclude=True ) projectId: Optional[str] apiEndpoint: Optional[str] dimensions: Optional[int] modelId: Optional[str] vectorizeClassName: bool titleProperty: Optional[str] taskType: Optional[str]
[docs] class _Text2VecTransformersConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_TRANSFORMERS, frozen=True, exclude=True ) poolingStrategy: Literal["masked_mean", "cls"] vectorizeClassName: bool inferenceUrl: Optional[str] passageInferenceUrl: Optional[str] queryInferenceUrl: Optional[str] dimensions: Optional[int] = None
[docs] class _Text2VecGPT4AllConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_GPT4ALL, frozen=True, exclude=True ) vectorizeClassName: bool
[docs] class _Text2VecJinaConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_JINAAI, frozen=True, exclude=True ) baseURL: Optional[str] dimensions: Optional[int] model: Optional[str] vectorizeClassName: bool
[docs] class _Text2VecVoyageConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_VOYAGEAI, frozen=True, exclude=True ) dimensions: Optional[int] model: Optional[str] baseURL: Optional[str] truncate: Optional[bool] vectorizeClassName: bool
[docs] class _Text2VecNvidiaConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_NVIDIA, frozen=True, exclude=True ) model: Optional[str] baseURL: Optional[str] truncate: Optional[bool] vectorizeClassName: bool
[docs] class _Text2VecWeaviateConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_WEAVIATE, frozen=True, exclude=True ) model: Optional[str] baseURL: Optional[str] vectorizeClassName: bool dimensions: Optional[int]
[docs] class _Text2VecOllamaConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_OLLAMA, frozen=True, exclude=True ) model: Optional[str] apiEndpoint: Optional[str] vectorizeClassName: bool
[docs] class _Img2VecNeuralConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.IMG2VEC_NEURAL, frozen=True, exclude=True ) imageFields: List[str]
[docs] class Multi2VecField(BaseModel): """Use this class when defining the fields to use in the `Multi2VecClip` and `Multi2VecBind` vectorizers.""" name: str weight: Optional[float] = Field(default=None, exclude=True)
[docs] class _Multi2VecBase(_VectorizerConfigCreate): imageFields: Optional[List[Multi2VecField]] textFields: Optional[List[Multi2VecField]]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() ret_dict["weights"] = {} for cls_field in type(self).model_fields: val = getattr(self, cls_field) if "Fields" in cls_field and val is not None: val = cast(List[Multi2VecField], val) ret_dict[cls_field] = [field.name for field in val] weights = [field.weight for field in val if field.weight is not None] if len(weights) > 0: ret_dict["weights"][cls_field] = weights if len(ret_dict["weights"]) == 0: del ret_dict["weights"] return ret_dict
[docs] class _Multi2VecCohereConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_COHERE, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] model: Optional[str] dimensions: Optional[int] truncate: Optional[CohereTruncation]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Multi2VecJinaConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_JINAAI, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] model: Optional[str] dimensions: Optional[int]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Multi2VecAWSConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_AWS, frozen=True, exclude=True ) region: Optional[str] model: Optional[str] dimensions: Optional[int]
[docs] class _Multi2MultiVecJinaConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2MULTI_JINAAI, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] model: Optional[str]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Multi2MultiVecWeaviateConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2MULTI_WEAVIATE, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] model: Optional[str]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Multi2VecClipConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_CLIP, frozen=True, exclude=True ) inferenceUrl: Optional[str]
[docs] class _Multi2VecGoogleConfig(_Multi2VecBase, _VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_PALM, frozen=True, exclude=True ) videoFields: Optional[List[Multi2VecField]] projectId: str location: Optional[str] modelId: Optional[str] dimensions: Optional[int] videoIntervalSeconds: Optional[int]
[docs] class _Multi2VecBindConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_BIND, frozen=True, exclude=True ) audioFields: Optional[List[Multi2VecField]] depthFields: Optional[List[Multi2VecField]] IMUFields: Optional[List[Multi2VecField]] thermalFields: Optional[List[Multi2VecField]] videoFields: Optional[List[Multi2VecField]]
[docs] class _Multi2VecVoyageaiConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_VOYAGEAI, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] model: Optional[str] truncation: Optional[bool] dimensions: Optional[int] videoFields: Optional[List[Multi2VecField]]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Multi2VecNvidiaConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_NVIDIA, frozen=True, exclude=True ) baseURL: Optional[AnyHttpUrl] model: Optional[str] truncation: Optional[bool]
[docs] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() if self.baseURL is not None: ret_dict["baseURL"] = self.baseURL.unicode_string() return ret_dict
[docs] class _Ref2VecCentroidConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.REF2VEC_CENTROID, frozen=True, exclude=True ) referenceProperties: List[str] method: Literal["mean"]
[docs] def _map_multi2vec_fields( fields: Optional[Union[List[str], List[Multi2VecField]]], ) -> Optional[List[Multi2VecField]]: if fields is None: return None return [Multi2VecField(name=field) if isinstance(field, str) else field for field in fields]
[docs] class _Vectorizer: """Use this factory class to create the correct object for the `vectorizer_config` argument in the `collections.create()` method. Each staticmethod provides options specific to the named vectorizer in the function's name. Under-the-hood data validation steps will ensure that any mis-specifications will be caught before the request is sent to Weaviate. """
[docs] @staticmethod def none() -> _VectorizerConfigCreate: """Create a `_VectorizerConfigCreate` object with the vectorizer set to `Vectorizer.NONE`.""" return _VectorizerConfigCreate(vectorizer=Vectorizers.NONE)
[docs] @staticmethod def img2vec_neural( image_fields: List[str], ) -> _VectorizerConfigCreate: """Create a `_Img2VecNeuralConfigCreate` object for use when vectorizing using the `img2vec-neural` model. See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/img2vec-neural) for detailed usage. Args: image_fields: The image fields to use. This is a required field and must match the property fields of the collection that are defined as `DataType.BLOB`. Raises: pydantic.ValidationError: If `image_fields` is not a `list`. """ return _Img2VecNeuralConfig(imageFields=image_fields)
[docs] @staticmethod def multi2vec_clip( image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, interference_url: Optional[str] = None, inference_url: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecClipConfigCreate` object for use when vectorizing using the `multi2vec-clip` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage. Args: image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. inference_url: The inference url to use where API requests should go. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `image_fields` or `text_fields` are not `None` or a `list`. """ if interference_url is not None: if inference_url is not None: raise ValueError( "You have provided `interference_url` as well as `inference_url`. Please only provide `inference_url`, as `interference_url` is deprecated." ) else: warnings.warn( message="""This parameter is deprecated and will be removed in a future release. Please use `inference_url` instead.""", category=DeprecationWarning, stacklevel=1, ) return _Multi2VecClipConfig( imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), inferenceUrl=inference_url, )
[docs] @staticmethod def multi2vec_bind( audio_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, depth_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, imu_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, thermal_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecBindConfigCreate` object for use when vectorizing using the `multi2vec-clip` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/imagebind/embeddings-multimodal) for detailed usage. Args: audio_fields: The audio fields to use in vectorization. depth_fields: The depth fields to use in vectorization. image_fields: The image fields to use in vectorization. imu_fields: The IMU fields to use in vectorization. text_fields: The text fields to use in vectorization. thermal_fields: The thermal fields to use in vectorization. video_fields: The video fields to use in vectorization. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If any of the `*_fields` are not `None` or a `list`. """ return _Multi2VecBindConfig( audioFields=_map_multi2vec_fields(audio_fields), depthFields=_map_multi2vec_fields(depth_fields), imageFields=_map_multi2vec_fields(image_fields), IMUFields=_map_multi2vec_fields(imu_fields), textFields=_map_multi2vec_fields(text_fields), thermalFields=_map_multi2vec_fields(thermal_fields), videoFields=_map_multi2vec_fields(video_fields), )
[docs] @staticmethod def ref2vec_centroid( reference_properties: List[str], method: Literal["mean"] = "mean", ) -> _VectorizerConfigCreate: """Create a `_Ref2VecCentroidConfigCreate` object for use when vectorizing using the `ref2vec-centroid` model. See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/ref2vec-centroid) for detailed usage. Args: reference_properties: The reference properties to use in vectorization, REQUIRED. method: The method to use in vectorization. Defaults to `mean`. Raises: pydantic.ValidationError: If `reference_properties` is not a `list`. """ return _Ref2VecCentroidConfig( referenceProperties=reference_properties, method=method, )
[docs] @staticmethod def text2vec_aws( model: Optional[Union[AWSModel, str]] = None, region: str = "", # cant have a non-default value after a default value, but we cant change the order for BC - will be validated in the model endpoint: Optional[str] = None, service: Union[AWSService, str] = "bedrock", vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecAWSConfigCreate` object for use when vectorizing using the `text2vec-aws` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/aws/embeddings) for detailed usage. Args: model: The model to use, REQUIRED for service "bedrock". region: The AWS region to run the model from, REQUIRED. endpoint: The model to use, REQUIRED for service "sagemaker". service: The AWS service to use, options are "bedrock" and "sagemaker". vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. """ return _Text2VecAWSConfig( model=model, region=region, vectorizeClassName=vectorize_collection_name, service=service, endpoint=endpoint, targetModel=None, targetVariant=None, )
[docs] @staticmethod def text2vec_azure_openai( resource_name: str, deployment_id: str, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, dimensions: Optional[int] = None, model: Optional[str] = None, ) -> _VectorizerConfigCreate: """Create a `_Text2VecAzureOpenAIConfigCreate` object for use when vectorizing using the `text2vec-azure-openai` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai-azure/embeddings) for detailed usage. Args: resource_name: The resource name to use, REQUIRED. deployment_id: The deployment ID to use, REQUIRED. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. dimensions: The dimensionality of the vectors. Defaults to `None`, which uses the server-defined default. Raises: pydantic.ValidationError: If `resource_name` or `deployment_id` are not `str`. """ return _Text2VecAzureOpenAIConfig( baseURL=base_url, dimensions=dimensions, resourceName=resource_name, deploymentId=deployment_id, vectorizeClassName=vectorize_collection_name, model=model, )
[docs] @staticmethod def text2vec_contextionary( vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecContextionaryConfigCreate` object for use when vectorizing using the `text2vec-contextionary` model. See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-contextionary) for detailed usage. Args: vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `vectorize_collection_name` is not a `bool`. """ return _Text2VecContextionaryConfig(vectorizeClassName=vectorize_collection_name)
[docs] @staticmethod def custom( module_name: str, module_config: Optional[Dict[str, Any]] = None ) -> _VectorizerConfigCreate: """Create a `_VectorizerCustomConfig` object for use when vectorizing using a custom specification. Args: module_name: The name of the module to use, REQUIRED. module_config: The configuration to use for the module. Defaults to `None`, which uses the server-defined default. """ return _VectorizerCustomConfig( vectorizer=_EnumLikeStr(module_name), module_config=module_config )
[docs] @staticmethod def text2vec_cohere( model: Optional[Union[CohereModel, str]] = None, truncate: Optional[CohereTruncation] = None, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, ) -> _VectorizerConfigCreate: """Create a `_Text2VecCohereConfigCreate` object for use when vectorizing using the `text2vec-cohere` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. Raises: pydantic.ValidationError: If `model` is not a valid value from the `CohereModel` type or if `truncate` is not a valid value from the `CohereTruncation` type. """ return _Text2VecCohereConfig( baseURL=base_url, model=model, dimensions=None, truncate=truncate, vectorizeClassName=vectorize_collection_name, )
[docs] @staticmethod def multi2vec_cohere( *, model: Optional[Union[CohereMultimodalModel, str]] = None, truncate: Optional[CohereTruncation] = None, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type. """ return _Multi2VecCohereConfig( baseURL=base_url, model=model, dimensions=None, truncate=truncate, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), )
[docs] @staticmethod def multi2vec_voyageai( *, model: Optional[Union[VoyageMultimodalModel, str]] = None, truncation: Optional[bool] = None, output_encoding: Optional[str], vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecVoyageaiConfig` object for use when vectorizing using the `multi2vec-voyageai` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings-multimodal) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. output_encoding: Deprecated, has no effect. vectorize_collection_name: Deprecated, has no effect. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `VoyageMultimodalModel` type. """ return _Multi2VecVoyageaiConfig( baseURL=base_url, model=model, truncation=truncation, dimensions=None, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), videoFields=None, )
[docs] @staticmethod def multi2vec_nvidia( *, model: Optional[str] = None, truncation: Optional[bool] = None, output_encoding: Optional[str] = None, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecNvidiaConfig` object for use when vectorizing using the `multi2vec-nvidia` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings-multimodal) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. output_encoding: Deprecated, has no effect. vectorize_collection_name: Deprecated, has no effect. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `NvidiaMultimodalModel` type or if `truncate` is not a valid value from the `NvidiaTruncation` type. """ return _Multi2VecNvidiaConfig( baseURL=base_url, model=model, truncation=truncation, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), )
[docs] @staticmethod def text2vec_databricks( *, endpoint: str, instruction: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecDatabricksConfig` object for use when vectorizing using the `text2vec-databricks` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/databricks/embeddings) for detailed usage. Args: endpoint: The endpoint to use. instruction: The instruction strategy to use. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `truncate` is not a valid value from the `CohereModel` type. """ return _Text2VecDatabricksConfig( endpoint=endpoint, instruction=instruction, vectorizeClassName=vectorize_collection_name, )
[docs] @staticmethod def text2vec_gpt4all( vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecGPT4AllConfigCreate` object for use when vectorizing using the `text2vec-gpt4all` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/gpt4all/embeddings) for detailed usage. Args: vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `vectorize_collection_name` is not a `bool`. """ return _Text2VecGPT4AllConfig(vectorizeClassName=vectorize_collection_name)
[docs] @staticmethod def text2vec_huggingface( model: Optional[str] = None, passage_model: Optional[str] = None, query_model: Optional[str] = None, endpoint_url: Optional[AnyHttpUrl] = None, wait_for_model: Optional[bool] = None, use_gpu: Optional[bool] = None, use_cache: Optional[bool] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecHuggingFaceConfigCreate` object for use when vectorizing using the `text2vec-huggingface` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. passage_model: The passage model to use. Defaults to `None`, which uses the server-defined default. query_model: The query model to use. Defaults to `None`, which uses the server-defined default. endpoint_url: The endpoint URL to use. Defaults to `None`, which uses the server-defined default. wait_for_model: Whether to wait for the model to be loaded. Defaults to `None`, which uses the server-defined default. use_gpu: Whether to use the GPU. Defaults to `None`, which uses the server-defined default. use_cache: Whether to use the cache. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If the arguments passed to the function are invalid. It is important to note that some of these variables are mutually exclusive. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings#vectorizer-parameters) for more details. """ return _Text2VecHuggingFaceConfig( model=model, passageModel=passage_model, queryModel=query_model, endpointURL=endpoint_url, waitForModel=wait_for_model, useGPU=use_gpu, useCache=use_cache, vectorizeClassName=vectorize_collection_name, )
[docs] @staticmethod def text2vec_mistral( *, base_url: Optional[AnyHttpUrl] = None, model: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecMistralConfig` object for use when vectorizing using the `text2vec-mistral` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/mistral/embeddings) for detailed usage. Args: base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. model: The model to use. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. """ return _Text2VecMistralConfig( baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name )
[docs] @staticmethod def text2vec_ollama( *, api_endpoint: Optional[str] = None, model: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecOllamaConfig` object for use when vectorizing using the `text2vec-ollama` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/ollama/embeddings) for detailed usage. Args: api_endpoint: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. Docker users may need to specify an alias, such as `http://host.docker.internal:11434` so that the container can access the host machine. model: The model to use. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. """ return _Text2VecOllamaConfig( apiEndpoint=api_endpoint, model=model, vectorizeClassName=vectorize_collection_name, )
[docs] @staticmethod def text2vec_openai( model: Optional[Union[OpenAIModel, str]] = None, model_version: Optional[str] = None, type_: Optional[OpenAIType] = None, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, dimensions: Optional[int] = None, ) -> _VectorizerConfigCreate: """Create a `_Text2VecOpenAIConfigCreate` object for use when vectorizing using the `text2vec-openai` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai/embeddings) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. model_version: The model version to use. Defaults to `None`, which uses the server-defined default. type_: The type of model to use. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. dimensions: Number of dimensions. Applicable to v3 OpenAI models only. Defaults to `None`, which uses the server-defined default. Raises: pydantic.ValidationError: If `type_` is not a valid value from the `OpenAIType` type. """ return _Text2VecOpenAIConfig( baseURL=base_url, model=model, modelVersion=model_version, type_=type_, vectorizeClassName=vectorize_collection_name, dimensions=dimensions, )
[docs] @staticmethod @docstring_deprecated( deprecated_in="4.9.0", details=""" This method is deprecated and will be removed in Q2 '25. Please use :meth:`~weaviate.collections.classes.config._Vectorizer.text2vec_google` instead. """, ) @typing_deprecated( "This method is deprecated and will be removed in Q2 '25. Please use `text2vec_google` instead." ) def text2vec_palm( project_id: str, api_endpoint: Optional[str] = None, model_id: Optional[str] = None, title_property: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecGoogleConfig` object for use when vectorizing using the `text2vec-palm` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage. Args: project_id: The project ID to use, REQUIRED. api_endpoint: The API endpoint to use without a leading scheme such as `http://`. Defaults to `None`, which uses the server-defined default model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `api_endpoint` is not a valid URL. """ _Warnings.palm_to_google_t2v() return _Text2VecGoogleConfig( projectId=project_id, apiEndpoint=api_endpoint, dimensions=None, modelId=model_id, vectorizeClassName=vectorize_collection_name, titleProperty=title_property, taskType=None, )
[docs] @staticmethod def text2vec_google_aistudio( model_id: Optional[str] = None, title_property: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecGoogleConfig` object for use when vectorizing using the `text2vec-google` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage. Args: model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `api_endpoint` is not a valid URL. """ return _Text2VecGoogleConfig( projectId=None, apiEndpoint="generativelanguage.googleapis.com", dimensions=None, modelId=model_id, vectorizeClassName=vectorize_collection_name, titleProperty=title_property, taskType=None, )
[docs] @staticmethod def text2vec_google( project_id: str, api_endpoint: Optional[str] = None, model_id: Optional[str] = None, title_property: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecGoogleConfig` object for use when vectorizing using the `text2vec-google` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage. Args: project_id: The project ID to use, REQUIRED. api_endpoint: The API endpoint to use without a leading scheme such as `http://`. Defaults to `None`, which uses the server-defined default model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. dimensions: The dimensionality of the vectors. Defaults to `None`, which uses the server-defined default. Raises: pydantic.ValidationError: If `api_endpoint` is not a valid URL. """ return _Text2VecGoogleConfig( projectId=project_id, apiEndpoint=api_endpoint, dimensions=None, modelId=model_id, vectorizeClassName=vectorize_collection_name, titleProperty=title_property, taskType=None, )
[docs] @staticmethod @docstring_deprecated( deprecated_in="4.9.0", details=""" This method is deprecated and will be removed in Q2 '25. Please use :meth:`~weaviate.collections.classes.config._Vectorizer.multi2vec_google` instead. """, ) @typing_deprecated( "This method is deprecated and will be removed in Q2 '25. Please use `multi2vec_google` instead." ) def multi2vec_palm( *, location: str, project_id: str, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, dimensions: Optional[int] = None, model_id: Optional[str] = None, video_interval_seconds: Optional[int] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecPalmConfig` object for use when vectorizing using the `text2vec-palm` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings-multimodal) for detailed usage. Args: location: Where the model runs. REQUIRED. project_id: The project ID to use, REQUIRED. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. video_fields: The video fields to use in vectorization. dimensions: The number of dimensions to use. Defaults to `None`, which uses the server-defined default. model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. video_interval_seconds: Length of a video interval. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `api_endpoint` is not a valid URL. """ _Warnings.palm_to_google_m2v() return _Multi2VecGoogleConfig( projectId=project_id, location=location, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), videoFields=_map_multi2vec_fields(video_fields), dimensions=dimensions, modelId=model_id, videoIntervalSeconds=video_interval_seconds, )
[docs] @staticmethod def multi2vec_google( *, location: str, project_id: str, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, model_id: Optional[str] = None, video_interval_seconds: Optional[int] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecGoogleConfig` object for use when vectorizing using the `text2vec-google` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings-multimodal) for detailed usage. Args: location: Where the model runs. REQUIRED. project_id: The project ID to use, REQUIRED. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. video_fields: The video fields to use in vectorization. model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. video_interval_seconds: Length of a video interval. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. Raises: pydantic.ValidationError: If `api_endpoint` is not a valid URL. """ return _Multi2VecGoogleConfig( projectId=project_id, location=location, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), videoFields=_map_multi2vec_fields(video_fields), dimensions=None, modelId=model_id, videoIntervalSeconds=video_interval_seconds, )
[docs] @staticmethod def text2vec_transformers( pooling_strategy: Literal["masked_mean", "cls"] = "masked_mean", dimensions: Optional[int] = None, vectorize_collection_name: bool = True, inference_url: Optional[str] = None, passage_inference_url: Optional[str] = None, query_inference_url: Optional[str] = None, ) -> _VectorizerConfigCreate: """Create a `_Text2VecTransformersConfigCreate` object for use when vectorizing using the `text2vec-transformers` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings) for detailed usage. Args: pooling_strategy: The pooling strategy to use. Defaults to `masked_mean`. dimensions: The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. inference_url: The inference url to use where API requests should go. You can use either this OR passage/query_inference_url. Defaults to `None`, which uses the server-defined default. passage_inference_url: The inference url to use where passage API requests should go. You can use either this and query_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. query_inference_url: The inference url to use where query API requests should go. You can use either this and passage_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. Raises: pydantic.ValidationError: If `pooling_strategy` is not a valid value from the `PoolingStrategy` type. """ return _Text2VecTransformersConfig( poolingStrategy=pooling_strategy, dimensions=dimensions, vectorizeClassName=vectorize_collection_name, inferenceUrl=inference_url, passageInferenceUrl=passage_inference_url, queryInferenceUrl=query_inference_url, )
[docs] @staticmethod def text2vec_jinaai( model: Optional[Union[JinaModel, str]] = None, vectorize_collection_name: bool = True, base_url: Optional[str] = None, dimensions: Optional[int] = None, ) -> _VectorizerConfigCreate: """Create a `_Text2VecJinaConfigCreate` object for use when vectorizing using the `text2vec-jinaai` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings#available-models) for more details. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. base_url: The base URL to send the vectorization requests to. Defaults to `None`, which uses the server-defined default. dimensions: The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default. """ return _Text2VecJinaConfig( model=model, vectorizeClassName=vectorize_collection_name, baseURL=base_url, dimensions=dimensions, )
[docs] @staticmethod def multi2vec_jinaai( *, model: Optional[Union[JinaMultimodalModel, str]] = None, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, dimensions: Optional[int] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecJinaConfig` object for use when vectorizing using the `multi2vec-jinaai` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. dimensions: The number of dimensions for the generated embeddings (only available for some models). Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `JinaMultimodalModel` type. """ return _Multi2VecJinaConfig( baseURL=base_url, model=model, dimensions=dimensions, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), )
[docs] @staticmethod def text2vec_voyageai( *, model: Optional[Union[VoyageModel, str]] = None, base_url: Optional[str] = None, truncate: Optional[bool] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecVoyageConfigCreate` object for use when vectorizing using the `text2vec-voyageai` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings#available-models) for more details. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. truncate: Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. """ return _Text2VecVoyageConfig( model=model, baseURL=base_url, truncate=truncate, vectorizeClassName=vectorize_collection_name, dimensions=None, )
[docs] @staticmethod def text2vec_weaviate( *, model: Optional[Union[WeaviateModel, str]] = None, base_url: Optional[str] = None, vectorize_collection_name: bool = True, dimensions: Optional[int] = None, ) -> _VectorizerConfigCreate: """TODO: add docstrings when the documentation is available.""" return _Text2VecWeaviateConfig( model=model, baseURL=base_url, vectorizeClassName=vectorize_collection_name, dimensions=dimensions, )
[docs] @staticmethod def text2vec_nvidia( *, model: Optional[str] = None, base_url: Optional[str] = None, truncate: Optional[bool] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecNvidiaConfigCreate` object for use when vectorizing using the `text2vec-nvidia` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings#available-models) for more details. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. truncate: Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. """ return _Text2VecNvidiaConfig( model=model, baseURL=base_url, truncate=truncate, vectorizeClassName=vectorize_collection_name, )
[docs] @staticmethod def text2vec_model2vec( *, inference_url: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Text2VecModel2VecConfigCreate` object for use when vectorizing using the `text2vec-model2vec` model. See the [documentation](https://weaviate.io/developers/weaviate/model-providers/model2vec/embeddings) for detailed usage. Args: vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. inference_url: The inference url to use where API requests should go. Defaults to `None`, which uses the server-defined default. """ return _Text2VecModel2VecConfig( vectorizeClassName=vectorize_collection_name, inferenceUrl=inference_url, )