import warnings
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union, cast
from deprecation import deprecated as docstring_deprecated
from pydantic import AnyHttpUrl, BaseModel, Field, field_validator
from typing_extensions import TypeAlias
from typing_extensions import deprecated as typing_deprecated
from weaviate.collections.classes.config_base import _ConfigCreateModel, _EnumLikeStr
from ...warnings import _Warnings
# See https://docs.cohere.com/docs/cohere-embed for reference
CohereModel: TypeAlias = Literal[
"embed-v4.0",
"embed-multilingual-v2.0",
"embed-multilingual-v3.0",
"embed-multilingual-light-v3.0",
"small",
"medium",
"large",
"multilingual-22-12",
"embed-english-v2.0",
"embed-english-light-v2.0",
"embed-english-v3.0",
"embed-english-light-v3.0",
]
CohereMultimodalModel: TypeAlias = Literal[
"embed-v4.0",
"embed-multilingual-v3.0",
"embed-multilingual-light-v3.0",
"embed-english-v3.0",
"embed-english-light-v3.0",
]
CohereTruncation: TypeAlias = Literal["NONE", "START", "END", "LEFT", "RIGHT"]
OpenAIModel: TypeAlias = Literal[
"text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"
]
JinaModel: TypeAlias = Literal[
"jina-embeddings-v2-base-en",
"jina-embeddings-v2-small-en",
"jina-embeddings-v2-base-zh",
"jina-embeddings-v2-base-es",
"jina-embeddings-v2-base-code",
"jina-embeddings-v3",
"jina-embeddings-v4",
]
JinaMultimodalModel: TypeAlias = Literal["jina-clip-v1", "jina-clip-v2", "jina-embeddings-v4"]
VoyageModel: TypeAlias = Literal[
"voyage-3.5",
"voyage-3.5-lite",
"voyage-3",
"voyage-3-lite",
"voyage-context-3",
"voyage-large-2",
"voyage-code-2",
"voyage-2",
"voyage-law-2",
"voyage-large-2-instruct",
"voyage-finance-2",
"voyage-multilingual-2",
]
VoyageMultimodalModel: TypeAlias = Literal["voyage-multimodal-3",]
AWSModel: TypeAlias = Literal[
"amazon.titan-embed-text-v1",
"cohere.embed-english-v3",
"cohere.embed-multilingual-v3",
]
AWSService: TypeAlias = Literal[
"bedrock",
"sagemaker",
]
WeaviateModel: TypeAlias = Literal[
"Snowflake/snowflake-arctic-embed-l-v2.0", "Snowflake/snowflake-arctic-embed-m-v1.5"
]
[docs]
class Vectorizers(str, Enum):
"""The available vectorization modules in Weaviate.
These modules encode binary data into lists of floats called vectors.
See the [docs](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules) for more details.
Attributes:
NONE: No vectorizer.
TEXT2VEC_AWS: Weaviate module backed by AWS text-based embedding models.
TEXT2VEC_COHERE: Weaviate module backed by Cohere text-based embedding models.
TEXT2VEC_CONTEXTIONARY: Weaviate module backed by Contextionary text-based embedding models.
TEXT2VEC_GPT4ALL: Weaviate module backed by GPT-4-All text-based embedding models.
TEXT2VEC_HUGGINGFACE: Weaviate module backed by HuggingFace text-based embedding models.
TEXT2VEC_OPENAI: Weaviate module backed by OpenAI and Azure-OpenAI text-based embedding models.
TEXT2VEC_PALM: Weaviate module backed by PaLM text-based embedding models.
TEXT2VEC_TRANSFORMERS: Weaviate module backed by Transformers text-based embedding models.
TEXT2VEC_JINAAI: Weaviate module backed by Jina AI text-based embedding models.
TEXT2VEC_VOYAGEAI: Weaviate module backed by Voyage AI text-based embedding models.
TEXT2VEC_NVIDIA: Weaviate module backed by NVIDIA text-based embedding models.
TEXT2VEC_WEAVIATE: Weaviate module backed by Weaviate's self-hosted text-based embedding models.
IMG2VEC_NEURAL: Weaviate module backed by a ResNet-50 neural network for images.
MULTI2VEC_CLIP: Weaviate module backed by a Sentence-BERT CLIP model for images and text.
MULTI2VEC_PALM: Weaviate module backed by a palm model for images and text.
MULTI2VEC_BIND: Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video.
MULTI2VEC_VOYAGEAI: Weaviate module backed by a Voyage AI multimodal embedding models.
MULTI2VEC_NVIDIA: Weaviate module backed by NVIDIA multimodal embedding models.
REF2VEC_CENTROID: Weaviate module backed by a centroid-based model that calculates an object's vectors from its referenced vectors.
"""
NONE = "none"
TEXT2COLBERT_JINAAI = "text2colbert-jinaai"
TEXT2VEC_AWS = "text2vec-aws"
TEXT2VEC_COHERE = "text2vec-cohere"
TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary"
TEXT2VEC_DATABRICKS = "text2vec-databricks"
TEXT2VEC_GPT4ALL = "text2vec-gpt4all"
TEXT2VEC_HUGGINGFACE = "text2vec-huggingface"
TEXT2VEC_MISTRAL = "text2vec-mistral"
TEXT2VEC_MORPH = "text2vec-morph"
TEXT2VEC_MODEL2VEC = "text2vec-model2vec"
TEXT2VEC_NVIDIA = "text2vec-nvidia"
TEXT2VEC_OLLAMA = "text2vec-ollama"
TEXT2VEC_OPENAI = "text2vec-openai"
TEXT2VEC_PALM = "text2vec-palm" # change to google once 1.27 is the lowest supported version
TEXT2VEC_TRANSFORMERS = "text2vec-transformers"
TEXT2VEC_JINAAI = "text2vec-jinaai"
TEXT2VEC_VOYAGEAI = "text2vec-voyageai"
TEXT2VEC_WEAVIATE = "text2vec-weaviate"
IMG2VEC_NEURAL = "img2vec-neural"
MULTI2VEC_AWS = "multi2vec-aws"
MULTI2VEC_CLIP = "multi2vec-clip"
MULTI2VEC_COHERE = "multi2vec-cohere"
MULTI2VEC_JINAAI = "multi2vec-jinaai"
MULTI2MULTI_JINAAI = "multi2multivec-jinaai"
MULTI2VEC_BIND = "multi2vec-bind"
MULTI2VEC_PALM = "multi2vec-palm" # change to google once 1.27 is the lowest supported version
MULTI2VEC_VOYAGEAI = "multi2vec-voyageai"
MULTI2VEC_NVIDIA = "multi2vec-nvidia"
REF2VEC_CENTROID = "ref2vec-centroid"
[docs]
class VectorDistances(str, Enum):
"""Vector similarity distance metric to be used in the `VectorIndexConfig` class.
To ensure optimal search results, we recommend reviewing whether your model provider advises a
specific distance metric and following their advice.
Attributes:
COSINE: Cosine distance: [reference](https://en.wikipedia.org/wiki/Cosine_similarity)
DOT: Dot distance: [reference](https://en.wikipedia.org/wiki/Dot_product)
L2_SQUARED: L2 squared distance: [reference](https://en.wikipedia.org/wiki/Euclidean_distance)
HAMMING: Hamming distance: [reference](https://en.wikipedia.org/wiki/Hamming_distance)
MANHATTAN: Manhattan distance: [reference](https://en.wikipedia.org/wiki/Taxicab_geometry)
"""
COSINE = "cosine"
DOT = "dot"
L2_SQUARED = "l2-squared"
HAMMING = "hamming"
MANHATTAN = "manhattan"
[docs]
class _VectorizerConfigCreate(_ConfigCreateModel):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(default=..., exclude=True)
[docs]
class _Text2ColbertJinaAIConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2COLBERT_JINAAI, frozen=True, exclude=True
)
vectorizeClassName: bool
model: Optional[str]
dimensions: Optional[int]
[docs]
class _Text2VecContextionaryConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_CONTEXTIONARY, frozen=True, exclude=True
)
vectorizeClassName: bool
[docs]
class _Text2VecModel2VecConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_MODEL2VEC, frozen=True, exclude=True
)
vectorizeClassName: bool
inferenceUrl: Optional[str]
[docs]
class _VectorizerCustomConfig(_VectorizerConfigCreate):
module_config: Optional[Dict[str, Any]]
[docs]
def _to_dict(self) -> Dict[str, Any]:
if self.module_config is None:
return {}
return self.module_config
[docs]
class _Text2VecAWSConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_AWS, frozen=True, exclude=True
)
model: Optional[str]
endpoint: Optional[str]
region: str
service: str
vectorizeClassName: bool
@field_validator("region")
def _check_name(cls, r: str) -> str:
if r == "":
raise ValueError("region is a required argument and must be given")
return r
[docs]
class _Text2VecAzureOpenAIConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_OPENAI, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
resourceName: str
deploymentId: str
vectorizeClassName: bool
dimensions: Optional[int]
model: Optional[str]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
ret_dict["isAzure"] = True
return ret_dict
[docs]
class _Text2VecHuggingFaceConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_HUGGINGFACE, frozen=True, exclude=True
)
model: Optional[str]
passageModel: Optional[str]
queryModel: Optional[str]
endpointURL: Optional[AnyHttpUrl]
waitForModel: Optional[bool]
useGPU: Optional[bool]
useCache: Optional[bool]
vectorizeClassName: bool
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
options = {}
if self.waitForModel is not None:
options["waitForModel"] = ret_dict.pop("waitForModel")
if self.useGPU is not None:
options["useGPU"] = ret_dict.pop("useGPU")
if self.useCache is not None:
options["useCache"] = ret_dict.pop("useCache")
if len(options) > 0:
ret_dict["options"] = options
if self.endpointURL is not None:
ret_dict["endpointURL"] = self.endpointURL.unicode_string()
return ret_dict
[docs]
class _Text2VecMistralConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_MISTRAL, frozen=True, exclude=True
)
model: Optional[str]
vectorizeClassName: bool
baseURL: Optional[AnyHttpUrl]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Text2VecMorphConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True
)
model: Optional[str]
vectorizeClassName: bool
baseURL: Optional[AnyHttpUrl]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Text2VecDatabricksConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_DATABRICKS, frozen=True, exclude=True
)
endpoint: str
instruction: Optional[str]
vectorizeClassName: bool
OpenAIType = Literal["text", "code"]
[docs]
class _Text2VecOpenAIConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_OPENAI, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
dimensions: Optional[int]
model: Optional[str]
modelVersion: Optional[str]
type_: Optional[OpenAIType]
vectorizeClassName: bool
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.type_ is not None:
ret_dict["type"] = ret_dict.pop("type_")
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
ret_dict["isAzure"] = False
return ret_dict
[docs]
class _Text2VecCohereConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_COHERE, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
model: Optional[str]
dimensions: Optional[int]
truncate: Optional[CohereTruncation]
vectorizeClassName: bool
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Text2VecGoogleConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_PALM, frozen=True, exclude=True
)
projectId: Optional[str]
apiEndpoint: Optional[str]
dimensions: Optional[int]
modelId: Optional[str]
vectorizeClassName: bool
titleProperty: Optional[str]
[docs]
class _Text2VecTransformersConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_TRANSFORMERS, frozen=True, exclude=True
)
poolingStrategy: Literal["masked_mean", "cls"]
vectorizeClassName: bool
inferenceUrl: Optional[str]
passageInferenceUrl: Optional[str]
queryInferenceUrl: Optional[str]
dimensions: Optional[int] = None
[docs]
class _Text2VecGPT4AllConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_GPT4ALL, frozen=True, exclude=True
)
vectorizeClassName: bool
[docs]
class _Text2VecJinaConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_JINAAI, frozen=True, exclude=True
)
baseURL: Optional[str]
dimensions: Optional[int]
model: Optional[str]
vectorizeClassName: bool
[docs]
class _Text2VecVoyageConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_VOYAGEAI, frozen=True, exclude=True
)
dimensions: Optional[int]
model: Optional[str]
baseURL: Optional[str]
truncate: Optional[bool]
vectorizeClassName: bool
[docs]
class _Text2VecNvidiaConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_NVIDIA, frozen=True, exclude=True
)
model: Optional[str]
baseURL: Optional[str]
truncate: Optional[bool]
vectorizeClassName: bool
[docs]
class _Text2VecWeaviateConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_WEAVIATE, frozen=True, exclude=True
)
model: Optional[str]
baseURL: Optional[str]
vectorizeClassName: bool
dimensions: Optional[int]
[docs]
class _Text2VecOllamaConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.TEXT2VEC_OLLAMA, frozen=True, exclude=True
)
model: Optional[str]
apiEndpoint: Optional[str]
vectorizeClassName: bool
[docs]
class _Img2VecNeuralConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.IMG2VEC_NEURAL, frozen=True, exclude=True
)
imageFields: List[str]
[docs]
class Multi2VecField(BaseModel):
"""Use this class when defining the fields to use in the `Multi2VecClip` and `Multi2VecBind` vectorizers."""
name: str
weight: Optional[float] = Field(default=None, exclude=True)
[docs]
class _Multi2VecBase(_VectorizerConfigCreate):
imageFields: Optional[List[Multi2VecField]]
textFields: Optional[List[Multi2VecField]]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
ret_dict["weights"] = {}
for cls_field in type(self).model_fields:
val = getattr(self, cls_field)
if "Fields" in cls_field and val is not None:
val = cast(List[Multi2VecField], val)
ret_dict[cls_field] = [field.name for field in val]
weights = [field.weight for field in val if field.weight is not None]
if len(weights) > 0:
ret_dict["weights"][cls_field] = weights
if len(ret_dict["weights"]) == 0:
del ret_dict["weights"]
return ret_dict
[docs]
class _Multi2VecCohereConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_COHERE, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
model: Optional[str]
dimensions: Optional[int]
truncate: Optional[CohereTruncation]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Multi2VecJinaConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_JINAAI, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
model: Optional[str]
dimensions: Optional[int]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Multi2VecAWSConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_AWS, frozen=True, exclude=True
)
region: Optional[str]
model: Optional[str]
dimensions: Optional[int]
[docs]
class _Multi2MultiVecJinaConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2MULTI_JINAAI, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
model: Optional[str]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Multi2VecClipConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_CLIP, frozen=True, exclude=True
)
inferenceUrl: Optional[str]
[docs]
class _Multi2VecGoogleConfig(_Multi2VecBase, _VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_PALM, frozen=True, exclude=True
)
videoFields: Optional[List[Multi2VecField]]
projectId: str
location: Optional[str]
modelId: Optional[str]
dimensions: Optional[int]
videoIntervalSeconds: Optional[int]
[docs]
class _Multi2VecBindConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_BIND, frozen=True, exclude=True
)
audioFields: Optional[List[Multi2VecField]]
depthFields: Optional[List[Multi2VecField]]
IMUFields: Optional[List[Multi2VecField]]
thermalFields: Optional[List[Multi2VecField]]
videoFields: Optional[List[Multi2VecField]]
[docs]
class _Multi2VecVoyageaiConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_VOYAGEAI, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
model: Optional[str]
truncation: Optional[bool]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Multi2VecNvidiaConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_NVIDIA, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
model: Optional[str]
truncation: Optional[bool]
[docs]
def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict
[docs]
class _Ref2VecCentroidConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.REF2VEC_CENTROID, frozen=True, exclude=True
)
referenceProperties: List[str]
method: Literal["mean"]
[docs]
def _map_multi2vec_fields(
fields: Optional[Union[List[str], List[Multi2VecField]]],
) -> Optional[List[Multi2VecField]]:
if fields is None:
return None
return [Multi2VecField(name=field) if isinstance(field, str) else field for field in fields]
[docs]
class _Vectorizer:
"""Use this factory class to create the correct object for the `vectorizer_config` argument in the `collections.create()` method.
Each staticmethod provides options specific to the named vectorizer in the function's name. Under-the-hood data validation steps
will ensure that any mis-specifications will be caught before the request is sent to Weaviate.
"""
[docs]
@staticmethod
def none() -> _VectorizerConfigCreate:
"""Create a `_VectorizerConfigCreate` object with the vectorizer set to `Vectorizer.NONE`."""
return _VectorizerConfigCreate(vectorizer=Vectorizers.NONE)
[docs]
@staticmethod
def img2vec_neural(
image_fields: List[str],
) -> _VectorizerConfigCreate:
"""Create a `_Img2VecNeuralConfigCreate` object for use when vectorizing using the `img2vec-neural` model.
See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/img2vec-neural)
for detailed usage.
Args:
image_fields: The image fields to use. This is a required field and must match the property fields
of the collection that are defined as `DataType.BLOB`.
Raises:
pydantic.ValidationError: If `image_fields` is not a `list`.
"""
return _Img2VecNeuralConfig(imageFields=image_fields)
[docs]
@staticmethod
def multi2vec_clip(
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
interference_url: Optional[str] = None,
inference_url: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecClipConfigCreate` object for use when vectorizing using the `multi2vec-clip` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal)
for detailed usage.
Args:
image_fields: The image fields to use in vectorization.
text_fields: The text fields to use in vectorization.
inference_url: The inference url to use where API requests should go. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `image_fields` or `text_fields` are not `None` or a `list`.
"""
if interference_url is not None:
if inference_url is not None:
raise ValueError(
"You have provided `interference_url` as well as `inference_url`. Please only provide `inference_url`, as `interference_url` is deprecated."
)
else:
warnings.warn(
message="""This parameter is deprecated and will be removed in a future release. Please use `inference_url` instead.""",
category=DeprecationWarning,
stacklevel=1,
)
return _Multi2VecClipConfig(
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
inferenceUrl=inference_url,
)
[docs]
@staticmethod
def multi2vec_bind(
audio_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
depth_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
imu_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
thermal_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecBindConfigCreate` object for use when vectorizing using the `multi2vec-clip` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/imagebind/embeddings-multimodal)
for detailed usage.
Args:
audio_fields: The audio fields to use in vectorization.
depth_fields: The depth fields to use in vectorization.
image_fields: The image fields to use in vectorization.
imu_fields: The IMU fields to use in vectorization.
text_fields: The text fields to use in vectorization.
thermal_fields: The thermal fields to use in vectorization.
video_fields: The video fields to use in vectorization.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If any of the `*_fields` are not `None` or a `list`.
"""
return _Multi2VecBindConfig(
audioFields=_map_multi2vec_fields(audio_fields),
depthFields=_map_multi2vec_fields(depth_fields),
imageFields=_map_multi2vec_fields(image_fields),
IMUFields=_map_multi2vec_fields(imu_fields),
textFields=_map_multi2vec_fields(text_fields),
thermalFields=_map_multi2vec_fields(thermal_fields),
videoFields=_map_multi2vec_fields(video_fields),
)
[docs]
@staticmethod
def ref2vec_centroid(
reference_properties: List[str],
method: Literal["mean"] = "mean",
) -> _VectorizerConfigCreate:
"""Create a `_Ref2VecCentroidConfigCreate` object for use when vectorizing using the `ref2vec-centroid` model.
See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/ref2vec-centroid)
for detailed usage.
Args:
reference_properties: The reference properties to use in vectorization, REQUIRED.
method: The method to use in vectorization. Defaults to `mean`.
Raises:
pydantic.ValidationError: If `reference_properties` is not a `list`.
"""
return _Ref2VecCentroidConfig(
referenceProperties=reference_properties,
method=method,
)
[docs]
@staticmethod
def text2vec_aws(
model: Optional[Union[AWSModel, str]] = None,
region: str = "", # cant have a non-default value after a default value, but we cant change the order for BC - will be validated in the model
endpoint: Optional[str] = None,
service: Union[AWSService, str] = "bedrock",
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecAWSConfigCreate` object for use when vectorizing using the `text2vec-aws` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/aws/embeddings)
for detailed usage.
Args:
model: The model to use, REQUIRED for service "bedrock".
region: The AWS region to run the model from, REQUIRED.
endpoint: The model to use, REQUIRED for service "sagemaker".
service: The AWS service to use, options are "bedrock" and "sagemaker".
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
"""
return _Text2VecAWSConfig(
model=model,
region=region,
vectorizeClassName=vectorize_collection_name,
service=service,
endpoint=endpoint,
)
[docs]
@staticmethod
def text2vec_azure_openai(
resource_name: str,
deployment_id: str,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
dimensions: Optional[int] = None,
model: Optional[str] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecAzureOpenAIConfigCreate` object for use when vectorizing using the `text2vec-azure-openai` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai-azure/embeddings)
for detailed usage.
Args:
resource_name: The resource name to use, REQUIRED.
deployment_id: The deployment ID to use, REQUIRED.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
dimensions: The dimensionality of the vectors. Defaults to `None`, which uses the server-defined default.
Raises:
pydantic.ValidationError: If `resource_name` or `deployment_id` are not `str`.
"""
return _Text2VecAzureOpenAIConfig(
baseURL=base_url,
dimensions=dimensions,
resourceName=resource_name,
deploymentId=deployment_id,
vectorizeClassName=vectorize_collection_name,
model=model,
)
[docs]
@staticmethod
def text2vec_contextionary(
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecContextionaryConfigCreate` object for use when vectorizing using the `text2vec-contextionary` model.
See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-contextionary)
for detailed usage.
Args:
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `vectorize_collection_name` is not a `bool`.
"""
return _Text2VecContextionaryConfig(vectorizeClassName=vectorize_collection_name)
[docs]
@staticmethod
def custom(
module_name: str, module_config: Optional[Dict[str, Any]] = None
) -> _VectorizerConfigCreate:
"""Create a `_VectorizerCustomConfig` object for use when vectorizing using a custom specification.
Args:
module_name: The name of the module to use, REQUIRED.
module_config: The configuration to use for the module. Defaults to `None`, which uses the server-defined default.
"""
return _VectorizerCustomConfig(
vectorizer=_EnumLikeStr(module_name), module_config=module_config
)
[docs]
@staticmethod
def text2vec_cohere(
model: Optional[Union[CohereModel, str]] = None,
truncate: Optional[CohereTruncation] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecCohereConfigCreate` object for use when vectorizing using the `text2vec-cohere` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
Raises:
pydantic.ValidationError: If `model` is not a valid value from the `CohereModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
"""
return _Text2VecCohereConfig(
baseURL=base_url,
model=model,
dimensions=None,
truncate=truncate,
vectorizeClassName=vectorize_collection_name,
)
[docs]
@staticmethod
def multi2vec_cohere(
*,
model: Optional[Union[CohereMultimodalModel, str]] = None,
truncate: Optional[CohereTruncation] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
image_fields: The image fields to use in vectorization.
text_fields: The text fields to use in vectorization.
Raises:
pydantic.ValidationError: If `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
"""
return _Multi2VecCohereConfig(
baseURL=base_url,
model=model,
dimensions=None,
truncate=truncate,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
)
[docs]
@staticmethod
def multi2vec_voyageai(
*,
model: Optional[Union[CohereMultimodalModel, str]] = None,
truncation: Optional[bool] = None,
output_encoding: Optional[str],
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
output_encoding: Deprecated, has no effect.
vectorize_collection_name: Deprecated, has no effect.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
image_fields: The image fields to use in vectorization.
text_fields: The text fields to use in vectorization.
Raises:
pydantic.ValidationError: If `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
"""
return _Multi2VecVoyageaiConfig(
baseURL=base_url,
model=model,
truncation=truncation,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
)
[docs]
@staticmethod
def multi2vec_nvidia(
*,
model: Optional[str] = None,
truncation: Optional[bool] = None,
output_encoding: Optional[str] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecNvidiaConfig` object for use when vectorizing using the `multi2vec-nvidia` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings-multimodal)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
output_encoding: Deprecated, has no effect.
vectorize_collection_name: Deprecated, has no effect.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
image_fields: The image fields to use in vectorization.
text_fields: The text fields to use in vectorization.
Raises:
pydantic.ValidationError: If `model` is not a valid value from the `NvidiaMultimodalModel` type or if `truncate` is not a valid value from the `NvidiaTruncation` type.
"""
return _Multi2VecNvidiaConfig(
baseURL=base_url,
model=model,
truncation=truncation,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
)
[docs]
@staticmethod
def text2vec_databricks(
*,
endpoint: str,
instruction: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecDatabricksConfig` object for use when vectorizing using the `text2vec-databricks` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/databricks/embeddings)
for detailed usage.
Args:
endpoint: The endpoint to use.
instruction: The instruction strategy to use. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `truncate` is not a valid value from the `CohereModel` type.
"""
return _Text2VecDatabricksConfig(
endpoint=endpoint,
instruction=instruction,
vectorizeClassName=vectorize_collection_name,
)
[docs]
@staticmethod
def text2vec_gpt4all(
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecGPT4AllConfigCreate` object for use when vectorizing using the `text2vec-gpt4all` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/gpt4all/embeddings)
for detailed usage.
Args:
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `vectorize_collection_name` is not a `bool`.
"""
return _Text2VecGPT4AllConfig(vectorizeClassName=vectorize_collection_name)
[docs]
@staticmethod
def text2vec_huggingface(
model: Optional[str] = None,
passage_model: Optional[str] = None,
query_model: Optional[str] = None,
endpoint_url: Optional[AnyHttpUrl] = None,
wait_for_model: Optional[bool] = None,
use_gpu: Optional[bool] = None,
use_cache: Optional[bool] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecHuggingFaceConfigCreate` object for use when vectorizing using the `text2vec-huggingface` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
passage_model: The passage model to use. Defaults to `None`, which uses the server-defined default.
query_model: The query model to use. Defaults to `None`, which uses the server-defined default.
endpoint_url: The endpoint URL to use. Defaults to `None`, which uses the server-defined default.
wait_for_model: Whether to wait for the model to be loaded. Defaults to `None`, which uses the server-defined default.
use_gpu: Whether to use the GPU. Defaults to `None`, which uses the server-defined default.
use_cache: Whether to use the cache. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If the arguments passed to the function are invalid.
It is important to note that some of these variables are mutually exclusive.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings#vectorizer-parameters) for more details.
"""
return _Text2VecHuggingFaceConfig(
model=model,
passageModel=passage_model,
queryModel=query_model,
endpointURL=endpoint_url,
waitForModel=wait_for_model,
useGPU=use_gpu,
useCache=use_cache,
vectorizeClassName=vectorize_collection_name,
)
[docs]
@staticmethod
def text2vec_mistral(
*,
base_url: Optional[AnyHttpUrl] = None,
model: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecMistralConfig` object for use when vectorizing using the `text2vec-mistral` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/mistral/embeddings)
for detailed usage.
Args:
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
model: The model to use. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
"""
return _Text2VecMistralConfig(
baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name
)
[docs]
@staticmethod
def text2vec_ollama(
*,
api_endpoint: Optional[str] = None,
model: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecOllamaConfig` object for use when vectorizing using the `text2vec-ollama` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/ollama/embeddings)
for detailed usage.
Args:
api_endpoint: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
Docker users may need to specify an alias, such as `http://host.docker.internal:11434` so that the container can access the host machine.
model: The model to use. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
"""
return _Text2VecOllamaConfig(
apiEndpoint=api_endpoint,
model=model,
vectorizeClassName=vectorize_collection_name,
)
[docs]
@staticmethod
def text2vec_openai(
model: Optional[Union[OpenAIModel, str]] = None,
model_version: Optional[str] = None,
type_: Optional[OpenAIType] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
dimensions: Optional[int] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecOpenAIConfigCreate` object for use when vectorizing using the `text2vec-openai` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai/embeddings)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
model_version: The model version to use. Defaults to `None`, which uses the server-defined default.
type_: The type of model to use. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
dimensions: Number of dimensions. Applicable to v3 OpenAI models only. Defaults to `None`, which uses the server-defined default.
Raises:
pydantic.ValidationError: If `type_` is not a valid value from the `OpenAIType` type.
"""
return _Text2VecOpenAIConfig(
baseURL=base_url,
model=model,
modelVersion=model_version,
type_=type_,
vectorizeClassName=vectorize_collection_name,
dimensions=dimensions,
)
[docs]
@staticmethod
@docstring_deprecated(
deprecated_in="4.9.0",
details="""
This method is deprecated and will be removed in Q2 '25. Please use :meth:`~weaviate.collections.classes.config._Vectorizer.text2vec_google` instead.
""",
)
@typing_deprecated(
"This method is deprecated and will be removed in Q2 '25. Please use `text2vec_google` instead."
)
def text2vec_palm(
project_id: str,
api_endpoint: Optional[str] = None,
model_id: Optional[str] = None,
title_property: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecGoogleConfig` object for use when vectorizing using the `text2vec-palm` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings)
for detailed usage.
Args:
project_id: The project ID to use, REQUIRED.
api_endpoint: The API endpoint to use without a leading scheme such as `http://`. Defaults to `None`, which uses the server-defined default
model_id: The model ID to use. Defaults to `None`, which uses the server-defined default.
title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `api_endpoint` is not a valid URL.
"""
_Warnings.palm_to_google_t2v()
return _Text2VecGoogleConfig(
projectId=project_id,
apiEndpoint=api_endpoint,
dimensions=None,
modelId=model_id,
vectorizeClassName=vectorize_collection_name,
titleProperty=title_property,
)
[docs]
@staticmethod
def text2vec_google_aistudio(
model_id: Optional[str] = None,
title_property: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecGoogleConfig` object for use when vectorizing using the `text2vec-google` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings)
for detailed usage.
Args:
model_id: The model ID to use. Defaults to `None`, which uses the server-defined default.
title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `api_endpoint` is not a valid URL.
"""
return _Text2VecGoogleConfig(
projectId=None,
apiEndpoint="generativelanguage.googleapis.com",
dimensions=None,
modelId=model_id,
vectorizeClassName=vectorize_collection_name,
titleProperty=title_property,
)
[docs]
@staticmethod
def text2vec_google(
project_id: str,
api_endpoint: Optional[str] = None,
model_id: Optional[str] = None,
title_property: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecGoogleConfig` object for use when vectorizing using the `text2vec-google` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings)
for detailed usage.
Args:
project_id: The project ID to use, REQUIRED.
api_endpoint: The API endpoint to use without a leading scheme such as `http://`. Defaults to `None`, which uses the server-defined default
model_id: The model ID to use. Defaults to `None`, which uses the server-defined default.
title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
dimensions: The dimensionality of the vectors. Defaults to `None`, which uses the server-defined default.
Raises:
pydantic.ValidationError: If `api_endpoint` is not a valid URL.
"""
return _Text2VecGoogleConfig(
projectId=project_id,
apiEndpoint=api_endpoint,
dimensions=None,
modelId=model_id,
vectorizeClassName=vectorize_collection_name,
titleProperty=title_property,
)
[docs]
@staticmethod
@docstring_deprecated(
deprecated_in="4.9.0",
details="""
This method is deprecated and will be removed in Q2 '25. Please use :meth:`~weaviate.collections.classes.config._Vectorizer.multi2vec_google` instead.
""",
)
@typing_deprecated(
"This method is deprecated and will be removed in Q2 '25. Please use `multi2vec_google` instead."
)
def multi2vec_palm(
*,
location: str,
project_id: str,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
dimensions: Optional[int] = None,
model_id: Optional[str] = None,
video_interval_seconds: Optional[int] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecPalmConfig` object for use when vectorizing using the `text2vec-palm` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings-multimodal)
for detailed usage.
Args:
location: Where the model runs. REQUIRED.
project_id: The project ID to use, REQUIRED.
image_fields: The image fields to use in vectorization.
text_fields: The text fields to use in vectorization.
video_fields: The video fields to use in vectorization.
dimensions: The number of dimensions to use. Defaults to `None`, which uses the server-defined default.
model_id: The model ID to use. Defaults to `None`, which uses the server-defined default.
video_interval_seconds: Length of a video interval. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `api_endpoint` is not a valid URL.
"""
_Warnings.palm_to_google_m2v()
return _Multi2VecGoogleConfig(
projectId=project_id,
location=location,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
videoFields=_map_multi2vec_fields(video_fields),
dimensions=dimensions,
modelId=model_id,
videoIntervalSeconds=video_interval_seconds,
)
[docs]
@staticmethod
def multi2vec_google(
*,
location: str,
project_id: str,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
model_id: Optional[str] = None,
video_interval_seconds: Optional[int] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecGoogleConfig` object for use when vectorizing using the `text2vec-google` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings-multimodal)
for detailed usage.
Args:
location: Where the model runs. REQUIRED.
project_id: The project ID to use, REQUIRED.
image_fields: The image fields to use in vectorization.
text_fields: The text fields to use in vectorization.
video_fields: The video fields to use in vectorization.
model_id: The model ID to use. Defaults to `None`, which uses the server-defined default.
video_interval_seconds: Length of a video interval. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
Raises:
pydantic.ValidationError: If `api_endpoint` is not a valid URL.
"""
return _Multi2VecGoogleConfig(
projectId=project_id,
location=location,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
videoFields=_map_multi2vec_fields(video_fields),
dimensions=None,
modelId=model_id,
videoIntervalSeconds=video_interval_seconds,
)
[docs]
@staticmethod
def text2vec_transformers(
pooling_strategy: Literal["masked_mean", "cls"] = "masked_mean",
dimensions: Optional[int] = None,
vectorize_collection_name: bool = True,
inference_url: Optional[str] = None,
passage_inference_url: Optional[str] = None,
query_inference_url: Optional[str] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecTransformersConfigCreate` object for use when vectorizing using the `text2vec-transformers` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings)
for detailed usage.
Args:
pooling_strategy: The pooling strategy to use. Defaults to `masked_mean`.
dimensions: The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
inference_url: The inference url to use where API requests should go. You can use either this OR passage/query_inference_url. Defaults to `None`, which uses the server-defined default.
passage_inference_url: The inference url to use where passage API requests should go. You can use either this and query_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default.
query_inference_url: The inference url to use where query API requests should go. You can use either this and passage_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default.
Raises:
pydantic.ValidationError: If `pooling_strategy` is not a valid value from the `PoolingStrategy` type.
"""
return _Text2VecTransformersConfig(
poolingStrategy=pooling_strategy,
dimensions=dimensions,
vectorizeClassName=vectorize_collection_name,
inferenceUrl=inference_url,
passageInferenceUrl=passage_inference_url,
queryInferenceUrl=query_inference_url,
)
[docs]
@staticmethod
def text2vec_jinaai(
model: Optional[Union[JinaModel, str]] = None,
vectorize_collection_name: bool = True,
base_url: Optional[str] = None,
dimensions: Optional[int] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecJinaConfigCreate` object for use when vectorizing using the `text2vec-jinaai` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
See the
[documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings#available-models) for more details.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
base_url: The base URL to send the vectorization requests to. Defaults to `None`, which uses the server-defined default.
dimensions: The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default.
"""
return _Text2VecJinaConfig(
model=model,
vectorizeClassName=vectorize_collection_name,
baseURL=base_url,
dimensions=dimensions,
)
[docs]
@staticmethod
def multi2vec_jinaai(
*,
model: Optional[Union[JinaMultimodalModel, str]] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
dimensions: Optional[int] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecJinaConfig` object for use when vectorizing using the `multi2vec-jinaai` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
dimensions: The number of dimensions for the generated embeddings (only available for some models). Defaults to `None`, which uses the server-defined default.
image_fields: The image fields to use in vectorization.
text_fields: The text fields to use in vectorization.
Raises:
pydantic.ValidationError: If `model` is not a valid value from the `JinaMultimodalModel` type.
"""
return _Multi2VecJinaConfig(
baseURL=base_url,
model=model,
dimensions=dimensions,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
)
[docs]
@staticmethod
def text2vec_voyageai(
*,
model: Optional[Union[VoyageModel, str]] = None,
base_url: Optional[str] = None,
truncate: Optional[bool] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecVoyageConfigCreate` object for use when vectorizing using the `text2vec-voyageai` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
See the
[documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings#available-models) for more details.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
truncate: Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
"""
return _Text2VecVoyageConfig(
model=model,
baseURL=base_url,
truncate=truncate,
vectorizeClassName=vectorize_collection_name,
dimensions=None,
)
[docs]
@staticmethod
def text2vec_weaviate(
*,
model: Optional[Union[WeaviateModel, str]] = None,
base_url: Optional[str] = None,
vectorize_collection_name: bool = True,
dimensions: Optional[int] = None,
) -> _VectorizerConfigCreate:
"""TODO: add docstrings when the documentation is available."""
return _Text2VecWeaviateConfig(
model=model,
baseURL=base_url,
vectorizeClassName=vectorize_collection_name,
dimensions=dimensions,
)
[docs]
@staticmethod
def text2vec_nvidia(
*,
model: Optional[str] = None,
base_url: Optional[str] = None,
truncate: Optional[bool] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecNvidiaConfigCreate` object for use when vectorizing using the `text2vec-nvidia` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings)
for detailed usage.
Args:
model: The model to use. Defaults to `None`, which uses the server-defined default.
See the
[documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings#available-models) for more details.
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
truncate: Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default.
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
"""
return _Text2VecNvidiaConfig(
model=model,
baseURL=base_url,
truncate=truncate,
vectorizeClassName=vectorize_collection_name,
)
[docs]
@staticmethod
def text2vec_model2vec(
*,
inference_url: Optional[str] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecModel2VecConfigCreate` object for use when vectorizing using the `text2vec-model2vec` model.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/model2vec/embeddings)
for detailed usage.
Args:
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
inference_url: The inference url to use where API requests should go. Defaults to `None`, which uses the server-defined default.
"""
return _Text2VecModel2VecConfig(
vectorizeClassName=vectorize_collection_name,
inferenceUrl=inference_url,
)