Source code for weaviate.collections.collection.sync

import json
from dataclasses import asdict
from typing import Generic, List, Literal, Optional, Type, Union, overload

from weaviate.collections.aggregate import _AggregateCollection
from weaviate.collections.backups import _CollectionBackup
from weaviate.collections.batch.collection import _BatchCollectionWrapper
from weaviate.collections.classes.cluster import Shard
from weaviate.collections.classes.config import ConsistencyLevel
from weaviate.collections.classes.grpc import METADATA, PROPERTIES, REFERENCES
from weaviate.collections.classes.internal import (
    CrossReferences,
    References,
    ReturnProperties,
    ReturnReferences,
    TReferences,
)
from weaviate.collections.classes.types import Properties, TProperties
from weaviate.collections.cluster import _Cluster
from weaviate.collections.config import _ConfigCollection
from weaviate.collections.data import _DataCollection
from weaviate.collections.generate import _GenerateCollection
from weaviate.collections.iterator import _IteratorInputs, _ObjectIterator
from weaviate.collections.query import _QueryCollection
from weaviate.collections.tenants import _Tenants
from weaviate.connect import ConnectionV4
from weaviate.types import UUID

from .base import _CollectionBase


[docs] class Collection(Generic[Properties, References], _CollectionBase[Properties, References]): """The collection class is the main entry point for interacting with a collection in Weaviate. This class is returned by the `client.collections.create` and `client.collections.get` methods. It provides access to all the methods available to you when interacting with a collection in Weaviate. You should not need to instantiate this class yourself but it may be useful to import this as a type when performing type hinting of functions that depend on a collection object. Attributes: `aggregate` This namespace includes all the querying methods available to you when using Weaviate's standard aggregation capabilities. `aggregate_group_by` This namespace includes all the aggregate methods available to you when using Weaviate's aggregation group-by capabilities. `config` This namespace includes all the CRUD methods available to you when modifying the configuration of the collection in Weaviate. `data` This namespace includes all the CUD methods available to you when modifying the data of the collection in Weaviate. `generate` This namespace includes all the querying methods available to you when using Weaviate's generative capabilities. `query_group_by` This namespace includes all the querying methods available to you when using Weaviate's querying group-by capabilities. `query` This namespace includes all the querying methods available to you when using Weaviate's standard query capabilities. `tenants` This namespace includes all the CRUD methods available to you when modifying the tenants of a multi-tenancy-enabled collection in Weaviate. """ def __init__( self, connection: ConnectionV4, name: str, validate_arguments: bool, consistency_level: Optional[ConsistencyLevel] = None, tenant: Optional[str] = None, properties: Optional[Type[Properties]] = None, references: Optional[Type[References]] = None, ) -> None: super().__init__( connection, name, validate_arguments, consistency_level, tenant, properties, references, ) self.__cluster = _Cluster(connection) config = _ConfigCollection( connection=connection, name=name, tenant=tenant, ) self.aggregate = _AggregateCollection( connection=connection, name=name, consistency_level=consistency_level, tenant=tenant, ) """This namespace includes all the querying methods available to you when using Weaviate's standard aggregation capabilities.""" self.backup = _CollectionBackup( connection=connection, name=name, ) """This namespace includes all the backup methods available to you when backing up a collection in Weaviate.""" self.batch = _BatchCollectionWrapper[Properties]( connection, consistency_level, name, tenant, config, ) """This namespace contains all the functionality to upload data in batches to Weaviate for this specific collection.""" self.config = config """This namespace includes all the CRUD methods available to you when modifying the configuration of the collection in Weaviate.""" self.data = _DataCollection[Properties]( connection, name, consistency_level, tenant, validate_arguments ) """This namespace includes all the CUD methods available to you when modifying the data of the collection in Weaviate.""" self.generate = _GenerateCollection[Properties, References]( connection=connection, name=name, consistency_level=consistency_level, tenant=tenant, properties=properties, references=references, validate_arguments=validate_arguments, ) """This namespace includes all the querying methods available to you when using Weaviate's generative capabilities.""" self.query = _QueryCollection[Properties, References]( connection=connection, name=name, consistency_level=consistency_level, tenant=tenant, properties=properties, references=references, validate_arguments=validate_arguments, ) """This namespace includes all the querying methods available to you when using Weaviate's standard query capabilities.""" self.tenants = _Tenants( connection=connection, name=name, consistency_level=consistency_level, validate_arguments=validate_arguments, ) """This namespace includes all the CRUD methods available to you when modifying the tenants of a multi-tenancy-enabled collection in Weaviate.""" def __len__(self) -> int: total = self.aggregate.over_all(total_count=True).total_count assert total is not None return total def __str__(self) -> str: config = self.config.get() json_ = json.dumps(asdict(config), indent=2) return f"<weaviate.Collection config={json_}>"
[docs] def exists(self) -> bool: """Check if the collection exists in Weaviate.""" try: self.config.get(simple=True) return True except Exception: return False
[docs] def shards(self) -> List[Shard]: """ Get the statuses of all the shards of this collection. Returns: The list of shards belonging to this collection. Raises `weaviate.WeaviateConnectionError` If the network connection to weaviate fails. `weaviate.UnexpectedStatusCodeError` If weaviate reports a none OK status. `weaviate.EmptyResponseError` If the response is empty. """ return [ shard for node in self.__cluster.nodes(self.name, output="verbose") for shard in node.shards ]
@overload def iterator( self, include_vector: bool = False, return_metadata: Optional[METADATA] = None, *, return_properties: Optional[PROPERTIES] = None, return_references: Literal[None] = None, after: Optional[UUID] = None, cache_size: Optional[int] = None, ) -> _ObjectIterator[Properties, References]: ... @overload def iterator( self, include_vector: bool = False, return_metadata: Optional[METADATA] = None, *, return_properties: Optional[PROPERTIES] = None, return_references: REFERENCES, after: Optional[UUID] = None, cache_size: Optional[int] = None, ) -> _ObjectIterator[Properties, CrossReferences]: ... @overload def iterator( self, include_vector: bool = False, return_metadata: Optional[METADATA] = None, *, return_properties: Optional[PROPERTIES] = None, return_references: Type[TReferences], after: Optional[UUID] = None, cache_size: Optional[int] = None, ) -> _ObjectIterator[Properties, TReferences]: ... @overload def iterator( self, include_vector: bool = False, return_metadata: Optional[METADATA] = None, *, return_properties: Type[TProperties], return_references: Literal[None] = None, after: Optional[UUID] = None, cache_size: Optional[int] = None, ) -> _ObjectIterator[TProperties, References]: ... @overload def iterator( self, include_vector: bool = False, return_metadata: Optional[METADATA] = None, *, return_properties: Type[TProperties], return_references: REFERENCES, after: Optional[UUID] = None, cache_size: Optional[int] = None, ) -> _ObjectIterator[TProperties, CrossReferences]: ... @overload def iterator( self, include_vector: bool = False, return_metadata: Optional[METADATA] = None, *, return_properties: Type[TProperties], return_references: Type[TReferences], after: Optional[UUID] = None, cache_size: Optional[int] = None, ) -> _ObjectIterator[TProperties, TReferences]: ...
[docs] def iterator( self, include_vector: bool = False, return_metadata: Optional[METADATA] = None, *, return_properties: Optional[ReturnProperties[TProperties]] = None, return_references: Optional[ReturnReferences[TReferences]] = None, after: Optional[UUID] = None, cache_size: Optional[int] = None, ) -> Union[ _ObjectIterator[Properties, References], _ObjectIterator[Properties, CrossReferences], _ObjectIterator[Properties, TReferences], _ObjectIterator[TProperties, References], _ObjectIterator[TProperties, CrossReferences], _ObjectIterator[TProperties, TReferences], ]: """Use this method to return an iterator over the objects in the collection. This iterator keeps a record of the last object that it returned to be used in each subsequent call to Weaviate. Once the collection is exhausted, the iterator exits. If `return_properties` is not provided, all the properties of each object will be requested from Weaviate except for its vector as this is an expensive operation. Specify `include_vector` to request the vector back as well. In addition, if `return_references=None` then none of the references are returned. Use `wvc.QueryReference` to specify which references to return. Arguments: `include_vector` Whether to include the vector in the metadata of the returned objects. `return_metadata` The metadata to return with each object. `return_properties` The properties to return with each object. `return_references` The references to return with each object. `after` The cursor to use to mark the initial starting point of the iterator in the collection. `cache_size` How many objects should be fetched in each request to Weaviate during the iteration. The default is 100. Raises: `weaviate.exceptions.WeaviateGRPCQueryError`: If the request to the Weaviate server fails. """ return _ObjectIterator( self.query, _IteratorInputs( include_vector=include_vector, return_metadata=return_metadata, return_properties=return_properties, return_references=return_references, after=after, ), cache_size=cache_size, )