"""GraphQL `Aggregate` command."""
import json
from dataclasses import dataclass
from typing import List, Optional
from weaviate.util import _capitalize_first_letter, _sanitize_str, file_encoder_b64
from .filter import (
Filter,
GraphQL,
MediaType,
NearAudio,
NearDepth,
NearImage,
NearIMU,
NearObject,
NearText,
NearThermal,
NearVector,
NearVideo,
Where,
)
[docs]
@dataclass
class Hybrid:
query: Optional[str]
alpha: Optional[float]
vector: Optional[List[float]]
properties: Optional[List[str]]
target_vectors: Optional[List[str]]
max_vector_distance: Optional[List[str]]
def __init__(self, content: dict) -> None:
self.query = content.get("query")
self.alpha = content.get("alpha")
self.vector = content.get("vector")
self.properties = content.get("properties")
self.target_vectors = content.get("targetVectors")
self.max_vector_distance = content.get("maxVectorDistance")
def __str__(self) -> str:
ret = ""
if self.query is not None:
ret += f"query: {_sanitize_str(self.query)}"
if self.vector is not None:
ret += f", vector: {self.vector}"
if self.alpha is not None:
ret += f", alpha: {self.alpha}"
if self.properties is not None and len(self.properties) > 0:
props = '","'.join(self.properties)
ret += f', properties: ["{props}"]'
if self.target_vectors is not None:
target_vectors = '","'.join(self.target_vectors)
ret += f', targetVectors: ["{target_vectors}"]'
if self.max_vector_distance is not None:
ret += f", maxVectorDistance:{self.max_vector_distance}"
return "hybrid:{" + ret + "}"
[docs]
class AggregateBuilder(GraphQL):
"""AggregateBuilder class used to aggregate Weaviate objects."""
def __init__(self, class_name: str):
"""Initialize a AggregateBuilder class instance.
Args:
class_name: Class name of the objects to be aggregated.
"""
self._class_name: str = _capitalize_first_letter(class_name)
self._object_limit: Optional[int] = None
self._with_meta_count: bool = False
self._fields: List[str] = []
self._where: Optional[Where] = None
self._group_by_properties: Optional[List[str]] = None
self._uses_filter: bool = False
self._near: Optional[Filter] = None
self._tenant: Optional[str] = None
self._limit: Optional[int] = None
self._hybrid: Optional[Hybrid] = None
[docs]
def with_tenant(self, tenant: str) -> "AggregateBuilder":
"""Sets a tenant for the query."""
if not isinstance(tenant, str):
raise TypeError("tenant must be of type str")
self._tenant = tenant
self._uses_filter = True
return self
[docs]
def with_object_limit(self, limit: int) -> "AggregateBuilder":
"""Set objectLimit to limit vector search results used within the aggregation query only when with near<MEDIA> filter.
Args:
limit: The object limit.
Returns:
Updated AggregateBuilder.
"""
self._object_limit = limit
return self
[docs]
def with_limit(self, limit: int) -> "AggregateBuilder":
"""Set limit to limit the number of returned results from the aggregation query.
Args:
limit: The limit.
Returns:
Updated AggregateBuilder.
"""
self._limit = limit
return self
[docs]
def with_fields(self, field: str) -> "AggregateBuilder":
"""Include a field in the aggregate query.
Args:
field: Field to include in the aggregate query. e.g. '<property_name> { count }'
Returns:
Updated AggregateBuilder.
"""
self._fields.append(field)
return self
[docs]
def with_where(self, content: dict) -> "AggregateBuilder":
"""Set 'where' filter.
Args:
content: The where filter to include in the aggregate query. See examples below.
Returns:
Updated AggregateBuilder.
"""
self._where = Where(content)
self._uses_filter = True
return self
[docs]
def with_hybrid(self, content: dict) -> "AggregateBuilder":
"""Get objects using bm25 and vector, then combine the results using a reciprocal ranking algorithm.
Args:
content: The content of the `hybrid` filter to set.
Returns:
Updated AggregateBuilder.
"""
if self._near is not None:
raise AttributeError("Cannot use 'hybrid' and 'near' filters simultaneously.")
self._hybrid = Hybrid(content)
self._uses_filter = True
return self
[docs]
def with_group_by_filter(self, properties: List[str]) -> "AggregateBuilder":
"""Add a group by filter to the query. Might requires the user to set an additional group by clause using `with_fields(..)`.
Args:
properties: The list of properties that are included in the group by filter.
Generates a filter like: 'groupBy: ["property1", "property2"]'
from a list ["property1", "property2"]
Returns:
Updated AggregateBuilder.
"""
self._group_by_properties = properties
self._uses_filter = True
return self
[docs]
def with_near_text(self, content: dict) -> "AggregateBuilder":
"""Set `nearText` filter.
This filter can be used with text modules (text2vec).
E.g.: text2vec-contextionary, text2vec-transformers.
NOTE: The 'autocorrect' field is enabled only with the `text-spellcheck` Weaviate module.
Args:
content: The content of the `nearText` filter to set. See examples below.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
if self._near is not None:
raise AttributeError("Cannot use multiple 'near' filters.")
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
self._near = NearText(content)
self._uses_filter = True
return self
[docs]
def with_near_vector(self, content: dict) -> "AggregateBuilder":
"""Set `nearVector` filter.
Args:
content: The content of the `nearVector` filter to set. See examples below.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
if self._near is not None:
raise AttributeError("Cannot use multiple 'near' filters.")
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
self._near = NearVector(content)
self._uses_filter = True
return self
[docs]
def with_near_object(self, content: dict) -> "AggregateBuilder":
"""Set `nearObject` filter.
Args:
content: The content of the `nearObject` filter to set. See examples below.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
if self._near is not None:
raise AttributeError("Cannot use multiple 'near' filters.")
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
self._near = NearObject(content, True)
self._uses_filter = True
return self
[docs]
def with_near_image(self, content: dict, encode: bool = True) -> "AggregateBuilder":
"""Set `nearImage` filter.
Args:
content: The content of the `nearImage` filter to set. See examples below.
encode: Whether to encode the `content["image"]` to base64 and convert to string. If True, the
`content["image"]` can be an image path or a file opened in binary read mode. If False,
the `content["image"]` MUST be a base64 encoded string (NOT bytes, i.e. NOT binary
string that looks like this: b'BASE64ENCODED' but simple 'BASE64ENCODED').
By default True.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
self._media_type = MediaType.IMAGE
if self._near is not None:
raise AttributeError(
"Cannot use multiple 'near' filters, or a 'near' filter along with a 'ask' filter!"
)
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
if encode:
content["image"] = file_encoder_b64(content["image"])
self._near = NearImage(content)
self._uses_filter = True
return self
[docs]
def with_near_audio(self, content: dict, encode: bool = True) -> "AggregateBuilder":
"""Set `nearAudio` filter.
Args:
content: The content of the `nearAudio` filter to set. See examples below.
encode: Whether to encode the `content["audio"]` to base64 and convert to string. If True, the
`content["audio"]` can be an audio path or a file opened in binary read mode. If False,
the `content["audio"]` MUST be a base64 encoded string (NOT bytes, i.e. NOT binary
string that looks like this: b'BASE64ENCODED' but simple 'BASE64ENCODED').
By default True.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
self._media_type = MediaType.AUDIO
if self._near is not None:
raise AttributeError(
"Cannot use multiple 'near' filters, or a 'near' filter along with a 'ask' filter!"
)
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
if encode:
content[self._media_type.value] = file_encoder_b64(content[self._media_type.value])
self._near = NearAudio(content)
self._uses_filter = True
return self
[docs]
def with_near_video(self, content: dict, encode: bool = True) -> "AggregateBuilder":
"""Set `nearVideo` filter.
Args:
content: The content of the `nearVideo` filter to set. See examples below.
encode: Whether to encode the `content["video"]` to base64 and convert to string. If True, the
`content["video"]` can be an video path or a file opened in binary read mode. If False,
the `content["video"]` MUST be a base64 encoded string (NOT bytes, i.e. NOT binary
string that looks like this: b'BASE64ENCODED' but simple 'BASE64ENCODED').
By default True.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
self._media_type = MediaType.VIDEO
if self._near is not None:
raise AttributeError(
"Cannot use multiple 'near' filters, or a 'near' filter along with a 'ask' filter!"
)
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
if encode:
content[self._media_type.value] = file_encoder_b64(content[self._media_type.value])
self._near = NearVideo(content)
self._uses_filter = True
return self
[docs]
def with_near_depth(self, content: dict, encode: bool = True) -> "AggregateBuilder":
"""Set `nearDepth` filter.
Args:
content: The content of the `nearDepth` filter to set. See examples below.
encode: Whether to encode the `content["depth"]` to base64 and convert to string. If True, the
`content["depth"]` can be an depth path or a file opened in binary read mode. If False,
the `content["depth"]` MUST be a base64 encoded string (NOT bytes, i.e. NOT binary
string that looks like this: b'BASE64ENCODED' but simple 'BASE64ENCODED').
By default True.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
self._media_type = MediaType.DEPTH
if self._near is not None:
raise AttributeError(
"Cannot use multiple 'near' filters, or a 'near' filter along with a 'ask' filter!"
)
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
if encode:
content[self._media_type.value] = file_encoder_b64(content[self._media_type.value])
self._near = NearDepth(content)
self._uses_filter = True
return self
[docs]
def with_near_thermal(self, content: dict, encode: bool = True) -> "AggregateBuilder":
"""Set `nearThermal` filter.
Args:
content: The content of the `nearThermal` filter to set. See examples below.
encode: Whether to encode the `content["thermal"]` to base64 and convert to string. If True, the
`content["thermal"]` can be an thermal path or a file opened in binary read mode. If False,
the `content["thermal"]` MUST be a base64 encoded string (NOT bytes, i.e. NOT binary
string that looks like this: b'BASE64ENCODED' but simple 'BASE64ENCODED').
By default True.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
self._media_type = MediaType.THERMAL
if self._near is not None:
raise AttributeError(
"Cannot use multiple 'near' filters, or a 'near' filter along with a 'ask' filter!"
)
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
if encode:
content[self._media_type.value] = file_encoder_b64(content[self._media_type.value])
self._near = NearThermal(content)
self._uses_filter = True
return self
[docs]
def with_near_imu(self, content: dict, encode: bool = True) -> "AggregateBuilder":
"""Set `nearIMU` filter.
Args:
content: The content of the `nearIMU` filter to set. See examples below.
encode: Whether to encode the `content["thermal"]` to base64 and convert to string. If True, the
`content["thermal"]` can be an thermal path or a file opened in binary read mode. If False,
the `content["thermal"]` MUST be a base64 encoded string (NOT bytes, i.e. NOT binary
string that looks like this: b'BASE64ENCODED' but simple 'BASE64ENCODED').
By default True.
Returns:
Updated AggregateBuilder.
Raises:
AttributeError: If another 'near' filter was already set.
"""
self._media_type = MediaType.IMU
if self._near is not None:
raise AttributeError(
"Cannot use multiple 'near' filters, or a 'near' filter along with a 'ask' filter!"
)
if self._hybrid is not None:
raise AttributeError("Cannot use 'near' and 'hybrid' filters simultaneously.")
if encode:
content[self._media_type.value] = file_encoder_b64(content[self._media_type.value])
self._near = NearIMU(content)
self._uses_filter = True
return self
[docs]
def build(self) -> str:
"""Build the query and return the string.
Returns:
The GraphQL query as a string.
"""
# Path
query = f"{{Aggregate{{{self._class_name}"
# Filter
if self._uses_filter:
query += "("
if self._where is not None:
query += str(self._where)
if self._group_by_properties is not None:
query += f"groupBy: {json.dumps(self._group_by_properties)}"
if self._near is not None:
query += str(self._near)
if self._object_limit:
query += f"objectLimit: {self._object_limit}"
if self._tenant is not None:
query += f'tenant: "{self._tenant}"'
if self._limit is not None:
query += f"limit: {self._limit}"
if self._hybrid is not None:
query += str(self._hybrid)
query += ")"
# Body
query += "{"
if self._with_meta_count:
query += "meta{count}"
for field in self._fields:
query += field
# close
query += "}}}"
return query