Source code for weaviate.collections.classes.aggregate

from dataclasses import dataclass
from typing import (
    Dict,
    List,
    Optional,
    Union,
)
from typing_extensions import TypeVar

from pydantic import BaseModel, Field

from weaviate.collections.classes.types import _WeaviateInput

N = TypeVar("N", int, float)



[docs]
@dataclass
class AggregateInteger:
    """The aggregation result for an int property."""

    count: Optional[int]
    maximum: Optional[int]
    mean: Optional[float]
    median: Optional[float]
    minimum: Optional[int]
    mode: Optional[int]
    sum_: Optional[int]




[docs]
@dataclass
class AggregateNumber:
    """The aggregation result for a number property."""

    count: Optional[int]
    maximum: Optional[float]
    mean: Optional[float]
    median: Optional[float]
    minimum: Optional[float]
    mode: Optional[float]
    sum_: Optional[float]




[docs]
@dataclass
class TopOccurrence:
    """The top occurrence of a text property."""

    count: Optional[int]
    value: Optional[str]




[docs]
@dataclass
class AggregateText:
    """The aggregation result for a text property."""

    count: Optional[int]
    top_occurrences: List[TopOccurrence]




[docs]
@dataclass
class AggregateBoolean:
    """The aggregation result for a boolean property."""

    count: Optional[int]
    percentage_false: Optional[float]
    percentage_true: Optional[float]
    total_false: Optional[int]
    total_true: Optional[int]



# Aggregate references currently bugged on Weaviate's side
# @dataclass
# class AggregateReference:
#     """The aggregation result for a cross-reference property."""

#     pointing_to: Optional[str]



[docs]
@dataclass
class AggregateDate:
    """The aggregation result for a date property."""

    count: Optional[int]
    maximum: Optional[str]
    median: Optional[str]
    minimum: Optional[str]
    mode: Optional[str]



AggregateResult = Union[
    AggregateInteger,
    AggregateNumber,
    AggregateText,
    AggregateBoolean,
    AggregateDate,
    # AggregateReference, # Aggregate references currently bugged on Weaviate's side
]

AProperties = Dict[str, AggregateResult]



[docs]
@dataclass
class AggregateReturn:
    """The aggregation result for a collection."""

    properties: AProperties
    total_count: Optional[int]




[docs]
@dataclass
class GroupedBy:
    """The property that the collection was grouped by."""

    prop: str
    value: str




[docs]
@dataclass
class AggregateGroup:
    """The aggregation result for a collection grouped by a property."""

    grouped_by: GroupedBy
    properties: AProperties
    total_count: Optional[int]




[docs]
@dataclass
class AggregateGroupByReturn:
    """The aggregation results for a collection grouped by a property."""

    groups: List[AggregateGroup]



class _MetricsBase(BaseModel):
    property_name: str
    count: bool


class _MetricsText(_MetricsBase):
    top_occurrences_count: bool
    top_occurrences_value: bool
    min_occurrences: Optional[int]

    def to_gql(self) -> str:
        limit = f"(limit: {self.min_occurrences})" if self.min_occurrences is not None else ""
        body = " ".join(
            [
                "count" if self.count else "",
                (
                    "topOccurrences" + limit + " {"
                    if self.top_occurrences_count or self.top_occurrences_value
                    else ""
                ),
                "occurs" if self.top_occurrences_count else "",
                "value" if self.top_occurrences_value else "",
                "}" if self.top_occurrences_count or self.top_occurrences_value else "",
            ]
        )
        return f"{self.property_name} {{ {body} }}"


class _MetricsNum(_MetricsBase):
    maximum: bool
    mean: bool
    median: bool
    minimum: bool
    mode: bool
    sum_: bool

    def to_gql(self) -> str:
        body = " ".join(
            [
                "count" if self.count else "",
                "maximum" if self.maximum else "",
                "mean" if self.mean else "",
                "median" if self.median else "",
                "minimum" if self.minimum else "",
                "mode" if self.mode else "",
                "sum" if self.sum_ else "",
            ]
        )
        return f"{self.property_name} {{ {body} }}"


class _MetricsInteger(_MetricsNum):
    pass


class _MetricsNumber(_MetricsNum):
    pass


class _MetricsBoolean(_MetricsBase):
    percentage_false: bool
    percentage_true: bool
    total_false: bool
    total_true: bool

    def to_gql(self) -> str:
        body = " ".join(
            [
                "count" if self.count else "",
                "percentageFalse" if self.percentage_false else "",
                "percentageTrue" if self.percentage_true else "",
                "totalFalse" if self.total_false else "",
                "totalTrue" if self.total_true else "",
            ]
        )
        return f"{self.property_name} {{ {body} }}"


class _MetricsDate(_MetricsBase):
    maximum: bool
    median: bool
    minimum: bool
    mode: bool

    def to_gql(self) -> str:
        body = " ".join(
            [
                "count" if self.count else "",
                "maximum" if self.maximum else "",
                "median" if self.median else "",
                "minimum" if self.minimum else "",
                "mode" if self.mode else "",
            ]
        )
        return f"{self.property_name} {{ {body} }}"


# Aggregate references currently bugged on Weaviate's side
# class _MetricsReference(BaseModel):
#     property_name: str
#     pointing_to: bool

#     def to_gql(self) -> str:
#         body = " ".join(
#             [
#                 "pointingTo" if self.pointing_to else "",
#             ]
#         )
#         return f"{self.property_name} {{ {body} }}"


_Metrics = Union[
    _MetricsText,
    _MetricsInteger,
    _MetricsNumber,
    _MetricsDate,
    _MetricsBoolean,
    # _MetricsReference, # Aggregate references currently bugged on Weaviate's side
]

PropertiesMetrics = Union[_Metrics, List[_Metrics]]



[docs]
class GroupByAggregate(_WeaviateInput):
    """Define how the aggregations's group-by operation should be performed."""

    prop: str
    limit: Optional[int] = Field(default=None)




[docs]
class Metrics:
    """Define the metrics to be returned based on a property when aggregating over a collection.

    Use the `__init__` method to define the name to the property to be aggregated on.
    Then use the `text`, `integer`, `number`, `boolean`, `date_`, or `reference` methods to define the metrics to be returned.

    See [the docs](https://weaviate.io/developers/weaviate/search/aggregate) for more details!
    """

    def __init__(self, property_: str) -> None:
        self.__property = property_


[docs]
    def text(
        self,
        count: bool = False,
        top_occurrences_count: bool = False,
        top_occurrences_value: bool = False,
        min_occurrences: Optional[int] = None,
    ) -> _MetricsText:
        """Define the metrics to be returned for a TEXT or TEXT_ARRAY property when aggregating over a collection.

        If none of the arguments are provided then all metrics will be returned.

        Arguments:
            `count`
                Whether to include the number of objects that contain this property.
            `top_occurrences_count`
                Whether to include the number of the top occurrences of a property's value.
            `top_occurrences_value`
                Whether to include the value of the top occurrences of a property's value.
            `min_occurrences`
                Only include entries with more occurrences than the given limit.

        Returns:
            A `_MetricsStr` object that includes the metrics to be returned.
        """
        if not any([count, top_occurrences_count, top_occurrences_value]):
            count = True
            top_occurrences_count = True
            top_occurrences_value = True
        return _MetricsText(
            property_name=self.__property,
            count=count,
            top_occurrences_count=top_occurrences_count,
            top_occurrences_value=top_occurrences_value,
            min_occurrences=min_occurrences,
        )



[docs]
    def integer(
        self,
        count: bool = False,
        maximum: bool = False,
        mean: bool = False,
        median: bool = False,
        minimum: bool = False,
        mode: bool = False,
        sum_: bool = False,
    ) -> _MetricsInteger:
        """Define the metrics to be returned for an INT or INT_ARRAY property when aggregating over a collection.

        If none of the arguments are provided then all metrics will be returned.

        Arguments:
            `count`
                Whether to include the number of objects that contain this property.
            `maximum`
                Whether to include the maximum value of this property.
            `mean`
                Whether to include the mean value of this property.
            `median`
                Whether to include the median value of this property.
            `minimum`
                Whether to include the minimum value of this property.
            `mode`
                Whether to include the mode value of this property.
            `sum_`
                Whether to include the sum of this property.

        Returns:
            A `_MetricsInteger` object that includes the metrics to be returned.
        """
        if not any([count, maximum, mean, median, minimum, mode, sum_]):
            count = True
            maximum = True
            mean = True
            median = True
            minimum = True
            mode = True
            sum_ = True
        return _MetricsInteger(
            property_name=self.__property,
            count=count,
            maximum=maximum,
            mean=mean,
            median=median,
            minimum=minimum,
            mode=mode,
            sum_=sum_,
        )



[docs]
    def number(
        self,
        count: bool = False,
        maximum: bool = False,
        mean: bool = False,
        median: bool = False,
        minimum: bool = False,
        mode: bool = False,
        sum_: bool = False,
    ) -> _MetricsNumber:
        """Define the metrics to be returned for a NUMBER or NUMBER_ARRAY property when aggregating over a collection.

        If none of the arguments are provided then all metrics will be returned.

        Arguments:
            `count`
                Whether to include the number of objects that contain this property.
            `maximum`
                Whether to include the maximum value of this property.
            `mean`
                Whether to include the mean value of this property.
            `median`
                Whether to include the median value of this property.
            `minimum`
                Whether to include the minimum value of this property.
            `mode`
                Whether to include the mode value of this property.
            `sum_`
                Whether to include the sum of this property.

        Returns:
            A `_MetricsNumber` object that includes the metrics to be returned.
        """
        if not any([count, maximum, mean, median, minimum, mode, sum_]):
            count = True
            maximum = True
            mean = True
            median = True
            minimum = True
            mode = True
            sum_ = True
        return _MetricsNumber(
            property_name=self.__property,
            count=count,
            maximum=maximum,
            mean=mean,
            median=median,
            minimum=minimum,
            mode=mode,
            sum_=sum_,
        )



[docs]
    def boolean(
        self,
        count: bool = False,
        percentage_false: bool = False,
        percentage_true: bool = False,
        total_false: bool = False,
        total_true: bool = False,
    ) -> _MetricsBoolean:
        """Define the metrics to be returned for a BOOL or BOOL_ARRAY property when aggregating over a collection.

        If none of the arguments are provided then all metrics will be returned.

        Arguments:
            `count`
                Whether to include the number of objects that contain this property.
            `percentage_false`
                Whether to include the percentage of objects that have a false value for this property.
            `percentage_true`
                Whether to include the percentage of objects that have a true value for this property.
            `total_false`
                Whether to include the total number of objects that have a false value for this property.
            `total_true`
                Whether to include the total number of objects that have a true value for this property.

        Returns:
            A `_MetricsBoolean` object that includes the metrics to be returned.
        """
        if not any([count, percentage_false, percentage_true, total_false, total_true]):
            count = True
            percentage_false = True
            percentage_true = True
            total_false = True
            total_true = True
        return _MetricsBoolean(
            property_name=self.__property,
            count=count,
            percentage_false=percentage_false,
            percentage_true=percentage_true,
            total_false=total_false,
            total_true=total_true,
        )



[docs]
    def date_(
        self,
        count: bool = False,
        maximum: bool = False,
        median: bool = False,
        minimum: bool = False,
        mode: bool = False,
    ) -> _MetricsDate:
        """Define the metrics to be returned for a DATE or DATE_ARRAY property when aggregating over a collection.

        If none of the arguments are provided then all metrics will be returned.

        Arguments:
            `count`
                Whether to include the number of objects that contain this property.
            `maximum`
                Whether to include the maximum value of this property.
            `median`
                Whether to include the median value of this property.
            `minimum`
                Whether to include the minimum value of this property.
            `mode`
                Whether to include the mode value of this property.

        Returns:
            A `_MetricsDate` object that includes the metrics to be returned.
        """
        if not any([count, maximum, median, minimum, mode]):
            count = True
            maximum = True
            median = True
            minimum = True
            mode = True
        return _MetricsDate(
            property_name=self.__property,
            count=count,
            maximum=maximum,
            median=median,
            minimum=minimum,
            mode=mode,
        )



    # Aggregate references currently bugged on Weaviate's side
    # def reference(
    #     self,
    #     pointing_to: bool = False,
    # ) -> _MetricsReference:
    #     """Define the metrics to be returned for a cross-reference property when aggregating over a collection.

    #     If none of the arguments are provided then all metrics will be returned.

    #     Arguments:
    #         `pointing_to`
    #             Whether to include the collection names that this property references.

    #     Returns:
    #         A `_MetricsReference` object that includes the metrics to be returned.
    #     """
    #     if not any([pointing_to]):
    #         pointing_to = True
    #     return _MetricsReference(
    #         property_name=self.__property,
    #         pointing_to=pointing_to,
    #     )