dify/api/core/evaluation/judgment/processor.py

"""Judgment condition processor for evaluation metrics.

Evaluates pass/fail judgment conditions against evaluation metric values.
Each condition uses ``variable_selector`` (``[node_id, metric_name]``) to
look up the metric value, then delegates the actual comparison to the
workflow condition engine (``graphon.utils.condition.processor``).

The processor is intentionally decoupled from evaluation frameworks and
runners.  It operates on plain ``dict`` mappings and can be invoked
anywhere that already has per-item metric results.
"""

import logging
from collections.abc import Sequence
from typing import Any, cast

from core.evaluation.entities.judgment_entity import (
    JudgmentCondition,
    JudgmentConditionResult,
    JudgmentConfig,
    JudgmentResult,
)
from graphon.utils.condition.entities import SupportedComparisonOperator
from graphon.utils.condition.processor import _evaluate_condition  # pyright: ignore[reportPrivateUsage]

logger = logging.getLogger(__name__)

_UNARY_OPERATORS = frozenset({"null", "not null", "empty", "not empty"})


class JudgmentProcessor:
    @staticmethod
    def evaluate(
        metric_values: dict[tuple[str, str], Any],
        config: JudgmentConfig,
    ) -> JudgmentResult:
        """Evaluate all judgment conditions against the given metric values.

        Args:
            metric_values: Mapping of ``(node_id, metric_name)`` → metric
                value (e.g. ``{("node_abc", "faithfulness"): 0.85}``).
            config: The judgment configuration with logical_operator and
                conditions.

        Returns:
            JudgmentResult with overall pass/fail and per-condition details.
        """
        if not config.conditions:
            return JudgmentResult(
                passed=True,
                logical_operator=config.logical_operator,
                condition_results=[],
            )

        condition_results: list[JudgmentConditionResult] = []

        for condition in config.conditions:
            result = JudgmentProcessor._evaluate_single_condition(metric_values, condition)
            condition_results.append(result)

            if config.logical_operator == "and" and not result.passed:
                return JudgmentResult(
                    passed=False,
                    logical_operator=config.logical_operator,
                    condition_results=condition_results,
                )
            if config.logical_operator == "or" and result.passed:
                return JudgmentResult(
                    passed=True,
                    logical_operator=config.logical_operator,
                    condition_results=condition_results,
                )

        if config.logical_operator == "and":
            final_passed = all(r.passed for r in condition_results)
        else:
            final_passed = any(r.passed for r in condition_results)

        return JudgmentResult(
            passed=final_passed,
            logical_operator=config.logical_operator,
            condition_results=condition_results,
        )

    @staticmethod
    def _evaluate_single_condition(
        metric_values: dict[tuple[str, str], Any],
        condition: JudgmentCondition,
    ) -> JudgmentConditionResult:
        """Evaluate a single judgment condition.

        Steps:
          1. Extract ``(node_id, metric_name)`` from ``variable_selector``.
          2. Look up the metric value from ``metric_values``.
          3. Delegate comparison to the workflow condition engine.
        """
        selector = condition.variable_selector
        if len(selector) < 2:
            return JudgmentConditionResult(
                variable_selector=selector,
                comparison_operator=condition.comparison_operator,
                expected_value=condition.value,
                actual_value=None,
                passed=False,
                error=f"variable_selector must have at least 2 elements, got {len(selector)}",
            )

        node_id, metric_name = selector[0], selector[1]
        actual_value = metric_values.get((node_id, metric_name))

        if actual_value is None and condition.comparison_operator not in _UNARY_OPERATORS:
            return JudgmentConditionResult(
                variable_selector=selector,
                comparison_operator=condition.comparison_operator,
                expected_value=condition.value,
                actual_value=None,
                passed=False,
                error=f"Metric '{metric_name}' on node '{node_id}' not found in evaluation results",
            )

        try:
            expected = condition.value
            # Numeric operators need the actual value coerced to int/float
            # so that the workflow engine's numeric assertions work correctly.
            coerced_actual: object = actual_value
            if (
                condition.comparison_operator in {"=", "≠", ">", "<", "≥", "≤"}
                and actual_value is not None
                and not isinstance(actual_value, (int, float, bool))
            ):
                coerced_actual = float(actual_value)

            passed = _evaluate_condition(
                operator=cast(SupportedComparisonOperator, condition.comparison_operator),
                value=coerced_actual,
                expected=cast(str | Sequence[str] | bool | Sequence[bool] | None, expected),
            )

            return JudgmentConditionResult(
                variable_selector=selector,
                comparison_operator=condition.comparison_operator,
                expected_value=expected,
                actual_value=actual_value,
                passed=passed,
            )
        except Exception as e:
            logger.warning(
                "Judgment condition evaluation failed for [%s, %s]: %s",
                node_id,
                metric_name,
                str(e),
            )
            return JudgmentConditionResult(
                variable_selector=selector,
                comparison_operator=condition.comparison_operator,
                expected_value=condition.value,
                actual_value=actual_value,
                passed=False,
                error=str(e),
            )