mirror of
https://github.com/langgenius/dify.git
synced 2026-04-05 11:03:17 +08:00
161 lines
6.1 KiB
Python
161 lines
6.1 KiB
Python
"""Judgment condition processor for evaluation metrics.
|
|
|
|
Evaluates pass/fail judgment conditions against evaluation metric values.
|
|
Each condition uses ``variable_selector`` (``[node_id, metric_name]``) to
|
|
look up the metric value, then delegates the actual comparison to the
|
|
workflow condition engine (``graphon.utils.condition.processor``).
|
|
|
|
The processor is intentionally decoupled from evaluation frameworks and
|
|
runners. It operates on plain ``dict`` mappings and can be invoked
|
|
anywhere that already has per-item metric results.
|
|
"""
|
|
|
|
import logging
|
|
from collections.abc import Sequence
|
|
from typing import Any, cast
|
|
|
|
from core.evaluation.entities.judgment_entity import (
|
|
JudgmentCondition,
|
|
JudgmentConditionResult,
|
|
JudgmentConfig,
|
|
JudgmentResult,
|
|
)
|
|
from graphon.utils.condition.entities import SupportedComparisonOperator
|
|
from graphon.utils.condition.processor import _evaluate_condition # pyright: ignore[reportPrivateUsage]
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_UNARY_OPERATORS = frozenset({"null", "not null", "empty", "not empty"})
|
|
|
|
|
|
class JudgmentProcessor:
|
|
@staticmethod
|
|
def evaluate(
|
|
metric_values: dict[tuple[str, str], Any],
|
|
config: JudgmentConfig,
|
|
) -> JudgmentResult:
|
|
"""Evaluate all judgment conditions against the given metric values.
|
|
|
|
Args:
|
|
metric_values: Mapping of ``(node_id, metric_name)`` → metric
|
|
value (e.g. ``{("node_abc", "faithfulness"): 0.85}``).
|
|
config: The judgment configuration with logical_operator and
|
|
conditions.
|
|
|
|
Returns:
|
|
JudgmentResult with overall pass/fail and per-condition details.
|
|
"""
|
|
if not config.conditions:
|
|
return JudgmentResult(
|
|
passed=True,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=[],
|
|
)
|
|
|
|
condition_results: list[JudgmentConditionResult] = []
|
|
|
|
for condition in config.conditions:
|
|
result = JudgmentProcessor._evaluate_single_condition(metric_values, condition)
|
|
condition_results.append(result)
|
|
|
|
if config.logical_operator == "and" and not result.passed:
|
|
return JudgmentResult(
|
|
passed=False,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=condition_results,
|
|
)
|
|
if config.logical_operator == "or" and result.passed:
|
|
return JudgmentResult(
|
|
passed=True,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=condition_results,
|
|
)
|
|
|
|
if config.logical_operator == "and":
|
|
final_passed = all(r.passed for r in condition_results)
|
|
else:
|
|
final_passed = any(r.passed for r in condition_results)
|
|
|
|
return JudgmentResult(
|
|
passed=final_passed,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=condition_results,
|
|
)
|
|
|
|
@staticmethod
|
|
def _evaluate_single_condition(
|
|
metric_values: dict[tuple[str, str], Any],
|
|
condition: JudgmentCondition,
|
|
) -> JudgmentConditionResult:
|
|
"""Evaluate a single judgment condition.
|
|
|
|
Steps:
|
|
1. Extract ``(node_id, metric_name)`` from ``variable_selector``.
|
|
2. Look up the metric value from ``metric_values``.
|
|
3. Delegate comparison to the workflow condition engine.
|
|
"""
|
|
selector = condition.variable_selector
|
|
if len(selector) < 2:
|
|
return JudgmentConditionResult(
|
|
variable_selector=selector,
|
|
comparison_operator=condition.comparison_operator,
|
|
expected_value=condition.value,
|
|
actual_value=None,
|
|
passed=False,
|
|
error=f"variable_selector must have at least 2 elements, got {len(selector)}",
|
|
)
|
|
|
|
node_id, metric_name = selector[0], selector[1]
|
|
actual_value = metric_values.get((node_id, metric_name))
|
|
|
|
if actual_value is None and condition.comparison_operator not in _UNARY_OPERATORS:
|
|
return JudgmentConditionResult(
|
|
variable_selector=selector,
|
|
comparison_operator=condition.comparison_operator,
|
|
expected_value=condition.value,
|
|
actual_value=None,
|
|
passed=False,
|
|
error=f"Metric '{metric_name}' on node '{node_id}' not found in evaluation results",
|
|
)
|
|
|
|
try:
|
|
expected = condition.value
|
|
# Numeric operators need the actual value coerced to int/float
|
|
# so that the workflow engine's numeric assertions work correctly.
|
|
coerced_actual: object = actual_value
|
|
if (
|
|
condition.comparison_operator in {"=", "≠", ">", "<", "≥", "≤"}
|
|
and actual_value is not None
|
|
and not isinstance(actual_value, (int, float, bool))
|
|
):
|
|
coerced_actual = float(actual_value)
|
|
|
|
passed = _evaluate_condition(
|
|
operator=cast(SupportedComparisonOperator, condition.comparison_operator),
|
|
value=coerced_actual,
|
|
expected=cast(str | Sequence[str] | bool | Sequence[bool] | None, expected),
|
|
)
|
|
|
|
return JudgmentConditionResult(
|
|
variable_selector=selector,
|
|
comparison_operator=condition.comparison_operator,
|
|
expected_value=expected,
|
|
actual_value=actual_value,
|
|
passed=passed,
|
|
)
|
|
except Exception as e:
|
|
logger.warning(
|
|
"Judgment condition evaluation failed for [%s, %s]: %s",
|
|
node_id,
|
|
metric_name,
|
|
str(e),
|
|
)
|
|
return JudgmentConditionResult(
|
|
variable_selector=selector,
|
|
comparison_operator=condition.comparison_operator,
|
|
expected_value=condition.value,
|
|
actual_value=actual_value,
|
|
passed=False,
|
|
error=str(e),
|
|
)
|