refactor(api): continue decoupling dify_graph from API concerns (#33580)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: WH-2099 <wh2099@pm.me>
This commit is contained in:
-LAN-
2026-03-25 20:32:24 +08:00
committed by GitHub
parent b7b9b003c9
commit 56593f20b0
487 changed files with 17999 additions and 9186 deletions

View File

@@ -6,11 +6,8 @@ import sqlalchemy as sa
from pydantic import BaseModel, Field
from sqlalchemy.orm import Mapped, mapped_column, relationship
from dify_graph.nodes.human_input.enums import (
DeliveryMethodType,
HumanInputFormKind,
HumanInputFormStatus,
)
from core.workflow.human_input_compat import DeliveryMethodType
from dify_graph.nodes.human_input.enums import HumanInputFormKind, HumanInputFormStatus
from libs.helper import generate_string
from .base import Base, DefaultFieldsMixin

View File

@@ -3,10 +3,11 @@ from __future__ import annotations
import json
import re
import uuid
from collections.abc import Mapping, Sequence
from collections.abc import Callable, Mapping, Sequence
from datetime import datetime
from decimal import Decimal
from enum import StrEnum, auto
from functools import lru_cache
from typing import TYPE_CHECKING, Any, Literal, NotRequired, cast
from uuid import uuid4
@@ -26,6 +27,7 @@ from dify_graph.file import helpers as file_helpers
from extensions.storage.storage_type import StorageType
from libs.helper import generate_string # type: ignore[import-not-found]
from libs.uuid_utils import uuidv7
from models.utils.file_input_compat import build_file_from_input_mapping
from .account import Account, Tenant
from .base import Base, TypeBase, gen_uuidv4_string
@@ -57,6 +59,32 @@ if TYPE_CHECKING:
# --- TypedDict definitions for structured dict return types ---
@lru_cache(maxsize=1)
def _get_file_access_controller():
from core.app.file_access import DatabaseFileAccessController
return DatabaseFileAccessController()
def _resolve_app_tenant_id(app_id: str) -> str:
resolved_tenant_id = db.session.scalar(select(App.tenant_id).where(App.id == app_id))
if not resolved_tenant_id:
raise ValueError(f"Unable to resolve tenant_id for app {app_id}")
return resolved_tenant_id
def _build_app_tenant_resolver(app_id: str, owner_tenant_id: str | None = None) -> Callable[[], str]:
resolved_tenant_id = owner_tenant_id
def resolve_owner_tenant_id() -> str:
nonlocal resolved_tenant_id
if resolved_tenant_id is None:
resolved_tenant_id = _resolve_app_tenant_id(app_id)
return resolved_tenant_id
return resolve_owner_tenant_id
class EnabledConfig(TypedDict):
enabled: bool
@@ -1057,23 +1085,26 @@ class Conversation(Base):
@property
def inputs(self) -> dict[str, Any]:
inputs = self._inputs.copy()
# Compatibility bridge: stored input payloads may come from before or after the
# graph-layer file refactor. Newer rows may omit `tenant_id`, so keep tenant
# resolution at the SQLAlchemy model boundary instead of pushing ownership back
# into `dify_graph.file.File`.
tenant_resolver = _build_app_tenant_resolver(
app_id=self.app_id,
owner_tenant_id=cast(str | None, getattr(self, "_owner_tenant_id", None)),
)
# Convert file mapping to File object
for key, value in inputs.items():
# NOTE: It's not the best way to implement this, but it's the only way to avoid circular import for now.
from factories import file_factory
if (
isinstance(value, dict)
and cast(dict[str, Any], value).get("dify_model_identity") == FILE_MODEL_IDENTITY
):
value_dict = cast(dict[str, Any], value)
if value_dict["transfer_method"] == FileTransferMethod.TOOL_FILE:
value_dict["tool_file_id"] = value_dict["related_id"]
elif value_dict["transfer_method"] in [FileTransferMethod.LOCAL_FILE, FileTransferMethod.REMOTE_URL]:
value_dict["upload_file_id"] = value_dict["related_id"]
tenant_id = cast(str, value_dict.get("tenant_id", ""))
inputs[key] = file_factory.build_from_mapping(mapping=value_dict, tenant_id=tenant_id)
inputs[key] = build_file_from_input_mapping(
file_mapping=value_dict,
tenant_resolver=tenant_resolver,
)
elif isinstance(value, list):
value_list = cast(list[Any], value)
if all(
@@ -1086,15 +1117,12 @@ class Conversation(Base):
if not isinstance(item, dict):
continue
item_dict = cast(dict[str, Any], item)
if item_dict["transfer_method"] == FileTransferMethod.TOOL_FILE:
item_dict["tool_file_id"] = item_dict["related_id"]
elif item_dict["transfer_method"] in [
FileTransferMethod.LOCAL_FILE,
FileTransferMethod.REMOTE_URL,
]:
item_dict["upload_file_id"] = item_dict["related_id"]
tenant_id = cast(str, item_dict.get("tenant_id", ""))
file_list.append(file_factory.build_from_mapping(mapping=item_dict, tenant_id=tenant_id))
file_list.append(
build_file_from_input_mapping(
file_mapping=item_dict,
tenant_resolver=tenant_resolver,
)
)
inputs[key] = file_list
return inputs
@@ -1402,21 +1430,23 @@ class Message(Base):
@property
def inputs(self) -> dict[str, Any]:
inputs = self._inputs.copy()
# Compatibility bridge: message inputs are persisted as JSON and must remain
# readable across file payload shape changes. Do not assume `tenant_id`
# is serialized into each file mapping going forward.
tenant_resolver = _build_app_tenant_resolver(
app_id=self.app_id,
owner_tenant_id=cast(str | None, getattr(self, "_owner_tenant_id", None)),
)
for key, value in inputs.items():
# NOTE: It's not the best way to implement this, but it's the only way to avoid circular import for now.
from factories import file_factory
if (
isinstance(value, dict)
and cast(dict[str, Any], value).get("dify_model_identity") == FILE_MODEL_IDENTITY
):
value_dict = cast(dict[str, Any], value)
if value_dict["transfer_method"] == FileTransferMethod.TOOL_FILE:
value_dict["tool_file_id"] = value_dict["related_id"]
elif value_dict["transfer_method"] in [FileTransferMethod.LOCAL_FILE, FileTransferMethod.REMOTE_URL]:
value_dict["upload_file_id"] = value_dict["related_id"]
tenant_id = cast(str, value_dict.get("tenant_id", ""))
inputs[key] = file_factory.build_from_mapping(mapping=value_dict, tenant_id=tenant_id)
inputs[key] = build_file_from_input_mapping(
file_mapping=value_dict,
tenant_resolver=tenant_resolver,
)
elif isinstance(value, list):
value_list = cast(list[Any], value)
if all(
@@ -1429,15 +1459,12 @@ class Message(Base):
if not isinstance(item, dict):
continue
item_dict = cast(dict[str, Any], item)
if item_dict["transfer_method"] == FileTransferMethod.TOOL_FILE:
item_dict["tool_file_id"] = item_dict["related_id"]
elif item_dict["transfer_method"] in [
FileTransferMethod.LOCAL_FILE,
FileTransferMethod.REMOTE_URL,
]:
item_dict["upload_file_id"] = item_dict["related_id"]
tenant_id = cast(str, item_dict.get("tenant_id", ""))
file_list.append(file_factory.build_from_mapping(mapping=item_dict, tenant_id=tenant_id))
file_list.append(
build_file_from_input_mapping(
file_mapping=item_dict,
tenant_resolver=tenant_resolver,
)
)
inputs[key] = file_list
return inputs
@@ -1612,6 +1639,7 @@ class Message(Base):
"upload_file_id": message_file.upload_file_id,
},
tenant_id=current_app.tenant_id,
access_controller=_get_file_access_controller(),
)
elif message_file.transfer_method == FileTransferMethod.REMOTE_URL:
if message_file.url is None:
@@ -1625,6 +1653,7 @@ class Message(Base):
"url": message_file.url,
},
tenant_id=current_app.tenant_id,
access_controller=_get_file_access_controller(),
)
elif message_file.transfer_method == FileTransferMethod.TOOL_FILE:
if message_file.upload_file_id is None:
@@ -1639,6 +1668,7 @@ class Message(Base):
file = file_factory.build_from_mapping(
mapping=mapping,
tenant_id=current_app.tenant_id,
access_controller=_get_file_access_controller(),
)
else:
raise ValueError(

View File

@@ -0,0 +1,3 @@
from .file_input_compat import build_file_from_input_mapping
__all__ = ["build_file_from_input_mapping"]

View File

@@ -0,0 +1,116 @@
from __future__ import annotations
from collections.abc import Callable, Mapping
from functools import lru_cache
from typing import Any
from core.workflow.file_reference import parse_file_reference
from dify_graph.file import File, FileTransferMethod
@lru_cache(maxsize=1)
def _get_file_access_controller():
from core.app.file_access import DatabaseFileAccessController
return DatabaseFileAccessController()
def resolve_file_record_id(file_mapping: Mapping[str, Any]) -> str | None:
reference = file_mapping.get("reference")
if isinstance(reference, str) and reference:
parsed_reference = parse_file_reference(reference)
if parsed_reference is not None:
return parsed_reference.record_id
related_id = file_mapping.get("related_id")
if isinstance(related_id, str) and related_id:
parsed_reference = parse_file_reference(related_id)
if parsed_reference is not None:
return parsed_reference.record_id
return None
def resolve_file_mapping_tenant_id(
*,
file_mapping: Mapping[str, Any],
tenant_resolver: Callable[[], str],
) -> str:
tenant_id = file_mapping.get("tenant_id")
if isinstance(tenant_id, str) and tenant_id:
return tenant_id
return tenant_resolver()
def build_file_from_stored_mapping(
*,
file_mapping: Mapping[str, Any],
tenant_id: str,
) -> File:
"""
Canonicalize a persisted file payload against the current tenant context.
Stored JSON rows can outlive file schema changes, so rebuild storage-backed
files through the workflow factory instead of trusting serialized metadata.
Pure external ``REMOTE_URL`` payloads without a backing upload row are
passed through because there is no server-owned record to rebind.
"""
# NOTE: It's not the best way to implement this, but it's the only way to avoid circular import for now.
from factories import file_factory
mapping = dict(file_mapping)
mapping.pop("tenant_id", None)
record_id = resolve_file_record_id(mapping)
transfer_method = FileTransferMethod.value_of(mapping["transfer_method"])
if transfer_method == FileTransferMethod.TOOL_FILE and record_id:
mapping["tool_file_id"] = record_id
elif transfer_method in [FileTransferMethod.LOCAL_FILE, FileTransferMethod.REMOTE_URL] and record_id:
mapping["upload_file_id"] = record_id
elif transfer_method == FileTransferMethod.DATASOURCE_FILE and record_id:
mapping["datasource_file_id"] = record_id
if transfer_method == FileTransferMethod.REMOTE_URL and record_id is None:
remote_url = mapping.get("remote_url")
if not isinstance(remote_url, str) or not remote_url:
url = mapping.get("url")
if isinstance(url, str) and url:
mapping["remote_url"] = url
return File.model_validate(mapping)
return file_factory.build_from_mapping(
mapping=mapping,
tenant_id=tenant_id,
access_controller=_get_file_access_controller(),
)
def build_file_from_input_mapping(
*,
file_mapping: Mapping[str, Any],
tenant_resolver: Callable[[], str],
) -> File:
"""
Rehydrate persisted model input payloads into graph `File` objects.
This compatibility layer exists because model JSON rows can outlive file payload
schema changes. Legacy rows may carry `related_id` and `tenant_id`, while newer
rows may only carry `reference`. Keep ownership resolution here, at the model
boundary, instead of pushing tenant data back into `dify_graph.file.File`.
"""
transfer_method = FileTransferMethod.value_of(file_mapping["transfer_method"])
record_id = resolve_file_record_id(file_mapping)
if transfer_method == FileTransferMethod.REMOTE_URL and record_id is None:
return build_file_from_stored_mapping(
file_mapping=file_mapping,
tenant_id="",
)
tenant_id = resolve_file_mapping_tenant_id(file_mapping=file_mapping, tenant_resolver=tenant_resolver)
return build_file_from_stored_mapping(
file_mapping=file_mapping,
tenant_id=tenant_id,
)

View File

@@ -24,7 +24,8 @@ from sqlalchemy.orm import Mapped, mapped_column
from typing_extensions import deprecated
from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE
from dify_graph.constants import (
from core.workflow.human_input_compat import normalize_node_config_for_graph
from core.workflow.variable_prefixes import (
CONVERSATION_VARIABLE_NODE_ID,
SYSTEM_VARIABLE_NODE_ID,
)
@@ -57,6 +58,7 @@ from .base import Base, DefaultFieldsMixin, TypeBase
from .engine import db
from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType, WorkflowRunTriggeredFrom
from .types import EnumText, LongText, StringUUID
from .utils.file_input_compat import build_file_from_stored_mapping
logger = logging.getLogger(__name__)
@@ -64,6 +66,15 @@ SerializedWorkflowValue = dict[str, Any]
SerializedWorkflowVariables = dict[str, SerializedWorkflowValue]
def _resolve_workflow_app_tenant_id(app_id: str) -> str:
from .model import App
tenant_id = db.session.scalar(select(App.tenant_id).where(App.id == app_id))
if not tenant_id:
raise ValueError(f"Unable to resolve tenant_id for app {app_id}")
return tenant_id
class WorkflowContentDict(TypedDict):
graph: Mapping[str, Any]
features: dict[str, Any]
@@ -273,7 +284,7 @@ class Workflow(Base): # bug
node_config: dict[str, Any] = next(filter(lambda node: node["id"] == node_id, nodes))
except StopIteration:
raise NodeNotFoundError(node_id)
return NodeConfigDictAdapter.validate_python(node_config)
return NodeConfigDictAdapter.validate_python(normalize_node_config_for_graph(node_config))
@staticmethod
def get_node_type_from_node_config(node_config: NodeConfigDict) -> NodeType:
@@ -1565,10 +1576,9 @@ class WorkflowDraftVariable(Base):
def _loads_value(self) -> Segment:
value = json.loads(self.value)
return self.build_segment_with_type(self.value_type, value)
return self.build_segment_from_serialized_value(self.value_type, value)
@staticmethod
def rebuild_file_types(value: Any):
def _rebuild_file_types(self, value: Any):
# NOTE(QuantumGhost): Temporary workaround for structured data handling.
# By this point, `output` has been converted to dict by
# `WorkflowEntry.handle_special_values`, so we need to
@@ -1582,13 +1592,72 @@ class WorkflowDraftVariable(Base):
if isinstance(value, dict):
if not maybe_file_object(value):
return cast(Any, value)
return File.model_validate(value)
tenant_id = _resolve_workflow_app_tenant_id(self.app_id)
return build_file_from_stored_mapping(
file_mapping=cast(dict[str, Any], value),
tenant_id=tenant_id,
)
elif isinstance(value, list) and value:
value_list = cast(list[Any], value)
first: Any = value_list[0]
if not maybe_file_object(first):
return cast(Any, value)
file_list: list[File] = [File.model_validate(cast(dict[str, Any], i)) for i in value_list]
tenant_id = _resolve_workflow_app_tenant_id(self.app_id)
file_list: list[File] = []
for item in value_list:
file_list.append(
build_file_from_stored_mapping(
file_mapping=cast(dict[str, Any], item),
tenant_id=tenant_id,
)
)
return cast(Any, file_list)
else:
return cast(Any, value)
def build_segment_from_serialized_value(self, segment_type: SegmentType, value: Any) -> Segment:
# Persisted draft variable rows may contain historical file payloads.
# Rebuild them through the file factory so tenant ownership, signed URLs,
# and storage-backed metadata come from canonical records instead of the
# serialized JSON blob.
if segment_type == SegmentType.FILE:
if isinstance(value, File):
return build_segment_with_type(segment_type, value)
elif isinstance(value, dict):
file = self._rebuild_file_types(value)
return build_segment_with_type(segment_type, file)
else:
raise TypeMismatchError(f"expected dict or File for FileSegment, got {type(value)}")
if segment_type == SegmentType.ARRAY_FILE:
if not isinstance(value, list):
raise TypeMismatchError(f"expected list for ArrayFileSegment, got {type(value)}")
file_list = self._rebuild_file_types(value)
return build_segment_with_type(segment_type=segment_type, value=file_list)
return build_segment_with_type(segment_type=segment_type, value=value)
@staticmethod
def rebuild_file_types(value: Any):
# Keep the class-level fallback for callers that only need lightweight
# structural reconstruction. Persisted draft-variable payloads should go
# through `build_segment_from_serialized_value()` so file metadata is
# rebuilt from canonical storage records.
if isinstance(value, dict):
if not maybe_file_object(value):
return cast(Any, value)
normalized_file = dict(value)
normalized_file.pop("tenant_id", None)
return File.model_validate(normalized_file)
elif isinstance(value, list) and value:
value_list = cast(list[Any], value)
first: Any = value_list[0]
if not maybe_file_object(first):
return cast(Any, value)
file_list: list[File] = []
for item in value_list:
normalized_file = dict(cast(dict[str, Any], item))
normalized_file.pop("tenant_id", None)
file_list.append(File.model_validate(normalized_file))
return cast(Any, file_list)
else:
return cast(Any, value)