feat(api/repo): Allow to config repository implementation (#21458)

Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
2026-04-05 10:25:48 +08:00 · 2025-07-14 14:54:38 +08:00
parent b27c540379
commit 6eb155ae69
38 changed files with 2361 additions and 329 deletions
--- a/api/repositories/init.py
+++ b/api/repositories/init.py
--- a/api/repositories/api_workflow_node_execution_repository.py
+++ b/api/repositories/api_workflow_node_execution_repository.py
@@ -0,0 +1,197 @@
+"""
+Service-layer repository protocol for WorkflowNodeExecutionModel operations.
+
+This module provides a protocol interface for service-layer operations on WorkflowNodeExecutionModel
+that abstracts database queries currently done directly in service classes. This repository is
+specifically designed for service-layer needs and is separate from the core domain repository.
+
+The service repository handles operations that require access to database-specific fields like
+tenant_id, app_id, triggered_from, etc., which are not part of the core domain model.
+"""
+
+from collections.abc import Sequence
+from datetime import datetime
+from typing import Optional, Protocol
+
+from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from models.workflow import WorkflowNodeExecutionModel
+
+
+class DifyAPIWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository, Protocol):
+    """
+    Protocol for service-layer operations on WorkflowNodeExecutionModel.
+
+    This repository provides database access patterns specifically needed by service classes,
+    handling queries that involve database-specific fields and multi-tenancy concerns.
+
+    Key responsibilities:
+    - Manages database operations for workflow node executions
+    - Handles multi-tenant data isolation
+    - Provides batch processing capabilities
+    - Supports execution lifecycle management
+
+    Implementation notes:
+    - Returns database models directly (WorkflowNodeExecutionModel)
+    - Handles tenant/app filtering automatically
+    - Provides service-specific query patterns
+    - Focuses on database operations without domain logic
+    - Supports cleanup and maintenance operations
+    """
+
+    def get_node_last_execution(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_id: str,
+        node_id: str,
+    ) -> Optional[WorkflowNodeExecutionModel]:
+        """
+        Get the most recent execution for a specific node.
+
+        This method finds the latest execution of a specific node within a workflow,
+        ordered by creation time. Used primarily for debugging and inspection purposes.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_id: The workflow identifier
+            node_id: The node identifier
+
+        Returns:
+            The most recent WorkflowNodeExecutionModel for the node, or None if not found
+        """
+        ...
+
+    def get_executions_by_workflow_run(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_run_id: str,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get all node executions for a specific workflow run.
+
+        This method retrieves all node executions that belong to a specific workflow run,
+        ordered by index in descending order for proper trace visualization.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_run_id: The workflow run identifier
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
+        """
+        ...
+
+    def get_execution_by_id(
+        self,
+        execution_id: str,
+        tenant_id: Optional[str] = None,
+    ) -> Optional[WorkflowNodeExecutionModel]:
+        """
+        Get a workflow node execution by its ID.
+
+        This method retrieves a specific execution by its unique identifier.
+        Tenant filtering is optional for cases where the execution ID is globally unique.
+
+        When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
+        If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
+        set `tenant_id` to prevent horizontal privilege escalation.
+
+        Args:
+            execution_id: The execution identifier
+            tenant_id: Optional tenant identifier for additional filtering
+
+        Returns:
+            The WorkflowNodeExecutionModel if found, or None if not found
+        """
+        ...
+
+    def delete_expired_executions(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete workflow node executions that are older than the specified date.
+
+        This method is used for cleanup operations to remove expired executions
+        in batches to avoid overwhelming the database.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Delete executions created before this date
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The number of executions deleted
+        """
+        ...
+
+    def delete_executions_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow node executions for a specific app.
+
+        This method is used when removing an app and all its related data.
+        Executions are deleted in batches to avoid overwhelming the database.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The total number of executions deleted
+        """
+        ...
+
+    def get_expired_executions_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get a batch of expired workflow node executions for backup purposes.
+
+        This method retrieves expired executions without deleting them,
+        allowing the caller to backup the data before deletion.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Get executions created before this date
+            batch_size: Maximum number of executions to retrieve
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances
+        """
+        ...
+
+    def delete_executions_by_ids(
+        self,
+        execution_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow node executions by their IDs.
+
+        This method deletes specific executions by their IDs,
+        typically used after backing up the data.
+
+        This method does not perform tenant isolation checks. The caller is responsible for ensuring proper
+        data isolation between tenants. When execution IDs come from untrusted sources (e.g., API requests),
+        additional tenant validation should be implemented to prevent unauthorized access.
+
+        Args:
+            execution_ids: List of execution IDs to delete
+
+        Returns:
+            The number of executions deleted
+        """
+        ...
--- a/api/repositories/api_workflow_run_repository.py
+++ b/api/repositories/api_workflow_run_repository.py
@@ -0,0 +1,181 @@
+"""
+API WorkflowRun Repository Protocol
+
+This module defines the protocol for service-layer WorkflowRun operations.
+The repository provides an abstraction layer for WorkflowRun database operations
+used by service classes, separating service-layer concerns from core domain logic.
+
+Key Features:
+- Paginated workflow run queries with filtering
+- Bulk deletion operations with OSS backup support
+- Multi-tenant data isolation
+- Expired record cleanup with data retention
+- Service-layer specific query patterns
+
+Usage:
+    This protocol should be used by service classes that need to perform
+    WorkflowRun database operations. It provides a clean interface that
+    hides implementation details and supports dependency injection.
+
+Example:
+    ```python
+    from repositories.dify_api_repository_factory import DifyAPIRepositoryFactory
+
+    session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
+    repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
+
+    # Get paginated workflow runs
+    runs = repo.get_paginated_workflow_runs(
+        tenant_id="tenant-123",
+        app_id="app-456",
+        triggered_from="debugging",
+        limit=20
+    )
+    ```
+"""
+
+from collections.abc import Sequence
+from datetime import datetime
+from typing import Optional, Protocol
+
+from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
+from libs.infinite_scroll_pagination import InfiniteScrollPagination
+from models.workflow import WorkflowRun
+
+
+class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
+    """
+    Protocol for service-layer WorkflowRun repository operations.
+
+    This protocol defines the interface for WorkflowRun database operations
+    that are specific to service-layer needs, including pagination, filtering,
+    and bulk operations with data backup support.
+    """
+
+    def get_paginated_workflow_runs(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        limit: int = 20,
+        last_id: Optional[str] = None,
+    ) -> InfiniteScrollPagination:
+        """
+        Get paginated workflow runs with filtering.
+
+        Retrieves workflow runs for a specific app and trigger source with
+        cursor-based pagination support. Used primarily for debugging and
+        workflow run listing in the UI.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            triggered_from: Filter by trigger source (e.g., "debugging", "app-run")
+            limit: Maximum number of records to return (default: 20)
+            last_id: Cursor for pagination - ID of the last record from previous page
+
+        Returns:
+            InfiniteScrollPagination object containing:
+            - data: List of WorkflowRun objects
+            - limit: Applied limit
+            - has_more: Boolean indicating if more records exist
+
+        Raises:
+            ValueError: If last_id is provided but the corresponding record doesn't exist
+        """
+        ...
+
+    def get_workflow_run_by_id(
+        self,
+        tenant_id: str,
+        app_id: str,
+        run_id: str,
+    ) -> Optional[WorkflowRun]:
+        """
+        Get a specific workflow run by ID.
+
+        Retrieves a single workflow run with tenant and app isolation.
+        Used for workflow run detail views and execution tracking.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            run_id: Workflow run identifier
+
+        Returns:
+            WorkflowRun object if found, None otherwise
+        """
+        ...
+
+    def get_expired_runs_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowRun]:
+        """
+        Get a batch of expired workflow runs for cleanup.
+
+        Retrieves workflow runs created before the specified date for
+        cleanup operations. Used by scheduled tasks to remove old data
+        while maintaining data retention policies.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            before_date: Only return runs created before this date
+            batch_size: Maximum number of records to return
+
+        Returns:
+            Sequence of WorkflowRun objects to be processed for cleanup
+        """
+        ...
+
+    def delete_runs_by_ids(
+        self,
+        run_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow runs by their IDs.
+
+        Performs bulk deletion of workflow runs by ID. This method should
+        be used after backing up the data to OSS storage for retention.
+
+        Args:
+            run_ids: Sequence of workflow run IDs to delete
+
+        Returns:
+            Number of records actually deleted
+
+        Note:
+            This method performs hard deletion. Ensure data is backed up
+            to OSS storage before calling this method for compliance with
+            data retention policies.
+        """
+        ...
+
+    def delete_runs_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow runs for a specific app.
+
+        Performs bulk deletion of all workflow runs associated with an app.
+        Used during app cleanup operations. Processes records in batches
+        to avoid memory issues and long-running transactions.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            batch_size: Number of records to process in each batch
+
+        Returns:
+            Total number of records deleted across all batches
+
+        Note:
+            This method performs hard deletion without backup. Use with caution
+            and ensure proper data retention policies are followed.
+        """
+        ...
--- a/api/repositories/factory.py
+++ b/api/repositories/factory.py
@@ -0,0 +1,103 @@
+"""
+DifyAPI Repository Factory for creating repository instances.
+
+This factory is specifically designed for DifyAPI repositories that handle
+service-layer operations with dependency injection patterns.
+"""
+
+import logging
+
+from sqlalchemy.orm import sessionmaker
+
+from configs import dify_config
+from core.repositories import DifyCoreRepositoryFactory, RepositoryImportError
+from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
+from repositories.api_workflow_run_repository import APIWorkflowRunRepository
+
+logger = logging.getLogger(__name__)
+
+
+class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory):
+    """
+    Factory for creating DifyAPI repository instances based on configuration.
+
+    This factory handles the creation of repositories that are specifically designed
+    for service-layer operations and use dependency injection with sessionmaker
+    for better testability and separation of concerns.
+    """
+
+    @classmethod
+    def create_api_workflow_node_execution_repository(
+        cls, session_maker: sessionmaker
+    ) -> DifyAPIWorkflowNodeExecutionRepository:
+        """
+        Create a DifyAPIWorkflowNodeExecutionRepository instance based on configuration.
+
+        This repository is designed for service-layer operations and uses dependency injection
+        with a sessionmaker for better testability and separation of concerns. It provides
+        database access patterns specifically needed by service classes, handling queries
+        that involve database-specific fields and multi-tenancy concerns.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker to inject for database session management.
+
+        Returns:
+            Configured DifyAPIWorkflowNodeExecutionRepository instance
+
+        Raises:
+            RepositoryImportError: If the configured repository cannot be imported or instantiated
+        """
+        class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY
+        logger.debug(f"Creating DifyAPIWorkflowNodeExecutionRepository from: {class_path}")
+
+        try:
+            repository_class = cls._import_class(class_path)
+            cls._validate_repository_interface(repository_class, DifyAPIWorkflowNodeExecutionRepository)
+            # Service repository requires session_maker parameter
+            cls._validate_constructor_signature(repository_class, ["session_maker"])
+
+            return repository_class(session_maker=session_maker)  # type: ignore[no-any-return]
+        except RepositoryImportError:
+            # Re-raise our custom errors as-is
+            raise
+        except Exception as e:
+            logger.exception("Failed to create DifyAPIWorkflowNodeExecutionRepository")
+            raise RepositoryImportError(
+                f"Failed to create DifyAPIWorkflowNodeExecutionRepository from '{class_path}': {e}"
+            ) from e
+
+    @classmethod
+    def create_api_workflow_run_repository(cls, session_maker: sessionmaker) -> APIWorkflowRunRepository:
+        """
+        Create an APIWorkflowRunRepository instance based on configuration.
+
+        This repository is designed for service-layer WorkflowRun operations and uses dependency
+        injection with a sessionmaker for better testability and separation of concerns. It provides
+        database access patterns specifically needed by service classes for workflow run management,
+        including pagination, filtering, and bulk operations.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker to inject for database session management.
+
+        Returns:
+            Configured APIWorkflowRunRepository instance
+
+        Raises:
+            RepositoryImportError: If the configured repository cannot be imported or instantiated
+        """
+        class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY
+        logger.debug(f"Creating APIWorkflowRunRepository from: {class_path}")
+
+        try:
+            repository_class = cls._import_class(class_path)
+            cls._validate_repository_interface(repository_class, APIWorkflowRunRepository)
+            # Service repository requires session_maker parameter
+            cls._validate_constructor_signature(repository_class, ["session_maker"])
+
+            return repository_class(session_maker=session_maker)  # type: ignore[no-any-return]
+        except RepositoryImportError:
+            # Re-raise our custom errors as-is
+            raise
+        except Exception as e:
+            logger.exception("Failed to create APIWorkflowRunRepository")
+            raise RepositoryImportError(f"Failed to create APIWorkflowRunRepository from '{class_path}': {e}") from e
--- a/api/repositories/sqlalchemy_api_workflow_node_execution_repository.py
+++ b/api/repositories/sqlalchemy_api_workflow_node_execution_repository.py
@@ -0,0 +1,290 @@
+"""
+SQLAlchemy implementation of WorkflowNodeExecutionServiceRepository.
+
+This module provides a concrete implementation of the service repository protocol
+using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.
+"""
+
+from collections.abc import Sequence
+from datetime import datetime
+from typing import Optional
+
+from sqlalchemy import delete, desc, select
+from sqlalchemy.orm import Session, sessionmaker
+
+from models.workflow import WorkflowNodeExecutionModel
+from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
+
+
+class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecutionRepository):
+    """
+    SQLAlchemy implementation of DifyAPIWorkflowNodeExecutionRepository.
+
+    This repository provides service-layer database operations for WorkflowNodeExecutionModel
+    using SQLAlchemy 2.0 style queries. It implements the DifyAPIWorkflowNodeExecutionRepository
+    protocol with the following features:
+
+    - Multi-tenancy data isolation through tenant_id filtering
+    - Direct database model operations without domain conversion
+    - Batch processing for efficient large-scale operations
+    - Optimized query patterns for common access patterns
+    - Dependency injection for better testability and maintainability
+    - Session management and transaction handling with proper cleanup
+    - Maintenance operations for data lifecycle management
+    - Thread-safe database operations using session-per-request pattern
+    """
+
+    def __init__(self, session_maker: sessionmaker[Session]):
+        """
+        Initialize the repository with a sessionmaker.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker for creating database sessions
+        """
+        self._session_maker = session_maker
+
+    def get_node_last_execution(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_id: str,
+        node_id: str,
+    ) -> Optional[WorkflowNodeExecutionModel]:
+        """
+        Get the most recent execution for a specific node.
+
+        This method replicates the query pattern from WorkflowService.get_node_last_run()
+        using SQLAlchemy 2.0 style syntax.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_id: The workflow identifier
+            node_id: The node identifier
+
+        Returns:
+            The most recent WorkflowNodeExecutionModel for the node, or None if not found
+        """
+        stmt = (
+            select(WorkflowNodeExecutionModel)
+            .where(
+                WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                WorkflowNodeExecutionModel.app_id == app_id,
+                WorkflowNodeExecutionModel.workflow_id == workflow_id,
+                WorkflowNodeExecutionModel.node_id == node_id,
+            )
+            .order_by(desc(WorkflowNodeExecutionModel.created_at))
+            .limit(1)
+        )
+
+        with self._session_maker() as session:
+            return session.scalar(stmt)
+
+    def get_executions_by_workflow_run(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_run_id: str,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get all node executions for a specific workflow run.
+
+        This method replicates the query pattern from WorkflowRunService.get_workflow_run_node_executions()
+        using SQLAlchemy 2.0 style syntax.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_run_id: The workflow run identifier
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
+        """
+        stmt = (
+            select(WorkflowNodeExecutionModel)
+            .where(
+                WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                WorkflowNodeExecutionModel.app_id == app_id,
+                WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id,
+            )
+            .order_by(desc(WorkflowNodeExecutionModel.index))
+        )
+
+        with self._session_maker() as session:
+            return session.execute(stmt).scalars().all()
+
+    def get_execution_by_id(
+        self,
+        execution_id: str,
+        tenant_id: Optional[str] = None,
+    ) -> Optional[WorkflowNodeExecutionModel]:
+        """
+        Get a workflow node execution by its ID.
+
+        This method replicates the query pattern from WorkflowDraftVariableService
+        and WorkflowService.single_step_run_workflow_node() using SQLAlchemy 2.0 style syntax.
+
+        When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
+        If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
+        set `tenant_id` to prevent horizontal privilege escalation.
+
+        Args:
+            execution_id: The execution identifier
+            tenant_id: Optional tenant identifier for additional filtering
+
+        Returns:
+            The WorkflowNodeExecutionModel if found, or None if not found
+        """
+        stmt = select(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id == execution_id)
+
+        # Add tenant filtering if provided
+        if tenant_id is not None:
+            stmt = stmt.where(WorkflowNodeExecutionModel.tenant_id == tenant_id)
+
+        with self._session_maker() as session:
+            return session.scalar(stmt)
+
+    def delete_expired_executions(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete workflow node executions that are older than the specified date.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Delete executions created before this date
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The number of executions deleted
+        """
+        total_deleted = 0
+
+        while True:
+            with self._session_maker() as session:
+                # Find executions to delete in batches
+                stmt = (
+                    select(WorkflowNodeExecutionModel.id)
+                    .where(
+                        WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                        WorkflowNodeExecutionModel.created_at < before_date,
+                    )
+                    .limit(batch_size)
+                )
+
+                execution_ids = session.execute(stmt).scalars().all()
+                if not execution_ids:
+                    break
+
+                # Delete the batch
+                delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
+                result = session.execute(delete_stmt)
+                session.commit()
+                total_deleted += result.rowcount
+
+                # If we deleted fewer than the batch size, we're done
+                if len(execution_ids) < batch_size:
+                    break
+
+        return total_deleted
+
+    def delete_executions_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow node executions for a specific app.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The total number of executions deleted
+        """
+        total_deleted = 0
+
+        while True:
+            with self._session_maker() as session:
+                # Find executions to delete in batches
+                stmt = (
+                    select(WorkflowNodeExecutionModel.id)
+                    .where(
+                        WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                        WorkflowNodeExecutionModel.app_id == app_id,
+                    )
+                    .limit(batch_size)
+                )
+
+                execution_ids = session.execute(stmt).scalars().all()
+                if not execution_ids:
+                    break
+
+                # Delete the batch
+                delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
+                result = session.execute(delete_stmt)
+                session.commit()
+                total_deleted += result.rowcount
+
+                # If we deleted fewer than the batch size, we're done
+                if len(execution_ids) < batch_size:
+                    break
+
+        return total_deleted
+
+    def get_expired_executions_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get a batch of expired workflow node executions for backup purposes.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Get executions created before this date
+            batch_size: Maximum number of executions to retrieve
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances
+        """
+        stmt = (
+            select(WorkflowNodeExecutionModel)
+            .where(
+                WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                WorkflowNodeExecutionModel.created_at < before_date,
+            )
+            .limit(batch_size)
+        )
+
+        with self._session_maker() as session:
+            return session.execute(stmt).scalars().all()
+
+    def delete_executions_by_ids(
+        self,
+        execution_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow node executions by their IDs.
+
+        Args:
+            execution_ids: List of execution IDs to delete
+
+        Returns:
+            The number of executions deleted
+        """
+        if not execution_ids:
+            return 0
+
+        with self._session_maker() as session:
+            stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
+            result = session.execute(stmt)
+            session.commit()
+            return result.rowcount
--- a/api/repositories/sqlalchemy_api_workflow_run_repository.py
+++ b/api/repositories/sqlalchemy_api_workflow_run_repository.py
@@ -0,0 +1,202 @@
+"""
+SQLAlchemy API WorkflowRun Repository Implementation
+
+This module provides the SQLAlchemy-based implementation of the APIWorkflowRunRepository
+protocol. It handles service-layer WorkflowRun database operations using SQLAlchemy 2.0
+style queries with proper session management and multi-tenant data isolation.
+
+Key Features:
+- SQLAlchemy 2.0 style queries for modern database operations
+- Cursor-based pagination for efficient large dataset handling
+- Bulk operations with batch processing for performance
+- Multi-tenant data isolation and security
+- Proper session management with dependency injection
+
+Implementation Notes:
+- Uses sessionmaker for consistent session management
+- Implements cursor-based pagination using created_at timestamps
+- Provides efficient bulk deletion with batch processing
+- Maintains data consistency with proper transaction handling
+"""
+
+import logging
+from collections.abc import Sequence
+from datetime import datetime
+from typing import Optional, cast
+
+from sqlalchemy import delete, select
+from sqlalchemy.orm import Session, sessionmaker
+
+from libs.infinite_scroll_pagination import InfiniteScrollPagination
+from models.workflow import WorkflowRun
+
+logger = logging.getLogger(__name__)
+
+
+class DifyAPISQLAlchemyWorkflowRunRepository:
+    """
+    SQLAlchemy implementation of APIWorkflowRunRepository.
+
+    Provides service-layer WorkflowRun database operations using SQLAlchemy 2.0
+    style queries. Supports dependency injection through sessionmaker and
+    maintains proper multi-tenant data isolation.
+
+    Args:
+        session_maker: SQLAlchemy sessionmaker instance for database connections
+    """
+
+    def __init__(self, session_maker: sessionmaker[Session]) -> None:
+        """
+        Initialize the repository with a sessionmaker.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker for database connections
+        """
+        self._session_maker = session_maker
+
+    def get_paginated_workflow_runs(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        limit: int = 20,
+        last_id: Optional[str] = None,
+    ) -> InfiniteScrollPagination:
+        """
+        Get paginated workflow runs with filtering.
+
+        Implements cursor-based pagination using created_at timestamps for
+        efficient handling of large datasets. Filters by tenant, app, and
+        trigger source for proper data isolation.
+        """
+        with self._session_maker() as session:
+            # Build base query with filters
+            base_stmt = select(WorkflowRun).where(
+                WorkflowRun.tenant_id == tenant_id,
+                WorkflowRun.app_id == app_id,
+                WorkflowRun.triggered_from == triggered_from,
+            )
+
+            if last_id:
+                # Get the last workflow run for cursor-based pagination
+                last_run_stmt = base_stmt.where(WorkflowRun.id == last_id)
+                last_workflow_run = session.scalar(last_run_stmt)
+
+                if not last_workflow_run:
+                    raise ValueError("Last workflow run not exists")
+
+                # Get records created before the last run's timestamp
+                base_stmt = base_stmt.where(
+                    WorkflowRun.created_at < last_workflow_run.created_at,
+                    WorkflowRun.id != last_workflow_run.id,
+                )
+
+            # First page - get most recent records
+            workflow_runs = session.scalars(base_stmt.order_by(WorkflowRun.created_at.desc()).limit(limit + 1)).all()
+
+            # Check if there are more records for pagination
+            has_more = len(workflow_runs) > limit
+            if has_more:
+                workflow_runs = workflow_runs[:-1]
+
+            return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more)
+
+    def get_workflow_run_by_id(
+        self,
+        tenant_id: str,
+        app_id: str,
+        run_id: str,
+    ) -> Optional[WorkflowRun]:
+        """
+        Get a specific workflow run by ID with tenant and app isolation.
+        """
+        with self._session_maker() as session:
+            stmt = select(WorkflowRun).where(
+                WorkflowRun.tenant_id == tenant_id,
+                WorkflowRun.app_id == app_id,
+                WorkflowRun.id == run_id,
+            )
+            return cast(Optional[WorkflowRun], session.scalar(stmt))
+
+    def get_expired_runs_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowRun]:
+        """
+        Get a batch of expired workflow runs for cleanup operations.
+        """
+        with self._session_maker() as session:
+            stmt = (
+                select(WorkflowRun)
+                .where(
+                    WorkflowRun.tenant_id == tenant_id,
+                    WorkflowRun.created_at < before_date,
+                )
+                .limit(batch_size)
+            )
+            return cast(Sequence[WorkflowRun], session.scalars(stmt).all())
+
+    def delete_runs_by_ids(
+        self,
+        run_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow runs by their IDs using bulk deletion.
+        """
+        if not run_ids:
+            return 0
+
+        with self._session_maker() as session:
+            stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
+            result = session.execute(stmt)
+            session.commit()
+
+            deleted_count = cast(int, result.rowcount)
+            logger.info(f"Deleted {deleted_count} workflow runs by IDs")
+            return deleted_count
+
+    def delete_runs_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow runs for a specific app in batches.
+        """
+        total_deleted = 0
+
+        while True:
+            with self._session_maker() as session:
+                # Get a batch of run IDs to delete
+                stmt = (
+                    select(WorkflowRun.id)
+                    .where(
+                        WorkflowRun.tenant_id == tenant_id,
+                        WorkflowRun.app_id == app_id,
+                    )
+                    .limit(batch_size)
+                )
+                run_ids = session.scalars(stmt).all()
+
+                if not run_ids:
+                    break
+
+                # Delete the batch
+                delete_stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
+                result = session.execute(delete_stmt)
+                session.commit()
+
+                batch_deleted = result.rowcount
+                total_deleted += batch_deleted
+
+                logger.info(f"Deleted batch of {batch_deleted} workflow runs for app {app_id}")
+
+                # If we deleted fewer records than the batch size, we're done
+                if batch_deleted < batch_size:
+                    break
+
+        logger.info(f"Total deleted {total_deleted} workflow runs for app {app_id}")
+        return total_deleted