mirror of
https://github.com/langgenius/dify.git
synced 2026-04-05 10:25:48 +08:00
feat(api/repo): Allow to config repository implementation (#21458)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
This commit is contained in:
0
api/repositories/__init__.py
Normal file
0
api/repositories/__init__.py
Normal file
197
api/repositories/api_workflow_node_execution_repository.py
Normal file
197
api/repositories/api_workflow_node_execution_repository.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
Service-layer repository protocol for WorkflowNodeExecutionModel operations.
|
||||
|
||||
This module provides a protocol interface for service-layer operations on WorkflowNodeExecutionModel
|
||||
that abstracts database queries currently done directly in service classes. This repository is
|
||||
specifically designed for service-layer needs and is separate from the core domain repository.
|
||||
|
||||
The service repository handles operations that require access to database-specific fields like
|
||||
tenant_id, app_id, triggered_from, etc., which are not part of the core domain model.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Optional, Protocol
|
||||
|
||||
from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
|
||||
from models.workflow import WorkflowNodeExecutionModel
|
||||
|
||||
|
||||
class DifyAPIWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository, Protocol):
|
||||
"""
|
||||
Protocol for service-layer operations on WorkflowNodeExecutionModel.
|
||||
|
||||
This repository provides database access patterns specifically needed by service classes,
|
||||
handling queries that involve database-specific fields and multi-tenancy concerns.
|
||||
|
||||
Key responsibilities:
|
||||
- Manages database operations for workflow node executions
|
||||
- Handles multi-tenant data isolation
|
||||
- Provides batch processing capabilities
|
||||
- Supports execution lifecycle management
|
||||
|
||||
Implementation notes:
|
||||
- Returns database models directly (WorkflowNodeExecutionModel)
|
||||
- Handles tenant/app filtering automatically
|
||||
- Provides service-specific query patterns
|
||||
- Focuses on database operations without domain logic
|
||||
- Supports cleanup and maintenance operations
|
||||
"""
|
||||
|
||||
def get_node_last_execution(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
) -> Optional[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get the most recent execution for a specific node.
|
||||
|
||||
This method finds the latest execution of a specific node within a workflow,
|
||||
ordered by creation time. Used primarily for debugging and inspection purposes.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_id: The workflow identifier
|
||||
node_id: The node identifier
|
||||
|
||||
Returns:
|
||||
The most recent WorkflowNodeExecutionModel for the node, or None if not found
|
||||
"""
|
||||
...
|
||||
|
||||
def get_executions_by_workflow_run(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_run_id: str,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get all node executions for a specific workflow run.
|
||||
|
||||
This method retrieves all node executions that belong to a specific workflow run,
|
||||
ordered by index in descending order for proper trace visualization.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_run_id: The workflow run identifier
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
|
||||
"""
|
||||
...
|
||||
|
||||
def get_execution_by_id(
|
||||
self,
|
||||
execution_id: str,
|
||||
tenant_id: Optional[str] = None,
|
||||
) -> Optional[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get a workflow node execution by its ID.
|
||||
|
||||
This method retrieves a specific execution by its unique identifier.
|
||||
Tenant filtering is optional for cases where the execution ID is globally unique.
|
||||
|
||||
When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
|
||||
If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
|
||||
set `tenant_id` to prevent horizontal privilege escalation.
|
||||
|
||||
Args:
|
||||
execution_id: The execution identifier
|
||||
tenant_id: Optional tenant identifier for additional filtering
|
||||
|
||||
Returns:
|
||||
The WorkflowNodeExecutionModel if found, or None if not found
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_expired_executions(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions that are older than the specified date.
|
||||
|
||||
This method is used for cleanup operations to remove expired executions
|
||||
in batches to avoid overwhelming the database.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Delete executions created before this date
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_executions_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow node executions for a specific app.
|
||||
|
||||
This method is used when removing an app and all its related data.
|
||||
Executions are deleted in batches to avoid overwhelming the database.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The total number of executions deleted
|
||||
"""
|
||||
...
|
||||
|
||||
def get_expired_executions_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get a batch of expired workflow node executions for backup purposes.
|
||||
|
||||
This method retrieves expired executions without deleting them,
|
||||
allowing the caller to backup the data before deletion.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Get executions created before this date
|
||||
batch_size: Maximum number of executions to retrieve
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_executions_by_ids(
|
||||
self,
|
||||
execution_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions by their IDs.
|
||||
|
||||
This method deletes specific executions by their IDs,
|
||||
typically used after backing up the data.
|
||||
|
||||
This method does not perform tenant isolation checks. The caller is responsible for ensuring proper
|
||||
data isolation between tenants. When execution IDs come from untrusted sources (e.g., API requests),
|
||||
additional tenant validation should be implemented to prevent unauthorized access.
|
||||
|
||||
Args:
|
||||
execution_ids: List of execution IDs to delete
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
...
|
||||
181
api/repositories/api_workflow_run_repository.py
Normal file
181
api/repositories/api_workflow_run_repository.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
API WorkflowRun Repository Protocol
|
||||
|
||||
This module defines the protocol for service-layer WorkflowRun operations.
|
||||
The repository provides an abstraction layer for WorkflowRun database operations
|
||||
used by service classes, separating service-layer concerns from core domain logic.
|
||||
|
||||
Key Features:
|
||||
- Paginated workflow run queries with filtering
|
||||
- Bulk deletion operations with OSS backup support
|
||||
- Multi-tenant data isolation
|
||||
- Expired record cleanup with data retention
|
||||
- Service-layer specific query patterns
|
||||
|
||||
Usage:
|
||||
This protocol should be used by service classes that need to perform
|
||||
WorkflowRun database operations. It provides a clean interface that
|
||||
hides implementation details and supports dependency injection.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from repositories.dify_api_repository_factory import DifyAPIRepositoryFactory
|
||||
|
||||
session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
|
||||
repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
|
||||
|
||||
# Get paginated workflow runs
|
||||
runs = repo.get_paginated_workflow_runs(
|
||||
tenant_id="tenant-123",
|
||||
app_id="app-456",
|
||||
triggered_from="debugging",
|
||||
limit=20
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Optional, Protocol
|
||||
|
||||
from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from models.workflow import WorkflowRun
|
||||
|
||||
|
||||
class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
|
||||
"""
|
||||
Protocol for service-layer WorkflowRun repository operations.
|
||||
|
||||
This protocol defines the interface for WorkflowRun database operations
|
||||
that are specific to service-layer needs, including pagination, filtering,
|
||||
and bulk operations with data backup support.
|
||||
"""
|
||||
|
||||
def get_paginated_workflow_runs(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
limit: int = 20,
|
||||
last_id: Optional[str] = None,
|
||||
) -> InfiniteScrollPagination:
|
||||
"""
|
||||
Get paginated workflow runs with filtering.
|
||||
|
||||
Retrieves workflow runs for a specific app and trigger source with
|
||||
cursor-based pagination support. Used primarily for debugging and
|
||||
workflow run listing in the UI.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source (e.g., "debugging", "app-run")
|
||||
limit: Maximum number of records to return (default: 20)
|
||||
last_id: Cursor for pagination - ID of the last record from previous page
|
||||
|
||||
Returns:
|
||||
InfiniteScrollPagination object containing:
|
||||
- data: List of WorkflowRun objects
|
||||
- limit: Applied limit
|
||||
- has_more: Boolean indicating if more records exist
|
||||
|
||||
Raises:
|
||||
ValueError: If last_id is provided but the corresponding record doesn't exist
|
||||
"""
|
||||
...
|
||||
|
||||
def get_workflow_run_by_id(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
run_id: str,
|
||||
) -> Optional[WorkflowRun]:
|
||||
"""
|
||||
Get a specific workflow run by ID.
|
||||
|
||||
Retrieves a single workflow run with tenant and app isolation.
|
||||
Used for workflow run detail views and execution tracking.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
run_id: Workflow run identifier
|
||||
|
||||
Returns:
|
||||
WorkflowRun object if found, None otherwise
|
||||
"""
|
||||
...
|
||||
|
||||
def get_expired_runs_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Get a batch of expired workflow runs for cleanup.
|
||||
|
||||
Retrieves workflow runs created before the specified date for
|
||||
cleanup operations. Used by scheduled tasks to remove old data
|
||||
while maintaining data retention policies.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
before_date: Only return runs created before this date
|
||||
batch_size: Maximum number of records to return
|
||||
|
||||
Returns:
|
||||
Sequence of WorkflowRun objects to be processed for cleanup
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_runs_by_ids(
|
||||
self,
|
||||
run_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow runs by their IDs.
|
||||
|
||||
Performs bulk deletion of workflow runs by ID. This method should
|
||||
be used after backing up the data to OSS storage for retention.
|
||||
|
||||
Args:
|
||||
run_ids: Sequence of workflow run IDs to delete
|
||||
|
||||
Returns:
|
||||
Number of records actually deleted
|
||||
|
||||
Note:
|
||||
This method performs hard deletion. Ensure data is backed up
|
||||
to OSS storage before calling this method for compliance with
|
||||
data retention policies.
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_runs_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow runs for a specific app.
|
||||
|
||||
Performs bulk deletion of all workflow runs associated with an app.
|
||||
Used during app cleanup operations. Processes records in batches
|
||||
to avoid memory issues and long-running transactions.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
batch_size: Number of records to process in each batch
|
||||
|
||||
Returns:
|
||||
Total number of records deleted across all batches
|
||||
|
||||
Note:
|
||||
This method performs hard deletion without backup. Use with caution
|
||||
and ensure proper data retention policies are followed.
|
||||
"""
|
||||
...
|
||||
103
api/repositories/factory.py
Normal file
103
api/repositories/factory.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
DifyAPI Repository Factory for creating repository instances.
|
||||
|
||||
This factory is specifically designed for DifyAPI repositories that handle
|
||||
service-layer operations with dependency injection patterns.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from configs import dify_config
|
||||
from core.repositories import DifyCoreRepositoryFactory, RepositoryImportError
|
||||
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
|
||||
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory):
|
||||
"""
|
||||
Factory for creating DifyAPI repository instances based on configuration.
|
||||
|
||||
This factory handles the creation of repositories that are specifically designed
|
||||
for service-layer operations and use dependency injection with sessionmaker
|
||||
for better testability and separation of concerns.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_api_workflow_node_execution_repository(
|
||||
cls, session_maker: sessionmaker
|
||||
) -> DifyAPIWorkflowNodeExecutionRepository:
|
||||
"""
|
||||
Create a DifyAPIWorkflowNodeExecutionRepository instance based on configuration.
|
||||
|
||||
This repository is designed for service-layer operations and uses dependency injection
|
||||
with a sessionmaker for better testability and separation of concerns. It provides
|
||||
database access patterns specifically needed by service classes, handling queries
|
||||
that involve database-specific fields and multi-tenancy concerns.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker to inject for database session management.
|
||||
|
||||
Returns:
|
||||
Configured DifyAPIWorkflowNodeExecutionRepository instance
|
||||
|
||||
Raises:
|
||||
RepositoryImportError: If the configured repository cannot be imported or instantiated
|
||||
"""
|
||||
class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY
|
||||
logger.debug(f"Creating DifyAPIWorkflowNodeExecutionRepository from: {class_path}")
|
||||
|
||||
try:
|
||||
repository_class = cls._import_class(class_path)
|
||||
cls._validate_repository_interface(repository_class, DifyAPIWorkflowNodeExecutionRepository)
|
||||
# Service repository requires session_maker parameter
|
||||
cls._validate_constructor_signature(repository_class, ["session_maker"])
|
||||
|
||||
return repository_class(session_maker=session_maker) # type: ignore[no-any-return]
|
||||
except RepositoryImportError:
|
||||
# Re-raise our custom errors as-is
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("Failed to create DifyAPIWorkflowNodeExecutionRepository")
|
||||
raise RepositoryImportError(
|
||||
f"Failed to create DifyAPIWorkflowNodeExecutionRepository from '{class_path}': {e}"
|
||||
) from e
|
||||
|
||||
@classmethod
|
||||
def create_api_workflow_run_repository(cls, session_maker: sessionmaker) -> APIWorkflowRunRepository:
|
||||
"""
|
||||
Create an APIWorkflowRunRepository instance based on configuration.
|
||||
|
||||
This repository is designed for service-layer WorkflowRun operations and uses dependency
|
||||
injection with a sessionmaker for better testability and separation of concerns. It provides
|
||||
database access patterns specifically needed by service classes for workflow run management,
|
||||
including pagination, filtering, and bulk operations.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker to inject for database session management.
|
||||
|
||||
Returns:
|
||||
Configured APIWorkflowRunRepository instance
|
||||
|
||||
Raises:
|
||||
RepositoryImportError: If the configured repository cannot be imported or instantiated
|
||||
"""
|
||||
class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY
|
||||
logger.debug(f"Creating APIWorkflowRunRepository from: {class_path}")
|
||||
|
||||
try:
|
||||
repository_class = cls._import_class(class_path)
|
||||
cls._validate_repository_interface(repository_class, APIWorkflowRunRepository)
|
||||
# Service repository requires session_maker parameter
|
||||
cls._validate_constructor_signature(repository_class, ["session_maker"])
|
||||
|
||||
return repository_class(session_maker=session_maker) # type: ignore[no-any-return]
|
||||
except RepositoryImportError:
|
||||
# Re-raise our custom errors as-is
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("Failed to create APIWorkflowRunRepository")
|
||||
raise RepositoryImportError(f"Failed to create APIWorkflowRunRepository from '{class_path}': {e}") from e
|
||||
@@ -0,0 +1,290 @@
|
||||
"""
|
||||
SQLAlchemy implementation of WorkflowNodeExecutionServiceRepository.
|
||||
|
||||
This module provides a concrete implementation of the service repository protocol
|
||||
using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import delete, desc, select
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from models.workflow import WorkflowNodeExecutionModel
|
||||
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
|
||||
|
||||
|
||||
class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecutionRepository):
|
||||
"""
|
||||
SQLAlchemy implementation of DifyAPIWorkflowNodeExecutionRepository.
|
||||
|
||||
This repository provides service-layer database operations for WorkflowNodeExecutionModel
|
||||
using SQLAlchemy 2.0 style queries. It implements the DifyAPIWorkflowNodeExecutionRepository
|
||||
protocol with the following features:
|
||||
|
||||
- Multi-tenancy data isolation through tenant_id filtering
|
||||
- Direct database model operations without domain conversion
|
||||
- Batch processing for efficient large-scale operations
|
||||
- Optimized query patterns for common access patterns
|
||||
- Dependency injection for better testability and maintainability
|
||||
- Session management and transaction handling with proper cleanup
|
||||
- Maintenance operations for data lifecycle management
|
||||
- Thread-safe database operations using session-per-request pattern
|
||||
"""
|
||||
|
||||
def __init__(self, session_maker: sessionmaker[Session]):
|
||||
"""
|
||||
Initialize the repository with a sessionmaker.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker for creating database sessions
|
||||
"""
|
||||
self._session_maker = session_maker
|
||||
|
||||
def get_node_last_execution(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
) -> Optional[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get the most recent execution for a specific node.
|
||||
|
||||
This method replicates the query pattern from WorkflowService.get_node_last_run()
|
||||
using SQLAlchemy 2.0 style syntax.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_id: The workflow identifier
|
||||
node_id: The node identifier
|
||||
|
||||
Returns:
|
||||
The most recent WorkflowNodeExecutionModel for the node, or None if not found
|
||||
"""
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.app_id == app_id,
|
||||
WorkflowNodeExecutionModel.workflow_id == workflow_id,
|
||||
WorkflowNodeExecutionModel.node_id == node_id,
|
||||
)
|
||||
.order_by(desc(WorkflowNodeExecutionModel.created_at))
|
||||
.limit(1)
|
||||
)
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.scalar(stmt)
|
||||
|
||||
def get_executions_by_workflow_run(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_run_id: str,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get all node executions for a specific workflow run.
|
||||
|
||||
This method replicates the query pattern from WorkflowRunService.get_workflow_run_node_executions()
|
||||
using SQLAlchemy 2.0 style syntax.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_run_id: The workflow run identifier
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
|
||||
"""
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.app_id == app_id,
|
||||
WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id,
|
||||
)
|
||||
.order_by(desc(WorkflowNodeExecutionModel.index))
|
||||
)
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.execute(stmt).scalars().all()
|
||||
|
||||
def get_execution_by_id(
|
||||
self,
|
||||
execution_id: str,
|
||||
tenant_id: Optional[str] = None,
|
||||
) -> Optional[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get a workflow node execution by its ID.
|
||||
|
||||
This method replicates the query pattern from WorkflowDraftVariableService
|
||||
and WorkflowService.single_step_run_workflow_node() using SQLAlchemy 2.0 style syntax.
|
||||
|
||||
When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
|
||||
If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
|
||||
set `tenant_id` to prevent horizontal privilege escalation.
|
||||
|
||||
Args:
|
||||
execution_id: The execution identifier
|
||||
tenant_id: Optional tenant identifier for additional filtering
|
||||
|
||||
Returns:
|
||||
The WorkflowNodeExecutionModel if found, or None if not found
|
||||
"""
|
||||
stmt = select(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id == execution_id)
|
||||
|
||||
# Add tenant filtering if provided
|
||||
if tenant_id is not None:
|
||||
stmt = stmt.where(WorkflowNodeExecutionModel.tenant_id == tenant_id)
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.scalar(stmt)
|
||||
|
||||
def delete_expired_executions(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions that are older than the specified date.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Delete executions created before this date
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
with self._session_maker() as session:
|
||||
# Find executions to delete in batches
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel.id)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.created_at < before_date,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
|
||||
execution_ids = session.execute(stmt).scalars().all()
|
||||
if not execution_ids:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
|
||||
result = session.execute(delete_stmt)
|
||||
session.commit()
|
||||
total_deleted += result.rowcount
|
||||
|
||||
# If we deleted fewer than the batch size, we're done
|
||||
if len(execution_ids) < batch_size:
|
||||
break
|
||||
|
||||
return total_deleted
|
||||
|
||||
def delete_executions_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow node executions for a specific app.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The total number of executions deleted
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
with self._session_maker() as session:
|
||||
# Find executions to delete in batches
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel.id)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.app_id == app_id,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
|
||||
execution_ids = session.execute(stmt).scalars().all()
|
||||
if not execution_ids:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
|
||||
result = session.execute(delete_stmt)
|
||||
session.commit()
|
||||
total_deleted += result.rowcount
|
||||
|
||||
# If we deleted fewer than the batch size, we're done
|
||||
if len(execution_ids) < batch_size:
|
||||
break
|
||||
|
||||
return total_deleted
|
||||
|
||||
def get_expired_executions_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get a batch of expired workflow node executions for backup purposes.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Get executions created before this date
|
||||
batch_size: Maximum number of executions to retrieve
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances
|
||||
"""
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.created_at < before_date,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.execute(stmt).scalars().all()
|
||||
|
||||
def delete_executions_by_ids(
|
||||
self,
|
||||
execution_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions by their IDs.
|
||||
|
||||
Args:
|
||||
execution_ids: List of execution IDs to delete
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
if not execution_ids:
|
||||
return 0
|
||||
|
||||
with self._session_maker() as session:
|
||||
stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
|
||||
result = session.execute(stmt)
|
||||
session.commit()
|
||||
return result.rowcount
|
||||
202
api/repositories/sqlalchemy_api_workflow_run_repository.py
Normal file
202
api/repositories/sqlalchemy_api_workflow_run_repository.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
SQLAlchemy API WorkflowRun Repository Implementation
|
||||
|
||||
This module provides the SQLAlchemy-based implementation of the APIWorkflowRunRepository
|
||||
protocol. It handles service-layer WorkflowRun database operations using SQLAlchemy 2.0
|
||||
style queries with proper session management and multi-tenant data isolation.
|
||||
|
||||
Key Features:
|
||||
- SQLAlchemy 2.0 style queries for modern database operations
|
||||
- Cursor-based pagination for efficient large dataset handling
|
||||
- Bulk operations with batch processing for performance
|
||||
- Multi-tenant data isolation and security
|
||||
- Proper session management with dependency injection
|
||||
|
||||
Implementation Notes:
|
||||
- Uses sessionmaker for consistent session management
|
||||
- Implements cursor-based pagination using created_at timestamps
|
||||
- Provides efficient bulk deletion with batch processing
|
||||
- Maintains data consistency with proper transaction handling
|
||||
"""
|
||||
|
||||
import logging
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Optional, cast
|
||||
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from models.workflow import WorkflowRun
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DifyAPISQLAlchemyWorkflowRunRepository:
|
||||
"""
|
||||
SQLAlchemy implementation of APIWorkflowRunRepository.
|
||||
|
||||
Provides service-layer WorkflowRun database operations using SQLAlchemy 2.0
|
||||
style queries. Supports dependency injection through sessionmaker and
|
||||
maintains proper multi-tenant data isolation.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker instance for database connections
|
||||
"""
|
||||
|
||||
def __init__(self, session_maker: sessionmaker[Session]) -> None:
|
||||
"""
|
||||
Initialize the repository with a sessionmaker.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker for database connections
|
||||
"""
|
||||
self._session_maker = session_maker
|
||||
|
||||
def get_paginated_workflow_runs(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
limit: int = 20,
|
||||
last_id: Optional[str] = None,
|
||||
) -> InfiniteScrollPagination:
|
||||
"""
|
||||
Get paginated workflow runs with filtering.
|
||||
|
||||
Implements cursor-based pagination using created_at timestamps for
|
||||
efficient handling of large datasets. Filters by tenant, app, and
|
||||
trigger source for proper data isolation.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
# Build base query with filters
|
||||
base_stmt = select(WorkflowRun).where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
WorkflowRun.triggered_from == triggered_from,
|
||||
)
|
||||
|
||||
if last_id:
|
||||
# Get the last workflow run for cursor-based pagination
|
||||
last_run_stmt = base_stmt.where(WorkflowRun.id == last_id)
|
||||
last_workflow_run = session.scalar(last_run_stmt)
|
||||
|
||||
if not last_workflow_run:
|
||||
raise ValueError("Last workflow run not exists")
|
||||
|
||||
# Get records created before the last run's timestamp
|
||||
base_stmt = base_stmt.where(
|
||||
WorkflowRun.created_at < last_workflow_run.created_at,
|
||||
WorkflowRun.id != last_workflow_run.id,
|
||||
)
|
||||
|
||||
# First page - get most recent records
|
||||
workflow_runs = session.scalars(base_stmt.order_by(WorkflowRun.created_at.desc()).limit(limit + 1)).all()
|
||||
|
||||
# Check if there are more records for pagination
|
||||
has_more = len(workflow_runs) > limit
|
||||
if has_more:
|
||||
workflow_runs = workflow_runs[:-1]
|
||||
|
||||
return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more)
|
||||
|
||||
def get_workflow_run_by_id(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
run_id: str,
|
||||
) -> Optional[WorkflowRun]:
|
||||
"""
|
||||
Get a specific workflow run by ID with tenant and app isolation.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = select(WorkflowRun).where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
WorkflowRun.id == run_id,
|
||||
)
|
||||
return cast(Optional[WorkflowRun], session.scalar(stmt))
|
||||
|
||||
def get_expired_runs_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Get a batch of expired workflow runs for cleanup operations.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = (
|
||||
select(WorkflowRun)
|
||||
.where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.created_at < before_date,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
return cast(Sequence[WorkflowRun], session.scalars(stmt).all())
|
||||
|
||||
def delete_runs_by_ids(
|
||||
self,
|
||||
run_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow runs by their IDs using bulk deletion.
|
||||
"""
|
||||
if not run_ids:
|
||||
return 0
|
||||
|
||||
with self._session_maker() as session:
|
||||
stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
|
||||
result = session.execute(stmt)
|
||||
session.commit()
|
||||
|
||||
deleted_count = cast(int, result.rowcount)
|
||||
logger.info(f"Deleted {deleted_count} workflow runs by IDs")
|
||||
return deleted_count
|
||||
|
||||
def delete_runs_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow runs for a specific app in batches.
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
with self._session_maker() as session:
|
||||
# Get a batch of run IDs to delete
|
||||
stmt = (
|
||||
select(WorkflowRun.id)
|
||||
.where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
run_ids = session.scalars(stmt).all()
|
||||
|
||||
if not run_ids:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
delete_stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
|
||||
result = session.execute(delete_stmt)
|
||||
session.commit()
|
||||
|
||||
batch_deleted = result.rowcount
|
||||
total_deleted += batch_deleted
|
||||
|
||||
logger.info(f"Deleted batch of {batch_deleted} workflow runs for app {app_id}")
|
||||
|
||||
# If we deleted fewer records than the batch size, we're done
|
||||
if batch_deleted < batch_size:
|
||||
break
|
||||
|
||||
logger.info(f"Total deleted {total_deleted} workflow runs for app {app_id}")
|
||||
return total_deleted
|
||||
Reference in New Issue
Block a user