fix: fix Cannot destructure property 'name' of 'value' as it is undef… (#30991 )

chore: bump version to 1.11.4 (#30961 )
build: require node 24.13.0 (#30945 )
2026-04-07 15:39:26 +08:00 · 2026-01-15 13:25:30 +08:00 · 2026-01-15 11:40:33 +08:00 · 2026-01-15 11:40:27 +08:00 · 2026-01-15 11:40:13 +08:00 · 2026-01-15 11:40:02 +08:00
324 changed files with 1675 additions and 17811 deletions
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -90,7 +90,7 @@ jobs:
        uses: actions/setup-node@v6
        if: steps.changed-files.outputs.any_changed == 'true'
        with:
-          node-version: 22
+          node-version: 24
          cache: pnpm
          cache-dependency-path: ./web/pnpm-lock.yaml

--- a/.github/workflows/tool-test-sdks.yaml
+++ b/.github/workflows/tool-test-sdks.yaml
@@ -16,10 +16,6 @@ jobs:
    name: unit test for Node.js SDK
    runs-on: ubuntu-latest

-    strategy:
-      matrix:
-        node-version: [16, 18, 20, 22]
-
    defaults:
      run:
        working-directory: sdks/nodejs-client
@@ -29,10 +25,10 @@ jobs:
        with:
          persist-credentials: false

-      - name: Use Node.js ${{ matrix.node-version }}
+      - name: Use Node.js
        uses: actions/setup-node@v6
        with:
-          node-version: ${{ matrix.node-version }}
+          node-version: 24
          cache: ''
          cache-dependency-path: 'pnpm-lock.yaml'

--- a/.github/workflows/translate-i18n-claude.yml
+++ b/.github/workflows/translate-i18n-claude.yml
@@ -57,7 +57,7 @@ jobs:
      - name: Set up Node.js
        uses: actions/setup-node@v6
        with:
-          node-version: 'lts/*'
+          node-version: 24
          cache: pnpm
          cache-dependency-path: ./web/pnpm-lock.yaml

--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -31,7 +31,7 @@ jobs:
      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
-          node-version: 22
+          node-version: 24
          cache: pnpm
          cache-dependency-path: ./web/pnpm-lock.yaml

--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -959,16 +959,6 @@ class MailConfig(BaseSettings):
        default=None,
    )

-    ENABLE_TRIAL_APP: bool = Field(
-        description="Enable trial app",
-        default=False,
-    )
-
-    ENABLE_EXPLORE_BANNER: bool = Field(
-        description="Enable explore banner",
-        default=False,
-    )
-

 class RagEtlConfig(BaseSettings):
    """
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@@ -107,12 +107,10 @@ from .datasets.rag_pipeline import (

 # Import explore controllers
 from .explore import (
-    banner,
    installed_app,
    parameter,
    recommended_app,
    saved_message,
-    trial,
 )

 # Import tag controllers
@@ -147,7 +145,6 @@ __all__ = [
    "apikey",
    "app",
    "audio",
-    "banner",
    "billing",
    "bp",
    "completion",
@@ -201,7 +198,6 @@ __all__ = [
    "statistic",
    "tags",
    "tool_providers",
-    "trial",
    "trigger_providers",
    "version",
    "website",
--- a/api/controllers/console/admin.py
+++ b/api/controllers/console/admin.py
@@ -15,7 +15,7 @@ from controllers.console.wraps import only_edition_cloud
 from core.db.session_factory import session_factory
 from extensions.ext_database import db
 from libs.token import extract_access_token
-from models.model import App, ExporleBanner, InstalledApp, RecommendedApp, TrialApp
+from models.model import App, InstalledApp, RecommendedApp

 P = ParamSpec("P")
 R = TypeVar("R")
@@ -32,8 +32,6 @@ class InsertExploreAppPayload(BaseModel):
    language: str = Field(...)
    category: str = Field(...)
    position: int = Field(...)
-    can_trial: bool = Field(default=False)
-    trial_limit: int = Field(default=0)

    @field_validator("language")
    @classmethod
@@ -41,33 +39,11 @@ class InsertExploreAppPayload(BaseModel):
        return supported_language(value)


-class InsertExploreBannerPayload(BaseModel):
-    category: str = Field(...)
-    title: str = Field(...)
-    description: str = Field(...)
-    img_src: str = Field(..., alias="img-src")
-    language: str = Field(default="en-US")
-    link: str = Field(...)
-    sort: int = Field(...)
-
-    @field_validator("language")
-    @classmethod
-    def validate_language(cls, value: str) -> str:
-        return supported_language(value)
-
-    model_config = {"populate_by_name": True}
-
-
 console_ns.schema_model(
    InsertExploreAppPayload.__name__,
    InsertExploreAppPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
 )

-console_ns.schema_model(
-    InsertExploreBannerPayload.__name__,
-    InsertExploreBannerPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
-)
-

 def admin_required(view: Callable[P, R]):
    @wraps(view)
@@ -133,20 +109,6 @@ class InsertExploreAppListApi(Resource):
                )

                db.session.add(recommended_app)
-                if payload.can_trial:
-                    trial_app = db.session.execute(
-                        select(TrialApp).where(TrialApp.app_id == payload.app_id)
-                    ).scalar_one_or_none()
-                    if not trial_app:
-                        db.session.add(
-                            TrialApp(
-                                app_id=payload.app_id,
-                                tenant_id=app.tenant_id,
-                                trial_limit=payload.trial_limit,
-                            )
-                        )
-                    else:
-                        trial_app.trial_limit = payload.trial_limit

                app.is_public = True
                db.session.commit()
@@ -161,20 +123,6 @@ class InsertExploreAppListApi(Resource):
                recommended_app.category = payload.category
                recommended_app.position = payload.position

-                if payload.can_trial:
-                    trial_app = db.session.execute(
-                        select(TrialApp).where(TrialApp.app_id == payload.app_id)
-                    ).scalar_one_or_none()
-                    if not trial_app:
-                        db.session.add(
-                            TrialApp(
-                                app_id=payload.app_id,
-                                tenant_id=app.tenant_id,
-                                trial_limit=payload.trial_limit,
-                            )
-                        )
-                    else:
-                        trial_app.trial_limit = payload.trial_limit
                app.is_public = True

                db.session.commit()
@@ -220,62 +168,7 @@ class InsertExploreAppApi(Resource):
            for installed_app in installed_apps:
                session.delete(installed_app)

-            trial_app = session.execute(
-                select(TrialApp).where(TrialApp.app_id == recommended_app.app_id)
-            ).scalar_one_or_none()
-            if trial_app:
-                session.delete(trial_app)
-
        db.session.delete(recommended_app)
        db.session.commit()

        return {"result": "success"}, 204
-
-
-@console_ns.route("/admin/insert-explore-banner")
-class InsertExploreBannerApi(Resource):
-    @console_ns.doc("insert_explore_banner")
-    @console_ns.doc(description="Insert an explore banner")
-    @console_ns.expect(console_ns.models[InsertExploreBannerPayload.__name__])
-    @console_ns.response(201, "Banner inserted successfully")
-    @only_edition_cloud
-    @admin_required
-    def post(self):
-        payload = InsertExploreBannerPayload.model_validate(console_ns.payload)
-
-        content = {
-            "category": payload.category,
-            "title": payload.title,
-            "description": payload.description,
-            "img-src": payload.img_src,
-        }
-
-        banner = ExporleBanner(
-            content=content,
-            link=payload.link,
-            sort=payload.sort,
-            language=payload.language,
-        )
-        db.session.add(banner)
-        db.session.commit()
-
-        return {"result": "success"}, 201
-
-
-@console_ns.route("/admin/insert-explore-banner/<uuid:banner_id>")
-class DeleteExploreBannerApi(Resource):
-    @console_ns.doc("delete_explore_banner")
-    @console_ns.doc(description="Delete an explore banner")
-    @console_ns.doc(params={"banner_id": "Banner ID to delete"})
-    @console_ns.response(204, "Banner deleted successfully")
-    @only_edition_cloud
-    @admin_required
-    def delete(self, banner_id):
-        banner = db.session.execute(select(ExporleBanner).where(ExporleBanner.id == banner_id)).scalar_one_or_none()
-        if not banner:
-            raise NotFound(f"Banner '{banner_id}' is not found")
-
-        db.session.delete(banner)
-        db.session.commit()
-
-        return {"result": "success"}, 204
--- a/api/controllers/console/app/annotation.py
+++ b/api/controllers/console/app/annotation.py
@@ -272,7 +272,6 @@ class AnnotationExportApi(Resource):
    @account_initialization_required
    @edit_permission_required
    def get(self, app_id):
-
        app_id = str(app_id)
        annotation_list = AppAnnotationService.export_annotation_list_by_app_id(app_id)
        response_data = {"data": marshal(annotation_list, annotation_fields)}
@@ -360,6 +359,7 @@ class AnnotationBatchImportApi(Resource):
        file.seek(0, 2)  # Seek to end of file
        file_size = file.tell()
        file.seek(0)  # Reset to beginning
+
        max_size_bytes = dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT * 1024 * 1024
        if file_size > max_size_bytes:
            abort(
--- a/api/controllers/console/app/error.py
+++ b/api/controllers/console/app/error.py
@@ -115,9 +115,3 @@ class InvokeRateLimitError(BaseHTTPException):
    error_code = "rate_limit_error"
    description = "Rate Limit Error"
    code = 429
-
-
-class NeedAddIdsError(BaseHTTPException):
-    error_code = "need_add_ids"
-    description = "Need to add ids."
-    code = 400
--- a/api/controllers/console/app/message.py
+++ b/api/controllers/console/app/message.py
@@ -202,7 +202,6 @@ message_detail_model = console_ns.model(
        "status": fields.String,
        "error": fields.String,
        "parent_message_id": fields.String,
-        "generation_detail": fields.Raw,
    },
 )

--- a/api/controllers/console/app/wraps.py
+++ b/api/controllers/console/app/wraps.py
@@ -23,11 +23,6 @@ def _load_app_model(app_id: str) -> App | None:
    return app_model


-def _load_app_model_with_trial(app_id: str) -> App | None:
-    app_model = db.session.query(App).where(App.id == app_id, App.status == "normal").first()
-    return app_model
-
-
 def get_app_model(view: Callable[P, R] | None = None, *, mode: Union[AppMode, list[AppMode], None] = None):
    def decorator(view_func: Callable[P1, R1]):
        @wraps(view_func)
@@ -67,44 +62,3 @@ def get_app_model(view: Callable[P, R] | None = None, *, mode: Union[AppMode, li
        return decorator
    else:
        return decorator(view)
-
-
-def get_app_model_with_trial(view: Callable[P, R] | None = None, *, mode: Union[AppMode, list[AppMode], None] = None):
-    def decorator(view_func: Callable[P, R]):
-        @wraps(view_func)
-        def decorated_view(*args: P.args, **kwargs: P.kwargs):
-            if not kwargs.get("app_id"):
-                raise ValueError("missing app_id in path parameters")
-
-            app_id = kwargs.get("app_id")
-            app_id = str(app_id)
-
-            del kwargs["app_id"]
-
-            app_model = _load_app_model_with_trial(app_id)
-
-            if not app_model:
-                raise AppNotFoundError()
-
-            app_mode = AppMode.value_of(app_model.mode)
-
-            if mode is not None:
-                if isinstance(mode, list):
-                    modes = mode
-                else:
-                    modes = [mode]
-
-                if app_mode not in modes:
-                    mode_values = {m.value for m in modes}
-                    raise AppNotFoundError(f"App mode is not in the supported list: {mode_values}")
-
-            kwargs["app_model"] = app_model
-
-            return view_func(*args, **kwargs)
-
-        return decorated_view
-
-    if view is None:
-        return decorator
-    else:
-        return decorator(view)
--- a/api/controllers/console/auth/oauth.py
+++ b/api/controllers/console/auth/oauth.py
@@ -161,7 +161,10 @@ class OAuthCallback(Resource):
            ip_address=extract_remote_ip(request),
        )

-        response = redirect(f"{dify_config.CONSOLE_WEB_URL}?oauth_new_user={str(oauth_new_user).lower()}")
+        base_url = dify_config.CONSOLE_WEB_URL
+        query_char = "&" if "?" in base_url else "?"
+        target_url = f"{base_url}{query_char}oauth_new_user={str(oauth_new_user).lower()}"
+        response = redirect(target_url)

        set_access_token_to_cookie(request, response, token_pair.access_token)
        set_refresh_token_to_cookie(request, response, token_pair.refresh_token)
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -146,7 +146,6 @@ class DatasetUpdatePayload(BaseModel):
    embedding_model: str | None = None
    embedding_model_provider: str | None = None
    retrieval_model: dict[str, Any] | None = None
-    summary_index_setting: dict[str, Any] | None = None
    partial_member_list: list[dict[str, str]] | None = None
    external_retrieval_model: dict[str, Any] | None = None
    external_knowledge_id: str | None = None
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -39,10 +39,9 @@ from fields.document_fields import (
 from libs.datetime_utils import naive_utc_now
 from libs.login import current_account_with_tenant, login_required
 from models import DatasetProcessRule, Document, DocumentSegment, UploadFile
-from models.dataset import DocumentPipelineExecutionLog, DocumentSegmentSummary
+from models.dataset import DocumentPipelineExecutionLog
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig, ProcessRule, RetrievalModel
-from tasks.generate_summary_index_task import generate_summary_index_task

 from ..app.error import (
    ProviderModelCurrentlyNotSupportError,
@@ -105,10 +104,6 @@ class DocumentRenamePayload(BaseModel):
    name: str


-class GenerateSummaryPayload(BaseModel):
-    document_list: list[str]
-
-
 register_schema_models(
    console_ns,
    KnowledgeConfig,
@@ -116,7 +111,6 @@ register_schema_models(
    RetrievalModel,
    DocumentRetryPayload,
    DocumentRenamePayload,
-    GenerateSummaryPayload,
 )


@@ -301,97 +295,6 @@ class DatasetDocumentListApi(Resource):

        paginated_documents = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
        documents = paginated_documents.items
-        
-        # Check if dataset has summary index enabled
-        has_summary_index = (
-            dataset.summary_index_setting
-            and dataset.summary_index_setting.get("enable") is True
-        )
-        
-        # Filter documents that need summary calculation
-        documents_need_summary = [doc for doc in documents if doc.need_summary is True]
-        document_ids_need_summary = [str(doc.id) for doc in documents_need_summary]
-        
-        # Calculate summary_index_status for documents that need summary (only if dataset summary index is enabled)
-        summary_status_map = {}
-        if has_summary_index and document_ids_need_summary:
-            # Get all segments for these documents (excluding qa_model and re_segment)
-            segments = (
-                db.session.query(DocumentSegment.id, DocumentSegment.document_id)
-                .where(
-                    DocumentSegment.document_id.in_(document_ids_need_summary),
-                    DocumentSegment.status != "re_segment",
-                    DocumentSegment.tenant_id == current_tenant_id,
-                )
-                .all()
-            )
-            
-            # Group segments by document_id
-            document_segments_map = {}
-            for segment in segments:
-                doc_id = str(segment.document_id)
-                if doc_id not in document_segments_map:
-                    document_segments_map[doc_id] = []
-                document_segments_map[doc_id].append(segment.id)
-            
-            # Get all summary records for these segments
-            all_segment_ids = [seg.id for seg in segments]
-            summaries = {}
-            if all_segment_ids:
-                summary_records = (
-                    db.session.query(DocumentSegmentSummary)
-                    .where(
-                        DocumentSegmentSummary.chunk_id.in_(all_segment_ids),
-                        DocumentSegmentSummary.dataset_id == dataset_id,
-                        DocumentSegmentSummary.enabled == True,  # Only count enabled summaries
-                    )
-                    .all()
-                )
-                summaries = {summary.chunk_id: summary.status for summary in summary_records}
-            
-            # Calculate summary_index_status for each document
-            for doc_id in document_ids_need_summary:
-                segment_ids = document_segments_map.get(doc_id, [])
-                if not segment_ids:
-                    # No segments, status is "GENERATING" (waiting to generate)
-                    summary_status_map[doc_id] = "GENERATING"
-                    continue
-                
-                # Count summary statuses for this document's segments
-                status_counts = {"completed": 0, "generating": 0, "error": 0, "not_started": 0}
-                for segment_id in segment_ids:
-                    status = summaries.get(segment_id, "not_started")
-                    if status in status_counts:
-                        status_counts[status] += 1
-                    else:
-                        status_counts["not_started"] += 1
-                
-                total_segments = len(segment_ids)
-                completed_count = status_counts["completed"]
-                generating_count = status_counts["generating"]
-                error_count = status_counts["error"]
-                
-                # Determine overall status (only three states: GENERATING, COMPLETED, ERROR)
-                if completed_count == total_segments:
-                    summary_status_map[doc_id] = "COMPLETED"
-                elif error_count > 0:
-                    # Has errors (even if some are completed or generating)
-                    summary_status_map[doc_id] = "ERROR"
-                elif generating_count > 0 or status_counts["not_started"] > 0:
-                    # Still generating or not started
-                    summary_status_map[doc_id] = "GENERATING"
-                else:
-                    # Default to generating
-                    summary_status_map[doc_id] = "GENERATING"
-        
-        # Add summary_index_status to each document
-        for document in documents:
-            if has_summary_index and document.need_summary is True:
-                document.summary_index_status = summary_status_map.get(str(document.id), "GENERATING")
-            else:
-                # Return null if summary index is not enabled or document doesn't need summary
-                document.summary_index_status = None
-        
        if fetch:
            for document in documents:
                completed_segments = (
@@ -490,7 +393,6 @@ class DatasetDocumentListApi(Resource):
        return {"result": "success"}, 204


-
@console_ns.route("/datasets/init")
 class DatasetInitApi(Resource):
    @console_ns.doc("init_dataset")
@@ -878,7 +780,6 @@ class DocumentApi(DocumentResource):
                "display_status": document.display_status,
                "doc_form": document.doc_form,
                "doc_language": document.doc_language,
-                "need_summary": document.need_summary if document.need_summary is not None else False,
            }
        else:
            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
@@ -914,7 +815,6 @@ class DocumentApi(DocumentResource):
                "display_status": document.display_status,
                "doc_form": document.doc_form,
                "doc_language": document.doc_language,
-                "need_summary": document.need_summary if document.need_summary is not None else False,
            }

        return response, 200
@@ -1282,211 +1182,3 @@ class DocumentPipelineExecutionLogApi(DocumentResource):
            "input_data": log.input_data,
            "datasource_node_id": log.datasource_node_id,
        }, 200
-
-
-@console_ns.route("/datasets/<uuid:dataset_id>/documents/generate-summary")
-class DocumentGenerateSummaryApi(Resource):
-    @console_ns.doc("generate_summary_for_documents")
-    @console_ns.doc(description="Generate summary index for documents")
-    @console_ns.doc(params={"dataset_id": "Dataset ID"})
-    @console_ns.expect(console_ns.models[GenerateSummaryPayload.__name__])
-    @console_ns.response(200, "Summary generation started successfully")
-    @console_ns.response(400, "Invalid request or dataset configuration")
-    @console_ns.response(403, "Permission denied")
-    @console_ns.response(404, "Dataset not found")
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @cloud_edition_billing_rate_limit_check("knowledge")
-    def post(self, dataset_id):
-        """
-        Generate summary index for specified documents.
-        
-        This endpoint checks if the dataset configuration supports summary generation
-        (indexing_technique must be 'high_quality' and summary_index_setting.enable must be true),
-        then asynchronously generates summary indexes for the provided documents.
-        """
-        current_user, _ = current_account_with_tenant()
-        dataset_id = str(dataset_id)
-        
-        # Get dataset
-        dataset = DatasetService.get_dataset(dataset_id)
-        if not dataset:
-            raise NotFound("Dataset not found.")
-        
-        # Check permissions
-        if not current_user.is_dataset_editor:
-            raise Forbidden()
-        
-        try:
-            DatasetService.check_dataset_permission(dataset, current_user)
-        except services.errors.account.NoPermissionError as e:
-            raise Forbidden(str(e))
-        
-        # Validate request payload
-        payload = GenerateSummaryPayload.model_validate(console_ns.payload or {})
-        document_list = payload.document_list
-        
-        if not document_list:
-            raise ValueError("document_list cannot be empty.")
-        
-        # Check if dataset configuration supports summary generation
-        if dataset.indexing_technique != "high_quality":
-            raise ValueError(
-                f"Summary generation is only available for 'high_quality' indexing technique. "
-                f"Current indexing technique: {dataset.indexing_technique}"
-            )
-        
-        summary_index_setting = dataset.summary_index_setting
-        if not summary_index_setting or not summary_index_setting.get("enable"):
-            raise ValueError(
-                "Summary index is not enabled for this dataset. "
-                "Please enable it in the dataset settings."
-            )
-        
-        # Verify all documents exist and belong to the dataset
-        documents = (
-            db.session.query(Document)
-            .filter(
-                Document.id.in_(document_list),
-                Document.dataset_id == dataset_id,
-            )
-            .all()
-        )
-        
-        if len(documents) != len(document_list):
-            found_ids = {doc.id for doc in documents}
-            missing_ids = set(document_list) - found_ids
-            raise NotFound(f"Some documents not found: {list(missing_ids)}")
-        
-        # Dispatch async tasks for each document
-        for document in documents:
-            # Skip qa_model documents as they don't generate summaries
-            if document.doc_form == "qa_model":
-                logger.info(
-                    f"Skipping summary generation for qa_model document {document.id}"
-                )
-                continue
-            
-            # Dispatch async task
-            generate_summary_index_task(dataset_id, document.id)
-            logger.info(
-                f"Dispatched summary generation task for document {document.id} in dataset {dataset_id}"
-            )
-        
-        return {"result": "success"}, 200
-
-
-@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/summary-status")
-class DocumentSummaryStatusApi(DocumentResource):
-    @console_ns.doc("get_document_summary_status")
-    @console_ns.doc(description="Get summary index generation status for a document")
-    @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
-    @console_ns.response(200, "Summary status retrieved successfully")
-    @console_ns.response(404, "Document not found")
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self, dataset_id, document_id):
-        """
-        Get summary index generation status for a document.
-        
-        Returns:
-        - total_segments: Total number of segments in the document
-        - summary_status: Dictionary with status counts
-          - completed: Number of summaries completed
-          - generating: Number of summaries being generated
-          - error: Number of summaries with errors
-          - not_started: Number of segments without summary records
-        - summaries: List of summary records with status and content preview
-        """
-        current_user, _ = current_account_with_tenant()
-        dataset_id = str(dataset_id)
-        document_id = str(document_id)
-        
-        # Get document
-        document = self.get_document(dataset_id, document_id)
-        
-        # Get dataset
-        dataset = DatasetService.get_dataset(dataset_id)
-        if not dataset:
-            raise NotFound("Dataset not found.")
-        
-        # Check permissions
-        try:
-            DatasetService.check_dataset_permission(dataset, current_user)
-        except services.errors.account.NoPermissionError as e:
-            raise Forbidden(str(e))
-        
-        # Get all segments for this document
-        segments = (
-            db.session.query(DocumentSegment)
-            .filter(
-                DocumentSegment.document_id == document_id,
-                DocumentSegment.dataset_id == dataset_id,
-                DocumentSegment.status == "completed",
-                DocumentSegment.enabled == True,
-            )
-            .all()
-        )
-        
-        total_segments = len(segments)
-        
-        # Get all summary records for these segments
-        segment_ids = [segment.id for segment in segments]
-        summaries = []
-        if segment_ids:
-            summaries = (
-                db.session.query(DocumentSegmentSummary)
-                .filter(
-                    DocumentSegmentSummary.document_id == document_id,
-                    DocumentSegmentSummary.dataset_id == dataset_id,
-                    DocumentSegmentSummary.chunk_id.in_(segment_ids),
-                    DocumentSegmentSummary.enabled == True,  # Only return enabled summaries
-                )
-                .all()
-            )
-        
-        # Create a mapping of chunk_id to summary
-        summary_map = {summary.chunk_id: summary for summary in summaries}
-        
-        # Count statuses
-        status_counts = {
-            "completed": 0,
-            "generating": 0,
-            "error": 0,
-            "not_started": 0,
-        }
-        
-        summary_list = []
-        for segment in segments:
-            summary = summary_map.get(segment.id)
-            if summary:
-                status = summary.status
-                status_counts[status] = status_counts.get(status, 0) + 1
-                summary_list.append({
-                    "segment_id": segment.id,
-                    "segment_position": segment.position,
-                    "status": summary.status,
-                    "summary_preview": summary.summary_content[:100] + "..." if summary.summary_content and len(summary.summary_content) > 100 else summary.summary_content,
-                    "error": summary.error,
-                    "created_at": int(summary.created_at.timestamp()) if summary.created_at else None,
-                    "updated_at": int(summary.updated_at.timestamp()) if summary.updated_at else None,
-                })
-            else:
-                status_counts["not_started"] += 1
-                summary_list.append({
-                    "segment_id": segment.id,
-                    "segment_position": segment.position,
-                    "status": "not_started",
-                    "summary_preview": None,
-                    "error": None,
-                    "created_at": None,
-                    "updated_at": None,
-                })
-        
-        return {
-            "total_segments": total_segments,
-            "summary_status": status_counts,
-            "summaries": summary_list,
-        }, 200
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -32,7 +32,7 @@ from extensions.ext_redis import redis_client
 from fields.segment_fields import child_chunk_fields, segment_fields
 from libs.helper import escape_like_pattern
 from libs.login import current_account_with_tenant, login_required
-from models.dataset import ChildChunk, DocumentSegment, DocumentSegmentSummary
+from models.dataset import ChildChunk, DocumentSegment
 from models.model import UploadFile
 from services.dataset_service import DatasetService, DocumentService, SegmentService
 from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs
@@ -41,23 +41,6 @@ from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingS
 from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task


-def _get_segment_with_summary(segment, dataset_id):
-    """Helper function to marshal segment and add summary information."""
-    segment_dict = marshal(segment, segment_fields)
-    # Query summary for this segment (only enabled summaries)
-    summary = (
-        db.session.query(DocumentSegmentSummary)
-        .where(
-            DocumentSegmentSummary.chunk_id == segment.id,
-            DocumentSegmentSummary.dataset_id == dataset_id,
-            DocumentSegmentSummary.enabled == True,  # Only return enabled summaries
-        )
-        .first()
-    )
-    segment_dict["summary"] = summary.summary_content if summary else None
-    return segment_dict
-
-
 class SegmentListQuery(BaseModel):
    limit: int = Field(default=20, ge=1, le=100)
    status: list[str] = Field(default_factory=list)
@@ -80,7 +63,6 @@ class SegmentUpdatePayload(BaseModel):
    keywords: list[str] | None = None
    regenerate_child_chunks: bool = False
    attachment_ids: list[str] | None = None
-    summary: str | None = None  # Summary content for summary index


 class BatchImportPayload(BaseModel):
@@ -198,34 +180,8 @@ class DatasetDocumentSegmentListApi(Resource):

        segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)

-        # Query summaries for all segments in this page (batch query for efficiency)
-        segment_ids = [segment.id for segment in segments.items]
-        summaries = {}
-        if segment_ids:
-            summary_records = (
-                db.session.query(DocumentSegmentSummary)
-                .where(
-                    DocumentSegmentSummary.chunk_id.in_(segment_ids),
-                    DocumentSegmentSummary.dataset_id == dataset_id,
-                )
-                .all()
-            )
-            # Only include enabled summaries
-            summaries = {
-                summary.chunk_id: summary.summary_content 
-                for summary in summary_records 
-                if summary.enabled is True
-            }
-
-        # Add summary to each segment
-        segments_with_summary = []
-        for segment in segments.items:
-            segment_dict = marshal(segment, segment_fields)
-            segment_dict["summary"] = summaries.get(segment.id)
-            segments_with_summary.append(segment_dict)
-
        response = {
-            "data": segments_with_summary,
+            "data": marshal(segments.items, segment_fields),
            "limit": limit,
            "total": segments.total,
            "total_pages": segments.pages,
@@ -371,7 +327,7 @@ class DatasetDocumentSegmentAddApi(Resource):
        payload_dict = payload.model_dump(exclude_none=True)
        SegmentService.segment_create_args_validate(payload_dict, document)
        segment = SegmentService.create_segment(payload_dict, document, dataset)
-        return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200
+        return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200


@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>")
@@ -433,12 +389,10 @@ class DatasetDocumentSegmentUpdateApi(Resource):
        payload = SegmentUpdatePayload.model_validate(console_ns.payload or {})
        payload_dict = payload.model_dump(exclude_none=True)
        SegmentService.segment_create_args_validate(payload_dict, document)
-        
-        # Update segment (summary update with change detection is handled in SegmentService.update_segment)
        segment = SegmentService.update_segment(
            SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset
        )
-        return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200
+        return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200

    @setup_required
    @login_required
--- a/api/controllers/console/datasets/hit_testing.py
+++ b/api/controllers/console/datasets/hit_testing.py
@@ -1,4 +1,4 @@
-from flask_restx import Resource, fields
+from flask_restx import Resource

 from controllers.common.schema import register_schema_model
 from libs.login import login_required
@@ -10,56 +10,17 @@ from ..wraps import (
    cloud_edition_billing_rate_limit_check,
    setup_required,
 )
-from fields.hit_testing_fields import (
-    child_chunk_fields,
-    document_fields,
-    files_fields,
-    hit_testing_record_fields,
-    segment_fields,
-)

 register_schema_model(console_ns, HitTestingPayload)


-def _get_or_create_model(model_name: str, field_def):
-    """Get or create a flask_restx model to avoid dict type issues in Swagger."""
-    existing = console_ns.models.get(model_name)
-    if existing is None:
-        existing = console_ns.model(model_name, field_def)
-    return existing
-
-
-# Register models for flask_restx to avoid dict type issues in Swagger
-document_model = _get_or_create_model("HitTestingDocument", document_fields)
-
-segment_fields_copy = segment_fields.copy()
-segment_fields_copy["document"] = fields.Nested(document_model)
-segment_model = _get_or_create_model("HitTestingSegment", segment_fields_copy)
-
-child_chunk_model = _get_or_create_model("HitTestingChildChunk", child_chunk_fields)
-files_model = _get_or_create_model("HitTestingFile", files_fields)
-
-hit_testing_record_fields_copy = hit_testing_record_fields.copy()
-hit_testing_record_fields_copy["segment"] = fields.Nested(segment_model)
-hit_testing_record_fields_copy["child_chunks"] = fields.List(fields.Nested(child_chunk_model))
-hit_testing_record_fields_copy["files"] = fields.List(fields.Nested(files_model))
-hit_testing_record_model = _get_or_create_model("HitTestingRecord", hit_testing_record_fields_copy)
-
-# Response model for hit testing API
-hit_testing_response_fields = {
-    "query": fields.String,
-    "records": fields.List(fields.Nested(hit_testing_record_model)),
-}
-hit_testing_response_model = _get_or_create_model("HitTestingResponse", hit_testing_response_fields)
-
-
@console_ns.route("/datasets/<uuid:dataset_id>/hit-testing")
 class HitTestingApi(Resource, DatasetsHitTestingBase):
    @console_ns.doc("test_dataset_retrieval")
    @console_ns.doc(description="Test dataset knowledge retrieval")
    @console_ns.doc(params={"dataset_id": "Dataset ID"})
    @console_ns.expect(console_ns.models[HitTestingPayload.__name__])
-    @console_ns.response(200, "Hit testing completed successfully", model=hit_testing_response_model)
+    @console_ns.response(200, "Hit testing completed successfully")
    @console_ns.response(404, "Dataset not found")
    @console_ns.response(400, "Invalid parameters")
    @setup_required
--- a/api/controllers/console/explore/banner.py
+++ b/api/controllers/console/explore/banner.py
@@ -1,43 +0,0 @@
-from flask import request
-from flask_restx import Resource
-
-from controllers.console import api
-from controllers.console.explore.wraps import explore_banner_enabled
-from extensions.ext_database import db
-from models.model import ExporleBanner
-
-
-class BannerApi(Resource):
-    """Resource for banner list."""
-
-    @explore_banner_enabled
-    def get(self):
-        """Get banner list."""
-        language = request.args.get("language", "en-US")
-
-        # Build base query for enabled banners
-        base_query = db.session.query(ExporleBanner).where(ExporleBanner.status == "enabled")
-
-        # Try to get banners in the requested language
-        banners = base_query.where(ExporleBanner.language == language).order_by(ExporleBanner.sort).all()
-
-        # Fallback to en-US if no banners found and language is not en-US
-        if not banners and language != "en-US":
-            banners = base_query.where(ExporleBanner.language == "en-US").order_by(ExporleBanner.sort).all()
-        # Convert banners to serializable format
-        result = []
-        for banner in banners:
-            banner_data = {
-                "id": banner.id,
-                "content": banner.content,  # Already parsed as JSON by SQLAlchemy
-                "link": banner.link,
-                "sort": banner.sort,
-                "status": banner.status,
-                "created_at": banner.created_at.isoformat() if banner.created_at else None,
-            }
-            result.append(banner_data)
-
-        return result
-
-
-api.add_resource(BannerApi, "/explore/banners")
--- a/api/controllers/console/explore/error.py
+++ b/api/controllers/console/explore/error.py
@@ -29,25 +29,3 @@ class AppAccessDeniedError(BaseHTTPException):
    error_code = "access_denied"
    description = "App access denied."
    code = 403
-
-
-class TrialAppNotAllowed(BaseHTTPException):
-    """*403* `Trial App Not Allowed`
-
-    Raise if the user has reached the trial app limit.
-    """
-
-    error_code = "trial_app_not_allowed"
-    code = 403
-    description = "the app is not allowed to be trial."
-
-
-class TrialAppLimitExceeded(BaseHTTPException):
-    """*403* `Trial App Limit Exceeded`
-
-    Raise if the user has exceeded the trial app limit.
-    """
-
-    error_code = "trial_app_limit_exceeded"
-    code = 403
-    description = "The user has exceeded the trial app limit."
--- a/api/controllers/console/explore/recommended_app.py
+++ b/api/controllers/console/explore/recommended_app.py
@@ -29,7 +29,6 @@ recommended_app_fields = {
    "category": fields.String,
    "position": fields.Integer,
    "is_listed": fields.Boolean,
-    "can_trial": fields.Boolean,
 }

 recommended_app_list_fields = {
--- a/api/controllers/console/explore/trial.py
+++ b/api/controllers/console/explore/trial.py
@@ -1,512 +0,0 @@
-import logging
-from typing import Any, cast
-
-from flask import request
-from flask_restx import Resource, marshal, marshal_with, reqparse
-from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
-
-import services
-from controllers.common.fields import Parameters as ParametersResponse
-from controllers.common.fields import Site as SiteResponse
-from controllers.console import api
-from controllers.console.app.error import (
-    AppUnavailableError,
-    AudioTooLargeError,
-    CompletionRequestError,
-    ConversationCompletedError,
-    NeedAddIdsError,
-    NoAudioUploadedError,
-    ProviderModelCurrentlyNotSupportError,
-    ProviderNotInitializeError,
-    ProviderNotSupportSpeechToTextError,
-    ProviderQuotaExceededError,
-    UnsupportedAudioTypeError,
-)
-from controllers.console.app.wraps import get_app_model_with_trial
-from controllers.console.explore.error import (
-    AppSuggestedQuestionsAfterAnswerDisabledError,
-    NotChatAppError,
-    NotCompletionAppError,
-    NotWorkflowAppError,
-)
-from controllers.console.explore.wraps import TrialAppResource, trial_feature_enable
-from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
-from core.app.app_config.common.parameters_mapping import get_parameters_from_feature_dict
-from core.app.apps.base_app_queue_manager import AppQueueManager
-from core.app.entities.app_invoke_entities import InvokeFrom
-from core.errors.error import (
-    ModelCurrentlyNotSupportError,
-    ProviderTokenNotInitError,
-    QuotaExceededError,
-)
-from core.model_runtime.errors.invoke import InvokeError
-from core.workflow.graph_engine.manager import GraphEngineManager
-from extensions.ext_database import db
-from fields.app_fields import app_detail_fields_with_site
-from fields.dataset_fields import dataset_fields
-from fields.workflow_fields import workflow_fields
-from libs import helper
-from libs.helper import uuid_value
-from libs.login import current_user
-from models import Account
-from models.account import TenantStatus
-from models.model import AppMode, Site
-from models.workflow import Workflow
-from services.app_generate_service import AppGenerateService
-from services.app_service import AppService
-from services.audio_service import AudioService
-from services.dataset_service import DatasetService
-from services.errors.audio import (
-    AudioTooLargeServiceError,
-    NoAudioUploadedServiceError,
-    ProviderNotSupportSpeechToTextServiceError,
-    UnsupportedAudioTypeServiceError,
-)
-from services.errors.conversation import ConversationNotExistsError
-from services.errors.llm import InvokeRateLimitError
-from services.errors.message import (
-    MessageNotExistsError,
-    SuggestedQuestionsAfterAnswerDisabledError,
-)
-from services.message_service import MessageService
-from services.recommended_app_service import RecommendedAppService
-
-logger = logging.getLogger(__name__)
-
-
-class TrialAppWorkflowRunApi(TrialAppResource):
-    def post(self, trial_app):
-        """
-        Run workflow
-        """
-        app_model = trial_app
-        if not app_model:
-            raise NotWorkflowAppError()
-        app_mode = AppMode.value_of(app_model.mode)
-        if app_mode != AppMode.WORKFLOW:
-            raise NotWorkflowAppError()
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
-        parser.add_argument("files", type=list, required=False, location="json")
-        args = parser.parse_args()
-        assert current_user is not None
-        try:
-            app_id = app_model.id
-            user_id = current_user.id
-            response = AppGenerateService.generate(
-                app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=True
-            )
-            RecommendedAppService.add_trial_app_record(app_id, user_id)
-            return helper.compact_generate_response(response)
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
-        except ValueError as e:
-            raise e
-        except Exception:
-            logger.exception("internal server error.")
-            raise InternalServerError()
-
-
-class TrialAppWorkflowTaskStopApi(TrialAppResource):
-    def post(self, trial_app, task_id: str):
-        """
-        Stop workflow task
-        """
-        app_model = trial_app
-        if not app_model:
-            raise NotWorkflowAppError()
-        app_mode = AppMode.value_of(app_model.mode)
-        if app_mode != AppMode.WORKFLOW:
-            raise NotWorkflowAppError()
-        assert current_user is not None
-
-        # Stop using both mechanisms for backward compatibility
-        # Legacy stop flag mechanism (without user check)
-        AppQueueManager.set_stop_flag_no_user_check(task_id)
-
-        # New graph engine command channel mechanism
-        GraphEngineManager.send_stop_command(task_id)
-
-        return {"result": "success"}
-
-
-class TrialChatApi(TrialAppResource):
-    @trial_feature_enable
-    def post(self, trial_app):
-        app_model = trial_app
-        app_mode = AppMode.value_of(app_model.mode)
-        if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
-            raise NotChatAppError()
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("inputs", type=dict, required=True, location="json")
-        parser.add_argument("query", type=str, required=True, location="json")
-        parser.add_argument("files", type=list, required=False, location="json")
-        parser.add_argument("conversation_id", type=uuid_value, location="json")
-        parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
-        parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
-        args = parser.parse_args()
-
-        args["auto_generate_name"] = False
-
-        try:
-            if not isinstance(current_user, Account):
-                raise ValueError("current_user must be an Account instance")
-
-            # Get IDs before they might be detached from session
-            app_id = app_model.id
-            user_id = current_user.id
-
-            response = AppGenerateService.generate(
-                app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=True
-            )
-            RecommendedAppService.add_trial_app_record(app_id, user_id)
-            return helper.compact_generate_response(response)
-        except services.errors.conversation.ConversationNotExistsError:
-            raise NotFound("Conversation Not Exists.")
-        except services.errors.conversation.ConversationCompletedError:
-            raise ConversationCompletedError()
-        except services.errors.app_model_config.AppModelConfigBrokenError:
-            logger.exception("App model config broken.")
-            raise AppUnavailableError()
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
-        except ValueError as e:
-            raise e
-        except Exception:
-            logger.exception("internal server error.")
-            raise InternalServerError()
-
-
-class TrialMessageSuggestedQuestionApi(TrialAppResource):
-    @trial_feature_enable
-    def get(self, trial_app, message_id):
-        app_model = trial_app
-        app_mode = AppMode.value_of(app_model.mode)
-        if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
-            raise NotChatAppError()
-
-        message_id = str(message_id)
-
-        try:
-            if not isinstance(current_user, Account):
-                raise ValueError("current_user must be an Account instance")
-            questions = MessageService.get_suggested_questions_after_answer(
-                app_model=app_model, user=current_user, message_id=message_id, invoke_from=InvokeFrom.EXPLORE
-            )
-        except MessageNotExistsError:
-            raise NotFound("Message not found")
-        except ConversationNotExistsError:
-            raise NotFound("Conversation not found")
-        except SuggestedQuestionsAfterAnswerDisabledError:
-            raise AppSuggestedQuestionsAfterAnswerDisabledError()
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
-        except Exception:
-            logger.exception("internal server error.")
-            raise InternalServerError()
-
-        return {"data": questions}
-
-
-class TrialChatAudioApi(TrialAppResource):
-    @trial_feature_enable
-    def post(self, trial_app):
-        app_model = trial_app
-
-        file = request.files["file"]
-
-        try:
-            if not isinstance(current_user, Account):
-                raise ValueError("current_user must be an Account instance")
-
-            # Get IDs before they might be detached from session
-            app_id = app_model.id
-            user_id = current_user.id
-
-            response = AudioService.transcript_asr(app_model=app_model, file=file, end_user=None)
-            RecommendedAppService.add_trial_app_record(app_id, user_id)
-            return response
-        except services.errors.app_model_config.AppModelConfigBrokenError:
-            logger.exception("App model config broken.")
-            raise AppUnavailableError()
-        except NoAudioUploadedServiceError:
-            raise NoAudioUploadedError()
-        except AudioTooLargeServiceError as e:
-            raise AudioTooLargeError(str(e))
-        except UnsupportedAudioTypeServiceError:
-            raise UnsupportedAudioTypeError()
-        except ProviderNotSupportSpeechToTextServiceError:
-            raise ProviderNotSupportSpeechToTextError()
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
-        except ValueError as e:
-            raise e
-        except Exception as e:
-            logger.exception("internal server error.")
-            raise InternalServerError()
-
-
-class TrialChatTextApi(TrialAppResource):
-    @trial_feature_enable
-    def post(self, trial_app):
-        app_model = trial_app
-        try:
-            parser = reqparse.RequestParser()
-            parser.add_argument("message_id", type=str, required=False, location="json")
-            parser.add_argument("voice", type=str, location="json")
-            parser.add_argument("text", type=str, location="json")
-            parser.add_argument("streaming", type=bool, location="json")
-            args = parser.parse_args()
-
-            message_id = args.get("message_id", None)
-            text = args.get("text", None)
-            voice = args.get("voice", None)
-            if not isinstance(current_user, Account):
-                raise ValueError("current_user must be an Account instance")
-
-            # Get IDs before they might be detached from session
-            app_id = app_model.id
-            user_id = current_user.id
-
-            response = AudioService.transcript_tts(app_model=app_model, text=text, voice=voice, message_id=message_id)
-            RecommendedAppService.add_trial_app_record(app_id, user_id)
-            return response
-        except services.errors.app_model_config.AppModelConfigBrokenError:
-            logger.exception("App model config broken.")
-            raise AppUnavailableError()
-        except NoAudioUploadedServiceError:
-            raise NoAudioUploadedError()
-        except AudioTooLargeServiceError as e:
-            raise AudioTooLargeError(str(e))
-        except UnsupportedAudioTypeServiceError:
-            raise UnsupportedAudioTypeError()
-        except ProviderNotSupportSpeechToTextServiceError:
-            raise ProviderNotSupportSpeechToTextError()
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
-        except ValueError as e:
-            raise e
-        except Exception as e:
-            logger.exception("internal server error.")
-            raise InternalServerError()
-
-
-class TrialCompletionApi(TrialAppResource):
-    @trial_feature_enable
-    def post(self, trial_app):
-        app_model = trial_app
-        if app_model.mode != "completion":
-            raise NotCompletionAppError()
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("inputs", type=dict, required=True, location="json")
-        parser.add_argument("query", type=str, location="json", default="")
-        parser.add_argument("files", type=list, required=False, location="json")
-        parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
-        parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
-        args = parser.parse_args()
-
-        streaming = args["response_mode"] == "streaming"
-        args["auto_generate_name"] = False
-
-        try:
-            if not isinstance(current_user, Account):
-                raise ValueError("current_user must be an Account instance")
-
-            # Get IDs before they might be detached from session
-            app_id = app_model.id
-            user_id = current_user.id
-
-            response = AppGenerateService.generate(
-                app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=streaming
-            )
-
-            RecommendedAppService.add_trial_app_record(app_id, user_id)
-            return helper.compact_generate_response(response)
-        except services.errors.conversation.ConversationNotExistsError:
-            raise NotFound("Conversation Not Exists.")
-        except services.errors.conversation.ConversationCompletedError:
-            raise ConversationCompletedError()
-        except services.errors.app_model_config.AppModelConfigBrokenError:
-            logger.exception("App model config broken.")
-            raise AppUnavailableError()
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
-        except ValueError as e:
-            raise e
-        except Exception:
-            logger.exception("internal server error.")
-            raise InternalServerError()
-
-
-class TrialSitApi(Resource):
-    """Resource for trial app sites."""
-
-    @trial_feature_enable
-    @get_app_model_with_trial
-    def get(self, app_model):
-        """Retrieve app site info.
-
-        Returns the site configuration for the application including theme, icons, and text.
-        """
-        site = db.session.query(Site).where(Site.app_id == app_model.id).first()
-
-        if not site:
-            raise Forbidden()
-
-        assert app_model.tenant
-        if app_model.tenant.status == TenantStatus.ARCHIVE:
-            raise Forbidden()
-
-        return SiteResponse.model_validate(site).model_dump(mode="json")
-
-
-class TrialAppParameterApi(Resource):
-    """Resource for app variables."""
-
-    @trial_feature_enable
-    @get_app_model_with_trial
-    def get(self, app_model):
-        """Retrieve app parameters."""
-
-        if app_model is None:
-            raise AppUnavailableError()
-
-        if app_model.mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
-            workflow = app_model.workflow
-            if workflow is None:
-                raise AppUnavailableError()
-
-            features_dict = workflow.features_dict
-            user_input_form = workflow.user_input_form(to_old_structure=True)
-        else:
-            app_model_config = app_model.app_model_config
-            if app_model_config is None:
-                raise AppUnavailableError()
-
-            features_dict = app_model_config.to_dict()
-
-            user_input_form = features_dict.get("user_input_form", [])
-
-        parameters = get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form)
-        return ParametersResponse.model_validate(parameters).model_dump(mode="json")
-
-
-class AppApi(Resource):
-    @trial_feature_enable
-    @get_app_model_with_trial
-    @marshal_with(app_detail_fields_with_site)
-    def get(self, app_model):
-        """Get app detail"""
-
-        app_service = AppService()
-        app_model = app_service.get_app(app_model)
-
-        return app_model
-
-
-class AppWorkflowApi(Resource):
-    @trial_feature_enable
-    @get_app_model_with_trial
-    @marshal_with(workflow_fields)
-    def get(self, app_model):
-        """Get workflow detail"""
-        if not app_model.workflow_id:
-            raise AppUnavailableError()
-
-        workflow = (
-            db.session.query(Workflow)
-            .where(
-                Workflow.id == app_model.workflow_id,
-            )
-            .first()
-        )
-        return workflow
-
-
-class DatasetListApi(Resource):
-    @trial_feature_enable
-    @get_app_model_with_trial
-    def get(self, app_model):
-        page = request.args.get("page", default=1, type=int)
-        limit = request.args.get("limit", default=20, type=int)
-        ids = request.args.getlist("ids")
-
-        tenant_id = app_model.tenant_id
-        if ids:
-            datasets, total = DatasetService.get_datasets_by_ids(ids, tenant_id)
-        else:
-            raise NeedAddIdsError()
-
-        data = cast(list[dict[str, Any]], marshal(datasets, dataset_fields))
-
-        response = {"data": data, "has_more": len(datasets) == limit, "limit": limit, "total": total, "page": page}
-        return response
-
-
-api.add_resource(TrialChatApi, "/trial-apps/<uuid:app_id>/chat-messages", endpoint="trial_app_chat_completion")
-
-api.add_resource(
-    TrialMessageSuggestedQuestionApi,
-    "/trial-apps/<uuid:app_id>/messages/<uuid:message_id>/suggested-questions",
-    endpoint="trial_app_suggested_question",
-)
-
-api.add_resource(TrialChatAudioApi, "/trial-apps/<uuid:app_id>/audio-to-text", endpoint="trial_app_audio")
-api.add_resource(TrialChatTextApi, "/trial-apps/<uuid:app_id>/text-to-audio", endpoint="trial_app_text")
-
-api.add_resource(TrialCompletionApi, "/trial-apps/<uuid:app_id>/completion-messages", endpoint="trial_app_completion")
-
-api.add_resource(TrialSitApi, "/trial-apps/<uuid:app_id>/site")
-
-api.add_resource(TrialAppParameterApi, "/trial-apps/<uuid:app_id>/parameters", endpoint="trial_app_parameters")
-
-api.add_resource(AppApi, "/trial-apps/<uuid:app_id>", endpoint="trial_app")
-
-api.add_resource(TrialAppWorkflowRunApi, "/trial-apps/<uuid:app_id>/workflows/run", endpoint="trial_app_workflow_run")
-api.add_resource(TrialAppWorkflowTaskStopApi, "/trial-apps/<uuid:app_id>/workflows/tasks/<string:task_id>/stop")
-
-api.add_resource(AppWorkflowApi, "/trial-apps/<uuid:app_id>/workflows", endpoint="trial_app_workflow")
-api.add_resource(DatasetListApi, "/trial-apps/<uuid:app_id>/datasets", endpoint="trial_app_datasets")
--- a/api/controllers/console/explore/wraps.py
+++ b/api/controllers/console/explore/wraps.py
@@ -2,15 +2,14 @@ from collections.abc import Callable
 from functools import wraps
 from typing import Concatenate, ParamSpec, TypeVar

-from flask import abort
 from flask_restx import Resource
 from werkzeug.exceptions import NotFound

-from controllers.console.explore.error import AppAccessDeniedError, TrialAppLimitExceeded, TrialAppNotAllowed
+from controllers.console.explore.error import AppAccessDeniedError
 from controllers.console.wraps import account_initialization_required
 from extensions.ext_database import db
 from libs.login import current_account_with_tenant, login_required
-from models import AccountTrialAppRecord, App, InstalledApp, TrialApp
+from models import InstalledApp
 from services.enterprise.enterprise_service import EnterpriseService
 from services.feature_service import FeatureService

@@ -72,61 +71,6 @@ def user_allowed_to_access_app(view: Callable[Concatenate[InstalledApp, P], R] |
    return decorator


-def trial_app_required(view: Callable[Concatenate[App, P], R] | None = None):
-    def decorator(view: Callable[Concatenate[App, P], R]):
-        @wraps(view)
-        def decorated(app_id: str, *args: P.args, **kwargs: P.kwargs):
-            current_user, _ = current_account_with_tenant()
-
-            trial_app = db.session.query(TrialApp).where(TrialApp.app_id == str(app_id)).first()
-
-            if trial_app is None:
-                raise TrialAppNotAllowed()
-            app = trial_app.app
-
-            if app is None:
-                raise TrialAppNotAllowed()
-
-            account_trial_app_record = (
-                db.session.query(AccountTrialAppRecord)
-                .where(AccountTrialAppRecord.account_id == current_user.id, AccountTrialAppRecord.app_id == app_id)
-                .first()
-            )
-            if account_trial_app_record:
-                if account_trial_app_record.count >= trial_app.trial_limit:
-                    raise TrialAppLimitExceeded()
-
-            return view(app, *args, **kwargs)
-
-        return decorated
-
-    if view:
-        return decorator(view)
-    return decorator
-
-
-def trial_feature_enable(view: Callable[..., R]) -> Callable[..., R]:
-    @wraps(view)
-    def decorated(*args, **kwargs):
-        features = FeatureService.get_system_features()
-        if not features.enable_trial_app:
-            abort(403, "Trial app feature is not enabled.")
-        return view(*args, **kwargs)
-
-    return decorated
-
-
-def explore_banner_enabled(view: Callable[..., R]) -> Callable[..., R]:
-    @wraps(view)
-    def decorated(*args, **kwargs):
-        features = FeatureService.get_system_features()
-        if not features.enable_explore_banner:
-            abort(403, "Explore banner feature is not enabled.")
-        return view(*args, **kwargs)
-
-    return decorated
-
-
 class InstalledAppResource(Resource):
    # must be reversed if there are multiple decorators

@@ -136,13 +80,3 @@ class InstalledAppResource(Resource):
        account_initialization_required,
        login_required,
    ]
-
-
-class TrialAppResource(Resource):
-    # must be reversed if there are multiple decorators
-
-    method_decorators = [
-        trial_app_required,
-        account_initialization_required,
-        login_required,
-    ]
--- a/api/controllers/console/wraps.py
+++ b/api/controllers/console/wraps.py
@@ -358,12 +358,14 @@ def annotation_import_rate_limit(view: Callable[P, R]):
    def decorated(*args: P.args, **kwargs: P.kwargs):
        _, current_tenant_id = current_account_with_tenant()
        current_time = int(time.time() * 1000)
+
        # Check per-minute rate limit
        minute_key = f"annotation_import_rate_limit:{current_tenant_id}:1min"
        redis_client.zadd(minute_key, {current_time: current_time})
        redis_client.zremrangebyscore(minute_key, 0, current_time - 60000)
        minute_count = redis_client.zcard(minute_key)
        redis_client.expire(minute_key, 120)  # 2 minutes TTL
+
        if minute_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE:
            abort(
                429,
@@ -377,6 +379,7 @@ def annotation_import_rate_limit(view: Callable[P, R]):
        redis_client.zremrangebyscore(hour_key, 0, current_time - 3600000)
        hour_count = redis_client.zcard(hour_key)
        redis_client.expire(hour_key, 7200)  # 2 hours TTL
+
        if hour_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:
            abort(
                429,
--- a/api/core/agent/agent_app_runner.py
+++ b/api/core/agent/agent_app_runner.py
@@ -1,380 +0,0 @@
-import logging
-from collections.abc import Generator
-from copy import deepcopy
-from typing import Any
-
-from core.agent.base_agent_runner import BaseAgentRunner
-from core.agent.entities import AgentEntity, AgentLog, AgentResult
-from core.agent.patterns.strategy_factory import StrategyFactory
-from core.app.apps.base_app_queue_manager import PublishFrom
-from core.app.entities.queue_entities import QueueAgentThoughtEvent, QueueMessageEndEvent, QueueMessageFileEvent
-from core.file import file_manager
-from core.model_runtime.entities import (
-    AssistantPromptMessage,
-    LLMResult,
-    LLMResultChunk,
-    LLMUsage,
-    PromptMessage,
-    PromptMessageContentType,
-    SystemPromptMessage,
-    TextPromptMessageContent,
-    UserPromptMessage,
-)
-from core.model_runtime.entities.message_entities import ImagePromptMessageContent, PromptMessageContentUnionTypes
-from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform
-from core.tools.__base.tool import Tool
-from core.tools.entities.tool_entities import ToolInvokeMeta
-from core.tools.tool_engine import ToolEngine
-from models.model import Message
-
-logger = logging.getLogger(__name__)
-
-
-class AgentAppRunner(BaseAgentRunner):
-    def _create_tool_invoke_hook(self, message: Message):
-        """
-        Create a tool invoke hook that uses ToolEngine.agent_invoke.
-        This hook handles file creation and returns proper meta information.
-        """
-        # Get trace manager from app generate entity
-        trace_manager = self.application_generate_entity.trace_manager
-
-        def tool_invoke_hook(
-            tool: Tool, tool_args: dict[str, Any], tool_name: str
-        ) -> tuple[str, list[str], ToolInvokeMeta]:
-            """Hook that uses agent_invoke for proper file and meta handling."""
-            tool_invoke_response, message_files, tool_invoke_meta = ToolEngine.agent_invoke(
-                tool=tool,
-                tool_parameters=tool_args,
-                user_id=self.user_id,
-                tenant_id=self.tenant_id,
-                message=message,
-                invoke_from=self.application_generate_entity.invoke_from,
-                agent_tool_callback=self.agent_callback,
-                trace_manager=trace_manager,
-                app_id=self.application_generate_entity.app_config.app_id,
-                message_id=message.id,
-                conversation_id=self.conversation.id,
-            )
-
-            # Publish files and track IDs
-            for message_file_id in message_files:
-                self.queue_manager.publish(
-                    QueueMessageFileEvent(message_file_id=message_file_id),
-                    PublishFrom.APPLICATION_MANAGER,
-                )
-                self._current_message_file_ids.append(message_file_id)
-
-            return tool_invoke_response, message_files, tool_invoke_meta
-
-        return tool_invoke_hook
-
-    def run(self, message: Message, query: str, **kwargs: Any) -> Generator[LLMResultChunk, None, None]:
-        """
-        Run Agent application
-        """
-        self.query = query
-        app_generate_entity = self.application_generate_entity
-
-        app_config = self.app_config
-        assert app_config is not None, "app_config is required"
-        assert app_config.agent is not None, "app_config.agent is required"
-
-        # convert tools into ModelRuntime Tool format
-        tool_instances, _ = self._init_prompt_tools()
-
-        assert app_config.agent
-
-        # Create tool invoke hook for agent_invoke
-        tool_invoke_hook = self._create_tool_invoke_hook(message)
-
-        # Get instruction for ReAct strategy
-        instruction = self.app_config.prompt_template.simple_prompt_template or ""
-
-        # Use factory to create appropriate strategy
-        strategy = StrategyFactory.create_strategy(
-            model_features=self.model_features,
-            model_instance=self.model_instance,
-            tools=list(tool_instances.values()),
-            files=list(self.files),
-            max_iterations=app_config.agent.max_iteration,
-            context=self.build_execution_context(),
-            agent_strategy=self.config.strategy,
-            tool_invoke_hook=tool_invoke_hook,
-            instruction=instruction,
-        )
-
-        # Initialize state variables
-        current_agent_thought_id = None
-        has_published_thought = False
-        current_tool_name: str | None = None
-        self._current_message_file_ids: list[str] = []
-
-        # organize prompt messages
-        prompt_messages = self._organize_prompt_messages()
-
-        # Run strategy
-        generator = strategy.run(
-            prompt_messages=prompt_messages,
-            model_parameters=app_generate_entity.model_conf.parameters,
-            stop=app_generate_entity.model_conf.stop,
-            stream=True,
-        )
-
-        # Consume generator and collect result
-        result: AgentResult | None = None
-        try:
-            while True:
-                try:
-                    output = next(generator)
-                except StopIteration as e:
-                    # Generator finished, get the return value
-                    result = e.value
-                    break
-
-                if isinstance(output, LLMResultChunk):
-                    # Handle LLM chunk
-                    if current_agent_thought_id and not has_published_thought:
-                        self.queue_manager.publish(
-                            QueueAgentThoughtEvent(agent_thought_id=current_agent_thought_id),
-                            PublishFrom.APPLICATION_MANAGER,
-                        )
-                        has_published_thought = True
-
-                    yield output
-
-                elif isinstance(output, AgentLog):
-                    # Handle Agent Log using log_type for type-safe dispatch
-                    if output.status == AgentLog.LogStatus.START:
-                        if output.log_type == AgentLog.LogType.ROUND:
-                            # Start of a new round
-                            message_file_ids: list[str] = []
-                            current_agent_thought_id = self.create_agent_thought(
-                                message_id=message.id,
-                                message="",
-                                tool_name="",
-                                tool_input="",
-                                messages_ids=message_file_ids,
-                            )
-                            has_published_thought = False
-
-                        elif output.log_type == AgentLog.LogType.TOOL_CALL:
-                            if current_agent_thought_id is None:
-                                continue
-
-                            # Tool call start - extract data from structured fields
-                            current_tool_name = output.data.get("tool_name", "")
-                            tool_input = output.data.get("tool_args", {})
-
-                            self.save_agent_thought(
-                                agent_thought_id=current_agent_thought_id,
-                                tool_name=current_tool_name,
-                                tool_input=tool_input,
-                                thought=None,
-                                observation=None,
-                                tool_invoke_meta=None,
-                                answer=None,
-                                messages_ids=[],
-                            )
-                            self.queue_manager.publish(
-                                QueueAgentThoughtEvent(agent_thought_id=current_agent_thought_id),
-                                PublishFrom.APPLICATION_MANAGER,
-                            )
-
-                    elif output.status == AgentLog.LogStatus.SUCCESS:
-                        if output.log_type == AgentLog.LogType.THOUGHT:
-                            if current_agent_thought_id is None:
-                                continue
-
-                            thought_text = output.data.get("thought")
-                            self.save_agent_thought(
-                                agent_thought_id=current_agent_thought_id,
-                                tool_name=None,
-                                tool_input=None,
-                                thought=thought_text,
-                                observation=None,
-                                tool_invoke_meta=None,
-                                answer=None,
-                                messages_ids=[],
-                            )
-                            self.queue_manager.publish(
-                                QueueAgentThoughtEvent(agent_thought_id=current_agent_thought_id),
-                                PublishFrom.APPLICATION_MANAGER,
-                            )
-
-                        elif output.log_type == AgentLog.LogType.TOOL_CALL:
-                            if current_agent_thought_id is None:
-                                continue
-
-                            # Tool call finished
-                            tool_output = output.data.get("output")
-                            # Get meta from strategy output (now properly populated)
-                            tool_meta = output.data.get("meta")
-
-                            # Wrap tool_meta with tool_name as key (required by agent_service)
-                            if tool_meta and current_tool_name:
-                                tool_meta = {current_tool_name: tool_meta}
-
-                            self.save_agent_thought(
-                                agent_thought_id=current_agent_thought_id,
-                                tool_name=None,
-                                tool_input=None,
-                                thought=None,
-                                observation=tool_output,
-                                tool_invoke_meta=tool_meta,
-                                answer=None,
-                                messages_ids=self._current_message_file_ids,
-                            )
-                            # Clear message file ids after saving
-                            self._current_message_file_ids = []
-                            current_tool_name = None
-
-                            self.queue_manager.publish(
-                                QueueAgentThoughtEvent(agent_thought_id=current_agent_thought_id),
-                                PublishFrom.APPLICATION_MANAGER,
-                            )
-
-                        elif output.log_type == AgentLog.LogType.ROUND:
-                            if current_agent_thought_id is None:
-                                continue
-
-                            # Round finished - save LLM usage and answer
-                            llm_usage = output.metadata.get(AgentLog.LogMetadata.LLM_USAGE)
-                            llm_result = output.data.get("llm_result")
-                            final_answer = output.data.get("final_answer")
-
-                            self.save_agent_thought(
-                                agent_thought_id=current_agent_thought_id,
-                                tool_name=None,
-                                tool_input=None,
-                                thought=llm_result,
-                                observation=None,
-                                tool_invoke_meta=None,
-                                answer=final_answer,
-                                messages_ids=[],
-                                llm_usage=llm_usage,
-                            )
-                            self.queue_manager.publish(
-                                QueueAgentThoughtEvent(agent_thought_id=current_agent_thought_id),
-                                PublishFrom.APPLICATION_MANAGER,
-                            )
-
-        except Exception:
-            # Re-raise any other exceptions
-            raise
-
-        # Process final result
-        if isinstance(result, AgentResult):
-            final_answer = result.text
-            usage = result.usage or LLMUsage.empty_usage()
-
-            # Publish end event
-            self.queue_manager.publish(
-                QueueMessageEndEvent(
-                    llm_result=LLMResult(
-                        model=self.model_instance.model,
-                        prompt_messages=prompt_messages,
-                        message=AssistantPromptMessage(content=final_answer),
-                        usage=usage,
-                        system_fingerprint="",
-                    )
-                ),
-                PublishFrom.APPLICATION_MANAGER,
-            )
-
-    def _init_system_message(self, prompt_template: str, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
-        """
-        Initialize system message
-        """
-        if not prompt_template:
-            return prompt_messages or []
-
-        prompt_messages = prompt_messages or []
-
-        if prompt_messages and isinstance(prompt_messages[0], SystemPromptMessage):
-            prompt_messages[0] = SystemPromptMessage(content=prompt_template)
-            return prompt_messages
-
-        if not prompt_messages:
-            return [SystemPromptMessage(content=prompt_template)]
-
-        prompt_messages.insert(0, SystemPromptMessage(content=prompt_template))
-        return prompt_messages
-
-    def _organize_user_query(self, query: str, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
-        """
-        Organize user query
-        """
-        if self.files:
-            # get image detail config
-            image_detail_config = (
-                self.application_generate_entity.file_upload_config.image_config.detail
-                if (
-                    self.application_generate_entity.file_upload_config
-                    and self.application_generate_entity.file_upload_config.image_config
-                )
-                else None
-            )
-            image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
-
-            prompt_message_contents: list[PromptMessageContentUnionTypes] = []
-            for file in self.files:
-                prompt_message_contents.append(
-                    file_manager.to_prompt_message_content(
-                        file,
-                        image_detail_config=image_detail_config,
-                    )
-                )
-            prompt_message_contents.append(TextPromptMessageContent(data=query))
-
-            prompt_messages.append(UserPromptMessage(content=prompt_message_contents))
-        else:
-            prompt_messages.append(UserPromptMessage(content=query))
-
-        return prompt_messages
-
-    def _clear_user_prompt_image_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
-        """
-        As for now, gpt supports both fc and vision at the first iteration.
-        We need to remove the image messages from the prompt messages at the first iteration.
-        """
-        prompt_messages = deepcopy(prompt_messages)
-
-        for prompt_message in prompt_messages:
-            if isinstance(prompt_message, UserPromptMessage):
-                if isinstance(prompt_message.content, list):
-                    prompt_message.content = "\n".join(
-                        [
-                            content.data
-                            if content.type == PromptMessageContentType.TEXT
-                            else "[image]"
-                            if content.type == PromptMessageContentType.IMAGE
-                            else "[file]"
-                            for content in prompt_message.content
-                        ]
-                    )
-
-        return prompt_messages
-
-    def _organize_prompt_messages(self):
-        # For ReAct strategy, use the agent prompt template
-        if self.config.strategy == AgentEntity.Strategy.CHAIN_OF_THOUGHT and self.config.prompt:
-            prompt_template = self.config.prompt.first_prompt
-        else:
-            prompt_template = self.app_config.prompt_template.simple_prompt_template or ""
-
-        self.history_prompt_messages = self._init_system_message(prompt_template, self.history_prompt_messages)
-        query_prompt_messages = self._organize_user_query(self.query or "", [])
-
-        self.history_prompt_messages = AgentHistoryPromptTransform(
-            model_config=self.model_config,
-            prompt_messages=[*query_prompt_messages, *self._current_thoughts],
-            history_messages=self.history_prompt_messages,
-            memory=self.memory,
-        ).get_prompt()
-
-        prompt_messages = [*self.history_prompt_messages, *query_prompt_messages, *self._current_thoughts]
-        if len(self._current_thoughts) != 0:
-            # clear messages after the first iteration
-            prompt_messages = self._clear_user_prompt_image_messages(prompt_messages)
-        return prompt_messages
--- a/api/core/agent/base_agent_runner.py
+++ b/api/core/agent/base_agent_runner.py
@@ -6,7 +6,7 @@ from typing import Union, cast

 from sqlalchemy import select

-from core.agent.entities import AgentEntity, AgentToolEntity, ExecutionContext
+from core.agent.entities import AgentEntity, AgentToolEntity
 from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
 from core.app.apps.agent_chat.app_config_manager import AgentChatAppConfig
 from core.app.apps.base_app_queue_manager import AppQueueManager
@@ -116,20 +116,9 @@ class BaseAgentRunner(AppRunner):
        features = model_schema.features if model_schema and model_schema.features else []
        self.stream_tool_call = ModelFeature.STREAM_TOOL_CALL in features
        self.files = application_generate_entity.files if ModelFeature.VISION in features else []
-        self.model_features = features
        self.query: str | None = ""
        self._current_thoughts: list[PromptMessage] = []

-    def build_execution_context(self) -> ExecutionContext:
-        """Build execution context."""
-        return ExecutionContext(
-            user_id=self.user_id,
-            app_id=self.app_config.app_id,
-            conversation_id=self.conversation.id,
-            message_id=self.message.id,
-            tenant_id=self.tenant_id,
-        )
-
    def _repack_app_generate_entity(
        self, app_generate_entity: AgentChatAppGenerateEntity
    ) -> AgentChatAppGenerateEntity:
--- a/api/core/agent/cot_agent_runner.py
+++ b/api/core/agent/cot_agent_runner.py
@@ -0,0 +1,437 @@
+import json
+import logging
+from abc import ABC, abstractmethod
+from collections.abc import Generator, Mapping, Sequence
+from typing import Any
+
+from core.agent.base_agent_runner import BaseAgentRunner
+from core.agent.entities import AgentScratchpadUnit
+from core.agent.output_parser.cot_output_parser import CotAgentOutputParser
+from core.app.apps.base_app_queue_manager import PublishFrom
+from core.app.entities.queue_entities import QueueAgentThoughtEvent, QueueMessageEndEvent, QueueMessageFileEvent
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessage,
+    PromptMessageTool,
+    ToolPromptMessage,
+    UserPromptMessage,
+)
+from core.ops.ops_trace_manager import TraceQueueManager
+from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform
+from core.tools.__base.tool import Tool
+from core.tools.entities.tool_entities import ToolInvokeMeta
+from core.tools.tool_engine import ToolEngine
+from core.workflow.nodes.agent.exc import AgentMaxIterationError
+from models.model import Message
+
+logger = logging.getLogger(__name__)
+
+
+class CotAgentRunner(BaseAgentRunner, ABC):
+    _is_first_iteration = True
+    _ignore_observation_providers = ["wenxin"]
+    _historic_prompt_messages: list[PromptMessage]
+    _agent_scratchpad: list[AgentScratchpadUnit]
+    _instruction: str
+    _query: str
+    _prompt_messages_tools: Sequence[PromptMessageTool]
+
+    def run(
+        self,
+        message: Message,
+        query: str,
+        inputs: Mapping[str, str],
+    ) -> Generator:
+        """
+        Run Cot agent application
+        """
+
+        app_generate_entity = self.application_generate_entity
+        self._repack_app_generate_entity(app_generate_entity)
+        self._init_react_state(query)
+
+        trace_manager = app_generate_entity.trace_manager
+
+        # check model mode
+        if "Observation" not in app_generate_entity.model_conf.stop:
+            if app_generate_entity.model_conf.provider not in self._ignore_observation_providers:
+                app_generate_entity.model_conf.stop.append("Observation")
+
+        app_config = self.app_config
+        assert app_config.agent
+
+        # init instruction
+        inputs = inputs or {}
+        instruction = app_config.prompt_template.simple_prompt_template or ""
+        self._instruction = self._fill_in_inputs_from_external_data_tools(instruction, inputs)
+
+        iteration_step = 1
+        max_iteration_steps = min(app_config.agent.max_iteration, 99) + 1
+
+        # convert tools into ModelRuntime Tool format
+        tool_instances, prompt_messages_tools = self._init_prompt_tools()
+        self._prompt_messages_tools = prompt_messages_tools
+
+        function_call_state = True
+        llm_usage: dict[str, LLMUsage | None] = {"usage": None}
+        final_answer = ""
+        prompt_messages: list = []  # Initialize prompt_messages
+        agent_thought_id = ""  # Initialize agent_thought_id
+
+        def increase_usage(final_llm_usage_dict: dict[str, LLMUsage | None], usage: LLMUsage):
+            if not final_llm_usage_dict["usage"]:
+                final_llm_usage_dict["usage"] = usage
+            else:
+                llm_usage = final_llm_usage_dict["usage"]
+                llm_usage.prompt_tokens += usage.prompt_tokens
+                llm_usage.completion_tokens += usage.completion_tokens
+                llm_usage.total_tokens += usage.total_tokens
+                llm_usage.prompt_price += usage.prompt_price
+                llm_usage.completion_price += usage.completion_price
+                llm_usage.total_price += usage.total_price
+
+        model_instance = self.model_instance
+
+        while function_call_state and iteration_step <= max_iteration_steps:
+            # continue to run until there is not any tool call
+            function_call_state = False
+
+            if iteration_step == max_iteration_steps:
+                # the last iteration, remove all tools
+                self._prompt_messages_tools = []
+
+            message_file_ids: list[str] = []
+
+            agent_thought_id = self.create_agent_thought(
+                message_id=message.id, message="", tool_name="", tool_input="", messages_ids=message_file_ids
+            )
+
+            if iteration_step > 1:
+                self.queue_manager.publish(
+                    QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER
+                )
+
+            # recalc llm max tokens
+            prompt_messages = self._organize_prompt_messages()
+            self.recalc_llm_max_tokens(self.model_config, prompt_messages)
+            # invoke model
+            chunks = model_instance.invoke_llm(
+                prompt_messages=prompt_messages,
+                model_parameters=app_generate_entity.model_conf.parameters,
+                tools=[],
+                stop=app_generate_entity.model_conf.stop,
+                stream=True,
+                user=self.user_id,
+                callbacks=[],
+            )
+
+            usage_dict: dict[str, LLMUsage | None] = {}
+            react_chunks = CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)
+            scratchpad = AgentScratchpadUnit(
+                agent_response="",
+                thought="",
+                action_str="",
+                observation="",
+                action=None,
+            )
+
+            # publish agent thought if it's first iteration
+            if iteration_step == 1:
+                self.queue_manager.publish(
+                    QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER
+                )
+
+            for chunk in react_chunks:
+                if isinstance(chunk, AgentScratchpadUnit.Action):
+                    action = chunk
+                    # detect action
+                    assert scratchpad.agent_response is not None
+                    scratchpad.agent_response += json.dumps(chunk.model_dump())
+                    scratchpad.action_str = json.dumps(chunk.model_dump())
+                    scratchpad.action = action
+                else:
+                    assert scratchpad.agent_response is not None
+                    scratchpad.agent_response += chunk
+                    assert scratchpad.thought is not None
+                    scratchpad.thought += chunk
+                    yield LLMResultChunk(
+                        model=self.model_config.model,
+                        prompt_messages=prompt_messages,
+                        system_fingerprint="",
+                        delta=LLMResultChunkDelta(index=0, message=AssistantPromptMessage(content=chunk), usage=None),
+                    )
+
+            assert scratchpad.thought is not None
+            scratchpad.thought = scratchpad.thought.strip() or "I am thinking about how to help you"
+            self._agent_scratchpad.append(scratchpad)
+
+            # Check if max iteration is reached and model still wants to call tools
+            if iteration_step == max_iteration_steps and scratchpad.action:
+                if scratchpad.action.action_name.lower() != "final answer":
+                    raise AgentMaxIterationError(app_config.agent.max_iteration)
+
+            # get llm usage
+            if "usage" in usage_dict:
+                if usage_dict["usage"] is not None:
+                    increase_usage(llm_usage, usage_dict["usage"])
+            else:
+                usage_dict["usage"] = LLMUsage.empty_usage()
+
+            self.save_agent_thought(
+                agent_thought_id=agent_thought_id,
+                tool_name=(scratchpad.action.action_name if scratchpad.action and not scratchpad.is_final() else ""),
+                tool_input={scratchpad.action.action_name: scratchpad.action.action_input} if scratchpad.action else {},
+                tool_invoke_meta={},
+                thought=scratchpad.thought or "",
+                observation="",
+                answer=scratchpad.agent_response or "",
+                messages_ids=[],
+                llm_usage=usage_dict["usage"],
+            )
+
+            if not scratchpad.is_final():
+                self.queue_manager.publish(
+                    QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER
+                )
+
+            if not scratchpad.action:
+                # failed to extract action, return final answer directly
+                final_answer = ""
+            else:
+                if scratchpad.action.action_name.lower() == "final answer":
+                    # action is final answer, return final answer directly
+                    try:
+                        if isinstance(scratchpad.action.action_input, dict):
+                            final_answer = json.dumps(scratchpad.action.action_input, ensure_ascii=False)
+                        elif isinstance(scratchpad.action.action_input, str):
+                            final_answer = scratchpad.action.action_input
+                        else:
+                            final_answer = f"{scratchpad.action.action_input}"
+                    except TypeError:
+                        final_answer = f"{scratchpad.action.action_input}"
+                else:
+                    function_call_state = True
+                    # action is tool call, invoke tool
+                    tool_invoke_response, tool_invoke_meta = self._handle_invoke_action(
+                        action=scratchpad.action,
+                        tool_instances=tool_instances,
+                        message_file_ids=message_file_ids,
+                        trace_manager=trace_manager,
+                    )
+                    scratchpad.observation = tool_invoke_response
+                    scratchpad.agent_response = tool_invoke_response
+
+                    self.save_agent_thought(
+                        agent_thought_id=agent_thought_id,
+                        tool_name=scratchpad.action.action_name,
+                        tool_input={scratchpad.action.action_name: scratchpad.action.action_input},
+                        thought=scratchpad.thought or "",
+                        observation={scratchpad.action.action_name: tool_invoke_response},
+                        tool_invoke_meta={scratchpad.action.action_name: tool_invoke_meta.to_dict()},
+                        answer=scratchpad.agent_response,
+                        messages_ids=message_file_ids,
+                        llm_usage=usage_dict["usage"],
+                    )
+
+                    self.queue_manager.publish(
+                        QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER
+                    )
+
+                # update prompt tool message
+                for prompt_tool in self._prompt_messages_tools:
+                    self.update_prompt_message_tool(tool_instances[prompt_tool.name], prompt_tool)
+
+            iteration_step += 1
+
+        yield LLMResultChunk(
+            model=model_instance.model,
+            prompt_messages=prompt_messages,
+            delta=LLMResultChunkDelta(
+                index=0, message=AssistantPromptMessage(content=final_answer), usage=llm_usage["usage"]
+            ),
+            system_fingerprint="",
+        )
+
+        # save agent thought
+        self.save_agent_thought(
+            agent_thought_id=agent_thought_id,
+            tool_name="",
+            tool_input={},
+            tool_invoke_meta={},
+            thought=final_answer,
+            observation={},
+            answer=final_answer,
+            messages_ids=[],
+        )
+        # publish end event
+        self.queue_manager.publish(
+            QueueMessageEndEvent(
+                llm_result=LLMResult(
+                    model=model_instance.model,
+                    prompt_messages=prompt_messages,
+                    message=AssistantPromptMessage(content=final_answer),
+                    usage=llm_usage["usage"] or LLMUsage.empty_usage(),
+                    system_fingerprint="",
+                )
+            ),
+            PublishFrom.APPLICATION_MANAGER,
+        )
+
+    def _handle_invoke_action(
+        self,
+        action: AgentScratchpadUnit.Action,
+        tool_instances: Mapping[str, Tool],
+        message_file_ids: list[str],
+        trace_manager: TraceQueueManager | None = None,
+    ) -> tuple[str, ToolInvokeMeta]:
+        """
+        handle invoke action
+        :param action: action
+        :param tool_instances: tool instances
+        :param message_file_ids: message file ids
+        :param trace_manager: trace manager
+        :return: observation, meta
+        """
+        # action is tool call, invoke tool
+        tool_call_name = action.action_name
+        tool_call_args = action.action_input
+        tool_instance = tool_instances.get(tool_call_name)
+
+        if not tool_instance:
+            answer = f"there is not a tool named {tool_call_name}"
+            return answer, ToolInvokeMeta.error_instance(answer)
+
+        if isinstance(tool_call_args, str):
+            try:
+                tool_call_args = json.loads(tool_call_args)
+            except json.JSONDecodeError:
+                pass
+
+        # invoke tool
+        tool_invoke_response, message_files, tool_invoke_meta = ToolEngine.agent_invoke(
+            tool=tool_instance,
+            tool_parameters=tool_call_args,
+            user_id=self.user_id,
+            tenant_id=self.tenant_id,
+            message=self.message,
+            invoke_from=self.application_generate_entity.invoke_from,
+            agent_tool_callback=self.agent_callback,
+            trace_manager=trace_manager,
+        )
+
+        # publish files
+        for message_file_id in message_files:
+            # publish message file
+            self.queue_manager.publish(
+                QueueMessageFileEvent(message_file_id=message_file_id), PublishFrom.APPLICATION_MANAGER
+            )
+            # add message file ids
+            message_file_ids.append(message_file_id)
+
+        return tool_invoke_response, tool_invoke_meta
+
+    def _convert_dict_to_action(self, action: dict) -> AgentScratchpadUnit.Action:
+        """
+        convert dict to action
+        """
+        return AgentScratchpadUnit.Action(action_name=action["action"], action_input=action["action_input"])
+
+    def _fill_in_inputs_from_external_data_tools(self, instruction: str, inputs: Mapping[str, Any]) -> str:
+        """
+        fill in inputs from external data tools
+        """
+        for key, value in inputs.items():
+            try:
+                instruction = instruction.replace(f"{{{{{key}}}}}", str(value))
+            except Exception:
+                continue
+
+        return instruction
+
+    def _init_react_state(self, query):
+        """
+        init agent scratchpad
+        """
+        self._query = query
+        self._agent_scratchpad = []
+        self._historic_prompt_messages = self._organize_historic_prompt_messages()
+
+    @abstractmethod
+    def _organize_prompt_messages(self) -> list[PromptMessage]:
+        """
+        organize prompt messages
+        """
+
+    def _format_assistant_message(self, agent_scratchpad: list[AgentScratchpadUnit]) -> str:
+        """
+        format assistant message
+        """
+        message = ""
+        for scratchpad in agent_scratchpad:
+            if scratchpad.is_final():
+                message += f"Final Answer: {scratchpad.agent_response}"
+            else:
+                message += f"Thought: {scratchpad.thought}\n\n"
+                if scratchpad.action_str:
+                    message += f"Action: {scratchpad.action_str}\n\n"
+                if scratchpad.observation:
+                    message += f"Observation: {scratchpad.observation}\n\n"
+
+        return message
+
+    def _organize_historic_prompt_messages(
+        self, current_session_messages: list[PromptMessage] | None = None
+    ) -> list[PromptMessage]:
+        """
+        organize historic prompt messages
+        """
+        result: list[PromptMessage] = []
+        scratchpads: list[AgentScratchpadUnit] = []
+        current_scratchpad: AgentScratchpadUnit | None = None
+
+        for message in self.history_prompt_messages:
+            if isinstance(message, AssistantPromptMessage):
+                if not current_scratchpad:
+                    assert isinstance(message.content, str)
+                    current_scratchpad = AgentScratchpadUnit(
+                        agent_response=message.content,
+                        thought=message.content or "I am thinking about how to help you",
+                        action_str="",
+                        action=None,
+                        observation=None,
+                    )
+                    scratchpads.append(current_scratchpad)
+                if message.tool_calls:
+                    try:
+                        current_scratchpad.action = AgentScratchpadUnit.Action(
+                            action_name=message.tool_calls[0].function.name,
+                            action_input=json.loads(message.tool_calls[0].function.arguments),
+                        )
+                        current_scratchpad.action_str = json.dumps(current_scratchpad.action.to_dict())
+                    except Exception:
+                        logger.exception("Failed to parse tool call from assistant message")
+            elif isinstance(message, ToolPromptMessage):
+                if current_scratchpad:
+                    assert isinstance(message.content, str)
+                    current_scratchpad.observation = message.content
+                else:
+                    raise NotImplementedError("expected str type")
+            elif isinstance(message, UserPromptMessage):
+                if scratchpads:
+                    result.append(AssistantPromptMessage(content=self._format_assistant_message(scratchpads)))
+                    scratchpads = []
+                    current_scratchpad = None
+
+                result.append(message)
+
+        if scratchpads:
+            result.append(AssistantPromptMessage(content=self._format_assistant_message(scratchpads)))
+
+        historic_prompts = AgentHistoryPromptTransform(
+            model_config=self.model_config,
+            prompt_messages=current_session_messages or [],
+            history_messages=result,
+            memory=self.memory,
+        ).get_prompt()
+        return historic_prompts
--- a/api/core/agent/cot_chat_agent_runner.py
+++ b/api/core/agent/cot_chat_agent_runner.py
@@ -0,0 +1,118 @@
+import json
+
+from core.agent.cot_agent_runner import CotAgentRunner
+from core.file import file_manager
+from core.model_runtime.entities import (
+    AssistantPromptMessage,
+    PromptMessage,
+    SystemPromptMessage,
+    TextPromptMessageContent,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.message_entities import ImagePromptMessageContent, PromptMessageContentUnionTypes
+from core.model_runtime.utils.encoders import jsonable_encoder
+
+
+class CotChatAgentRunner(CotAgentRunner):
+    def _organize_system_prompt(self) -> SystemPromptMessage:
+        """
+        Organize system prompt
+        """
+        assert self.app_config.agent
+        assert self.app_config.agent.prompt
+
+        prompt_entity = self.app_config.agent.prompt
+        if not prompt_entity:
+            raise ValueError("Agent prompt configuration is not set")
+        first_prompt = prompt_entity.first_prompt
+
+        system_prompt = (
+            first_prompt.replace("{{instruction}}", self._instruction)
+            .replace("{{tools}}", json.dumps(jsonable_encoder(self._prompt_messages_tools)))
+            .replace("{{tool_names}}", ", ".join([tool.name for tool in self._prompt_messages_tools]))
+        )
+
+        return SystemPromptMessage(content=system_prompt)
+
+    def _organize_user_query(self, query, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
+        """
+        Organize user query
+        """
+        if self.files:
+            # get image detail config
+            image_detail_config = (
+                self.application_generate_entity.file_upload_config.image_config.detail
+                if (
+                    self.application_generate_entity.file_upload_config
+                    and self.application_generate_entity.file_upload_config.image_config
+                )
+                else None
+            )
+            image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
+
+            prompt_message_contents: list[PromptMessageContentUnionTypes] = []
+            for file in self.files:
+                prompt_message_contents.append(
+                    file_manager.to_prompt_message_content(
+                        file,
+                        image_detail_config=image_detail_config,
+                    )
+                )
+            prompt_message_contents.append(TextPromptMessageContent(data=query))
+
+            prompt_messages.append(UserPromptMessage(content=prompt_message_contents))
+        else:
+            prompt_messages.append(UserPromptMessage(content=query))
+
+        return prompt_messages
+
+    def _organize_prompt_messages(self) -> list[PromptMessage]:
+        """
+        Organize
+        """
+        # organize system prompt
+        system_message = self._organize_system_prompt()
+
+        # organize current assistant messages
+        agent_scratchpad = self._agent_scratchpad
+        if not agent_scratchpad:
+            assistant_messages = []
+        else:
+            assistant_message = AssistantPromptMessage(content="")
+            assistant_message.content = ""  # FIXME: type check tell mypy that assistant_message.content is str
+            for unit in agent_scratchpad:
+                if unit.is_final():
+                    assert isinstance(assistant_message.content, str)
+                    assistant_message.content += f"Final Answer: {unit.agent_response}"
+                else:
+                    assert isinstance(assistant_message.content, str)
+                    assistant_message.content += f"Thought: {unit.thought}\n\n"
+                    if unit.action_str:
+                        assistant_message.content += f"Action: {unit.action_str}\n\n"
+                    if unit.observation:
+                        assistant_message.content += f"Observation: {unit.observation}\n\n"
+
+            assistant_messages = [assistant_message]
+
+        # query messages
+        query_messages = self._organize_user_query(self._query, [])
+
+        if assistant_messages:
+            # organize historic prompt messages
+            historic_messages = self._organize_historic_prompt_messages(
+                [system_message, *query_messages, *assistant_messages, UserPromptMessage(content="continue")]
+            )
+            messages = [
+                system_message,
+                *historic_messages,
+                *query_messages,
+                *assistant_messages,
+                UserPromptMessage(content="continue"),
+            ]
+        else:
+            # organize historic prompt messages
+            historic_messages = self._organize_historic_prompt_messages([system_message, *query_messages])
+            messages = [system_message, *historic_messages, *query_messages]
+
+        # join all messages
+        return messages
--- a/api/core/agent/cot_completion_agent_runner.py
+++ b/api/core/agent/cot_completion_agent_runner.py
@@ -0,0 +1,87 @@
+import json
+
+from core.agent.cot_agent_runner import CotAgentRunner
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessage,
+    TextPromptMessageContent,
+    UserPromptMessage,
+)
+from core.model_runtime.utils.encoders import jsonable_encoder
+
+
+class CotCompletionAgentRunner(CotAgentRunner):
+    def _organize_instruction_prompt(self) -> str:
+        """
+        Organize instruction prompt
+        """
+        if self.app_config.agent is None:
+            raise ValueError("Agent configuration is not set")
+        prompt_entity = self.app_config.agent.prompt
+        if prompt_entity is None:
+            raise ValueError("prompt entity is not set")
+        first_prompt = prompt_entity.first_prompt
+
+        system_prompt = (
+            first_prompt.replace("{{instruction}}", self._instruction)
+            .replace("{{tools}}", json.dumps(jsonable_encoder(self._prompt_messages_tools)))
+            .replace("{{tool_names}}", ", ".join([tool.name for tool in self._prompt_messages_tools]))
+        )
+
+        return system_prompt
+
+    def _organize_historic_prompt(self, current_session_messages: list[PromptMessage] | None = None) -> str:
+        """
+        Organize historic prompt
+        """
+        historic_prompt_messages = self._organize_historic_prompt_messages(current_session_messages)
+        historic_prompt = ""
+
+        for message in historic_prompt_messages:
+            if isinstance(message, UserPromptMessage):
+                historic_prompt += f"Question: {message.content}\n\n"
+            elif isinstance(message, AssistantPromptMessage):
+                if isinstance(message.content, str):
+                    historic_prompt += message.content + "\n\n"
+                elif isinstance(message.content, list):
+                    for content in message.content:
+                        if not isinstance(content, TextPromptMessageContent):
+                            continue
+                        historic_prompt += content.data
+
+        return historic_prompt
+
+    def _organize_prompt_messages(self) -> list[PromptMessage]:
+        """
+        Organize prompt messages
+        """
+        # organize system prompt
+        system_prompt = self._organize_instruction_prompt()
+
+        # organize historic prompt messages
+        historic_prompt = self._organize_historic_prompt()
+
+        # organize current assistant messages
+        agent_scratchpad = self._agent_scratchpad
+        assistant_prompt = ""
+        for unit in agent_scratchpad or []:
+            if unit.is_final():
+                assistant_prompt += f"Final Answer: {unit.agent_response}"
+            else:
+                assistant_prompt += f"Thought: {unit.thought}\n\n"
+                if unit.action_str:
+                    assistant_prompt += f"Action: {unit.action_str}\n\n"
+                if unit.observation:
+                    assistant_prompt += f"Observation: {unit.observation}\n\n"
+
+        # query messages
+        query_prompt = f"Question: {self._query}"
+
+        # join all messages
+        prompt = (
+            system_prompt.replace("{{historic_messages}}", historic_prompt)
+            .replace("{{agent_scratchpad}}", assistant_prompt)
+            .replace("{{query}}", query_prompt)
+        )
+
+        return [UserPromptMessage(content=prompt)]
--- a/api/core/agent/entities.py
+++ b/api/core/agent/entities.py
@@ -1,5 +1,3 @@
-import uuid
-from collections.abc import Mapping
 from enum import StrEnum
 from typing import Any, Union

@@ -94,96 +92,3 @@ class AgentInvokeMessage(ToolInvokeMessage):
    """

    pass
-
-
-class ExecutionContext(BaseModel):
-    """Execution context containing trace and audit information.
-
-    This context carries all the IDs and metadata that are not part of
-    the core business logic but needed for tracing, auditing, and
-    correlation purposes.
-    """
-
-    user_id: str | None = None
-    app_id: str | None = None
-    conversation_id: str | None = None
-    message_id: str | None = None
-    tenant_id: str | None = None
-
-    @classmethod
-    def create_minimal(cls, user_id: str | None = None) -> "ExecutionContext":
-        """Create a minimal context with only essential fields."""
-        return cls(user_id=user_id)
-
-    def to_dict(self) -> dict[str, Any]:
-        """Convert to dictionary for passing to legacy code."""
-        return {
-            "user_id": self.user_id,
-            "app_id": self.app_id,
-            "conversation_id": self.conversation_id,
-            "message_id": self.message_id,
-            "tenant_id": self.tenant_id,
-        }
-
-    def with_updates(self, **kwargs) -> "ExecutionContext":
-        """Create a new context with updated fields."""
-        data = self.to_dict()
-        data.update(kwargs)
-
-        return ExecutionContext(
-            user_id=data.get("user_id"),
-            app_id=data.get("app_id"),
-            conversation_id=data.get("conversation_id"),
-            message_id=data.get("message_id"),
-            tenant_id=data.get("tenant_id"),
-        )
-
-
-class AgentLog(BaseModel):
-    """
-    Agent Log.
-    """
-
-    class LogType(StrEnum):
-        """Type of agent log entry."""
-
-        ROUND = "round"  # A complete iteration round
-        THOUGHT = "thought"  # LLM thinking/reasoning
-        TOOL_CALL = "tool_call"  # Tool invocation
-
-    class LogMetadata(StrEnum):
-        STARTED_AT = "started_at"
-        FINISHED_AT = "finished_at"
-        ELAPSED_TIME = "elapsed_time"
-        TOTAL_PRICE = "total_price"
-        TOTAL_TOKENS = "total_tokens"
-        PROVIDER = "provider"
-        CURRENCY = "currency"
-        LLM_USAGE = "llm_usage"
-        ICON = "icon"
-        ICON_DARK = "icon_dark"
-
-    class LogStatus(StrEnum):
-        START = "start"
-        ERROR = "error"
-        SUCCESS = "success"
-
-    id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="The id of the log")
-    label: str = Field(..., description="The label of the log")
-    log_type: LogType = Field(..., description="The type of the log")
-    parent_id: str | None = Field(default=None, description="Leave empty for root log")
-    error: str | None = Field(default=None, description="The error message")
-    status: LogStatus = Field(..., description="The status of the log")
-    data: Mapping[str, Any] = Field(..., description="Detailed log data")
-    metadata: Mapping[LogMetadata, Any] = Field(default={}, description="The metadata of the log")
-
-
-class AgentResult(BaseModel):
-    """
-    Agent execution result.
-    """
-
-    text: str = Field(default="", description="The generated text")
-    files: list[Any] = Field(default_factory=list, description="Files produced during execution")
-    usage: Any | None = Field(default=None, description="LLM usage statistics")
-    finish_reason: str | None = Field(default=None, description="Reason for completion")
--- a/api/core/agent/patterns/README.md
+++ b/api/core/agent/patterns/README.md
@@ -1,55 +0,0 @@
-# Agent Patterns
-
-A unified agent pattern module that powers both Agent V2 workflow nodes and agent applications. Strategies share a common execution contract while adapting to model capabilities and tool availability.
-
-## Overview
-
-The module applies a strategy pattern around LLM/tool orchestration. `StrategyFactory` auto-selects the best implementation based on model features or an explicit agent strategy, and each strategy streams logs and usage consistently.
-
-## Key Features
-
- **Dual strategies**
-  - `FunctionCallStrategy`: uses native LLM function/tool calling when the model exposes `TOOL_CALL`, `MULTI_TOOL_CALL`, or `STREAM_TOOL_CALL`.
-  - `ReActStrategy`: ReAct (reasoning + acting) flow driven by `CotAgentOutputParser`, used when function calling is unavailable or explicitly requested.
- **Explicit or auto selection**
-  - `StrategyFactory.create_strategy` prefers an explicit `AgentEntity.Strategy` (FUNCTION_CALLING or CHAIN_OF_THOUGHT).
-  - Otherwise it falls back to function calling when tool-call features exist, or ReAct when they do not.
- **Unified execution contract**
-  - `AgentPattern.run` yields streaming `AgentLog` entries and `LLMResultChunk` data, returning an `AgentResult` with text, files, usage, and `finish_reason`.
-  - Iterations are configurable and hard-capped at 99 rounds; the last round forces a final answer by withholding tools.
- **Tool handling and hooks**
-  - Tools convert to `PromptMessageTool` objects before invocation.
-  - Optional `tool_invoke_hook` lets callers override tool execution (e.g., agent apps) while workflow runs use `ToolEngine.generic_invoke`.
-  - Tool outputs support text, links, JSON, variables, blobs, retriever resources, and file attachments; `target=="self"` files are reloaded into model context, others are returned as outputs.
- **File-aware arguments**
-  - Tool args accept `[File: <id>]` or `[Files: <id1, id2>]` placeholders that resolve to `File` objects before invocation, enabling models to reference uploaded files safely.
- **ReAct prompt shaping**
-  - System prompts replace `{{instruction}}`, `{{tools}}`, and `{{tool_names}}` placeholders.
-  - Adds `Observation` to stop sequences and appends scratchpad text so the model sees prior Thought/Action/Observation history.
- **Observability and accounting**
-  - Standardized `AgentLog` entries for rounds, model thoughts, and tool calls, including usage aggregation (`LLMUsage`) across streaming and non-streaming paths.
-
-## Architecture
-
-```
-agent/patterns/
-├── base.py              # Shared utilities: logging, usage, tool invocation, file handling
-├── function_call.py     # Native function-calling loop with tool execution
-├── react.py             # ReAct loop with CoT parsing and scratchpad wiring
-└── strategy_factory.py  # Strategy selection by model features or explicit override
-```
-
-## Usage
-
- For auto-selection:
-  - Call `StrategyFactory.create_strategy(model_features, model_instance, context, tools, files, ...)` and run the returned strategy with prompt messages and model params.
- For explicit behavior:
-  - Pass `agent_strategy=AgentEntity.Strategy.FUNCTION_CALLING` to force native calls (falls back to ReAct if unsupported), or `CHAIN_OF_THOUGHT` to force ReAct.
- Both strategies stream chunks and logs; collect the generator output until it returns an `AgentResult`.
-
-## Integration Points
-
- **Model runtime**: delegates to `ModelInstance.invoke_llm` for both streaming and non-streaming calls.
- **Tool system**: defaults to `ToolEngine.generic_invoke`, with `tool_invoke_hook` for custom callers.
- **Files**: flows through `File` objects for tool inputs/outputs and model-context attachments.
- **Execution context**: `ExecutionContext` fields (user/app/conversation/message) propagate to tool invocations and logging.
--- a/api/core/agent/patterns/init.py
+++ b/api/core/agent/patterns/init.py
@@ -1,19 +0,0 @@
-"""Agent patterns module.
-
-This module provides different strategies for agent execution:
- FunctionCallStrategy: Uses native function/tool calling
- ReActStrategy: Uses ReAct (Reasoning + Acting) approach
- StrategyFactory: Factory for creating strategies based on model features
-"""
-
-from .base import AgentPattern
-from .function_call import FunctionCallStrategy
-from .react import ReActStrategy
-from .strategy_factory import StrategyFactory
-
-__all__ = [
-    "AgentPattern",
-    "FunctionCallStrategy",
-    "ReActStrategy",
-    "StrategyFactory",
-]
--- a/api/core/agent/patterns/base.py
+++ b/api/core/agent/patterns/base.py
@@ -1,474 +0,0 @@
-"""Base class for agent strategies."""
-
-from __future__ import annotations
-
-import json
-import re
-import time
-from abc import ABC, abstractmethod
-from collections.abc import Callable, Generator
-from typing import TYPE_CHECKING, Any
-
-from core.agent.entities import AgentLog, AgentResult, ExecutionContext
-from core.file import File
-from core.model_manager import ModelInstance
-from core.model_runtime.entities import (
-    AssistantPromptMessage,
-    LLMResult,
-    LLMResultChunk,
-    LLMResultChunkDelta,
-    PromptMessage,
-    PromptMessageTool,
-)
-from core.model_runtime.entities.llm_entities import LLMUsage
-from core.model_runtime.entities.message_entities import TextPromptMessageContent
-from core.tools.entities.tool_entities import ToolInvokeMessage, ToolInvokeMeta
-
-if TYPE_CHECKING:
-    from core.tools.__base.tool import Tool
-
-# Type alias for tool invoke hook
-# Returns: (response_content, message_file_ids, tool_invoke_meta)
-ToolInvokeHook = Callable[["Tool", dict[str, Any], str], tuple[str, list[str], ToolInvokeMeta]]
-
-
-class AgentPattern(ABC):
-    """Base class for agent execution strategies."""
-
-    def __init__(
-        self,
-        model_instance: ModelInstance,
-        tools: list[Tool],
-        context: ExecutionContext,
-        max_iterations: int = 10,
-        workflow_call_depth: int = 0,
-        files: list[File] = [],
-        tool_invoke_hook: ToolInvokeHook | None = None,
-    ):
-        """Initialize the agent strategy."""
-        self.model_instance = model_instance
-        self.tools = tools
-        self.context = context
-        self.max_iterations = min(max_iterations, 99)  # Cap at 99 iterations
-        self.workflow_call_depth = workflow_call_depth
-        self.files: list[File] = files
-        self.tool_invoke_hook = tool_invoke_hook
-
-    @abstractmethod
-    def run(
-        self,
-        prompt_messages: list[PromptMessage],
-        model_parameters: dict[str, Any],
-        stop: list[str] = [],
-        stream: bool = True,
-    ) -> Generator[LLMResultChunk | AgentLog, None, AgentResult]:
-        """Execute the agent strategy."""
-        pass
-
-    def _accumulate_usage(self, total_usage: dict[str, Any], delta_usage: LLMUsage) -> None:
-        """Accumulate LLM usage statistics."""
-        if not total_usage.get("usage"):
-            # Create a copy to avoid modifying the original
-            total_usage["usage"] = LLMUsage(
-                prompt_tokens=delta_usage.prompt_tokens,
-                prompt_unit_price=delta_usage.prompt_unit_price,
-                prompt_price_unit=delta_usage.prompt_price_unit,
-                prompt_price=delta_usage.prompt_price,
-                completion_tokens=delta_usage.completion_tokens,
-                completion_unit_price=delta_usage.completion_unit_price,
-                completion_price_unit=delta_usage.completion_price_unit,
-                completion_price=delta_usage.completion_price,
-                total_tokens=delta_usage.total_tokens,
-                total_price=delta_usage.total_price,
-                currency=delta_usage.currency,
-                latency=delta_usage.latency,
-            )
-        else:
-            current: LLMUsage = total_usage["usage"]
-            current.prompt_tokens += delta_usage.prompt_tokens
-            current.completion_tokens += delta_usage.completion_tokens
-            current.total_tokens += delta_usage.total_tokens
-            current.prompt_price += delta_usage.prompt_price
-            current.completion_price += delta_usage.completion_price
-            current.total_price += delta_usage.total_price
-
-    def _extract_content(self, content: Any) -> str:
-        """Extract text content from message content."""
-        if isinstance(content, list):
-            # Content items are PromptMessageContentUnionTypes
-            text_parts = []
-            for c in content:
-                # Check if it's a TextPromptMessageContent (which has data attribute)
-                if isinstance(c, TextPromptMessageContent):
-                    text_parts.append(c.data)
-            return "".join(text_parts)
-        return str(content)
-
-    def _has_tool_calls(self, chunk: LLMResultChunk) -> bool:
-        """Check if chunk contains tool calls."""
-        # LLMResultChunk always has delta attribute
-        return bool(chunk.delta.message and chunk.delta.message.tool_calls)
-
-    def _has_tool_calls_result(self, result: LLMResult) -> bool:
-        """Check if result contains tool calls (non-streaming)."""
-        # LLMResult always has message attribute
-        return bool(result.message and result.message.tool_calls)
-
-    def _extract_tool_calls(self, chunk: LLMResultChunk) -> list[tuple[str, str, dict[str, Any]]]:
-        """Extract tool calls from streaming chunk."""
-        tool_calls: list[tuple[str, str, dict[str, Any]]] = []
-        if chunk.delta.message and chunk.delta.message.tool_calls:
-            for tool_call in chunk.delta.message.tool_calls:
-                if tool_call.function:
-                    try:
-                        args = json.loads(tool_call.function.arguments) if tool_call.function.arguments else {}
-                    except json.JSONDecodeError:
-                        args = {}
-                    tool_calls.append((tool_call.id or "", tool_call.function.name, args))
-        return tool_calls
-
-    def _extract_tool_calls_result(self, result: LLMResult) -> list[tuple[str, str, dict[str, Any]]]:
-        """Extract tool calls from non-streaming result."""
-        tool_calls = []
-        if result.message and result.message.tool_calls:
-            for tool_call in result.message.tool_calls:
-                if tool_call.function:
-                    try:
-                        args = json.loads(tool_call.function.arguments) if tool_call.function.arguments else {}
-                    except json.JSONDecodeError:
-                        args = {}
-                    tool_calls.append((tool_call.id or "", tool_call.function.name, args))
-        return tool_calls
-
-    def _extract_text_from_message(self, message: PromptMessage) -> str:
-        """Extract text content from a prompt message."""
-        # PromptMessage always has content attribute
-        content = message.content
-        if isinstance(content, str):
-            return content
-        elif isinstance(content, list):
-            # Extract text from content list
-            text_parts = []
-            for item in content:
-                if isinstance(item, TextPromptMessageContent):
-                    text_parts.append(item.data)
-            return " ".join(text_parts)
-        return ""
-
-    def _get_tool_metadata(self, tool_instance: Tool) -> dict[AgentLog.LogMetadata, Any]:
-        """Get metadata for a tool including provider and icon info."""
-        from core.tools.tool_manager import ToolManager
-
-        metadata: dict[AgentLog.LogMetadata, Any] = {}
-        if tool_instance.entity and tool_instance.entity.identity:
-            identity = tool_instance.entity.identity
-            if identity.provider:
-                metadata[AgentLog.LogMetadata.PROVIDER] = identity.provider
-
-            # Get icon using ToolManager for proper URL generation
-            tenant_id = self.context.tenant_id
-            if tenant_id and identity.provider:
-                try:
-                    provider_type = tool_instance.tool_provider_type()
-                    icon = ToolManager.get_tool_icon(tenant_id, provider_type, identity.provider)
-                    if isinstance(icon, str):
-                        metadata[AgentLog.LogMetadata.ICON] = icon
-                    elif isinstance(icon, dict):
-                        # Handle icon dict with background/content or light/dark variants
-                        metadata[AgentLog.LogMetadata.ICON] = icon
-                except Exception:
-                    # Fallback to identity.icon if ToolManager fails
-                    if identity.icon:
-                        metadata[AgentLog.LogMetadata.ICON] = identity.icon
-            elif identity.icon:
-                metadata[AgentLog.LogMetadata.ICON] = identity.icon
-        return metadata
-
-    def _create_log(
-        self,
-        label: str,
-        log_type: AgentLog.LogType,
-        status: AgentLog.LogStatus,
-        data: dict[str, Any] | None = None,
-        parent_id: str | None = None,
-        extra_metadata: dict[AgentLog.LogMetadata, Any] | None = None,
-    ) -> AgentLog:
-        """Create a new AgentLog with standard metadata."""
-        metadata: dict[AgentLog.LogMetadata, Any] = {
-            AgentLog.LogMetadata.STARTED_AT: time.perf_counter(),
-        }
-        if extra_metadata:
-            metadata.update(extra_metadata)
-
-        return AgentLog(
-            label=label,
-            log_type=log_type,
-            status=status,
-            data=data or {},
-            parent_id=parent_id,
-            metadata=metadata,
-        )
-
-    def _finish_log(
-        self,
-        log: AgentLog,
-        data: dict[str, Any] | None = None,
-        usage: LLMUsage | None = None,
-    ) -> AgentLog:
-        """Finish an AgentLog by updating its status and metadata."""
-        log.status = AgentLog.LogStatus.SUCCESS
-
-        if data is not None:
-            log.data = data
-
-        # Calculate elapsed time
-        started_at = log.metadata.get(AgentLog.LogMetadata.STARTED_AT, time.perf_counter())
-        finished_at = time.perf_counter()
-
-        # Update metadata
-        log.metadata = {
-            **log.metadata,
-            AgentLog.LogMetadata.FINISHED_AT: finished_at,
-            # Calculate elapsed time in seconds
-            AgentLog.LogMetadata.ELAPSED_TIME: round(finished_at - started_at, 4),
-        }
-
-        # Add usage information if provided
-        if usage:
-            log.metadata.update(
-                {
-                    AgentLog.LogMetadata.TOTAL_PRICE: usage.total_price,
-                    AgentLog.LogMetadata.CURRENCY: usage.currency,
-                    AgentLog.LogMetadata.TOTAL_TOKENS: usage.total_tokens,
-                    AgentLog.LogMetadata.LLM_USAGE: usage,
-                }
-            )
-
-        return log
-
-    def _replace_file_references(self, tool_args: dict[str, Any]) -> dict[str, Any]:
-        """
-        Replace file references in tool arguments with actual File objects.
-
-        Args:
-            tool_args: Dictionary of tool arguments
-
-        Returns:
-            Updated tool arguments with file references replaced
-        """
-        # Process each argument in the dictionary
-        processed_args: dict[str, Any] = {}
-        for key, value in tool_args.items():
-            processed_args[key] = self._process_file_reference(value)
-        return processed_args
-
-    def _process_file_reference(self, data: Any) -> Any:
-        """
-        Recursively process data to replace file references.
-        Supports both single file [File: file_id] and multiple files [Files: file_id1, file_id2, ...].
-
-        Args:
-            data: The data to process (can be dict, list, str, or other types)
-
-        Returns:
-            Processed data with file references replaced
-        """
-        single_file_pattern = re.compile(r"^\[File:\s*([^\]]+)\]$")
-        multiple_files_pattern = re.compile(r"^\[Files:\s*([^\]]+)\]$")
-
-        if isinstance(data, dict):
-            # Process dictionary recursively
-            return {key: self._process_file_reference(value) for key, value in data.items()}
-        elif isinstance(data, list):
-            # Process list recursively
-            return [self._process_file_reference(item) for item in data]
-        elif isinstance(data, str):
-            # Check for single file pattern [File: file_id]
-            single_match = single_file_pattern.match(data.strip())
-            if single_match:
-                file_id = single_match.group(1).strip()
-                # Find the file in self.files
-                for file in self.files:
-                    if file.id and str(file.id) == file_id:
-                        return file
-                # If file not found, return original value
-                return data
-
-            # Check for multiple files pattern [Files: file_id1, file_id2, ...]
-            multiple_match = multiple_files_pattern.match(data.strip())
-            if multiple_match:
-                file_ids_str = multiple_match.group(1).strip()
-                # Split by comma and strip whitespace
-                file_ids = [fid.strip() for fid in file_ids_str.split(",")]
-
-                # Find all matching files
-                matched_files: list[File] = []
-                for file_id in file_ids:
-                    for file in self.files:
-                        if file.id and str(file.id) == file_id:
-                            matched_files.append(file)
-                            break
-
-                # Return list of files if any were found, otherwise return original
-                return matched_files or data
-
-            return data
-        else:
-            # Return other types as-is
-            return data
-
-    def _create_text_chunk(self, text: str, prompt_messages: list[PromptMessage]) -> LLMResultChunk:
-        """Create a text chunk for streaming."""
-        return LLMResultChunk(
-            model=self.model_instance.model,
-            prompt_messages=prompt_messages,
-            delta=LLMResultChunkDelta(
-                index=0,
-                message=AssistantPromptMessage(content=text),
-                usage=None,
-            ),
-            system_fingerprint="",
-        )
-
-    def _invoke_tool(
-        self,
-        tool_instance: Tool,
-        tool_args: dict[str, Any],
-        tool_name: str,
-    ) -> tuple[str, list[File], ToolInvokeMeta | None]:
-        """
-        Invoke a tool and collect its response.
-
-        Args:
-            tool_instance: The tool instance to invoke
-            tool_args: Tool arguments
-            tool_name: Name of the tool
-
-        Returns:
-            Tuple of (response_content, tool_files, tool_invoke_meta)
-        """
-        # Process tool_args to replace file references with actual File objects
-        tool_args = self._replace_file_references(tool_args)
-
-        # If a tool invoke hook is set, use it instead of generic_invoke
-        if self.tool_invoke_hook:
-            response_content, _, tool_invoke_meta = self.tool_invoke_hook(tool_instance, tool_args, tool_name)
-            # Note: message_file_ids are stored in DB, we don't convert them to File objects here
-            # The caller (AgentAppRunner) handles file publishing
-            return response_content, [], tool_invoke_meta
-
-        # Default: use generic_invoke for workflow scenarios
-        # Import here to avoid circular import
-        from core.tools.tool_engine import DifyWorkflowCallbackHandler, ToolEngine
-
-        tool_response = ToolEngine().generic_invoke(
-            tool=tool_instance,
-            tool_parameters=tool_args,
-            user_id=self.context.user_id or "",
-            workflow_tool_callback=DifyWorkflowCallbackHandler(),
-            workflow_call_depth=self.workflow_call_depth,
-            app_id=self.context.app_id,
-            conversation_id=self.context.conversation_id,
-            message_id=self.context.message_id,
-        )
-
-        # Collect response and files
-        response_content = ""
-        tool_files: list[File] = []
-
-        for response in tool_response:
-            if response.type == ToolInvokeMessage.MessageType.TEXT:
-                assert isinstance(response.message, ToolInvokeMessage.TextMessage)
-                response_content += response.message.text
-
-            elif response.type == ToolInvokeMessage.MessageType.LINK:
-                # Handle link messages
-                if isinstance(response.message, ToolInvokeMessage.TextMessage):
-                    response_content += f"[Link: {response.message.text}]"
-
-            elif response.type == ToolInvokeMessage.MessageType.IMAGE:
-                # Handle image URL messages
-                if isinstance(response.message, ToolInvokeMessage.TextMessage):
-                    response_content += f"[Image: {response.message.text}]"
-
-            elif response.type == ToolInvokeMessage.MessageType.IMAGE_LINK:
-                # Handle image link messages
-                if isinstance(response.message, ToolInvokeMessage.TextMessage):
-                    response_content += f"[Image: {response.message.text}]"
-
-            elif response.type == ToolInvokeMessage.MessageType.BINARY_LINK:
-                # Handle binary file link messages
-                if isinstance(response.message, ToolInvokeMessage.TextMessage):
-                    filename = response.meta.get("filename", "file") if response.meta else "file"
-                    response_content += f"[File: {filename} - {response.message.text}]"
-
-            elif response.type == ToolInvokeMessage.MessageType.JSON:
-                # Handle JSON messages
-                if isinstance(response.message, ToolInvokeMessage.JsonMessage):
-                    response_content += json.dumps(response.message.json_object, ensure_ascii=False, indent=2)
-
-            elif response.type == ToolInvokeMessage.MessageType.BLOB:
-                # Handle blob messages - convert to text representation
-                if isinstance(response.message, ToolInvokeMessage.BlobMessage):
-                    mime_type = (
-                        response.meta.get("mime_type", "application/octet-stream")
-                        if response.meta
-                        else "application/octet-stream"
-                    )
-                    size = len(response.message.blob)
-                    response_content += f"[Binary data: {mime_type}, size: {size} bytes]"
-
-            elif response.type == ToolInvokeMessage.MessageType.VARIABLE:
-                # Handle variable messages
-                if isinstance(response.message, ToolInvokeMessage.VariableMessage):
-                    var_name = response.message.variable_name
-                    var_value = response.message.variable_value
-                    if isinstance(var_value, str):
-                        response_content += var_value
-                    else:
-                        response_content += f"[Variable {var_name}: {json.dumps(var_value, ensure_ascii=False)}]"
-
-            elif response.type == ToolInvokeMessage.MessageType.BLOB_CHUNK:
-                # Handle blob chunk messages - these are parts of a larger blob
-                if isinstance(response.message, ToolInvokeMessage.BlobChunkMessage):
-                    response_content += f"[Blob chunk {response.message.sequence}: {len(response.message.blob)} bytes]"
-
-            elif response.type == ToolInvokeMessage.MessageType.RETRIEVER_RESOURCES:
-                # Handle retriever resources messages
-                if isinstance(response.message, ToolInvokeMessage.RetrieverResourceMessage):
-                    response_content += response.message.context
-
-            elif response.type == ToolInvokeMessage.MessageType.FILE:
-                # Extract file from meta
-                if response.meta and "file" in response.meta:
-                    file = response.meta["file"]
-                    if isinstance(file, File):
-                        # Check if file is for model or tool output
-                        if response.meta.get("target") == "self":
-                            # File is for model - add to files for next prompt
-                            self.files.append(file)
-                            response_content += f"File '{file.filename}' has been loaded into your context."
-                        else:
-                            # File is tool output
-                            tool_files.append(file)
-
-        return response_content, tool_files, None
-
-    def _find_tool_by_name(self, tool_name: str) -> Tool | None:
-        """Find a tool instance by its name."""
-        for tool in self.tools:
-            if tool.entity.identity.name == tool_name:
-                return tool
-        return None
-
-    def _convert_tools_to_prompt_format(self) -> list[PromptMessageTool]:
-        """Convert tools to prompt message format."""
-        prompt_tools: list[PromptMessageTool] = []
-        for tool in self.tools:
-            prompt_tools.append(tool.to_prompt_message_tool())
-        return prompt_tools
-
-    def _update_usage_with_empty(self, llm_usage: dict[str, Any]) -> None:
-        """Initialize usage tracking with empty usage if not set."""
-        if "usage" not in llm_usage or llm_usage["usage"] is None:
-            llm_usage["usage"] = LLMUsage.empty_usage()
--- a/api/core/agent/patterns/function_call.py
+++ b/api/core/agent/patterns/function_call.py
@@ -1,299 +0,0 @@
-"""Function Call strategy implementation."""
-
-import json
-from collections.abc import Generator
-from typing import Any, Union
-
-from core.agent.entities import AgentLog, AgentResult
-from core.file import File
-from core.model_runtime.entities import (
-    AssistantPromptMessage,
-    LLMResult,
-    LLMResultChunk,
-    LLMResultChunkDelta,
-    LLMUsage,
-    PromptMessage,
-    PromptMessageTool,
-    ToolPromptMessage,
-)
-from core.tools.entities.tool_entities import ToolInvokeMeta
-
-from .base import AgentPattern
-
-
-class FunctionCallStrategy(AgentPattern):
-    """Function Call strategy using model's native tool calling capability."""
-
-    def run(
-        self,
-        prompt_messages: list[PromptMessage],
-        model_parameters: dict[str, Any],
-        stop: list[str] = [],
-        stream: bool = True,
-    ) -> Generator[LLMResultChunk | AgentLog, None, AgentResult]:
-        """Execute the function call agent strategy."""
-        # Convert tools to prompt format
-        prompt_tools: list[PromptMessageTool] = self._convert_tools_to_prompt_format()
-
-        # Initialize tracking
-        iteration_step: int = 1
-        max_iterations: int = self.max_iterations + 1
-        function_call_state: bool = True
-        total_usage: dict[str, LLMUsage | None] = {"usage": None}
-        messages: list[PromptMessage] = list(prompt_messages)  # Create mutable copy
-        final_text: str = ""
-        finish_reason: str | None = None
-        output_files: list[File] = []  # Track files produced by tools
-
-        while function_call_state and iteration_step <= max_iterations:
-            function_call_state = False
-            round_log = self._create_log(
-                label=f"ROUND {iteration_step}",
-                log_type=AgentLog.LogType.ROUND,
-                status=AgentLog.LogStatus.START,
-                data={},
-            )
-            yield round_log
-            # On last iteration, remove tools to force final answer
-            current_tools: list[PromptMessageTool] = [] if iteration_step == max_iterations else prompt_tools
-            model_log = self._create_log(
-                label=f"{self.model_instance.model} Thought",
-                log_type=AgentLog.LogType.THOUGHT,
-                status=AgentLog.LogStatus.START,
-                data={},
-                parent_id=round_log.id,
-                extra_metadata={
-                    AgentLog.LogMetadata.PROVIDER: self.model_instance.provider,
-                },
-            )
-            yield model_log
-
-            # Track usage for this round only
-            round_usage: dict[str, LLMUsage | None] = {"usage": None}
-
-            # Invoke model
-            chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = self.model_instance.invoke_llm(
-                prompt_messages=messages,
-                model_parameters=model_parameters,
-                tools=current_tools,
-                stop=stop,
-                stream=stream,
-                user=self.context.user_id,
-                callbacks=[],
-            )
-
-            # Process response
-            tool_calls, response_content, chunk_finish_reason = yield from self._handle_chunks(
-                chunks, round_usage, model_log
-            )
-            messages.append(self._create_assistant_message(response_content, tool_calls))
-
-            # Accumulate to total usage
-            round_usage_value = round_usage.get("usage")
-            if round_usage_value:
-                self._accumulate_usage(total_usage, round_usage_value)
-
-            # Update final text if no tool calls (this is likely the final answer)
-            if not tool_calls:
-                final_text = response_content
-
-            # Update finish reason
-            if chunk_finish_reason:
-                finish_reason = chunk_finish_reason
-
-            # Process tool calls
-            tool_outputs: dict[str, str] = {}
-            if tool_calls:
-                function_call_state = True
-                # Execute tools
-                for tool_call_id, tool_name, tool_args in tool_calls:
-                    tool_response, tool_files, _ = yield from self._handle_tool_call(
-                        tool_name, tool_args, tool_call_id, messages, round_log
-                    )
-                    tool_outputs[tool_name] = tool_response
-                    # Track files produced by tools
-                    output_files.extend(tool_files)
-            yield self._finish_log(
-                round_log,
-                data={
-                    "llm_result": response_content,
-                    "tool_calls": [
-                        {"name": tc[1], "args": tc[2], "output": tool_outputs.get(tc[1], "")} for tc in tool_calls
-                    ]
-                    if tool_calls
-                    else [],
-                    "final_answer": final_text if not function_call_state else None,
-                },
-                usage=round_usage.get("usage"),
-            )
-            iteration_step += 1
-
-        # Return final result
-        from core.agent.entities import AgentResult
-
-        return AgentResult(
-            text=final_text,
-            files=output_files,
-            usage=total_usage.get("usage") or LLMUsage.empty_usage(),
-            finish_reason=finish_reason,
-        )
-
-    def _handle_chunks(
-        self,
-        chunks: Union[Generator[LLMResultChunk, None, None], LLMResult],
-        llm_usage: dict[str, LLMUsage | None],
-        start_log: AgentLog,
-    ) -> Generator[
-        LLMResultChunk | AgentLog,
-        None,
-        tuple[list[tuple[str, str, dict[str, Any]]], str, str | None],
-    ]:
-        """Handle LLM response chunks and extract tool calls and content.
-
-        Returns a tuple of (tool_calls, response_content, finish_reason).
-        """
-        tool_calls: list[tuple[str, str, dict[str, Any]]] = []
-        response_content: str = ""
-        finish_reason: str | None = None
-        if isinstance(chunks, Generator):
-            # Streaming response
-            for chunk in chunks:
-                # Extract tool calls
-                if self._has_tool_calls(chunk):
-                    tool_calls.extend(self._extract_tool_calls(chunk))
-
-                # Extract content
-                if chunk.delta.message and chunk.delta.message.content:
-                    response_content += self._extract_content(chunk.delta.message.content)
-
-                # Track usage
-                if chunk.delta.usage:
-                    self._accumulate_usage(llm_usage, chunk.delta.usage)
-
-                # Capture finish reason
-                if chunk.delta.finish_reason:
-                    finish_reason = chunk.delta.finish_reason
-
-                yield chunk
-        else:
-            # Non-streaming response
-            result: LLMResult = chunks
-
-            if self._has_tool_calls_result(result):
-                tool_calls.extend(self._extract_tool_calls_result(result))
-
-            if result.message and result.message.content:
-                response_content += self._extract_content(result.message.content)
-
-            if result.usage:
-                self._accumulate_usage(llm_usage, result.usage)
-
-            # Convert to streaming format
-            yield LLMResultChunk(
-                model=result.model,
-                prompt_messages=result.prompt_messages,
-                delta=LLMResultChunkDelta(index=0, message=result.message, usage=result.usage),
-            )
-        yield self._finish_log(
-            start_log,
-            data={
-                "result": response_content,
-            },
-            usage=llm_usage.get("usage"),
-        )
-        return tool_calls, response_content, finish_reason
-
-    def _create_assistant_message(
-        self, content: str, tool_calls: list[tuple[str, str, dict[str, Any]]] | None = None
-    ) -> AssistantPromptMessage:
-        """Create assistant message with tool calls."""
-        if tool_calls is None:
-            return AssistantPromptMessage(content=content)
-        return AssistantPromptMessage(
-            content=content or "",
-            tool_calls=[
-                AssistantPromptMessage.ToolCall(
-                    id=tc[0],
-                    type="function",
-                    function=AssistantPromptMessage.ToolCall.ToolCallFunction(name=tc[1], arguments=json.dumps(tc[2])),
-                )
-                for tc in tool_calls
-            ],
-        )
-
-    def _handle_tool_call(
-        self,
-        tool_name: str,
-        tool_args: dict[str, Any],
-        tool_call_id: str,
-        messages: list[PromptMessage],
-        round_log: AgentLog,
-    ) -> Generator[AgentLog, None, tuple[str, list[File], ToolInvokeMeta | None]]:
-        """Handle a single tool call and return response with files and meta."""
-        # Find tool
-        tool_instance = self._find_tool_by_name(tool_name)
-        if not tool_instance:
-            raise ValueError(f"Tool {tool_name} not found")
-
-        # Get tool metadata (provider, icon, etc.)
-        tool_metadata = self._get_tool_metadata(tool_instance)
-
-        # Create tool call log
-        tool_call_log = self._create_log(
-            label=f"CALL {tool_name}",
-            log_type=AgentLog.LogType.TOOL_CALL,
-            status=AgentLog.LogStatus.START,
-            data={
-                "tool_call_id": tool_call_id,
-                "tool_name": tool_name,
-                "tool_args": tool_args,
-            },
-            parent_id=round_log.id,
-            extra_metadata=tool_metadata,
-        )
-        yield tool_call_log
-
-        # Invoke tool using base class method with error handling
-        try:
-            response_content, tool_files, tool_invoke_meta = self._invoke_tool(tool_instance, tool_args, tool_name)
-
-            yield self._finish_log(
-                tool_call_log,
-                data={
-                    **tool_call_log.data,
-                    "output": response_content,
-                    "files": len(tool_files),
-                    "meta": tool_invoke_meta.to_dict() if tool_invoke_meta else None,
-                },
-            )
-            final_content = response_content or "Tool executed successfully"
-            # Add tool response to messages
-            messages.append(
-                ToolPromptMessage(
-                    content=final_content,
-                    tool_call_id=tool_call_id,
-                    name=tool_name,
-                )
-            )
-            return response_content, tool_files, tool_invoke_meta
-        except Exception as e:
-            # Tool invocation failed, yield error log
-            error_message = str(e)
-            tool_call_log.status = AgentLog.LogStatus.ERROR
-            tool_call_log.error = error_message
-            tool_call_log.data = {
-                **tool_call_log.data,
-                "error": error_message,
-            }
-            yield tool_call_log
-
-            # Add error message to conversation
-            error_content = f"Tool execution failed: {error_message}"
-            messages.append(
-                ToolPromptMessage(
-                    content=error_content,
-                    tool_call_id=tool_call_id,
-                    name=tool_name,
-                )
-            )
-            return error_content, [], None
--- a/api/core/agent/patterns/react.py
+++ b/api/core/agent/patterns/react.py
@@ -1,418 +0,0 @@
-"""ReAct strategy implementation."""
-
-from __future__ import annotations
-
-import json
-from collections.abc import Generator
-from typing import TYPE_CHECKING, Any, Union
-
-from core.agent.entities import AgentLog, AgentResult, AgentScratchpadUnit, ExecutionContext
-from core.agent.output_parser.cot_output_parser import CotAgentOutputParser
-from core.file import File
-from core.model_manager import ModelInstance
-from core.model_runtime.entities import (
-    AssistantPromptMessage,
-    LLMResult,
-    LLMResultChunk,
-    LLMResultChunkDelta,
-    PromptMessage,
-    SystemPromptMessage,
-)
-
-from .base import AgentPattern, ToolInvokeHook
-
-if TYPE_CHECKING:
-    from core.tools.__base.tool import Tool
-
-
-class ReActStrategy(AgentPattern):
-    """ReAct strategy using reasoning and acting approach."""
-
-    def __init__(
-        self,
-        model_instance: ModelInstance,
-        tools: list[Tool],
-        context: ExecutionContext,
-        max_iterations: int = 10,
-        workflow_call_depth: int = 0,
-        files: list[File] = [],
-        tool_invoke_hook: ToolInvokeHook | None = None,
-        instruction: str = "",
-    ):
-        """Initialize the ReAct strategy with instruction support."""
-        super().__init__(
-            model_instance=model_instance,
-            tools=tools,
-            context=context,
-            max_iterations=max_iterations,
-            workflow_call_depth=workflow_call_depth,
-            files=files,
-            tool_invoke_hook=tool_invoke_hook,
-        )
-        self.instruction = instruction
-
-    def run(
-        self,
-        prompt_messages: list[PromptMessage],
-        model_parameters: dict[str, Any],
-        stop: list[str] = [],
-        stream: bool = True,
-    ) -> Generator[LLMResultChunk | AgentLog, None, AgentResult]:
-        """Execute the ReAct agent strategy."""
-        # Initialize tracking
-        agent_scratchpad: list[AgentScratchpadUnit] = []
-        iteration_step: int = 1
-        max_iterations: int = self.max_iterations + 1
-        react_state: bool = True
-        total_usage: dict[str, Any] = {"usage": None}
-        output_files: list[File] = []  # Track files produced by tools
-        final_text: str = ""
-        finish_reason: str | None = None
-
-        # Add "Observation" to stop sequences
-        if "Observation" not in stop:
-            stop = stop.copy()
-            stop.append("Observation")
-
-        while react_state and iteration_step <= max_iterations:
-            react_state = False
-            round_log = self._create_log(
-                label=f"ROUND {iteration_step}",
-                log_type=AgentLog.LogType.ROUND,
-                status=AgentLog.LogStatus.START,
-                data={},
-            )
-            yield round_log
-
-            # Build prompt with/without tools based on iteration
-            include_tools = iteration_step < max_iterations
-            current_messages = self._build_prompt_with_react_format(
-                prompt_messages, agent_scratchpad, include_tools, self.instruction
-            )
-
-            model_log = self._create_log(
-                label=f"{self.model_instance.model} Thought",
-                log_type=AgentLog.LogType.THOUGHT,
-                status=AgentLog.LogStatus.START,
-                data={},
-                parent_id=round_log.id,
-                extra_metadata={
-                    AgentLog.LogMetadata.PROVIDER: self.model_instance.provider,
-                },
-            )
-            yield model_log
-
-            # Track usage for this round only
-            round_usage: dict[str, Any] = {"usage": None}
-
-            # Use current messages directly (files are handled by base class if needed)
-            messages_to_use = current_messages
-
-            # Invoke model
-            chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = self.model_instance.invoke_llm(
-                prompt_messages=messages_to_use,
-                model_parameters=model_parameters,
-                stop=stop,
-                stream=stream,
-                user=self.context.user_id or "",
-                callbacks=[],
-            )
-
-            # Process response
-            scratchpad, chunk_finish_reason = yield from self._handle_chunks(
-                chunks, round_usage, model_log, current_messages
-            )
-            agent_scratchpad.append(scratchpad)
-
-            # Accumulate to total usage
-            round_usage_value = round_usage.get("usage")
-            if round_usage_value:
-                self._accumulate_usage(total_usage, round_usage_value)
-
-            # Update finish reason
-            if chunk_finish_reason:
-                finish_reason = chunk_finish_reason
-
-            # Check if we have an action to execute
-            if scratchpad.action and scratchpad.action.action_name.lower() != "final answer":
-                react_state = True
-                # Execute tool
-                observation, tool_files = yield from self._handle_tool_call(
-                    scratchpad.action, current_messages, round_log
-                )
-                scratchpad.observation = observation
-                # Track files produced by tools
-                output_files.extend(tool_files)
-
-                # Add observation to scratchpad for display
-                yield self._create_text_chunk(f"\nObservation: {observation}\n", current_messages)
-            else:
-                # Extract final answer
-                if scratchpad.action and scratchpad.action.action_input:
-                    final_answer = scratchpad.action.action_input
-                    if isinstance(final_answer, dict):
-                        final_answer = json.dumps(final_answer, ensure_ascii=False)
-                    final_text = str(final_answer)
-                elif scratchpad.thought:
-                    # If no action but we have thought, use thought as final answer
-                    final_text = scratchpad.thought
-
-            yield self._finish_log(
-                round_log,
-                data={
-                    "thought": scratchpad.thought,
-                    "action": scratchpad.action_str if scratchpad.action else None,
-                    "observation": scratchpad.observation or None,
-                    "final_answer": final_text if not react_state else None,
-                },
-                usage=round_usage.get("usage"),
-            )
-            iteration_step += 1
-
-        # Return final result
-
-        from core.agent.entities import AgentResult
-
-        return AgentResult(
-            text=final_text, files=output_files, usage=total_usage.get("usage"), finish_reason=finish_reason
-        )
-
-    def _build_prompt_with_react_format(
-        self,
-        original_messages: list[PromptMessage],
-        agent_scratchpad: list[AgentScratchpadUnit],
-        include_tools: bool = True,
-        instruction: str = "",
-    ) -> list[PromptMessage]:
-        """Build prompt messages with ReAct format."""
-        # Copy messages to avoid modifying original
-        messages = list(original_messages)
-
-        # Find and update the system prompt that should already exist
-        system_prompt_found = False
-        for i, msg in enumerate(messages):
-            if isinstance(msg, SystemPromptMessage):
-                system_prompt_found = True
-                # The system prompt from frontend already has the template, just replace placeholders
-
-                # Format tools
-                tools_str = ""
-                tool_names = []
-                if include_tools and self.tools:
-                    # Convert tools to prompt message tools format
-                    prompt_tools = [tool.to_prompt_message_tool() for tool in self.tools]
-                    tool_names = [tool.name for tool in prompt_tools]
-
-                    # Format tools as JSON for comprehensive information
-                    from core.model_runtime.utils.encoders import jsonable_encoder
-
-                    tools_str = json.dumps(jsonable_encoder(prompt_tools), indent=2)
-                    tool_names_str = ", ".join(f'"{name}"' for name in tool_names)
-                else:
-                    tools_str = "No tools available"
-                    tool_names_str = ""
-
-                # Replace placeholders in the existing system prompt
-                updated_content = msg.content
-                assert isinstance(updated_content, str)
-                updated_content = updated_content.replace("{{instruction}}", instruction)
-                updated_content = updated_content.replace("{{tools}}", tools_str)
-                updated_content = updated_content.replace("{{tool_names}}", tool_names_str)
-
-                # Create new SystemPromptMessage with updated content
-                messages[i] = SystemPromptMessage(content=updated_content)
-                break
-
-        # If no system prompt found, that's unexpected but add scratchpad anyway
-        if not system_prompt_found:
-            # This shouldn't happen if frontend is working correctly
-            pass
-
-        # Format agent scratchpad
-        scratchpad_str = ""
-        if agent_scratchpad:
-            scratchpad_parts: list[str] = []
-            for unit in agent_scratchpad:
-                if unit.thought:
-                    scratchpad_parts.append(f"Thought: {unit.thought}")
-                if unit.action_str:
-                    scratchpad_parts.append(f"Action:\n```\n{unit.action_str}\n```")
-                if unit.observation:
-                    scratchpad_parts.append(f"Observation: {unit.observation}")
-            scratchpad_str = "\n".join(scratchpad_parts)
-
-        # If there's a scratchpad, append it to the last message
-        if scratchpad_str:
-            messages.append(AssistantPromptMessage(content=scratchpad_str))
-
-        return messages
-
-    def _handle_chunks(
-        self,
-        chunks: Union[Generator[LLMResultChunk, None, None], LLMResult],
-        llm_usage: dict[str, Any],
-        model_log: AgentLog,
-        current_messages: list[PromptMessage],
-    ) -> Generator[
-        LLMResultChunk | AgentLog,
-        None,
-        tuple[AgentScratchpadUnit, str | None],
-    ]:
-        """Handle LLM response chunks and extract action/thought.
-
-        Returns a tuple of (scratchpad_unit, finish_reason).
-        """
-        usage_dict: dict[str, Any] = {}
-
-        # Convert non-streaming to streaming format if needed
-        if isinstance(chunks, LLMResult):
-            # Create a generator from the LLMResult
-            def result_to_chunks() -> Generator[LLMResultChunk, None, None]:
-                yield LLMResultChunk(
-                    model=chunks.model,
-                    prompt_messages=chunks.prompt_messages,
-                    delta=LLMResultChunkDelta(
-                        index=0,
-                        message=chunks.message,
-                        usage=chunks.usage,
-                        finish_reason=None,  # LLMResult doesn't have finish_reason, only streaming chunks do
-                    ),
-                    system_fingerprint=chunks.system_fingerprint or "",
-                )
-
-            streaming_chunks = result_to_chunks()
-        else:
-            streaming_chunks = chunks
-
-        react_chunks = CotAgentOutputParser.handle_react_stream_output(streaming_chunks, usage_dict)
-
-        # Initialize scratchpad unit
-        scratchpad = AgentScratchpadUnit(
-            agent_response="",
-            thought="",
-            action_str="",
-            observation="",
-            action=None,
-        )
-
-        finish_reason: str | None = None
-
-        # Process chunks
-        for chunk in react_chunks:
-            if isinstance(chunk, AgentScratchpadUnit.Action):
-                # Action detected
-                action_str = json.dumps(chunk.model_dump())
-                scratchpad.agent_response = (scratchpad.agent_response or "") + action_str
-                scratchpad.action_str = action_str
-                scratchpad.action = chunk
-
-                yield self._create_text_chunk(json.dumps(chunk.model_dump()), current_messages)
-            else:
-                # Text chunk
-                chunk_text = str(chunk)
-                scratchpad.agent_response = (scratchpad.agent_response or "") + chunk_text
-                scratchpad.thought = (scratchpad.thought or "") + chunk_text
-
-                yield self._create_text_chunk(chunk_text, current_messages)
-
-        # Update usage
-        if usage_dict.get("usage"):
-            if llm_usage.get("usage"):
-                self._accumulate_usage(llm_usage, usage_dict["usage"])
-            else:
-                llm_usage["usage"] = usage_dict["usage"]
-
-        # Clean up thought
-        scratchpad.thought = (scratchpad.thought or "").strip() or "I am thinking about how to help you"
-
-        # Finish model log
-        yield self._finish_log(
-            model_log,
-            data={
-                "thought": scratchpad.thought,
-                "action": scratchpad.action_str if scratchpad.action else None,
-            },
-            usage=llm_usage.get("usage"),
-        )
-
-        return scratchpad, finish_reason
-
-    def _handle_tool_call(
-        self,
-        action: AgentScratchpadUnit.Action,
-        prompt_messages: list[PromptMessage],
-        round_log: AgentLog,
-    ) -> Generator[AgentLog, None, tuple[str, list[File]]]:
-        """Handle tool call and return observation with files."""
-        tool_name = action.action_name
-        tool_args: dict[str, Any] | str = action.action_input
-
-        # Find tool instance first to get metadata
-        tool_instance = self._find_tool_by_name(tool_name)
-        tool_metadata = self._get_tool_metadata(tool_instance) if tool_instance else {}
-
-        # Start tool log with tool metadata
-        tool_log = self._create_log(
-            label=f"CALL {tool_name}",
-            log_type=AgentLog.LogType.TOOL_CALL,
-            status=AgentLog.LogStatus.START,
-            data={
-                "tool_name": tool_name,
-                "tool_args": tool_args,
-            },
-            parent_id=round_log.id,
-            extra_metadata=tool_metadata,
-        )
-        yield tool_log
-
-        if not tool_instance:
-            # Finish tool log with error
-            yield self._finish_log(
-                tool_log,
-                data={
-                    **tool_log.data,
-                    "error": f"Tool {tool_name} not found",
-                },
-            )
-            return f"Tool {tool_name} not found", []
-
-        # Ensure tool_args is a dict
-        tool_args_dict: dict[str, Any]
-        if isinstance(tool_args, str):
-            try:
-                tool_args_dict = json.loads(tool_args)
-            except json.JSONDecodeError:
-                tool_args_dict = {"input": tool_args}
-        elif not isinstance(tool_args, dict):
-            tool_args_dict = {"input": str(tool_args)}
-        else:
-            tool_args_dict = tool_args
-
-        # Invoke tool using base class method with error handling
-        try:
-            response_content, tool_files, tool_invoke_meta = self._invoke_tool(tool_instance, tool_args_dict, tool_name)
-
-            # Finish tool log
-            yield self._finish_log(
-                tool_log,
-                data={
-                    **tool_log.data,
-                    "output": response_content,
-                    "files": len(tool_files),
-                    "meta": tool_invoke_meta.to_dict() if tool_invoke_meta else None,
-                },
-            )
-
-            return response_content or "Tool executed successfully", tool_files
-        except Exception as e:
-            # Tool invocation failed, yield error log
-            error_message = str(e)
-            tool_log.status = AgentLog.LogStatus.ERROR
-            tool_log.error = error_message
-            tool_log.data = {
-                **tool_log.data,
-                "error": error_message,
-            }
-            yield tool_log
-
-            return f"Tool execution failed: {error_message}", []
--- a/api/core/agent/patterns/strategy_factory.py
+++ b/api/core/agent/patterns/strategy_factory.py
@@ -1,107 +0,0 @@
-"""Strategy factory for creating agent strategies."""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from core.agent.entities import AgentEntity, ExecutionContext
-from core.file.models import File
-from core.model_manager import ModelInstance
-from core.model_runtime.entities.model_entities import ModelFeature
-
-from .base import AgentPattern, ToolInvokeHook
-from .function_call import FunctionCallStrategy
-from .react import ReActStrategy
-
-if TYPE_CHECKING:
-    from core.tools.__base.tool import Tool
-
-
-class StrategyFactory:
-    """Factory for creating agent strategies based on model features."""
-
-    # Tool calling related features
-    TOOL_CALL_FEATURES = {ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL}
-
-    @staticmethod
-    def create_strategy(
-        model_features: list[ModelFeature],
-        model_instance: ModelInstance,
-        context: ExecutionContext,
-        tools: list[Tool],
-        files: list[File],
-        max_iterations: int = 10,
-        workflow_call_depth: int = 0,
-        agent_strategy: AgentEntity.Strategy | None = None,
-        tool_invoke_hook: ToolInvokeHook | None = None,
-        instruction: str = "",
-    ) -> AgentPattern:
-        """
-        Create an appropriate strategy based on model features.
-
-        Args:
-            model_features: List of model features/capabilities
-            model_instance: Model instance to use
-            context: Execution context containing trace/audit information
-            tools: Available tools
-            files: Available files
-            max_iterations: Maximum iterations for the strategy
-            workflow_call_depth: Depth of workflow calls
-            agent_strategy: Optional explicit strategy override
-            tool_invoke_hook: Optional hook for custom tool invocation (e.g., agent_invoke)
-            instruction: Optional instruction for ReAct strategy
-
-        Returns:
-            AgentStrategy instance
-        """
-        # If explicit strategy is provided and it's Function Calling, try to use it if supported
-        if agent_strategy == AgentEntity.Strategy.FUNCTION_CALLING:
-            if set(model_features) & StrategyFactory.TOOL_CALL_FEATURES:
-                return FunctionCallStrategy(
-                    model_instance=model_instance,
-                    context=context,
-                    tools=tools,
-                    files=files,
-                    max_iterations=max_iterations,
-                    workflow_call_depth=workflow_call_depth,
-                    tool_invoke_hook=tool_invoke_hook,
-                )
-            # Fallback to ReAct if FC is requested but not supported
-
-        # If explicit strategy is Chain of Thought (ReAct)
-        if agent_strategy == AgentEntity.Strategy.CHAIN_OF_THOUGHT:
-            return ReActStrategy(
-                model_instance=model_instance,
-                context=context,
-                tools=tools,
-                files=files,
-                max_iterations=max_iterations,
-                workflow_call_depth=workflow_call_depth,
-                tool_invoke_hook=tool_invoke_hook,
-                instruction=instruction,
-            )
-
-        # Default auto-selection logic
-        if set(model_features) & StrategyFactory.TOOL_CALL_FEATURES:
-            # Model supports native function calling
-            return FunctionCallStrategy(
-                model_instance=model_instance,
-                context=context,
-                tools=tools,
-                files=files,
-                max_iterations=max_iterations,
-                workflow_call_depth=workflow_call_depth,
-                tool_invoke_hook=tool_invoke_hook,
-            )
-        else:
-            # Use ReAct strategy for models without function calling
-            return ReActStrategy(
-                model_instance=model_instance,
-                context=context,
-                tools=tools,
-                files=files,
-                max_iterations=max_iterations,
-                workflow_call_depth=workflow_call_depth,
-                tool_invoke_hook=tool_invoke_hook,
-                instruction=instruction,
-            )
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@@ -4,7 +4,6 @@ import re
 import time
 from collections.abc import Callable, Generator, Mapping
 from contextlib import contextmanager
-from dataclasses import dataclass, field
 from threading import Thread
 from typing import Any, Union

@@ -20,7 +19,6 @@ from core.app.entities.app_invoke_entities import (
    InvokeFrom,
 )
 from core.app.entities.queue_entities import (
-    ChunkType,
    MessageQueueMessage,
    QueueAdvancedChatMessageEndEvent,
    QueueAgentLogEvent,
@@ -72,122 +70,13 @@ from core.workflow.runtime import GraphRuntimeState
 from core.workflow.system_variable import SystemVariable
 from extensions.ext_database import db
 from libs.datetime_utils import naive_utc_now
-from models import Account, Conversation, EndUser, LLMGenerationDetail, Message, MessageFile
+from models import Account, Conversation, EndUser, Message, MessageFile
 from models.enums import CreatorUserRole
 from models.workflow import Workflow

 logger = logging.getLogger(__name__)


-@dataclass
-class StreamEventBuffer:
-    """
-    Buffer for recording stream events in order to reconstruct the generation sequence.
-    Records the exact order of text chunks, thoughts, and tool calls as they stream.
-    """
-
-    # Accumulated reasoning content (each thought block is a separate element)
-    reasoning_content: list[str] = field(default_factory=list)
-    # Current reasoning buffer (accumulates until we see a different event type)
-    _current_reasoning: str = ""
-    # Tool calls with their details
-    tool_calls: list[dict] = field(default_factory=list)
-    # Tool call ID to index mapping for updating results
-    _tool_call_id_map: dict[str, int] = field(default_factory=dict)
-    # Sequence of events in stream order
-    sequence: list[dict] = field(default_factory=list)
-    # Current position in answer text
-    _content_position: int = 0
-    # Track last event type to detect transitions
-    _last_event_type: str | None = None
-
-    def _flush_current_reasoning(self) -> None:
-        """Flush accumulated reasoning to the list and add to sequence."""
-        if self._current_reasoning.strip():
-            self.reasoning_content.append(self._current_reasoning.strip())
-            self.sequence.append({"type": "reasoning", "index": len(self.reasoning_content) - 1})
-            self._current_reasoning = ""
-
-    def record_text_chunk(self, text: str) -> None:
-        """Record a text chunk event."""
-        if not text:
-            return
-
-        # Flush any pending reasoning first
-        if self._last_event_type == "thought":
-            self._flush_current_reasoning()
-
-        text_len = len(text)
-        start_pos = self._content_position
-
-        # If last event was also content, extend it; otherwise create new
-        if self.sequence and self.sequence[-1].get("type") == "content":
-            self.sequence[-1]["end"] = start_pos + text_len
-        else:
-            self.sequence.append({"type": "content", "start": start_pos, "end": start_pos + text_len})
-
-        self._content_position += text_len
-        self._last_event_type = "content"
-
-    def record_thought_chunk(self, text: str) -> None:
-        """Record a thought/reasoning chunk event."""
-        if not text:
-            return
-
-        # Accumulate thought content
-        self._current_reasoning += text
-        self._last_event_type = "thought"
-
-    def record_tool_call(self, tool_call_id: str, tool_name: str, tool_arguments: str) -> None:
-        """Record a tool call event."""
-        if not tool_call_id:
-            return
-
-        # Flush any pending reasoning first
-        if self._last_event_type == "thought":
-            self._flush_current_reasoning()
-
-        # Check if this tool call already exists (we might get multiple chunks)
-        if tool_call_id in self._tool_call_id_map:
-            idx = self._tool_call_id_map[tool_call_id]
-            # Update arguments if provided
-            if tool_arguments:
-                self.tool_calls[idx]["arguments"] = tool_arguments
-        else:
-            # New tool call
-            tool_call = {
-                "id": tool_call_id or "",
-                "name": tool_name or "",
-                "arguments": tool_arguments or "",
-                "result": "",
-                "elapsed_time": None,
-            }
-            self.tool_calls.append(tool_call)
-            idx = len(self.tool_calls) - 1
-            self._tool_call_id_map[tool_call_id] = idx
-            self.sequence.append({"type": "tool_call", "index": idx})
-
-        self._last_event_type = "tool_call"
-
-    def record_tool_result(self, tool_call_id: str, result: str, tool_elapsed_time: float | None = None) -> None:
-        """Record a tool result event (update existing tool call)."""
-        if not tool_call_id:
-            return
-        if tool_call_id in self._tool_call_id_map:
-            idx = self._tool_call_id_map[tool_call_id]
-            self.tool_calls[idx]["result"] = result
-            self.tool_calls[idx]["elapsed_time"] = tool_elapsed_time
-
-    def finalize(self) -> None:
-        """Finalize the buffer, flushing any pending data."""
-        if self._last_event_type == "thought":
-            self._flush_current_reasoning()
-
-    def has_data(self) -> bool:
-        """Check if there's any meaningful data recorded."""
-        return bool(self.reasoning_content or self.tool_calls or self.sequence)
-
-
 class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
    """
    AdvancedChatAppGenerateTaskPipeline is a class that generate stream output and state management for Application.
@@ -255,8 +144,6 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        self._workflow_run_id: str = ""
        self._draft_var_saver_factory = draft_var_saver_factory
        self._graph_runtime_state: GraphRuntimeState | None = None
-        # Stream event buffer for recording generation sequence
-        self._stream_buffer = StreamEventBuffer()
        self._seed_graph_runtime_state_from_queue_manager()

    def process(self) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]:
@@ -496,7 +383,7 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        queue_message: Union[WorkflowQueueMessage, MessageQueueMessage] | None = None,
        **kwargs,
    ) -> Generator[StreamResponse, None, None]:
-        """Handle text chunk events and record to stream buffer for sequence reconstruction."""
+        """Handle text chunk events."""
        delta_text = event.text
        if delta_text is None:
            return
@@ -518,52 +405,9 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        if tts_publisher and queue_message:
            tts_publisher.publish(queue_message)

-        tool_call = event.tool_call
-        tool_result = event.tool_result
-        tool_payload = tool_call or tool_result
-        tool_call_id = tool_payload.id if tool_payload and tool_payload.id else ""
-        tool_name = tool_payload.name if tool_payload and tool_payload.name else ""
-        tool_arguments = tool_call.arguments if tool_call and tool_call.arguments else ""
-        tool_files = tool_result.files if tool_result else []
-        tool_elapsed_time = tool_result.elapsed_time if tool_result else None
-        tool_icon = tool_payload.icon if tool_payload else None
-        tool_icon_dark = tool_payload.icon_dark if tool_payload else None
-        # Record stream event based on chunk type
-        chunk_type = event.chunk_type or ChunkType.TEXT
-        match chunk_type:
-            case ChunkType.TEXT:
-                self._stream_buffer.record_text_chunk(delta_text)
-                self._task_state.answer += delta_text
-            case ChunkType.THOUGHT:
-                # Reasoning should not be part of final answer text
-                self._stream_buffer.record_thought_chunk(delta_text)
-            case ChunkType.TOOL_CALL:
-                self._stream_buffer.record_tool_call(
-                    tool_call_id=tool_call_id,
-                    tool_name=tool_name,
-                    tool_arguments=tool_arguments,
-                )
-            case ChunkType.TOOL_RESULT:
-                self._stream_buffer.record_tool_result(
-                    tool_call_id=tool_call_id,
-                    result=delta_text,
-                    tool_elapsed_time=tool_elapsed_time,
-                )
-                self._task_state.answer += delta_text
-            case _:
-                pass
+        self._task_state.answer += delta_text
        yield self._message_cycle_manager.message_to_stream_response(
-            answer=delta_text,
-            message_id=self._message_id,
-            from_variable_selector=event.from_variable_selector,
-            chunk_type=event.chunk_type.value if event.chunk_type else None,
-            tool_call_id=tool_call_id or None,
-            tool_name=tool_name or None,
-            tool_arguments=tool_arguments or None,
-            tool_files=tool_files,
-            tool_elapsed_time=tool_elapsed_time,
-            tool_icon=tool_icon,
-            tool_icon_dark=tool_icon_dark,
+            answer=delta_text, message_id=self._message_id, from_variable_selector=event.from_variable_selector
        )

    def _handle_iteration_start_event(
@@ -931,7 +775,6 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):

        # If there are assistant files, remove markdown image links from answer
        answer_text = self._task_state.answer
-        answer_text = self._strip_think_blocks(answer_text)
        if self._recorded_files:
            # Remove markdown image links since we're storing files separately
            answer_text = re.sub(r"!\[.*?\]\(.*?\)", "", answer_text).strip()
@@ -983,54 +826,6 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        ]
        session.add_all(message_files)

-        # Save generation detail (reasoning/tool calls/sequence) from stream buffer
-        self._save_generation_detail(session=session, message=message)
-
-    @staticmethod
-    def _strip_think_blocks(text: str) -> str:
-        """Remove <think>...</think> blocks (including their content) from text."""
-        if not text or "<think" not in text.lower():
-            return text
-
-        clean_text = re.sub(r"<think[^>]*>.*?</think>", "", text, flags=re.IGNORECASE | re.DOTALL)
-        clean_text = re.sub(r"\n\s*\n", "\n\n", clean_text).strip()
-        return clean_text
-
-    def _save_generation_detail(self, *, session: Session, message: Message) -> None:
-        """
-        Save LLM generation detail for Chatflow using stream event buffer.
-        The buffer records the exact order of events as they streamed,
-        allowing accurate reconstruction of the generation sequence.
-        """
-        # Finalize the stream buffer to flush any pending data
-        self._stream_buffer.finalize()
-
-        # Only save if there's meaningful data
-        if not self._stream_buffer.has_data():
-            return
-
-        reasoning_content = self._stream_buffer.reasoning_content
-        tool_calls = self._stream_buffer.tool_calls
-        sequence = self._stream_buffer.sequence
-
-        # Check if generation detail already exists for this message
-        existing = session.query(LLMGenerationDetail).filter_by(message_id=message.id).first()
-
-        if existing:
-            existing.reasoning_content = json.dumps(reasoning_content) if reasoning_content else None
-            existing.tool_calls = json.dumps(tool_calls) if tool_calls else None
-            existing.sequence = json.dumps(sequence) if sequence else None
-        else:
-            generation_detail = LLMGenerationDetail(
-                tenant_id=self._application_generate_entity.app_config.tenant_id,
-                app_id=self._application_generate_entity.app_config.app_id,
-                message_id=message.id,
-                reasoning_content=json.dumps(reasoning_content) if reasoning_content else None,
-                tool_calls=json.dumps(tool_calls) if tool_calls else None,
-                sequence=json.dumps(sequence) if sequence else None,
-            )
-            session.add(generation_detail)
-
    def _seed_graph_runtime_state_from_queue_manager(self) -> None:
        """Bootstrap the cached runtime state from the queue manager when present."""
        candidate = self._base_task_pipeline.queue_manager.graph_runtime_state
--- a/api/core/app/apps/agent_chat/app_runner.py
+++ b/api/core/app/apps/agent_chat/app_runner.py
@@ -3,8 +3,10 @@ from typing import cast

 from sqlalchemy import select

-from core.agent.agent_app_runner import AgentAppRunner
+from core.agent.cot_chat_agent_runner import CotChatAgentRunner
+from core.agent.cot_completion_agent_runner import CotCompletionAgentRunner
 from core.agent.entities import AgentEntity
+from core.agent.fc_agent_runner import FunctionCallAgentRunner
 from core.app.apps.agent_chat.app_config_manager import AgentChatAppConfig
 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
 from core.app.apps.base_app_runner import AppRunner
@@ -12,7 +14,8 @@ from core.app.entities.app_invoke_entities import AgentChatAppGenerateEntity
 from core.app.entities.queue_entities import QueueAnnotationReplyEvent
 from core.memory.token_buffer_memory import TokenBufferMemory
 from core.model_manager import ModelInstance
-from core.model_runtime.entities.model_entities import ModelFeature
+from core.model_runtime.entities.llm_entities import LLMMode
+from core.model_runtime.entities.model_entities import ModelFeature, ModelPropertyKey
 from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 from core.moderation.base import ModerationError
 from extensions.ext_database import db
@@ -191,7 +194,22 @@ class AgentChatAppRunner(AppRunner):
            raise ValueError("Message not found")
        db.session.close()

-        runner = AgentAppRunner(
+        runner_cls: type[FunctionCallAgentRunner] | type[CotChatAgentRunner] | type[CotCompletionAgentRunner]
+        # start agent runner
+        if agent_entity.strategy == AgentEntity.Strategy.CHAIN_OF_THOUGHT:
+            # check LLM mode
+            if model_schema.model_properties.get(ModelPropertyKey.MODE) == LLMMode.CHAT:
+                runner_cls = CotChatAgentRunner
+            elif model_schema.model_properties.get(ModelPropertyKey.MODE) == LLMMode.COMPLETION:
+                runner_cls = CotCompletionAgentRunner
+            else:
+                raise ValueError(f"Invalid LLM mode: {model_schema.model_properties.get(ModelPropertyKey.MODE)}")
+        elif agent_entity.strategy == AgentEntity.Strategy.FUNCTION_CALLING:
+            runner_cls = FunctionCallAgentRunner
+        else:
+            raise ValueError(f"Invalid agent strategy: {agent_entity.strategy}")
+
+        runner = runner_cls(
            tenant_id=app_config.tenant_id,
            application_generate_entity=application_generate_entity,
            conversation=conversation_result,
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@@ -671,7 +671,7 @@ class WorkflowResponseConverter:
            task_id=task_id,
            data=AgentLogStreamResponse.Data(
                node_execution_id=event.node_execution_id,
-                message_id=event.id,
+                id=event.id,
                parent_id=event.parent_id,
                label=event.label,
                error=event.error,
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@@ -13,7 +13,6 @@ from core.app.apps.common.workflow_response_converter import WorkflowResponseCon
 from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity
 from core.app.entities.queue_entities import (
    AppQueueEvent,
-    ChunkType,
    MessageQueueMessage,
    QueueAgentLogEvent,
    QueueErrorEvent,
@@ -484,33 +483,11 @@ class WorkflowAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        if delta_text is None:
            return

-        tool_call = event.tool_call
-        tool_result = event.tool_result
-        tool_payload = tool_call or tool_result
-        tool_call_id = tool_payload.id if tool_payload and tool_payload.id else None
-        tool_name = tool_payload.name if tool_payload and tool_payload.name else None
-        tool_arguments = tool_call.arguments if tool_call else None
-        tool_elapsed_time = tool_result.elapsed_time if tool_result else None
-        tool_files = tool_result.files if tool_result else []
-        tool_icon = tool_payload.icon if tool_payload else None
-        tool_icon_dark = tool_payload.icon_dark if tool_payload else None
-
        # only publish tts message at text chunk streaming
        if tts_publisher and queue_message:
            tts_publisher.publish(queue_message)

-        yield self._text_chunk_to_stream_response(
-            text=delta_text,
-            from_variable_selector=event.from_variable_selector,
-            chunk_type=event.chunk_type,
-            tool_call_id=tool_call_id,
-            tool_name=tool_name,
-            tool_arguments=tool_arguments,
-            tool_files=tool_files,
-            tool_elapsed_time=tool_elapsed_time,
-            tool_icon=tool_icon,
-            tool_icon_dark=tool_icon_dark,
-        )
+        yield self._text_chunk_to_stream_response(delta_text, from_variable_selector=event.from_variable_selector)

    def _handle_agent_log_event(self, event: QueueAgentLogEvent, **kwargs) -> Generator[StreamResponse, None, None]:
        """Handle agent log events."""
@@ -673,61 +650,16 @@ class WorkflowAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        session.add(workflow_app_log)

    def _text_chunk_to_stream_response(
-        self,
-        text: str,
-        from_variable_selector: list[str] | None = None,
-        chunk_type: ChunkType | None = None,
-        tool_call_id: str | None = None,
-        tool_name: str | None = None,
-        tool_arguments: str | None = None,
-        tool_files: list[str] | None = None,
-        tool_error: str | None = None,
-        tool_elapsed_time: float | None = None,
-        tool_icon: str | dict | None = None,
-        tool_icon_dark: str | dict | None = None,
+        self, text: str, from_variable_selector: list[str] | None = None
    ) -> TextChunkStreamResponse:
        """
        Handle completed event.
        :param text: text
        :return:
        """
-        from core.app.entities.task_entities import ChunkType as ResponseChunkType
-
-        response_chunk_type = ResponseChunkType(chunk_type.value) if chunk_type else ResponseChunkType.TEXT
-
-        data = TextChunkStreamResponse.Data(
-            text=text,
-            from_variable_selector=from_variable_selector,
-            chunk_type=response_chunk_type,
-        )
-
-        if response_chunk_type == ResponseChunkType.TOOL_CALL:
-            data = data.model_copy(
-                update={
-                    "tool_call_id": tool_call_id,
-                    "tool_name": tool_name,
-                    "tool_arguments": tool_arguments,
-                    "tool_icon": tool_icon,
-                    "tool_icon_dark": tool_icon_dark,
-                }
-            )
-        elif response_chunk_type == ResponseChunkType.TOOL_RESULT:
-            data = data.model_copy(
-                update={
-                    "tool_call_id": tool_call_id,
-                    "tool_name": tool_name,
-                    "tool_arguments": tool_arguments,
-                    "tool_files": tool_files,
-                    "tool_error": tool_error,
-                    "tool_elapsed_time": tool_elapsed_time,
-                    "tool_icon": tool_icon,
-                    "tool_icon_dark": tool_icon_dark,
-                }
-            )
-
        response = TextChunkStreamResponse(
            task_id=self._application_generate_entity.task_id,
-            data=data,
+            data=TextChunkStreamResponse.Data(text=text, from_variable_selector=from_variable_selector),
        )

        return response
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@@ -463,20 +463,12 @@ class WorkflowBasedAppRunner:
                )
            )
        elif isinstance(event, NodeRunStreamChunkEvent):
-            from core.app.entities.queue_entities import ChunkType as QueueChunkType
-
-            if event.is_final and not event.chunk:
-                return
-
            self._publish_event(
                QueueTextChunkEvent(
                    text=event.chunk,
                    from_variable_selector=list(event.selector),
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    chunk_type=QueueChunkType(event.chunk_type.value),
-                    tool_call=event.tool_call,
-                    tool_result=event.tool_result,
                )
            )
        elif isinstance(event, NodeRunRetrieverResourceEvent):
--- a/api/core/app/entities/llm_generation_entities.py
+++ b/api/core/app/entities/llm_generation_entities.py
@@ -1,70 +0,0 @@
-"""
-LLM Generation Detail entities.
-
-Defines the structure for storing and transmitting LLM generation details
-including reasoning content, tool calls, and their sequence.
-"""
-
-from typing import Literal
-
-from pydantic import BaseModel, Field
-
-
-class ContentSegment(BaseModel):
-    """Represents a content segment in the generation sequence."""
-
-    type: Literal["content"] = "content"
-    start: int = Field(..., description="Start position in the text")
-    end: int = Field(..., description="End position in the text")
-
-
-class ReasoningSegment(BaseModel):
-    """Represents a reasoning segment in the generation sequence."""
-
-    type: Literal["reasoning"] = "reasoning"
-    index: int = Field(..., description="Index into reasoning_content array")
-
-
-class ToolCallSegment(BaseModel):
-    """Represents a tool call segment in the generation sequence."""
-
-    type: Literal["tool_call"] = "tool_call"
-    index: int = Field(..., description="Index into tool_calls array")
-
-
-SequenceSegment = ContentSegment | ReasoningSegment | ToolCallSegment
-
-
-class ToolCallDetail(BaseModel):
-    """Represents a tool call with its arguments and result."""
-
-    id: str = Field(default="", description="Unique identifier for the tool call")
-    name: str = Field(..., description="Name of the tool")
-    arguments: str = Field(default="", description="JSON string of tool arguments")
-    result: str = Field(default="", description="Result from the tool execution")
-    elapsed_time: float | None = Field(default=None, description="Elapsed time in seconds")
-
-
-class LLMGenerationDetailData(BaseModel):
-    """
-    Domain model for LLM generation detail.
-
-    Contains the structured data for reasoning content, tool calls,
-    and their display sequence.
-    """
-
-    reasoning_content: list[str] = Field(default_factory=list, description="List of reasoning segments")
-    tool_calls: list[ToolCallDetail] = Field(default_factory=list, description="List of tool call details")
-    sequence: list[SequenceSegment] = Field(default_factory=list, description="Display order of segments")
-
-    def is_empty(self) -> bool:
-        """Check if there's any meaningful generation detail."""
-        return not self.reasoning_content and not self.tool_calls
-
-    def to_response_dict(self) -> dict:
-        """Convert to dictionary for API response."""
-        return {
-            "reasoning_content": self.reasoning_content,
-            "tool_calls": [tc.model_dump() for tc in self.tool_calls],
-            "sequence": [seg.model_dump() for seg in self.sequence],
-        }
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@@ -7,7 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field

 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from core.workflow.entities import AgentNodeStrategyInit, ToolCall, ToolResult
+from core.workflow.entities import AgentNodeStrategyInit
 from core.workflow.enums import WorkflowNodeExecutionMetadataKey
 from core.workflow.nodes import NodeType

@@ -177,17 +177,6 @@ class QueueLoopCompletedEvent(AppQueueEvent):
    error: str | None = None


-class ChunkType(StrEnum):
-    """Stream chunk type for LLM-related events."""
-
-    TEXT = "text"  # Normal text streaming
-    TOOL_CALL = "tool_call"  # Tool call arguments streaming
-    TOOL_RESULT = "tool_result"  # Tool execution result
-    THOUGHT = "thought"  # Agent thinking process (ReAct)
-    THOUGHT_START = "thought_start"  # Agent thought start
-    THOUGHT_END = "thought_end"  # Agent thought end
-
-
 class QueueTextChunkEvent(AppQueueEvent):
    """
    QueueTextChunkEvent entity
@@ -202,16 +191,6 @@ class QueueTextChunkEvent(AppQueueEvent):
    in_loop_id: str | None = None
    """loop id if node is in loop"""

-    # Extended fields for Agent/Tool streaming
-    chunk_type: ChunkType = ChunkType.TEXT
-    """type of the chunk"""
-
-    # Tool streaming payloads
-    tool_call: ToolCall | None = None
-    """structured tool call info"""
-    tool_result: ToolResult | None = None
-    """structured tool result info"""
-

 class QueueAgentMessageEvent(AppQueueEvent):
    """
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@@ -113,38 +113,6 @@ class MessageStreamResponse(StreamResponse):
    answer: str
    from_variable_selector: list[str] | None = None

-    # Extended fields for Agent/Tool streaming (imported at runtime to avoid circular import)
-    chunk_type: str | None = None
-    """type of the chunk: text, tool_call, tool_result, thought"""
-
-    # Tool call fields (when chunk_type == "tool_call")
-    tool_call_id: str | None = None
-    """unique identifier for this tool call"""
-    tool_name: str | None = None
-    """name of the tool being called"""
-    tool_arguments: str | None = None
-    """accumulated tool arguments JSON"""
-
-    # Tool result fields (when chunk_type == "tool_result")
-    tool_files: list[str] | None = None
-    """file IDs produced by tool"""
-    tool_error: str | None = None
-    """error message if tool failed"""
-    tool_elapsed_time: float | None = None
-    """elapsed time spent executing the tool"""
-    tool_icon: str | dict | None = None
-    """icon of the tool"""
-    tool_icon_dark: str | dict | None = None
-    """dark theme icon of the tool"""
-
-    def model_dump(self, *args, **kwargs) -> dict[str, object]:
-        kwargs.setdefault("exclude_none", True)
-        return super().model_dump(*args, **kwargs)
-
-    def model_dump_json(self, *args, **kwargs) -> str:
-        kwargs.setdefault("exclude_none", True)
-        return super().model_dump_json(*args, **kwargs)
-

 class MessageAudioStreamResponse(StreamResponse):
    """
@@ -614,17 +582,6 @@ class LoopNodeCompletedStreamResponse(StreamResponse):
    data: Data


-class ChunkType(StrEnum):
-    """Stream chunk type for LLM-related events."""
-
-    TEXT = "text"  # Normal text streaming
-    TOOL_CALL = "tool_call"  # Tool call arguments streaming
-    TOOL_RESULT = "tool_result"  # Tool execution result
-    THOUGHT = "thought"  # Agent thinking process (ReAct)
-    THOUGHT_START = "thought_start"  # Agent thought start
-    THOUGHT_END = "thought_end"  # Agent thought end
-
-
 class TextChunkStreamResponse(StreamResponse):
    """
    TextChunkStreamResponse entity
@@ -638,36 +595,6 @@ class TextChunkStreamResponse(StreamResponse):
        text: str
        from_variable_selector: list[str] | None = None

-        # Extended fields for Agent/Tool streaming
-        chunk_type: ChunkType = ChunkType.TEXT
-        """type of the chunk"""
-
-        # Tool call fields (when chunk_type == TOOL_CALL)
-        tool_call_id: str | None = None
-        """unique identifier for this tool call"""
-        tool_name: str | None = None
-        """name of the tool being called"""
-        tool_arguments: str | None = None
-        """accumulated tool arguments JSON"""
-
-        # Tool result fields (when chunk_type == TOOL_RESULT)
-        tool_files: list[str] | None = None
-        """file IDs produced by tool"""
-        tool_error: str | None = None
-        """error message if tool failed"""
-
-        # Tool elapsed time fields (when chunk_type == TOOL_RESULT)
-        tool_elapsed_time: float | None = None
-        """elapsed time spent executing the tool"""
-
-        def model_dump(self, *args, **kwargs) -> dict[str, object]:
-            kwargs.setdefault("exclude_none", True)
-            return super().model_dump(*args, **kwargs)
-
-        def model_dump_json(self, *args, **kwargs) -> str:
-            kwargs.setdefault("exclude_none", True)
-            return super().model_dump_json(*args, **kwargs)
-
    event: StreamEvent = StreamEvent.TEXT_CHUNK
    data: Data

@@ -816,7 +743,7 @@ class AgentLogStreamResponse(StreamResponse):
        """

        node_execution_id: str
-        message_id: str
+        id: str
        label: str
        parent_id: str | None = None
        error: str | None = None
--- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
@@ -1,5 +1,4 @@
 import logging
-import re
 import time
 from collections.abc import Generator
 from threading import Thread
@@ -59,7 +58,7 @@ from core.prompt.utils.prompt_template_parser import PromptTemplateParser
 from events.message_event import message_was_created
 from extensions.ext_database import db
 from libs.datetime_utils import naive_utc_now
-from models.model import AppMode, Conversation, LLMGenerationDetail, Message, MessageAgentThought
+from models.model import AppMode, Conversation, Message, MessageAgentThought

 logger = logging.getLogger(__name__)

@@ -69,8 +68,6 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
    EasyUIBasedGenerateTaskPipeline is a class that generate stream output and state management for Application.
    """

-    _THINK_PATTERN = re.compile(r"<think[^>]*>(.*?)</think>", re.IGNORECASE | re.DOTALL)
-
    _task_state: EasyUITaskState
    _application_generate_entity: Union[ChatAppGenerateEntity, CompletionAppGenerateEntity, AgentChatAppGenerateEntity]

@@ -412,136 +409,11 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
                )
            )

-        # Save LLM generation detail if there's reasoning_content
-        self._save_generation_detail(session=session, message=message, llm_result=llm_result)
-
        message_was_created.send(
            message,
            application_generate_entity=self._application_generate_entity,
        )

-    def _save_generation_detail(self, *, session: Session, message: Message, llm_result: LLMResult) -> None:
-        """
-        Save LLM generation detail for Completion/Chat/Agent-Chat applications.
-        For Agent-Chat, also merges MessageAgentThought records.
-        """
-        import json
-
-        reasoning_list: list[str] = []
-        tool_calls_list: list[dict] = []
-        sequence: list[dict] = []
-        answer = message.answer or ""
-
-        # Check if this is Agent-Chat mode by looking for agent thoughts
-        agent_thoughts = (
-            session.query(MessageAgentThought)
-            .filter_by(message_id=message.id)
-            .order_by(MessageAgentThought.position.asc())
-            .all()
-        )
-
-        if agent_thoughts:
-            # Agent-Chat mode: merge MessageAgentThought records
-            content_pos = 0
-            cleaned_answer_parts: list[str] = []
-            for thought in agent_thoughts:
-                # Add thought/reasoning
-                if thought.thought:
-                    reasoning_text = thought.thought
-                    if "<think" in reasoning_text.lower():
-                        clean_text, extracted_reasoning = self._split_reasoning_from_answer(reasoning_text)
-                        if extracted_reasoning:
-                            reasoning_text = extracted_reasoning
-                            thought.thought = clean_text or extracted_reasoning
-                    reasoning_list.append(reasoning_text)
-                    sequence.append({"type": "reasoning", "index": len(reasoning_list) - 1})
-
-                # Add tool calls
-                if thought.tool:
-                    tool_calls_list.append(
-                        {
-                            "name": thought.tool,
-                            "arguments": thought.tool_input or "",
-                            "result": thought.observation or "",
-                        }
-                    )
-                    sequence.append({"type": "tool_call", "index": len(tool_calls_list) - 1})
-
-                # Add answer content if present
-                if thought.answer:
-                    content_text = thought.answer
-                    if "<think" in content_text.lower():
-                        clean_answer, extracted_reasoning = self._split_reasoning_from_answer(content_text)
-                        if extracted_reasoning:
-                            reasoning_list.append(extracted_reasoning)
-                            sequence.append({"type": "reasoning", "index": len(reasoning_list) - 1})
-                        content_text = clean_answer
-                        thought.answer = clean_answer or content_text
-
-                    if content_text:
-                        start = content_pos
-                        end = content_pos + len(content_text)
-                        sequence.append({"type": "content", "start": start, "end": end})
-                        content_pos = end
-                        cleaned_answer_parts.append(content_text)
-
-            if cleaned_answer_parts:
-                merged_answer = "".join(cleaned_answer_parts)
-                message.answer = merged_answer
-                llm_result.message.content = merged_answer
-        else:
-            # Completion/Chat mode: use reasoning_content from llm_result
-            reasoning_content = llm_result.reasoning_content
-            if not reasoning_content and answer:
-                # Extract reasoning from <think> blocks and clean the final answer
-                clean_answer, reasoning_content = self._split_reasoning_from_answer(answer)
-                if reasoning_content:
-                    answer = clean_answer
-                    llm_result.message.content = clean_answer
-                    llm_result.reasoning_content = reasoning_content
-                    message.answer = clean_answer
-            if reasoning_content:
-                reasoning_list = [reasoning_content]
-                # Content comes first, then reasoning
-                if answer:
-                    sequence.append({"type": "content", "start": 0, "end": len(answer)})
-                sequence.append({"type": "reasoning", "index": 0})
-
-        # Only save if there's meaningful generation detail
-        if not reasoning_list and not tool_calls_list:
-            return
-
-        # Check if generation detail already exists
-        existing = session.query(LLMGenerationDetail).filter_by(message_id=message.id).first()
-
-        if existing:
-            existing.reasoning_content = json.dumps(reasoning_list) if reasoning_list else None
-            existing.tool_calls = json.dumps(tool_calls_list) if tool_calls_list else None
-            existing.sequence = json.dumps(sequence) if sequence else None
-        else:
-            generation_detail = LLMGenerationDetail(
-                tenant_id=self._application_generate_entity.app_config.tenant_id,
-                app_id=self._application_generate_entity.app_config.app_id,
-                message_id=message.id,
-                reasoning_content=json.dumps(reasoning_list) if reasoning_list else None,
-                tool_calls=json.dumps(tool_calls_list) if tool_calls_list else None,
-                sequence=json.dumps(sequence) if sequence else None,
-            )
-            session.add(generation_detail)
-
-    @classmethod
-    def _split_reasoning_from_answer(cls, text: str) -> tuple[str, str]:
-        """
-        Extract reasoning segments from <think> blocks and return (clean_text, reasoning).
-        """
-        matches = cls._THINK_PATTERN.findall(text)
-        reasoning_content = "\n".join(match.strip() for match in matches) if matches else ""
-
-        clean_text = cls._THINK_PATTERN.sub("", text)
-        clean_text = re.sub(r"\n\s*\n", "\n\n", clean_text).strip()
-
-        return clean_text, reasoning_content or ""
-
    def _handle_stop(self, event: QueueStopEvent):
        """
        Handle stop.
--- a/api/core/app/task_pipeline/message_cycle_manager.py
+++ b/api/core/app/task_pipeline/message_cycle_manager.py
@@ -232,31 +232,15 @@ class MessageCycleManager:
        answer: str,
        message_id: str,
        from_variable_selector: list[str] | None = None,
-        chunk_type: str | None = None,
-        tool_call_id: str | None = None,
-        tool_name: str | None = None,
-        tool_arguments: str | None = None,
-        tool_files: list[str] | None = None,
-        tool_error: str | None = None,
-        tool_elapsed_time: float | None = None,
-        tool_icon: str | dict | None = None,
-        tool_icon_dark: str | dict | None = None,
        event_type: StreamEvent | None = None,
    ) -> MessageStreamResponse:
        """
        Message to stream response.
        :param answer: answer
        :param message_id: message id
-        :param from_variable_selector: from variable selector
-        :param chunk_type: type of the chunk (text, function_call, tool_result, thought)
-        :param tool_call_id: unique identifier for this tool call
-        :param tool_name: name of the tool being called
-        :param tool_arguments: accumulated tool arguments JSON
-        :param tool_files: file IDs produced by tool
-        :param tool_error: error message if tool failed
        :return:
        """
-        response = MessageStreamResponse(
+        return MessageStreamResponse(
            task_id=self._application_generate_entity.task_id,
            id=message_id,
            answer=answer,
@@ -264,35 +248,6 @@ class MessageCycleManager:
            event=event_type or StreamEvent.MESSAGE,
        )

-        if chunk_type:
-            response = response.model_copy(update={"chunk_type": chunk_type})
-
-        if chunk_type == "tool_call":
-            response = response.model_copy(
-                update={
-                    "tool_call_id": tool_call_id,
-                    "tool_name": tool_name,
-                    "tool_arguments": tool_arguments,
-                    "tool_icon": tool_icon,
-                    "tool_icon_dark": tool_icon_dark,
-                }
-            )
-        elif chunk_type == "tool_result":
-            response = response.model_copy(
-                update={
-                    "tool_call_id": tool_call_id,
-                    "tool_name": tool_name,
-                    "tool_arguments": tool_arguments,
-                    "tool_files": tool_files,
-                    "tool_error": tool_error,
-                    "tool_elapsed_time": tool_elapsed_time,
-                    "tool_icon": tool_icon,
-                    "tool_icon_dark": tool_icon_dark,
-                }
-            )
-
-        return response
-
    def message_replace_to_stream_response(self, answer: str, reason: str = "") -> MessageReplaceStreamResponse:
        """
        Message replace to stream response.
--- a/api/core/callback_handler/index_tool_callback_handler.py
+++ b/api/core/callback_handler/index_tool_callback_handler.py
@@ -5,6 +5,7 @@ from sqlalchemy import select

 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
 from core.app.entities.app_invoke_entities import InvokeFrom
+from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
 from core.rag.index_processor.constant.index_type import IndexStructureType
 from core.rag.models.document import Document
@@ -89,8 +90,6 @@ class DatasetIndexToolCallbackHandler:
    # TODO(-LAN-): Improve type check
    def return_retriever_resource_info(self, resource: Sequence[RetrievalSourceMetadata]):
        """Handle return_retriever_resource_info."""
-        from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
-
        self._queue_manager.publish(
            QueueRetrieverResourcesEvent(retriever_resources=resource), PublishFrom.APPLICATION_MANAGER
        )
--- a/api/core/entities/knowledge_entities.py
+++ b/api/core/entities/knowledge_entities.py
@@ -3,7 +3,6 @@ from pydantic import BaseModel, Field, field_validator

 class PreviewDetail(BaseModel):
    content: str
-    summary: str | None = None
    child_chunks: list[str] | None = None


--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -311,18 +311,14 @@ class IndexingRunner:
        qa_preview_texts: list[QAPreviewDetail] = []

        total_segments = 0
-        # doc_form represents the segmentation method (general, parent-child, QA)
        index_type = doc_form
        index_processor = IndexProcessorFactory(index_type).init_index_processor()
-        # one extract_setting is one source document
        for extract_setting in extract_settings:
            # extract
            processing_rule = DatasetProcessRule(
                mode=tmp_processing_rule["mode"], rules=json.dumps(tmp_processing_rule["rules"])
            )
-            # Extract document content
            text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
-            # Cleaning and segmentation
            documents = index_processor.transform(
                text_docs,
                current_user=None,
@@ -365,12 +361,6 @@ class IndexingRunner:

        if doc_form and doc_form == "qa_model":
            return IndexingEstimate(total_segments=total_segments * 20, qa_preview=qa_preview_texts, preview=[])
-
-        # Generate summary preview
-        summary_index_setting = tmp_processing_rule["summary_index_setting"] if "summary_index_setting" in tmp_processing_rule else None
-        if summary_index_setting and summary_index_setting.get('enable') and preview_texts:
-            preview_texts = index_processor.generate_summary_preview(tenant_id, preview_texts, summary_index_setting)
-
        return IndexingEstimate(total_segments=total_segments, preview=preview_texts)

    def _extract(
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -434,6 +434,3 @@ INSTRUCTION_GENERATE_TEMPLATE_PROMPT = """The output of this prompt is not as ex
 You should edit the prompt according to the IDEAL OUTPUT."""

 INSTRUCTION_GENERATE_TEMPLATE_CODE = """Please fix the errors in the {{#error_message#}}."""
-
-DEFAULT_GENERATOR_SUMMARY_PROMPT = """
-You are a helpful assistant that summarizes long pieces of text into concise summaries. Given the following text, generate a brief summary that captures the main points and key information. The summary should be clear, concise, and written in complete sentences. """
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -392,69 +392,6 @@ class RetrievalService:
            records = []
            include_segment_ids = set()
            segment_child_map = {}
-            segment_file_map = {}
-            segment_summary_map = {}  # Map segment_id to summary content
-            summary_segment_ids = set()  # Track segments retrieved via summary
-            with Session(bind=db.engine, expire_on_commit=False) as session:
-                # Process documents
-                for document in documents:
-                    segment_id = None
-                    attachment_info = None
-                    child_chunk = None
-                    document_id = document.metadata.get("document_id")
-                    if document_id not in dataset_documents:
-                        continue
-
-                    dataset_document = dataset_documents[document_id]
-                    if not dataset_document:
-                        continue
-
-                    if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
-                        # Handle parent-child documents
-                        if document.metadata.get("doc_type") == DocType.IMAGE:
-                            attachment_info_dict = cls.get_segment_attachment_info(
-                                dataset_document.dataset_id,
-                                dataset_document.tenant_id,
-                                document.metadata.get("doc_id") or "",
-                                session,
-                            )
-                            if attachment_info_dict:
-                                attachment_info = attachment_info_dict["attachment_info"]
-                                segment_id = attachment_info_dict["segment_id"]
-                        else:
-                            # Check if this is a summary document
-                            is_summary = document.metadata.get("is_summary", False)
-                            if is_summary:
-                                # For summary documents, find the original chunk via original_chunk_id
-                                original_chunk_id = document.metadata.get("original_chunk_id")
-                                if not original_chunk_id:
-                                    continue
-                                segment_id = original_chunk_id
-                                # Track that this segment was retrieved via summary
-                                summary_segment_ids.add(segment_id)
-                            else:
-                                # For normal documents, find by child chunk index_node_id
-                                child_index_node_id = document.metadata.get("doc_id")
-                                child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id == child_index_node_id)
-                                child_chunk = session.scalar(child_chunk_stmt)
-
-                                if not child_chunk:
-                                    continue
-                                segment_id = child_chunk.segment_id
-
-                        if not segment_id:
-                            continue
-
-                        segment = (
-                            session.query(DocumentSegment)
-                            .where(
-                                DocumentSegment.dataset_id == dataset_document.dataset_id,
-                                DocumentSegment.enabled == True,
-                                DocumentSegment.status == "completed",
-                                DocumentSegment.id == segment_id,
-                            )
-                            .first()
-                        )

            valid_dataset_documents = {}
            image_doc_ids: list[Any] = []
@@ -570,47 +507,7 @@ class RetrievalService:
                                max_score = max(
                                    max_score, file_document.metadata.get("score", 0.0) if file_document else 0.0
                                )
-                                segment = session.scalar(document_segment_stmt)
-                                if segment:
-                                    segment_file_map[segment.id] = [attachment_info]
-                        else:
-                            # Check if this is a summary document
-                            is_summary = document.metadata.get("is_summary", False)
-                            if is_summary:
-                                # For summary documents, find the original chunk via original_chunk_id
-                                original_chunk_id = document.metadata.get("original_chunk_id")
-                                if not original_chunk_id:
-                                    continue
-                                # Track that this segment was retrieved via summary
-                                summary_segment_ids.add(original_chunk_id)
-                                document_segment_stmt = select(DocumentSegment).where(
-                                    DocumentSegment.dataset_id == dataset_document.dataset_id,
-                                    DocumentSegment.enabled == True,
-                                    DocumentSegment.status == "completed",
-                                    DocumentSegment.id == original_chunk_id,
-                                )
-                                segment = session.scalar(document_segment_stmt)
-                            else:
-                                # For normal documents, find by index_node_id
-                                index_node_id = document.metadata.get("doc_id")
-                                if not index_node_id:
-                                    continue
-                                document_segment_stmt = select(DocumentSegment).where(
-                                    DocumentSegment.dataset_id == dataset_document.dataset_id,
-                                    DocumentSegment.enabled == True,
-                                    DocumentSegment.status == "completed",
-                                    DocumentSegment.index_node_id == index_node_id,
-                                )
-                                segment = session.scalar(document_segment_stmt)

-                        if not segment:
-                            continue
-                        if segment.id not in include_segment_ids:
-                            include_segment_ids.add(segment.id)
-                            record = {
-                                "segment": segment,
-                                "score": document.metadata.get("score"),  # type: ignore
-                            }
                            map_detail = {
                                "max_score": max_score,
                                "child_chunks": child_chunk_details,
@@ -645,23 +542,6 @@ class RetrievalService:
                if record["segment"].id in attachment_map:
                    record["files"] = attachment_map[record["segment"].id]  # type: ignore[assignment]

-            # Batch query summaries for segments retrieved via summary (only enabled summaries)
-            if summary_segment_ids:
-                from models.dataset import DocumentSegmentSummary
-
-                summaries = (
-                    session.query(DocumentSegmentSummary)
-                    .filter(
-                        DocumentSegmentSummary.chunk_id.in_(summary_segment_ids),
-                        DocumentSegmentSummary.status == "completed",
-                        DocumentSegmentSummary.enabled == True,  # Only retrieve enabled summaries
-                    )
-                    .all()
-                )
-                for summary in summaries:
-                    if summary.summary_content:
-                        segment_summary_map[summary.chunk_id] = summary.summary_content
-
            result: list[RetrievalSegments] = []
            for record in records:
                # Extract segment
@@ -696,16 +576,9 @@ class RetrievalService:
                    else None
                )

-                # Extract summary if this segment was retrieved via summary
-                summary_content = segment_summary_map.get(segment.id)
-
                # Create RetrievalSegments object
                retrieval_segment = RetrievalSegments(
-                    segment=segment,
-                    child_chunks=child_chunks_list,
-                    score=score,
-                    files=files,
-                    summary=summary_content
+                    segment=segment, child_chunks=child_chunks_list, score=score, files=files
                )
                result.append(retrieval_segment)

--- a/api/core/rag/embedding/retrieval.py
+++ b/api/core/rag/embedding/retrieval.py
@@ -20,4 +20,3 @@ class RetrievalSegments(BaseModel):
    child_chunks: list[RetrievalChildChunk] | None = None
    score: float | None = None
    files: list[dict[str, str | int]] | None = None
-    summary: str | None = None  # Summary content if retrieved via summary index
--- a/api/core/rag/index_processor/index_processor_base.py
+++ b/api/core/rag/index_processor/index_processor_base.py
@@ -13,7 +13,6 @@ from urllib.parse import unquote, urlparse
 import httpx

 from configs import dify_config
-from core.entities.knowledge_entities import PreviewDetail
 from core.helper import ssrf_proxy
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.index_processor.constant.doc_type import DocType
@@ -46,15 +45,6 @@ class BaseIndexProcessor(ABC):
    def transform(self, documents: list[Document], current_user: Account | None = None, **kwargs) -> list[Document]:
        raise NotImplementedError

-    @abstractmethod
-    def generate_summary_preview(self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict) -> list[PreviewDetail]:
-        """
-        For each segment in preview_texts, generate a summary using LLM and attach it to the segment.
-        The summary can be stored in a new attribute, e.g., summary.
-        This method should be implemented by subclasses.
-        """
-        raise NotImplementedError
-
    @abstractmethod
    def load(
        self,
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@@ -1,13 +1,9 @@
 """Paragraph index processor."""

-import logging
 import uuid
 from collections.abc import Mapping
 from typing import Any

-logger = logging.getLogger(__name__)
-
-from core.entities.knowledge_entities import PreviewDetail
 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.retrieval_service import RetrievalService
@@ -21,19 +17,12 @@ from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.models.document import AttachmentDocument, Document, MultimodalGeneralStructureChunk
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from core.tools.utils.text_processing_utils import remove_leading_symbols
-from extensions.ext_database import db
 from libs import helper
 from models.account import Account
-from models.dataset import Dataset, DatasetProcessRule, DocumentSegment
+from models.dataset import Dataset, DatasetProcessRule
 from models.dataset import Document as DatasetDocument
 from services.account_service import AccountService
 from services.entities.knowledge_entities.knowledge_entities import Rule
-from services.summary_index_service import SummaryIndexService
-from core.llm_generator.prompts import DEFAULT_GENERATOR_SUMMARY_PROMPT
-from core.model_runtime.entities.message_entities import UserPromptMessage
-from core.model_runtime.entities.model_entities import ModelType
-from core.provider_manager import ProviderManager
-from core.model_manager import ModelInstance


 class ParagraphIndexProcessor(BaseIndexProcessor):
@@ -119,29 +108,6 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
                keyword.add_texts(documents)

    def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
-        # Note: Summary indexes are now disabled (not deleted) when segments are disabled.
-        # This method is called for actual deletion scenarios (e.g., when segment is deleted).
-        # For disable operations, disable_summaries_for_segments is called directly in the task.
-        # Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
-        delete_summaries = kwargs.get("delete_summaries", False)
-        if delete_summaries:
-            if node_ids:
-                # Find segments by index_node_id
-                segments = (
-                    db.session.query(DocumentSegment)
-                    .filter(
-                        DocumentSegment.dataset_id == dataset.id,
-                        DocumentSegment.index_node_id.in_(node_ids),
-                    )
-                    .all()
-                )
-                segment_ids = [segment.id for segment in segments]
-                if segment_ids:
-                    SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
-            else:
-                # Delete all summaries for the dataset
-                SummaryIndexService.delete_summaries_for_segments(dataset, None)
-
        if dataset.indexing_technique == "high_quality":
            vector = Vector(dataset)
            if node_ids:
@@ -261,70 +227,3 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
            }
        else:
            raise ValueError("Chunks is not a list")
-
-    def generate_summary_preview(self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict) -> list[PreviewDetail]:
-        """
-        For each segment, concurrently call generate_summary to generate a summary
-        and write it to the summary attribute of PreviewDetail.
-        """
-        import concurrent.futures
-        from flask import current_app
-
-        # Capture Flask app context for worker threads
-        flask_app = None
-        try:
-            flask_app = current_app._get_current_object()  # type: ignore
-        except RuntimeError:
-            logger.warning("No Flask application context available, summary generation may fail")
-
-        def process(preview: PreviewDetail) -> None:
-            """Generate summary for a single preview item."""
-            try:
-                if flask_app:
-                    # Ensure Flask app context in worker thread
-                    with flask_app.app_context():
-                        summary = self.generate_summary(tenant_id, preview.content, summary_index_setting)
-                        preview.summary = summary
-                else:
-                    # Fallback: try without app context (may fail)
-                    summary = self.generate_summary(tenant_id, preview.content, summary_index_setting)
-                    preview.summary = summary
-            except Exception as e:
-                logger.error(f"Failed to generate summary for preview: {str(e)}")
-                # Don't fail the entire preview if summary generation fails
-                preview.summary = None
-
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            list(executor.map(process, preview_texts))
-        return preview_texts
-
-    @staticmethod
-    def generate_summary(tenant_id: str, text: str, summary_index_setting: dict = None) -> str:
-        """
-        Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt.
-        """
-        if not summary_index_setting or not summary_index_setting.get("enable"):
-            raise ValueError("summary_index_setting is required and must be enabled to generate summary.")
-
-        model_name = summary_index_setting.get("model_name")
-        model_provider_name = summary_index_setting.get("model_provider_name")
-        summary_prompt = summary_index_setting.get("summary_prompt")
-
-        # Import default summary prompt
-        if not summary_prompt:
-            summary_prompt = DEFAULT_GENERATOR_SUMMARY_PROMPT
-
-        prompt = f"{summary_prompt}\n{text}"
-
-        provider_manager = ProviderManager()
-        provider_model_bundle = provider_manager.get_provider_model_bundle(tenant_id, model_provider_name, ModelType.LLM)
-        model_instance = ModelInstance(provider_model_bundle, model_name)
-        prompt_messages = [UserPromptMessage(content=prompt)]
-
-        result = model_instance.invoke_llm(
-            prompt_messages=prompt_messages,
-            model_parameters={},
-            stream=False
-        )
-
-        return getattr(result.message, "content", "")
--- a/api/core/rag/index_processor/processor/parent_child_index_processor.py
+++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py
@@ -25,7 +25,6 @@ from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegm
 from models.dataset import Document as DatasetDocument
 from services.account_service import AccountService
 from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
-from services.summary_index_service import SummaryIndexService


 class ParentChildIndexProcessor(BaseIndexProcessor):
@@ -136,29 +135,6 @@ class ParentChildIndexProcessor(BaseIndexProcessor):

    def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
        # node_ids is segment's node_ids
-        # Note: Summary indexes are now disabled (not deleted) when segments are disabled.
-        # This method is called for actual deletion scenarios (e.g., when segment is deleted).
-        # For disable operations, disable_summaries_for_segments is called directly in the task.
-        # Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
-        delete_summaries = kwargs.get("delete_summaries", False)
-        if delete_summaries:
-            if node_ids:
-                # Find segments by index_node_id
-                segments = (
-                    db.session.query(DocumentSegment)
-                    .filter(
-                        DocumentSegment.dataset_id == dataset.id,
-                        DocumentSegment.index_node_id.in_(node_ids),
-                    )
-                    .all()
-                )
-                segment_ids = [segment.id for segment in segments]
-                if segment_ids:
-                    SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
-            else:
-                # Delete all summaries for the dataset
-                SummaryIndexService.delete_summaries_for_segments(dataset, None)
-
        if dataset.indexing_technique == "high_quality":
            delete_child_chunks = kwargs.get("delete_child_chunks") or False
            precomputed_child_node_ids = kwargs.get("precomputed_child_node_ids")
--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@@ -25,10 +25,9 @@ from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from core.tools.utils.text_processing_utils import remove_leading_symbols
 from libs import helper
 from models.account import Account
-from models.dataset import Dataset, DocumentSegment
+from models.dataset import Dataset
 from models.dataset import Document as DatasetDocument
 from services.entities.knowledge_entities.knowledge_entities import Rule
-from services.summary_index_service import SummaryIndexService

 logger = logging.getLogger(__name__)

@@ -145,30 +144,6 @@ class QAIndexProcessor(BaseIndexProcessor):
                vector.create_multimodal(multimodal_documents)

    def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
-        # Note: Summary indexes are now disabled (not deleted) when segments are disabled.
-        # This method is called for actual deletion scenarios (e.g., when segment is deleted).
-        # For disable operations, disable_summaries_for_segments is called directly in the task.
-        # Note: qa_model doesn't generate summaries, but we clean them for completeness
-        # Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
-        delete_summaries = kwargs.get("delete_summaries", False)
-        if delete_summaries:
-            if node_ids:
-                # Find segments by index_node_id
-                segments = (
-                    db.session.query(DocumentSegment)
-                    .filter(
-                        DocumentSegment.dataset_id == dataset.id,
-                        DocumentSegment.index_node_id.in_(node_ids),
-                    )
-                    .all()
-                )
-                segment_ids = [segment.id for segment in segments]
-                if segment_ids:
-                    SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
-            else:
-                # Delete all summaries for the dataset
-                SummaryIndexService.delete_summaries_for_segments(dataset, None)
-
        vector = Vector(dataset)
        if node_ids:
            vector.delete_by_ids(node_ids)
--- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py
+++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py
@@ -29,7 +29,6 @@ from models import (
    Account,
    CreatorUserRole,
    EndUser,
-    LLMGenerationDetail,
    WorkflowNodeExecutionModel,
    WorkflowNodeExecutionTriggeredFrom,
 )
@@ -458,113 +457,6 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
            session.merge(db_model)
            session.flush()

-            # Save LLMGenerationDetail for LLM nodes with successful execution
-            if (
-                domain_model.node_type == NodeType.LLM
-                and domain_model.status == WorkflowNodeExecutionStatus.SUCCEEDED
-                and domain_model.outputs is not None
-            ):
-                self._save_llm_generation_detail(session, domain_model)
-
-    def _save_llm_generation_detail(self, session, execution: WorkflowNodeExecution) -> None:
-        """
-        Save LLM generation detail for LLM nodes.
-        Extracts reasoning_content, tool_calls, and sequence from outputs and metadata.
-        """
-        outputs = execution.outputs or {}
-        metadata = execution.metadata or {}
-
-        reasoning_list = self._extract_reasoning(outputs)
-        tool_calls_list = self._extract_tool_calls(metadata.get(WorkflowNodeExecutionMetadataKey.AGENT_LOG))
-
-        if not reasoning_list and not tool_calls_list:
-            return
-
-        sequence = self._build_generation_sequence(outputs.get("text", ""), reasoning_list, tool_calls_list)
-        self._upsert_generation_detail(session, execution, reasoning_list, tool_calls_list, sequence)
-
-    def _extract_reasoning(self, outputs: Mapping[str, Any]) -> list[str]:
-        """Extract reasoning_content as a clean list of non-empty strings."""
-        reasoning_content = outputs.get("reasoning_content")
-        if isinstance(reasoning_content, str):
-            trimmed = reasoning_content.strip()
-            return [trimmed] if trimmed else []
-        if isinstance(reasoning_content, list):
-            return [item.strip() for item in reasoning_content if isinstance(item, str) and item.strip()]
-        return []
-
-    def _extract_tool_calls(self, agent_log: Any) -> list[dict[str, str]]:
-        """Extract tool call records from agent logs."""
-        if not agent_log or not isinstance(agent_log, list):
-            return []
-
-        tool_calls: list[dict[str, str]] = []
-        for log in agent_log:
-            log_data = log.data if hasattr(log, "data") else (log.get("data", {}) if isinstance(log, dict) else {})
-            tool_name = log_data.get("tool_name")
-            if tool_name and str(tool_name).strip():
-                tool_calls.append(
-                    {
-                        "id": log_data.get("tool_call_id", ""),
-                        "name": tool_name,
-                        "arguments": json.dumps(log_data.get("tool_args", {})),
-                        "result": str(log_data.get("output", "")),
-                    }
-                )
-        return tool_calls
-
-    def _build_generation_sequence(
-        self, text: str, reasoning_list: list[str], tool_calls_list: list[dict[str, str]]
-    ) -> list[dict[str, Any]]:
-        """Build a simple content/reasoning/tool_call sequence."""
-        sequence: list[dict[str, Any]] = []
-        if text:
-            sequence.append({"type": "content", "start": 0, "end": len(text)})
-        for index in range(len(reasoning_list)):
-            sequence.append({"type": "reasoning", "index": index})
-        for index in range(len(tool_calls_list)):
-            sequence.append({"type": "tool_call", "index": index})
-        return sequence
-
-    def _upsert_generation_detail(
-        self,
-        session,
-        execution: WorkflowNodeExecution,
-        reasoning_list: list[str],
-        tool_calls_list: list[dict[str, str]],
-        sequence: list[dict[str, Any]],
-    ) -> None:
-        """Insert or update LLMGenerationDetail with serialized fields."""
-        existing = (
-            session.query(LLMGenerationDetail)
-            .filter_by(
-                workflow_run_id=execution.workflow_execution_id,
-                node_id=execution.node_id,
-            )
-            .first()
-        )
-
-        reasoning_json = json.dumps(reasoning_list) if reasoning_list else None
-        tool_calls_json = json.dumps(tool_calls_list) if tool_calls_list else None
-        sequence_json = json.dumps(sequence) if sequence else None
-
-        if existing:
-            existing.reasoning_content = reasoning_json
-            existing.tool_calls = tool_calls_json
-            existing.sequence = sequence_json
-            return
-
-        generation_detail = LLMGenerationDetail(
-            tenant_id=self._tenant_id,
-            app_id=self._app_id,
-            workflow_run_id=execution.workflow_execution_id,
-            node_id=execution.node_id,
-            reasoning_content=reasoning_json,
-            tool_calls=tool_calls_json,
-            sequence=sequence_json,
-        )
-        session.add(generation_detail)
-
    def get_db_models_by_workflow_run(
        self,
        workflow_run_id: str,
--- a/api/core/tools/__base/tool.py
+++ b/api/core/tools/__base/tool.py
@@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
    from models.model import File

-from core.model_runtime.entities.message_entities import PromptMessageTool
 from core.tools.__base.tool_runtime import ToolRuntime
 from core.tools.entities.tool_entities import (
    ToolEntity,
@@ -155,60 +154,6 @@ class Tool(ABC):

        return parameters

-    def to_prompt_message_tool(self) -> PromptMessageTool:
-        message_tool = PromptMessageTool(
-            name=self.entity.identity.name,
-            description=self.entity.description.llm if self.entity.description else "",
-            parameters={
-                "type": "object",
-                "properties": {},
-                "required": [],
-            },
-        )
-
-        parameters = self.get_merged_runtime_parameters()
-        for parameter in parameters:
-            if parameter.form != ToolParameter.ToolParameterForm.LLM:
-                continue
-
-            parameter_type = parameter.type.as_normal_type()
-            if parameter.type in {
-                ToolParameter.ToolParameterType.SYSTEM_FILES,
-                ToolParameter.ToolParameterType.FILE,
-                ToolParameter.ToolParameterType.FILES,
-            }:
-                # Determine the description based on parameter type
-                if parameter.type == ToolParameter.ToolParameterType.FILE:
-                    file_format_desc = " Input the file id with format: [File: file_id]."
-                else:
-                    file_format_desc = "Input the file id with format: [Files: file_id1, file_id2, ...]. "
-
-                message_tool.parameters["properties"][parameter.name] = {
-                    "type": "string",
-                    "description": (parameter.llm_description or "") + file_format_desc,
-                }
-                continue
-            enum = []
-            if parameter.type == ToolParameter.ToolParameterType.SELECT:
-                enum = [option.value for option in parameter.options] if parameter.options else []
-
-            message_tool.parameters["properties"][parameter.name] = (
-                {
-                    "type": parameter_type,
-                    "description": parameter.llm_description or "",
-                }
-                if parameter.input_schema is None
-                else parameter.input_schema
-            )
-
-            if len(enum) > 0:
-                message_tool.parameters["properties"][parameter.name]["enum"] = enum
-
-            if parameter.required:
-                message_tool.parameters["required"].append(parameter.name)
-
-        return message_tool
-
    def create_image_message(
        self,
        image: str,
--- a/api/core/workflow/entities/init.py
+++ b/api/core/workflow/entities/init.py
@@ -1,16 +1,11 @@
 from .agent import AgentNodeStrategyInit
 from .graph_init_params import GraphInitParams
-from .tool_entities import ToolCall, ToolCallResult, ToolResult, ToolResultStatus
 from .workflow_execution import WorkflowExecution
 from .workflow_node_execution import WorkflowNodeExecution

 __all__ = [
    "AgentNodeStrategyInit",
    "GraphInitParams",
-    "ToolCall",
-    "ToolCallResult",
-    "ToolResult",
-    "ToolResultStatus",
    "WorkflowExecution",
    "WorkflowNodeExecution",
 ]
--- a/api/core/workflow/entities/tool_entities.py
+++ b/api/core/workflow/entities/tool_entities.py
@@ -1,39 +0,0 @@
-from enum import StrEnum
-
-from pydantic import BaseModel, Field
-
-from core.file import File
-
-
-class ToolResultStatus(StrEnum):
-    SUCCESS = "success"
-    ERROR = "error"
-
-
-class ToolCall(BaseModel):
-    id: str | None = Field(default=None, description="Unique identifier for this tool call")
-    name: str | None = Field(default=None, description="Name of the tool being called")
-    arguments: str | None = Field(default=None, description="Accumulated tool arguments JSON")
-    icon: str | dict | None = Field(default=None, description="Icon of the tool")
-    icon_dark: str | dict | None = Field(default=None, description="Dark theme icon of the tool")
-
-
-class ToolResult(BaseModel):
-    id: str | None = Field(default=None, description="Identifier of the tool call this result belongs to")
-    name: str | None = Field(default=None, description="Name of the tool")
-    output: str | None = Field(default=None, description="Tool output text, error or success message")
-    files: list[str] = Field(default_factory=list, description="File produced by tool")
-    status: ToolResultStatus | None = Field(default=ToolResultStatus.SUCCESS, description="Tool execution status")
-    elapsed_time: float | None = Field(default=None, description="Elapsed seconds spent executing the tool")
-    icon: str | dict | None = Field(default=None, description="Icon of the tool")
-    icon_dark: str | dict | None = Field(default=None, description="Dark theme icon of the tool")
-
-
-class ToolCallResult(BaseModel):
-    id: str | None = Field(default=None, description="Identifier for the tool call")
-    name: str | None = Field(default=None, description="Name of the tool")
-    arguments: str | None = Field(default=None, description="Accumulated tool arguments JSON")
-    output: str | None = Field(default=None, description="Tool output text, error or success message")
-    files: list[File] = Field(default_factory=list, description="File produced by tool")
-    status: ToolResultStatus = Field(default=ToolResultStatus.SUCCESS, description="Tool execution status")
-    elapsed_time: float | None = Field(default=None, description="Elapsed seconds spent executing the tool")
--- a/api/core/workflow/enums.py
+++ b/api/core/workflow/enums.py
@@ -251,8 +251,6 @@ class WorkflowNodeExecutionMetadataKey(StrEnum):
    ERROR_STRATEGY = "error_strategy"  # node in continue on error mode return the field
    LOOP_VARIABLE_MAP = "loop_variable_map"  # single loop variable output
    DATASOURCE_INFO = "datasource_info"
-    LLM_CONTENT_SEQUENCE = "llm_content_sequence"
-    LLM_TRACE = "llm_trace"
    COMPLETED_REASON = "completed_reason"  # completed reason for loop node


--- a/api/core/workflow/graph_engine/response_coordinator/coordinator.py
+++ b/api/core/workflow/graph_engine/response_coordinator/coordinator.py
@@ -16,13 +16,7 @@ from pydantic import BaseModel, Field

 from core.workflow.enums import NodeExecutionType, NodeState
 from core.workflow.graph import Graph
-from core.workflow.graph_events import (
-    ChunkType,
-    NodeRunStreamChunkEvent,
-    NodeRunSucceededEvent,
-    ToolCall,
-    ToolResult,
-)
+from core.workflow.graph_events import NodeRunStreamChunkEvent, NodeRunSucceededEvent
 from core.workflow.nodes.base.template import TextSegment, VariableSegment
 from core.workflow.runtime import VariablePool

@@ -327,24 +321,11 @@ class ResponseStreamCoordinator:
        selector: Sequence[str],
        chunk: str,
        is_final: bool = False,
-        chunk_type: ChunkType = ChunkType.TEXT,
-        tool_call: ToolCall | None = None,
-        tool_result: ToolResult | None = None,
    ) -> NodeRunStreamChunkEvent:
        """Create a stream chunk event with consistent structure.

        For selectors with special prefixes (sys, env, conversation), we use the
        active response node's information since these are not actual node IDs.
-
-        Args:
-            node_id: The node ID to attribute the event to
-            execution_id: The execution ID for this node
-            selector: The variable selector
-            chunk: The chunk content
-            is_final: Whether this is the final chunk
-            chunk_type: The semantic type of the chunk being streamed
-            tool_call: Structured data for tool_call chunks
-            tool_result: Structured data for tool_result chunks
        """
        # Check if this is a special selector that doesn't correspond to a node
        if selector and selector[0] not in self._graph.nodes and self._active_session:
@@ -357,9 +338,6 @@ class ResponseStreamCoordinator:
                selector=selector,
                chunk=chunk,
                is_final=is_final,
-                chunk_type=chunk_type,
-                tool_call=tool_call,
-                tool_result=tool_result,
            )

        # Standard case: selector refers to an actual node
@@ -371,9 +349,6 @@ class ResponseStreamCoordinator:
            selector=selector,
            chunk=chunk,
            is_final=is_final,
-            chunk_type=chunk_type,
-            tool_call=tool_call,
-            tool_result=tool_result,
        )

    def _process_variable_segment(self, segment: VariableSegment) -> tuple[Sequence[NodeRunStreamChunkEvent], bool]:
@@ -381,8 +356,6 @@ class ResponseStreamCoordinator:

        Handles both regular node selectors and special system selectors (sys, env, conversation).
        For special selectors, we attribute the output to the active response node.
-
-        For object-type variables, automatically streams all child fields that have stream events.
        """
        events: list[NodeRunStreamChunkEvent] = []
        source_selector_prefix = segment.selector[0] if segment.selector else ""
@@ -391,81 +364,60 @@ class ResponseStreamCoordinator:
        # Determine which node to attribute the output to
        # For special selectors (sys, env, conversation), use the active response node
        # For regular selectors, use the source node
-        active_session = self._active_session
-        special_selector = bool(active_session and source_selector_prefix not in self._graph.nodes)
-        output_node_id = active_session.node_id if special_selector and active_session else source_selector_prefix
+        if self._active_session and source_selector_prefix not in self._graph.nodes:
+            # Special selector - use active response node
+            output_node_id = self._active_session.node_id
+        else:
+            # Regular node selector
+            output_node_id = source_selector_prefix
        execution_id = self._get_or_create_execution_id(output_node_id)

-        # Check if there's a direct stream for this selector
-        has_direct_stream = (
-            tuple(segment.selector) in self._stream_buffers or tuple(segment.selector) in self._closed_streams
-        )
-
-        stream_targets = [segment.selector] if has_direct_stream else sorted(self._find_child_streams(segment.selector))
-
-        if stream_targets:
-            all_complete = True
-
-            for target_selector in stream_targets:
-                while self._has_unread_stream(target_selector):
-                    if event := self._pop_stream_chunk(target_selector):
-                        events.append(
-                            self._rewrite_stream_event(
-                                event=event,
-                                output_node_id=output_node_id,
-                                execution_id=execution_id,
-                                special_selector=bool(special_selector),
-                            )
-                        )
-
-                if not self._is_stream_closed(target_selector):
-                    all_complete = False
-
-            is_complete = all_complete
-
-        # Fallback: check if scalar value exists in variable pool
-        if not is_complete and not has_direct_stream:
-            if value := self._variable_pool.get(segment.selector):
-                # Process scalar value
-                is_last_segment = bool(
-                    self._active_session
-                    and self._active_session.index == len(self._active_session.template.segments) - 1
-                )
-                events.append(
-                    self._create_stream_chunk_event(
-                        node_id=output_node_id,
-                        execution_id=execution_id,
-                        selector=segment.selector,
-                        chunk=value.markdown,
-                        is_final=is_last_segment,
+        # Stream all available chunks
+        while self._has_unread_stream(segment.selector):
+            if event := self._pop_stream_chunk(segment.selector):
+                # For special selectors, we need to update the event to use
+                # the active response node's information
+                if self._active_session and source_selector_prefix not in self._graph.nodes:
+                    response_node = self._graph.nodes[self._active_session.node_id]
+                    # Create a new event with the response node's information
+                    # but keep the original selector
+                    updated_event = NodeRunStreamChunkEvent(
+                        id=execution_id,
+                        node_id=response_node.id,
+                        node_type=response_node.node_type,
+                        selector=event.selector,  # Keep original selector
+                        chunk=event.chunk,
+                        is_final=event.is_final,
                    )
+                    events.append(updated_event)
+                else:
+                    # Regular node selector - use event as is
+                    events.append(event)
+
+        # Check if this is the last chunk by looking ahead
+        stream_closed = self._is_stream_closed(segment.selector)
+        # Check if stream is closed to determine if segment is complete
+        if stream_closed:
+            is_complete = True
+
+        elif value := self._variable_pool.get(segment.selector):
+            # Process scalar value
+            is_last_segment = bool(
+                self._active_session and self._active_session.index == len(self._active_session.template.segments) - 1
+            )
+            events.append(
+                self._create_stream_chunk_event(
+                    node_id=output_node_id,
+                    execution_id=execution_id,
+                    selector=segment.selector,
+                    chunk=value.markdown,
+                    is_final=is_last_segment,
                )
-                is_complete = True
+            )
+            is_complete = True

        return events, is_complete

-    def _rewrite_stream_event(
-        self,
-        event: NodeRunStreamChunkEvent,
-        output_node_id: str,
-        execution_id: str,
-        special_selector: bool,
-    ) -> NodeRunStreamChunkEvent:
-        """Rewrite event to attribute to active response node when selector is special."""
-        if not special_selector:
-            return event
-
-        return self._create_stream_chunk_event(
-            node_id=output_node_id,
-            execution_id=execution_id,
-            selector=event.selector,
-            chunk=event.chunk,
-            is_final=event.is_final,
-            chunk_type=event.chunk_type,
-            tool_call=event.tool_call,
-            tool_result=event.tool_result,
-        )
-
    def _process_text_segment(self, segment: TextSegment) -> Sequence[NodeRunStreamChunkEvent]:
        """Process a text segment. Returns (events, is_complete)."""
        assert self._active_session is not None
@@ -561,36 +513,6 @@ class ResponseStreamCoordinator:

    # ============= Internal Stream Management Methods =============

-    def _find_child_streams(self, parent_selector: Sequence[str]) -> list[tuple[str, ...]]:
-        """Find all child stream selectors that are descendants of the parent selector.
-
-        For example, if parent_selector is ['llm', 'generation'], this will find:
-        - ['llm', 'generation', 'content']
-        - ['llm', 'generation', 'tool_calls']
-        - ['llm', 'generation', 'tool_results']
-        - ['llm', 'generation', 'thought']
-
-        Args:
-            parent_selector: The parent selector to search for children
-
-        Returns:
-            List of child selector tuples found in stream buffers or closed streams
-        """
-        parent_key = tuple(parent_selector)
-        parent_len = len(parent_key)
-        child_streams: set[tuple[str, ...]] = set()
-
-        # Search in both active buffers and closed streams
-        all_selectors = set(self._stream_buffers.keys()) | self._closed_streams
-
-        for selector_key in all_selectors:
-            # Check if this selector is a direct child of the parent
-            # Direct child means: len(child) == len(parent) + 1 and child starts with parent
-            if len(selector_key) == parent_len + 1 and selector_key[:parent_len] == parent_key:
-                child_streams.add(selector_key)
-
-        return sorted(child_streams)
-
    def _append_stream_chunk(self, selector: Sequence[str], event: NodeRunStreamChunkEvent) -> None:
        """
        Append a stream chunk to the internal buffer.
--- a/api/core/workflow/graph_events/init.py
+++ b/api/core/workflow/graph_events/init.py
@@ -36,7 +36,6 @@ from .loop import (

 # Node events
 from .node import (
-    ChunkType,
    NodeRunExceptionEvent,
    NodeRunFailedEvent,
    NodeRunPauseRequestedEvent,
@@ -45,13 +44,10 @@ from .node import (
    NodeRunStartedEvent,
    NodeRunStreamChunkEvent,
    NodeRunSucceededEvent,
-    ToolCall,
-    ToolResult,
 )

 __all__ = [
    "BaseGraphEvent",
-    "ChunkType",
    "GraphEngineEvent",
    "GraphNodeEventBase",
    "GraphRunAbortedEvent",
@@ -77,6 +73,4 @@ __all__ = [
    "NodeRunStartedEvent",
    "NodeRunStreamChunkEvent",
    "NodeRunSucceededEvent",
-    "ToolCall",
-    "ToolResult",
 ]
--- a/api/core/workflow/graph_events/node.py
+++ b/api/core/workflow/graph_events/node.py
@@ -1,11 +1,10 @@
 from collections.abc import Sequence
 from datetime import datetime
-from enum import StrEnum

 from pydantic import Field

 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from core.workflow.entities import AgentNodeStrategyInit, ToolCall, ToolResult
+from core.workflow.entities import AgentNodeStrategyInit
 from core.workflow.entities.pause_reason import PauseReason

 from .base import GraphNodeEventBase
@@ -22,39 +21,13 @@ class NodeRunStartedEvent(GraphNodeEventBase):
    provider_id: str = ""


-class ChunkType(StrEnum):
-    """Stream chunk type for LLM-related events."""
-
-    TEXT = "text"  # Normal text streaming
-    TOOL_CALL = "tool_call"  # Tool call arguments streaming
-    TOOL_RESULT = "tool_result"  # Tool execution result
-    THOUGHT = "thought"  # Agent thinking process (ReAct)
-    THOUGHT_START = "thought_start"  # Agent thought start
-    THOUGHT_END = "thought_end"  # Agent thought end
-
-
 class NodeRunStreamChunkEvent(GraphNodeEventBase):
-    """Stream chunk event for workflow node execution."""
-
-    # Base fields
+    # Spec-compliant fields
    selector: Sequence[str] = Field(
        ..., description="selector identifying the output location (e.g., ['nodeA', 'text'])"
    )
    chunk: str = Field(..., description="the actual chunk content")
    is_final: bool = Field(default=False, description="indicates if this is the last chunk")
-    chunk_type: ChunkType = Field(default=ChunkType.TEXT, description="type of the chunk")
-
-    # Tool call fields (when chunk_type == TOOL_CALL)
-    tool_call: ToolCall | None = Field(
-        default=None,
-        description="structured payload for tool_call chunks",
-    )
-
-    # Tool result fields (when chunk_type == TOOL_RESULT)
-    tool_result: ToolResult | None = Field(
-        default=None,
-        description="structured payload for tool_result chunks",
-    )


 class NodeRunRetrieverResourceEvent(GraphNodeEventBase):
--- a/api/core/workflow/node_events/init.py
+++ b/api/core/workflow/node_events/init.py
@@ -13,21 +13,16 @@ from .loop import (
    LoopSucceededEvent,
 )
 from .node import (
-    ChunkType,
    ModelInvokeCompletedEvent,
    PauseRequestedEvent,
    RunRetrieverResourceEvent,
    RunRetryEvent,
    StreamChunkEvent,
    StreamCompletedEvent,
-    ThoughtChunkEvent,
-    ToolCallChunkEvent,
-    ToolResultChunkEvent,
 )

 __all__ = [
    "AgentLogEvent",
-    "ChunkType",
    "IterationFailedEvent",
    "IterationNextEvent",
    "IterationStartedEvent",
@@ -44,7 +39,4 @@ __all__ = [
    "RunRetryEvent",
    "StreamChunkEvent",
    "StreamCompletedEvent",
-    "ThoughtChunkEvent",
-    "ToolCallChunkEvent",
-    "ToolResultChunkEvent",
 ]
--- a/api/core/workflow/node_events/node.py
+++ b/api/core/workflow/node_events/node.py
@@ -1,13 +1,11 @@
 from collections.abc import Sequence
 from datetime import datetime
-from enum import StrEnum

 from pydantic import Field

 from core.file import File
 from core.model_runtime.entities.llm_entities import LLMUsage
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from core.workflow.entities import ToolCall, ToolResult
 from core.workflow.entities.pause_reason import PauseReason
 from core.workflow.node_events import NodeRunResult

@@ -34,60 +32,13 @@ class RunRetryEvent(NodeEventBase):
    start_at: datetime = Field(..., description="Retry start time")


-class ChunkType(StrEnum):
-    """Stream chunk type for LLM-related events."""
-
-    TEXT = "text"  # Normal text streaming
-    TOOL_CALL = "tool_call"  # Tool call arguments streaming
-    TOOL_RESULT = "tool_result"  # Tool execution result
-    THOUGHT = "thought"  # Agent thinking process (ReAct)
-    THOUGHT_START = "thought_start"  # Agent thought start
-    THOUGHT_END = "thought_end"  # Agent thought end
-
-
 class StreamChunkEvent(NodeEventBase):
-    """Base stream chunk event - normal text streaming output."""
-
+    # Spec-compliant fields
    selector: Sequence[str] = Field(
        ..., description="selector identifying the output location (e.g., ['nodeA', 'text'])"
    )
    chunk: str = Field(..., description="the actual chunk content")
    is_final: bool = Field(default=False, description="indicates if this is the last chunk")
-    chunk_type: ChunkType = Field(default=ChunkType.TEXT, description="type of the chunk")
-    tool_call: ToolCall | None = Field(default=None, description="structured payload for tool_call chunks")
-    tool_result: ToolResult | None = Field(default=None, description="structured payload for tool_result chunks")
-
-
-class ToolCallChunkEvent(StreamChunkEvent):
-    """Tool call streaming event - tool call arguments streaming output."""
-
-    chunk_type: ChunkType = Field(default=ChunkType.TOOL_CALL, frozen=True)
-    tool_call: ToolCall | None = Field(default=None, description="structured tool call payload")
-
-
-class ToolResultChunkEvent(StreamChunkEvent):
-    """Tool result event - tool execution result."""
-
-    chunk_type: ChunkType = Field(default=ChunkType.TOOL_RESULT, frozen=True)
-    tool_result: ToolResult | None = Field(default=None, description="structured tool result payload")
-
-
-class ThoughtStartChunkEvent(StreamChunkEvent):
-    """Agent thought start streaming event - Agent thinking process (ReAct)."""
-
-    chunk_type: ChunkType = Field(default=ChunkType.THOUGHT_START, frozen=True)
-
-
-class ThoughtEndChunkEvent(StreamChunkEvent):
-    """Agent thought end streaming event - Agent thinking process (ReAct)."""
-
-    chunk_type: ChunkType = Field(default=ChunkType.THOUGHT_END, frozen=True)
-
-
-class ThoughtChunkEvent(StreamChunkEvent):
-    """Agent thought streaming event - Agent thinking process (ReAct)."""
-
-    chunk_type: ChunkType = Field(default=ChunkType.THOUGHT, frozen=True)


 class StreamCompletedEvent(NodeEventBase):
--- a/api/core/workflow/nodes/base/node.py
+++ b/api/core/workflow/nodes/base/node.py
@@ -48,9 +48,6 @@ from core.workflow.node_events import (
    RunRetrieverResourceEvent,
    StreamChunkEvent,
    StreamCompletedEvent,
-    ThoughtChunkEvent,
-    ToolCallChunkEvent,
-    ToolResultChunkEvent,
 )
 from core.workflow.runtime import GraphRuntimeState
 from libs.datetime_utils import naive_utc_now
@@ -567,8 +564,6 @@ class Node(Generic[NodeDataT]):

    @_dispatch.register
    def _(self, event: StreamChunkEvent) -> NodeRunStreamChunkEvent:
-        from core.workflow.graph_events import ChunkType
-
        return NodeRunStreamChunkEvent(
            id=self.execution_id,
            node_id=self._node_id,
@@ -576,60 +571,6 @@ class Node(Generic[NodeDataT]):
            selector=event.selector,
            chunk=event.chunk,
            is_final=event.is_final,
-            chunk_type=ChunkType(event.chunk_type.value),
-            tool_call=event.tool_call,
-            tool_result=event.tool_result,
-        )
-
-    @_dispatch.register
-    def _(self, event: ToolCallChunkEvent) -> NodeRunStreamChunkEvent:
-        from core.workflow.graph_events import ChunkType
-
-        return NodeRunStreamChunkEvent(
-            id=self._node_execution_id,
-            node_id=self._node_id,
-            node_type=self.node_type,
-            selector=event.selector,
-            chunk=event.chunk,
-            is_final=event.is_final,
-            chunk_type=ChunkType.TOOL_CALL,
-            tool_call=event.tool_call,
-        )
-
-    @_dispatch.register
-    def _(self, event: ToolResultChunkEvent) -> NodeRunStreamChunkEvent:
-        from core.workflow.entities import ToolResult, ToolResultStatus
-        from core.workflow.graph_events import ChunkType
-
-        tool_result = event.tool_result or ToolResult()
-        status: ToolResultStatus = tool_result.status or ToolResultStatus.SUCCESS
-        tool_result = tool_result.model_copy(
-            update={"status": status, "files": tool_result.files or []},
-        )
-
-        return NodeRunStreamChunkEvent(
-            id=self._node_execution_id,
-            node_id=self._node_id,
-            node_type=self.node_type,
-            selector=event.selector,
-            chunk=event.chunk,
-            is_final=event.is_final,
-            chunk_type=ChunkType.TOOL_RESULT,
-            tool_result=tool_result,
-        )
-
-    @_dispatch.register
-    def _(self, event: ThoughtChunkEvent) -> NodeRunStreamChunkEvent:
-        from core.workflow.graph_events import ChunkType
-
-        return NodeRunStreamChunkEvent(
-            id=self._node_execution_id,
-            node_id=self._node_id,
-            node_type=self.node_type,
-            selector=event.selector,
-            chunk=event.chunk,
-            is_final=event.is_final,
-            chunk_type=ChunkType.THOUGHT,
        )

    @_dispatch.register
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@@ -62,21 +62,6 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
        inputs = {"variable_selector": variable_selector}
        process_data = {"documents": value if isinstance(value, list) else [value]}

-        # Ensure storage_key is loaded for File objects
-        files_to_check = value if isinstance(value, list) else [value]
-        files_needing_storage_key = [
-            f for f in files_to_check
-            if isinstance(f, File) and not f.storage_key and f.related_id
-        ]
-        if files_needing_storage_key:
-            from factories.file_factory import StorageKeyLoader
-            from extensions.ext_database import db
-            from sqlalchemy.orm import Session
-            
-            with Session(bind=db.engine) as session:
-                storage_key_loader = StorageKeyLoader(session, tenant_id=self.tenant_id)
-                storage_key_loader.load_storage_keys(files_needing_storage_key)
-
        try:
            if isinstance(value, list):
                extracted_text_list = list(map(_extract_text_from_file, value))
@@ -430,15 +415,6 @@ def _download_file_content(file: File) -> bytes:
            response.raise_for_status()
            return response.content
        else:
-            # Check if storage_key is set
-            if not file.storage_key:
-                raise FileDownloadError(f"File storage_key is missing for file: {file.filename}")
-            
-            # Check if file exists before downloading
-            from extensions.ext_storage import storage
-            if not storage.exists(file.storage_key):
-                raise FileDownloadError(f"File not found in storage: {file.storage_key}")
-            
            return file_manager.download(file)
    except Exception as e:
        raise FileDownloadError(f"Error downloading file: {str(e)}") from e
--- a/api/core/workflow/nodes/knowledge_index/entities.py
+++ b/api/core/workflow/nodes/knowledge_index/entities.py
@@ -158,5 +158,3 @@ class KnowledgeIndexNodeData(BaseNodeData):
    type: str = "knowledge-index"
    chunk_structure: str
    index_chunk_variable_selector: list[str]
-    indexing_technique: str | None = None
-    summary_index_setting: dict | None = None
--- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
+++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
@@ -1,11 +1,9 @@
-import concurrent.futures
 import datetime
 import logging
 import time
 from collections.abc import Mapping
 from typing import Any

-from flask import current_app
 from sqlalchemy import func, select

 from core.app.entities.app_invoke_entities import InvokeFrom
@@ -18,9 +16,7 @@ from core.workflow.nodes.base.node import Node
 from core.workflow.nodes.base.template import Template
 from core.workflow.runtime import VariablePool
 from extensions.ext_database import db
-from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary
-from services.summary_index_service import SummaryIndexService
-from tasks.generate_summary_index_task import generate_summary_index_task
+from models.dataset import Dataset, Document, DocumentSegment

 from .entities import KnowledgeIndexNodeData
 from .exc import (
@@ -71,18 +67,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
        # index knowledge
        try:
            if is_preview:
-                # Preview mode: generate summaries for chunks directly without saving to database
-                # Format preview and generate summaries on-the-fly
-                # Get indexing_technique and summary_index_setting from node_data (workflow graph config)
-                # or fallback to dataset if not available in node_data
-                indexing_technique = node_data.indexing_technique or dataset.indexing_technique
-                summary_index_setting = node_data.summary_index_setting or dataset.summary_index_setting
-                
-                outputs = self._get_preview_output_with_summaries(
-                    node_data.chunk_structure, chunks, dataset=dataset,
-                    indexing_technique=indexing_technique,
-                    summary_index_setting=summary_index_setting
-                )
+                outputs = self._get_preview_output(node_data.chunk_structure, chunks)
                return NodeRunResult(
                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
                    inputs=variables,
@@ -178,9 +163,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):

        db.session.commit()

-        # Generate summary index if enabled
-        self._handle_summary_index_generation(dataset, document, variable_pool)
-
        return {
            "dataset_id": ds_id_value,
            "dataset_name": dataset_name_value,
@@ -191,269 +173,9 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
            "display_status": "completed",
        }

-    def _handle_summary_index_generation(
-        self,
-        dataset: Dataset,
-        document: Document,
-        variable_pool: VariablePool,
-    ) -> None:
-        """
-        Handle summary index generation based on mode (debug/preview or production).
-
-        Args:
-            dataset: Dataset containing the document
-            document: Document to generate summaries for
-            variable_pool: Variable pool to check invoke_from
-        """
-        # Only generate summary index for high_quality indexing technique
-        if dataset.indexing_technique != "high_quality":
-            return
-
-        # Check if summary index is enabled
-        summary_index_setting = dataset.summary_index_setting
-        if not summary_index_setting or not summary_index_setting.get("enable"):
-            return
-
-        # Skip qa_model documents
-        if document.doc_form == "qa_model":
-            return
-
-        # Determine if in preview/debug mode
-        invoke_from = variable_pool.get(["sys", SystemVariableKey.INVOKE_FROM])
-        is_preview = invoke_from and invoke_from.value == InvokeFrom.DEBUGGER
-
-        # Determine if only parent chunks should be processed
-        only_parent_chunks = dataset.chunk_structure == "parent_child_index"
-
-        if is_preview:
-            try:
-                # Query segments that need summary generation
-                query = db.session.query(DocumentSegment).filter_by(
-                    dataset_id=dataset.id,
-                    document_id=document.id,
-                    status="completed",
-                    enabled=True,
-                )
-                segments = query.all()
-
-                if not segments:
-                    logger.info(f"No segments found for document {document.id}")
-                    return
-
-                # Filter segments based on mode
-                segments_to_process = []
-                for segment in segments:
-                    # Skip if summary already exists
-                    existing_summary = (
-                        db.session.query(DocumentSegmentSummary)
-                        .filter_by(chunk_id=segment.id, dataset_id=dataset.id, status="completed")
-                        .first()
-                    )
-                    if existing_summary:
-                        continue
-
-                    # For parent-child mode, all segments are parent chunks, so process all
-                    segments_to_process.append(segment)
-
-                if not segments_to_process:
-                    logger.info(f"No segments need summary generation for document {document.id}")
-                    return
-
-                # Use ThreadPoolExecutor for concurrent generation
-                flask_app = current_app._get_current_object()  # type: ignore
-                max_workers = min(10, len(segments_to_process))  # Limit to 10 workers
-
-                def process_segment(segment: DocumentSegment) -> None:
-                    """Process a single segment in a thread with Flask app context."""
-                    with flask_app.app_context():
-                        try:
-                            SummaryIndexService.generate_and_vectorize_summary(
-                                segment, dataset, summary_index_setting
-                            )
-                        except Exception as e:
-                            logger.error(f"Failed to generate summary for segment {segment.id}: {str(e)}")
-                            # Continue processing other segments
-
-                with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-                    futures = [
-                        executor.submit(process_segment, segment) for segment in segments_to_process
-                    ]
-                    # Wait for all tasks to complete
-                    concurrent.futures.wait(futures, timeout=300)
-
-                logger.info(
-                    f"Successfully generated summary index for {len(segments_to_process)} segments "
-                    f"in document {document.id}"
-                )
-            except Exception as e:
-                logger.exception(f"Failed to generate summary index for document {document.id}: {str(e)}")
-                # Don't fail the entire indexing process if summary generation fails
-        else:
-            # Production mode: asynchronous generation
-            logger.info(f"Queuing summary index generation task for document {document.id} (production mode)")
-            try:
-                generate_summary_index_task.delay(dataset.id, document.id, None)
-                logger.info(f"Summary index generation task queued for document {document.id}")
-            except Exception as e:
-                logger.exception(f"Failed to queue summary index generation task for document {document.id}: {str(e)}")
-                # Don't fail the entire indexing process if task queuing fails
-
-    def _get_preview_output_with_summaries(
-        self, chunk_structure: str, chunks: Any, dataset: Dataset,
-        indexing_technique: str | None = None,
-        summary_index_setting: dict | None = None
-    ) -> Mapping[str, Any]:
-        """
-        Generate preview output with summaries for chunks in preview mode.
-        This method generates summaries on-the-fly without saving to database.
-        
-        Args:
-            chunk_structure: Chunk structure type
-            chunks: Chunks to generate preview for
-            dataset: Dataset object (for tenant_id)
-            indexing_technique: Indexing technique from node config or dataset
-            summary_index_setting: Summary index setting from node config or dataset
-        """
+    def _get_preview_output(self, chunk_structure: str, chunks: Any) -> Mapping[str, Any]:
        index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
-        preview_output = index_processor.format_preview(chunks)
-        
-        # Check if summary index is enabled
-        if indexing_technique != "high_quality":
-            return preview_output
-        
-        if not summary_index_setting or not summary_index_setting.get("enable"):
-            return preview_output
-        
-        # Generate summaries for chunks
-        if "preview" in preview_output and isinstance(preview_output["preview"], list):
-            chunk_count = len(preview_output["preview"])
-            logger.info(
-                f"Generating summaries for {chunk_count} chunks in preview mode "
-                f"(dataset: {dataset.id})"
-            )
-            # Use ParagraphIndexProcessor's generate_summary method
-            from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
-            
-            # Get Flask app for application context in worker threads
-            flask_app = None
-            try:
-                flask_app = current_app._get_current_object()  # type: ignore
-            except RuntimeError:
-                logger.warning("No Flask application context available, summary generation may fail")
-            
-            def generate_summary_for_chunk(preview_item: dict) -> None:
-                """Generate summary for a single chunk."""
-                if "content" in preview_item:
-                    try:
-                        # Set Flask application context in worker thread
-                        if flask_app:
-                            with flask_app.app_context():
-                                summary = ParagraphIndexProcessor.generate_summary(
-                                    tenant_id=dataset.tenant_id,
-                                    text=preview_item["content"],
-                                    summary_index_setting=summary_index_setting,
-                                )
-                                if summary:
-                                    preview_item["summary"] = summary
-                        else:
-                            # Fallback: try without app context (may fail)
-                            summary = ParagraphIndexProcessor.generate_summary(
-                                tenant_id=dataset.tenant_id,
-                                text=preview_item["content"],
-                                summary_index_setting=summary_index_setting,
-                            )
-                            if summary:
-                                preview_item["summary"] = summary
-                    except Exception as e:
-                        logger.error(f"Failed to generate summary for chunk: {str(e)}")
-                        # Don't fail the entire preview if summary generation fails
-            
-            # Generate summaries concurrently using ThreadPoolExecutor
-            # Set a reasonable timeout to prevent hanging (60 seconds per chunk, max 5 minutes total)
-            timeout_seconds = min(300, 60 * len(preview_output["preview"]))
-            with concurrent.futures.ThreadPoolExecutor(max_workers=min(10, len(preview_output["preview"]))) as executor:
-                futures = [
-                    executor.submit(generate_summary_for_chunk, preview_item)
-                    for preview_item in preview_output["preview"]
-                ]
-                # Wait for all tasks to complete with timeout
-                done, not_done = concurrent.futures.wait(futures, timeout=timeout_seconds)
-                
-                # Cancel tasks that didn't complete in time
-                if not_done:
-                    logger.warning(
-                        f"Summary generation timeout: {len(not_done)} chunks did not complete within {timeout_seconds}s. "
-                        "Cancelling remaining tasks..."
-                    )
-                    for future in not_done:
-                        future.cancel()
-                    # Wait a bit for cancellation to take effect
-                    concurrent.futures.wait(not_done, timeout=5)
-            
-            completed_count = sum(1 for item in preview_output["preview"] if item.get("summary") is not None)
-            logger.info(
-                f"Completed summary generation for preview chunks: {completed_count}/{len(preview_output['preview'])} succeeded"
-            )
-        
-        return preview_output
-
-    def _get_preview_output(
-        self, chunk_structure: str, chunks: Any, dataset: Dataset | None = None, variable_pool: VariablePool | None = None
-    ) -> Mapping[str, Any]:
-        index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
-        preview_output = index_processor.format_preview(chunks)
-        
-        # If dataset is provided, try to enrich preview with summaries
-        if dataset and variable_pool:
-            document_id = variable_pool.get(["sys", SystemVariableKey.DOCUMENT_ID])
-            if document_id:
-                document = db.session.query(Document).filter_by(id=document_id.value).first()
-                if document:
-                    # Query summaries for this document
-                    summaries = (
-                        db.session.query(DocumentSegmentSummary)
-                        .filter_by(
-                            dataset_id=dataset.id,
-                            document_id=document.id,
-                            status="completed",
-                            enabled=True,
-                        )
-                        .all()
-                    )
-                    
-                    if summaries:
-                        # Create a map of segment content to summary for matching
-                        # Use content matching as chunks in preview might not be indexed yet
-                        summary_by_content = {}
-                        for summary in summaries:
-                            segment = (
-                                db.session.query(DocumentSegment)
-                                .filter_by(id=summary.chunk_id, dataset_id=dataset.id)
-                                .first()
-                            )
-                            if segment:
-                                # Normalize content for matching (strip whitespace)
-                                normalized_content = segment.content.strip()
-                                summary_by_content[normalized_content] = summary.summary_content
-                        
-                        # Enrich preview with summaries by content matching
-                        if "preview" in preview_output and isinstance(preview_output["preview"], list):
-                            matched_count = 0
-                            for preview_item in preview_output["preview"]:
-                                if "content" in preview_item:
-                                    # Normalize content for matching
-                                    normalized_chunk_content = preview_item["content"].strip()
-                                    if normalized_chunk_content in summary_by_content:
-                                        preview_item["summary"] = summary_by_content[normalized_chunk_content]
-                                        matched_count += 1
-                            
-                            if matched_count > 0:
-                                logger.info(
-                                    f"Enriched preview with {matched_count} existing summaries "
-                                    f"(dataset: {dataset.id}, document: {document.id})"
-                                )
-        
-        return preview_output
+        return index_processor.format_preview(chunks)

    @classmethod
    def version(cls) -> str:
--- a/api/core/workflow/nodes/llm/init.py
+++ b/api/core/workflow/nodes/llm/init.py
@@ -3,7 +3,6 @@ from .entities import (
    LLMNodeCompletionModelPromptTemplate,
    LLMNodeData,
    ModelConfig,
-    ToolMetadata,
    VisionConfig,
 )
 from .node import LLMNode
@@ -14,6 +13,5 @@ __all__ = [
    "LLMNodeCompletionModelPromptTemplate",
    "LLMNodeData",
    "ModelConfig",
-    "ToolMetadata",
    "VisionConfig",
 ]
--- a/api/core/workflow/nodes/llm/entities.py
+++ b/api/core/workflow/nodes/llm/entities.py
@@ -1,17 +1,10 @@
-import re
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal

-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, Field, field_validator

-from core.agent.entities import AgentLog, AgentResult
-from core.file import File
 from core.model_runtime.entities import ImagePromptMessageContent, LLMMode
-from core.model_runtime.entities.llm_entities import LLMUsage
 from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
-from core.tools.entities.tool_entities import ToolProviderType
-from core.workflow.entities import ToolCall, ToolCallResult
-from core.workflow.node_events import AgentLogEvent
 from core.workflow.nodes.base import BaseNodeData
 from core.workflow.nodes.base.entities import VariableSelector

@@ -65,268 +58,6 @@ class LLMNodeCompletionModelPromptTemplate(CompletionModelPromptTemplate):
    jinja2_text: str | None = None


-class ToolMetadata(BaseModel):
-    """
-    Tool metadata for LLM node with tool support.
-
-    Defines the essential fields needed for tool configuration,
-    particularly the 'type' field to identify tool provider type.
-    """
-
-    # Core fields
-    enabled: bool = True
-    type: ToolProviderType = Field(..., description="Tool provider type: builtin, api, mcp, workflow")
-    provider_name: str = Field(..., description="Tool provider name/identifier")
-    tool_name: str = Field(..., description="Tool name")
-
-    # Optional fields
-    plugin_unique_identifier: str | None = Field(None, description="Plugin unique identifier for plugin tools")
-    credential_id: str | None = Field(None, description="Credential ID for tools requiring authentication")
-
-    # Configuration fields
-    parameters: dict[str, Any] = Field(default_factory=dict, description="Tool parameters")
-    settings: dict[str, Any] = Field(default_factory=dict, description="Tool settings configuration")
-    extra: dict[str, Any] = Field(default_factory=dict, description="Extra tool configuration like custom description")
-
-
-class ModelTraceSegment(BaseModel):
-    """Model invocation trace segment with token usage and output."""
-
-    text: str | None = Field(None, description="Model output text content")
-    reasoning: str | None = Field(None, description="Reasoning/thought content from model")
-    tool_calls: list[ToolCall] = Field(default_factory=list, description="Tool calls made by the model")
-
-
-class ToolTraceSegment(BaseModel):
-    """Tool invocation trace segment with call details and result."""
-
-    id: str | None = Field(default=None, description="Unique identifier for this tool call")
-    name: str | None = Field(default=None, description="Name of the tool being called")
-    arguments: str | None = Field(default=None, description="Accumulated tool arguments JSON")
-    output: str | None = Field(default=None, description="Tool call result")
-
-
-class LLMTraceSegment(BaseModel):
-    """
-    Streaming trace segment for LLM tool-enabled runs.
-
-    Represents alternating model and tool invocations in sequence:
-    model -> tool -> model -> tool -> ...
-
-    Each segment records its execution duration.
-    """
-
-    type: Literal["model", "tool"]
-    duration: float = Field(..., description="Execution duration in seconds")
-    usage: LLMUsage | None = Field(default=None, description="Token usage statistics for this model call")
-    output: ModelTraceSegment | ToolTraceSegment = Field(..., description="Output of the segment")
-
-    # Common metadata for both model and tool segments
-    provider: str | None = Field(default=None, description="Model or tool provider identifier")
-    name: str | None = Field(default=None, description="Name of the model or tool")
-    icon: str | None = Field(default=None, description="Icon for the provider")
-    icon_dark: str | None = Field(default=None, description="Dark theme icon for the provider")
-    error: str | None = Field(default=None, description="Error message if segment failed")
-    status: Literal["success", "error"] | None = Field(default=None, description="Tool execution status")
-
-
-class LLMGenerationData(BaseModel):
-    """Generation data from LLM invocation with tools.
-
-    For multi-turn tool calls like: thought1 -> text1 -> tool_call1 -> thought2 -> text2 -> tool_call2
-    - reasoning_contents: [thought1, thought2, ...] - one element per turn
-    - tool_calls: [{id, name, arguments, result}, ...] - all tool calls with results
-    """
-
-    text: str = Field(..., description="Accumulated text content from all turns")
-    reasoning_contents: list[str] = Field(default_factory=list, description="Reasoning content per turn")
-    tool_calls: list[ToolCallResult] = Field(default_factory=list, description="Tool calls with results")
-    sequence: list[dict[str, Any]] = Field(default_factory=list, description="Ordered segments for rendering")
-    usage: LLMUsage = Field(..., description="LLM usage statistics")
-    finish_reason: str | None = Field(None, description="Finish reason from LLM")
-    files: list[File] = Field(default_factory=list, description="Generated files")
-    trace: list[LLMTraceSegment] = Field(default_factory=list, description="Streaming trace in emitted order")
-
-
-class ThinkTagStreamParser:
-    """Lightweight state machine to split streaming chunks by <think> tags."""
-
-    _START_PATTERN = re.compile(r"<think(?:\s[^>]*)?>", re.IGNORECASE)
-    _END_PATTERN = re.compile(r"</think>", re.IGNORECASE)
-    _START_PREFIX = "<think"
-    _END_PREFIX = "</think"
-
-    def __init__(self):
-        self._buffer = ""
-        self._in_think = False
-
-    @staticmethod
-    def _suffix_prefix_len(text: str, prefix: str) -> int:
-        """Return length of the longest suffix of `text` that is a prefix of `prefix`."""
-        max_len = min(len(text), len(prefix) - 1)
-        for i in range(max_len, 0, -1):
-            if text[-i:].lower() == prefix[:i].lower():
-                return i
-        return 0
-
-    def process(self, chunk: str) -> list[tuple[str, str]]:
-        """
-        Split incoming chunk into ('thought' | 'text', content) tuples.
-        Content excludes the <think> tags themselves and handles split tags across chunks.
-        """
-        parts: list[tuple[str, str]] = []
-        self._buffer += chunk
-
-        while self._buffer:
-            if self._in_think:
-                end_match = self._END_PATTERN.search(self._buffer)
-                if end_match:
-                    thought_text = self._buffer[: end_match.start()]
-                    if thought_text:
-                        parts.append(("thought", thought_text))
-                    parts.append(("thought_end", ""))
-                    self._buffer = self._buffer[end_match.end() :]
-                    self._in_think = False
-                    continue
-
-                hold_len = self._suffix_prefix_len(self._buffer, self._END_PREFIX)
-                emit = self._buffer[: len(self._buffer) - hold_len]
-                if emit:
-                    parts.append(("thought", emit))
-                self._buffer = self._buffer[-hold_len:] if hold_len > 0 else ""
-                break
-
-            start_match = self._START_PATTERN.search(self._buffer)
-            if start_match:
-                prefix = self._buffer[: start_match.start()]
-                if prefix:
-                    parts.append(("text", prefix))
-                self._buffer = self._buffer[start_match.end() :]
-                parts.append(("thought_start", ""))
-                self._in_think = True
-                continue
-
-            hold_len = self._suffix_prefix_len(self._buffer, self._START_PREFIX)
-            emit = self._buffer[: len(self._buffer) - hold_len]
-            if emit:
-                parts.append(("text", emit))
-            self._buffer = self._buffer[-hold_len:] if hold_len > 0 else ""
-            break
-
-        cleaned_parts: list[tuple[str, str]] = []
-        for kind, content in parts:
-            # Extra safeguard: strip any stray tags that slipped through.
-            content = self._START_PATTERN.sub("", content)
-            content = self._END_PATTERN.sub("", content)
-            if content or kind in {"thought_start", "thought_end"}:
-                cleaned_parts.append((kind, content))
-
-        return cleaned_parts
-
-    def flush(self) -> list[tuple[str, str]]:
-        """Flush remaining buffer when the stream ends."""
-        if not self._buffer:
-            return []
-        kind = "thought" if self._in_think else "text"
-        content = self._buffer
-        # Drop dangling partial tags instead of emitting them
-        if content.lower().startswith(self._START_PREFIX) or content.lower().startswith(self._END_PREFIX):
-            content = ""
-        self._buffer = ""
-        if not content and not self._in_think:
-            return []
-        # Strip any complete tags that might still be present.
-        content = self._START_PATTERN.sub("", content)
-        content = self._END_PATTERN.sub("", content)
-
-        result: list[tuple[str, str]] = []
-        if content:
-            result.append((kind, content))
-        if self._in_think:
-            result.append(("thought_end", ""))
-            self._in_think = False
-        return result
-
-
-class StreamBuffers(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    think_parser: ThinkTagStreamParser = Field(default_factory=ThinkTagStreamParser)
-    pending_thought: list[str] = Field(default_factory=list)
-    pending_content: list[str] = Field(default_factory=list)
-    pending_tool_calls: list[ToolCall] = Field(default_factory=list)
-    current_turn_reasoning: list[str] = Field(default_factory=list)
-    reasoning_per_turn: list[str] = Field(default_factory=list)
-
-
-class TraceState(BaseModel):
-    trace_segments: list[LLMTraceSegment] = Field(default_factory=list)
-    tool_trace_map: dict[str, LLMTraceSegment] = Field(default_factory=dict)
-    tool_call_index_map: dict[str, int] = Field(default_factory=dict)
-    model_segment_start_time: float | None = Field(default=None, description="Start time for current model segment")
-    pending_usage: LLMUsage | None = Field(default=None, description="Pending usage for current model segment")
-
-
-class AggregatedResult(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    text: str = ""
-    files: list[File] = Field(default_factory=list)
-    usage: LLMUsage = Field(default_factory=LLMUsage.empty_usage)
-    finish_reason: str | None = None
-
-
-class AgentContext(BaseModel):
-    agent_logs: list[AgentLogEvent] = Field(default_factory=list)
-    agent_result: AgentResult | None = None
-
-
-class ToolOutputState(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    stream: StreamBuffers = Field(default_factory=StreamBuffers)
-    trace: TraceState = Field(default_factory=TraceState)
-    aggregate: AggregatedResult = Field(default_factory=AggregatedResult)
-    agent: AgentContext = Field(default_factory=AgentContext)
-
-
-class ToolLogPayload(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    tool_name: str = ""
-    tool_call_id: str = ""
-    tool_args: dict[str, Any] = Field(default_factory=dict)
-    tool_output: Any = None
-    tool_error: Any = None
-    files: list[Any] = Field(default_factory=list)
-    meta: dict[str, Any] = Field(default_factory=dict)
-
-    @classmethod
-    def from_log(cls, log: AgentLog) -> "ToolLogPayload":
-        data = log.data or {}
-        return cls(
-            tool_name=data.get("tool_name", ""),
-            tool_call_id=data.get("tool_call_id", ""),
-            tool_args=data.get("tool_args") or {},
-            tool_output=data.get("output"),
-            tool_error=data.get("error"),
-            files=data.get("files") or [],
-            meta=data.get("meta") or {},
-        )
-
-    @classmethod
-    def from_mapping(cls, data: Mapping[str, Any]) -> "ToolLogPayload":
-        return cls(
-            tool_name=data.get("tool_name", ""),
-            tool_call_id=data.get("tool_call_id", ""),
-            tool_args=data.get("tool_args") or {},
-            tool_output=data.get("output"),
-            tool_error=data.get("error"),
-            files=data.get("files") or [],
-            meta=data.get("meta") or {},
-        )
-
-
 class LLMNodeData(BaseNodeData):
    model: ModelConfig
    prompt_template: Sequence[LLMNodeChatModelMessage] | LLMNodeCompletionModelPromptTemplate
@@ -355,10 +86,6 @@ class LLMNodeData(BaseNodeData):
        ),
    )

-    # Tool support
-    tools: Sequence[ToolMetadata] = Field(default_factory=list)
-    max_iterations: int | None = Field(default=None, description="Maximum number of iterations for the LLM node")
-
    @field_validator("prompt_config", mode="before")
    @classmethod
    def convert_none_prompt_config(cls, v: Any):
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
--- a/api/extensions/ext_celery.py
+++ b/api/extensions/ext_celery.py
@@ -102,8 +102,6 @@ def init_app(app: DifyApp) -> Celery:
    imports = [
        "tasks.async_workflow_tasks",  # trigger workers
        "tasks.trigger_processing_tasks",  # async trigger processing
-        "tasks.generate_summary_index_task",  # summary index generation
-        "tasks.regenerate_summary_index_task",  # summary index regeneration
    ]
    day = dify_config.CELERY_BEAT_SCHEDULER_TIME

--- a/api/fields/conversation_fields.py
+++ b/api/fields/conversation_fields.py
@@ -169,7 +169,6 @@ class MessageDetail(ResponseModel):
    status: str
    error: str | None = None
    parent_message_id: str | None = None
-    generation_detail: JSONValue | None = Field(default=None, validation_alias="generation_detail_dict")

    @field_validator("inputs", mode="before")
    @classmethod
--- a/api/fields/dataset_fields.py
+++ b/api/fields/dataset_fields.py
@@ -39,14 +39,6 @@ dataset_retrieval_model_fields = {
    "score_threshold_enabled": fields.Boolean,
    "score_threshold": fields.Float,
 }
-
-dataset_summary_index_fields = {
-    "enable": fields.Boolean,
-    "model_name": fields.String,
-    "model_provider_name": fields.String,
-    "summary_prompt": fields.String,
-}
-
 external_retrieval_model_fields = {
    "top_k": fields.Integer,
    "score_threshold": fields.Float,
@@ -91,7 +83,6 @@ dataset_detail_fields = {
    "embedding_model_provider": fields.String,
    "embedding_available": fields.Boolean,
    "retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields),
-    "summary_index_setting": fields.Nested(dataset_summary_index_fields),
    "tags": fields.List(fields.Nested(tag_fields)),
    "doc_form": fields.String,
    "external_knowledge_info": fields.Nested(external_knowledge_info_fields),
--- a/api/fields/document_fields.py
+++ b/api/fields/document_fields.py
@@ -33,8 +33,6 @@ document_fields = {
    "hit_count": fields.Integer,
    "doc_form": fields.String,
    "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
-    "summary_index_status": fields.String,  # Summary index generation status: "waiting", "generating", "completed", "partial_error", or null if not enabled
-    "need_summary": fields.Boolean,  # Whether this document needs summary index generation
 }

 document_with_segments_fields = {
@@ -62,8 +60,6 @@ document_with_segments_fields = {
    "completed_segments": fields.Integer,
    "total_segments": fields.Integer,
    "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
-    "summary_index_status": fields.String,  # Summary index generation status: "waiting", "generating", "completed", "partial_error", or null if not enabled
-    "need_summary": fields.Boolean,  # Whether this document needs summary index generation
 }

 dataset_and_document_fields = {
--- a/api/fields/hit_testing_fields.py
+++ b/api/fields/hit_testing_fields.py
@@ -58,5 +58,4 @@ hit_testing_record_fields = {
    "score": fields.Float,
    "tsne_position": fields.Raw,
    "files": fields.List(fields.Nested(files_fields)),
-    "summary": fields.String,  # Summary content if retrieved via summary index
 }
--- a/api/fields/message_fields.py
+++ b/api/fields/message_fields.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 from datetime import datetime
 from typing import TypeAlias
+from uuid import uuid4

 from pydantic import BaseModel, ConfigDict, Field, field_validator

@@ -20,8 +21,8 @@ class SimpleFeedback(ResponseModel):


 class RetrieverResource(ResponseModel):
-    id: str
-    message_id: str
+    id: str = Field(default_factory=lambda: str(uuid4()))
+    message_id: str = Field(default_factory=lambda: str(uuid4()))
    position: int
    dataset_id: str | None = None
    dataset_name: str | None = None
@@ -59,7 +60,6 @@ class MessageListItem(ResponseModel):
    message_files: list[MessageFile]
    status: str
    error: str | None = None
-    generation_detail: JSONValueType | None = Field(default=None, validation_alias="generation_detail_dict")

    @field_validator("inputs", mode="before")
    @classmethod
--- a/api/fields/segment_fields.py
+++ b/api/fields/segment_fields.py
@@ -49,5 +49,4 @@ segment_fields = {
    "stopped_at": TimestampField,
    "child_chunks": fields.List(fields.Nested(child_chunk_fields)),
    "attachments": fields.List(fields.Nested(attachment_fields)),
-    "summary": fields.String,  # Summary content for the segment
 }
--- a/api/fields/workflow_run_fields.py
+++ b/api/fields/workflow_run_fields.py
@@ -81,7 +81,6 @@ workflow_run_detail_fields = {
    "inputs": fields.Raw(attribute="inputs_dict"),
    "status": fields.String,
    "outputs": fields.Raw(attribute="outputs_dict"),
-    "outputs_as_generation": fields.Boolean,
    "error": fields.String,
    "elapsed_time": fields.Float,
    "total_tokens": fields.Integer,
@@ -130,7 +129,6 @@ workflow_run_node_execution_fields = {
    "inputs_truncated": fields.Boolean,
    "outputs_truncated": fields.Boolean,
    "process_data_truncated": fields.Boolean,
-    "generation_detail": fields.Raw,
 }

 workflow_run_node_execution_list_fields = {
--- a/api/migrations/versions/2025_12_10_1504-8a7f2ad7c23e_add_workflow_runs_created_at_idx.py
+++ b/api/migrations/versions/2025_12_10_1504-8a7f2ad7c23e_add_workflow_runs_created_at_idx.py
@@ -1,29 +0,0 @@
-"""Add index on workflow_runs.created_at
-
-Revision ID: 8a7f2ad7c23e
-Revises: d57accd375ae
-Create Date: 2025-12-10 15:04:00.000000
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "8a7f2ad7c23e"
-down_revision = "d57accd375ae"
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    with op.batch_alter_table("workflow_runs", schema=None) as batch_op:
-        batch_op.create_index(
-            batch_op.f("workflow_runs_created_at_idx"),
-            ["created_at"],
-            unique=False,
-        )
-
-
-def downgrade():
-    with op.batch_alter_table("workflow_runs", schema=None) as batch_op:
-        batch_op.drop_index(batch_op.f("workflow_runs_created_at_idx"))
--- a/api/migrations/versions/2025_12_15_1614-6bb0832495f0_alter_table_pipeline_recommended_.py
+++ b/api/migrations/versions/2025_12_15_1614-6bb0832495f0_alter_table_pipeline_recommended_.py
@@ -1,64 +0,0 @@
-"""Alter table pipeline_recommended_plugins add column type
-
-Revision ID: 6bb0832495f0
-Revises: 7bb281b7a422
-Create Date: 2025-12-15 16:14:38.482072
-
-"""
-from alembic import op
-import models as models
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = '6bb0832495f0'
-down_revision = '7bb281b7a422'
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    with op.batch_alter_table('app_triggers', schema=None) as batch_op:
-        batch_op.alter_column('provider_name',
-               existing_type=sa.VARCHAR(length=255),
-               nullable=False,
-               existing_server_default=sa.text("''::character varying"))
-
-    with op.batch_alter_table('operation_logs', schema=None) as batch_op:
-        batch_op.alter_column('content',
-               existing_type=postgresql.JSON(astext_type=sa.Text()),
-               nullable=False)
-
-    with op.batch_alter_table('pipeline_recommended_plugins', schema=None) as batch_op:
-        batch_op.add_column(sa.Column('type', sa.String(length=50), nullable=True))
-
-    with op.batch_alter_table('providers', schema=None) as batch_op:
-        batch_op.alter_column('quota_used',
-               existing_type=sa.BIGINT(),
-               nullable=False)
-
-    # ### end Alembic commands ###
-
-def downgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    with op.batch_alter_table('providers', schema=None) as batch_op:
-        batch_op.alter_column('quota_used',
-               existing_type=sa.BIGINT(),
-               nullable=True)
-
-    with op.batch_alter_table('pipeline_recommended_plugins', schema=None) as batch_op:
-        batch_op.drop_column('type')
-
-    with op.batch_alter_table('operation_logs', schema=None) as batch_op:
-        batch_op.alter_column('content',
-               existing_type=postgresql.JSON(astext_type=sa.Text()),
-               nullable=True)
-
-    with op.batch_alter_table('app_triggers', schema=None) as batch_op:
-        batch_op.alter_column('provider_name',
-               existing_type=sa.VARCHAR(length=255),
-               nullable=True,
-               existing_server_default=sa.text("''::character varying"))
-
-    # ### end Alembic commands ###
--- a/api/migrations/versions/2025_12_16_1424-2536f83803a8_add_type_column_not_null_default_tool.py
+++ b/api/migrations/versions/2025_12_16_1424-2536f83803a8_add_type_column_not_null_default_tool.py
@@ -1,33 +0,0 @@
-"""add type column not null default tool
-
-Revision ID: 2536f83803a8
-Revises: 6bb0832495f0
-Create Date: 2025-12-16 14:24:40.740253
-
-"""
-from alembic import op
-import models as models
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = '2536f83803a8'
-down_revision = '6bb0832495f0'
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    with op.batch_alter_table('pipeline_recommended_plugins', schema=None) as batch_op:
-        batch_op.add_column(sa.Column('type', sa.String(length=50), nullable=False, server_default='tool'))
-
-    # ### end Alembic commands ###
-
-
-def downgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    with op.batch_alter_table('pipeline_recommended_plugins', schema=None) as batch_op:
-        batch_op.drop_column('type')
-
-    # ### end Alembic commands ###
--- a/api/migrations/versions/2025_12_30_1617-85c8b4a64f53_add_llm_generation_detail_table.py
+++ b/api/migrations/versions/2025_12_30_1617-85c8b4a64f53_add_llm_generation_detail_table.py
@@ -1,46 +0,0 @@
-"""add llm generation detail table.
-
-Revision ID: 85c8b4a64f53
-Revises: 7bb281b7a422
-Create Date: 2025-12-10 16:17:46.597669
-
-"""
-from alembic import op
-import models as models
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = '85c8b4a64f53'
-down_revision = '7df29de0f6be'
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('llm_generation_details',
-    sa.Column('id', models.types.StringUUID(), nullable=False),
-    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
-    sa.Column('app_id', models.types.StringUUID(), nullable=False),
-    sa.Column('message_id', models.types.StringUUID(), nullable=True),
-    sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True),
-    sa.Column('node_id', sa.String(length=255), nullable=True),
-    sa.Column('reasoning_content', models.types.LongText(), nullable=True),
-    sa.Column('tool_calls', models.types.LongText(), nullable=True),
-    sa.Column('sequence', models.types.LongText(), nullable=True),
-    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
-    sa.CheckConstraint('(message_id IS NOT NULL AND workflow_run_id IS NULL AND node_id IS NULL) OR (message_id IS NULL AND workflow_run_id IS NOT NULL AND node_id IS NOT NULL)', name=op.f('llm_generation_details_ck_llm_generation_detail_assoc_mode_check')),
-    sa.PrimaryKeyConstraint('id', name='llm_generation_detail_pkey'),
-    sa.UniqueConstraint('message_id', name=op.f('llm_generation_details_message_id_key'))
-    )
-    with op.batch_alter_table('llm_generation_details', schema=None) as batch_op:
-        batch_op.create_index('idx_llm_generation_detail_message', ['message_id'], unique=False)
-        batch_op.create_index('idx_llm_generation_detail_workflow', ['workflow_run_id', 'node_id'], unique=False)
-
-
-
-def downgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_table('llm_generation_details')
-    # ### end Alembic commands ###
--- a/api/migrations/versions/2026_01_09_1110-f9f6d18a37f9_add_table_explore_banner_and_trial.py
+++ b/api/migrations/versions/2026_01_09_1110-f9f6d18a37f9_add_table_explore_banner_and_trial.py
@@ -1,73 +0,0 @@
-"""add table explore banner and trial
-
-Revision ID: f9f6d18a37f9
-Revises: 7df29de0f6be
-Create Date: 2026-01-09 11:10:18.079355
-
-"""
-from alembic import op
-import models as models
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = 'f9f6d18a37f9'
-down_revision = '7df29de0f6be'
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('account_trial_app_records',
-    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
-    sa.Column('account_id', models.types.StringUUID(), nullable=False),
-    sa.Column('app_id', models.types.StringUUID(), nullable=False),
-    sa.Column('count', sa.Integer(), nullable=False),
-    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
-    sa.PrimaryKeyConstraint('id', name='user_trial_app_pkey'),
-    sa.UniqueConstraint('account_id', 'app_id', name='unique_account_trial_app_record')
-    )
-    with op.batch_alter_table('account_trial_app_records', schema=None) as batch_op:
-        batch_op.create_index('account_trial_app_record_account_id_idx', ['account_id'], unique=False)
-        batch_op.create_index('account_trial_app_record_app_id_idx', ['app_id'], unique=False)
-
-    op.create_table('exporle_banners',
-    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
-    sa.Column('content', sa.JSON(), nullable=False),
-    sa.Column('link', sa.String(length=255), nullable=False),
-    sa.Column('sort', sa.Integer(), nullable=False),
-    sa.Column('status', sa.String(length=255), server_default=sa.text("'enabled'::character varying"), nullable=False),
-    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
-    sa.Column('language', sa.String(length=255), server_default=sa.text("'en-US'::character varying"), nullable=False),
-    sa.PrimaryKeyConstraint('id', name='exporler_banner_pkey')
-    )
-    op.create_table('trial_apps',
-    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
-    sa.Column('app_id', models.types.StringUUID(), nullable=False),
-    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
-    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
-    sa.Column('trial_limit', sa.Integer(), nullable=False),
-    sa.PrimaryKeyConstraint('id', name='trial_app_pkey'),
-    sa.UniqueConstraint('app_id', name='unique_trail_app_id')
-    )
-    with op.batch_alter_table('trial_apps', schema=None) as batch_op:
-        batch_op.create_index('trial_app_app_id_idx', ['app_id'], unique=False)
-        batch_op.create_index('trial_app_tenant_id_idx', ['tenant_id'], unique=False)
-    # ### end Alembic commands ###
-
-
-def downgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    with op.batch_alter_table('trial_apps', schema=None) as batch_op:
-        batch_op.drop_index('trial_app_tenant_id_idx')
-        batch_op.drop_index('trial_app_app_id_idx')
-
-    op.drop_table('trial_apps')
-    op.drop_table('exporle_banners')
-    with op.batch_alter_table('account_trial_app_records', schema=None) as batch_op:
-        batch_op.drop_index('account_trial_app_record_app_id_idx')
-        batch_op.drop_index('account_trial_app_record_account_id_idx')
-
-    op.drop_table('account_trial_app_records')
-    # ### end Alembic commands ###
--- a/api/migrations/versions/2026_01_12_1358-562dcce7d77c_add_summaryindex_feature.py
+++ b/api/migrations/versions/2026_01_12_1358-562dcce7d77c_add_summaryindex_feature.py
@@ -1,69 +0,0 @@
-"""add SummaryIndex feature
-
-Revision ID: 562dcce7d77c
-Revises: 03ea244985ce
-Create Date: 2026-01-12 13:58:40.584802
-
-"""
-from alembic import op
-import models as models
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = '562dcce7d77c'
-down_revision = '03ea244985ce'
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('document_segment_summary',
-    sa.Column('id', models.types.StringUUID(), nullable=False),
-    sa.Column('dataset_id', models.types.StringUUID(), nullable=False),
-    sa.Column('document_id', models.types.StringUUID(), nullable=False),
-    sa.Column('chunk_id', models.types.StringUUID(), nullable=False),
-    sa.Column('summary_content', models.types.LongText(), nullable=True),
-    sa.Column('summary_index_node_id', sa.String(length=255), nullable=True),
-    sa.Column('summary_index_node_hash', sa.String(length=255), nullable=True),
-    sa.Column('status', sa.String(length=32), server_default=sa.text("'generating'"), nullable=False),
-    sa.Column('error', models.types.LongText(), nullable=True),
-    sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False),
-    sa.Column('disabled_at', sa.DateTime(), nullable=True),
-    sa.Column('disabled_by', models.types.StringUUID(), nullable=True),
-    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
-    sa.PrimaryKeyConstraint('id', name='document_segment_summary_pkey')
-    )
-    with op.batch_alter_table('document_segment_summary', schema=None) as batch_op:
-        batch_op.create_index('document_segment_summary_chunk_id_idx', ['chunk_id'], unique=False)
-        batch_op.create_index('document_segment_summary_dataset_id_idx', ['dataset_id'], unique=False)
-        batch_op.create_index('document_segment_summary_document_id_idx', ['document_id'], unique=False)
-        batch_op.create_index('document_segment_summary_status_idx', ['status'], unique=False)
-
-    with op.batch_alter_table('datasets', schema=None) as batch_op:
-        batch_op.add_column(sa.Column('summary_index_setting', models.types.AdjustedJSON(), nullable=True))
-
-    with op.batch_alter_table('documents', schema=None) as batch_op:
-        batch_op.add_column(sa.Column('need_summary', sa.Boolean(), server_default=sa.text('false'), nullable=True))
-
-    # ### end Alembic commands ###
-
-
-def downgrade():
-    # ### commands auto generated by Alembic - please adjust! ###
-    with op.batch_alter_table('documents', schema=None) as batch_op:
-        batch_op.drop_column('need_summary')
-
-    with op.batch_alter_table('datasets', schema=None) as batch_op:
-        batch_op.drop_column('summary_index_setting')
-
-    with op.batch_alter_table('document_segment_summary', schema=None) as batch_op:
-        batch_op.drop_index('document_segment_summary_status_idx')
-        batch_op.drop_index('document_segment_summary_document_id_idx')
-        batch_op.drop_index('document_segment_summary_dataset_id_idx')
-        batch_op.drop_index('document_segment_summary_chunk_id_idx')
-
-    op.drop_table('document_segment_summary')
-    # ### end Alembic commands ###
--- a/api/models/init.py
+++ b/api/models/init.py
@@ -35,7 +35,6 @@ from .enums import (
    WorkflowTriggerStatus,
 )
 from .model import (
-    AccountTrialAppRecord,
    ApiRequest,
    ApiToken,
    App,
@@ -48,10 +47,8 @@ from .model import (
    DatasetRetrieverResource,
    DifySetup,
    EndUser,
-    ExporleBanner,
    IconType,
    InstalledApp,
-    LLMGenerationDetail,
    Message,
    MessageAgentThought,
    MessageAnnotation,
@@ -65,7 +62,6 @@ from .model import (
    TagBinding,
    TenantCreditPool,
    TraceAppConfig,
-    TrialApp,
    UploadFile,
 )
 from .oauth import DatasourceOauthParamConfig, DatasourceProvider
@@ -118,7 +114,6 @@ __all__ = [
    "Account",
    "AccountIntegrate",
    "AccountStatus",
-    "AccountTrialAppRecord",
    "ApiRequest",
    "ApiToken",
    "ApiToolProvider",
@@ -155,13 +150,11 @@ __all__ = [
    "DocumentSegment",
    "Embedding",
    "EndUser",
-    "ExporleBanner",
    "ExternalKnowledgeApis",
    "ExternalKnowledgeBindings",
    "IconType",
    "InstalledApp",
    "InvitationCode",
-    "LLMGenerationDetail",
    "LoadBalancingModelConfig",
    "Message",
    "MessageAgentThought",
@@ -195,7 +188,6 @@ __all__ = [
    "ToolLabelBinding",
    "ToolModelInvoke",
    "TraceAppConfig",
-    "TrialApp",
    "TriggerOAuthSystemClient",
    "TriggerOAuthTenantClient",
    "TriggerSubscription",
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -72,7 +72,6 @@ class Dataset(Base):
    keyword_number = mapped_column(sa.Integer, nullable=True, server_default=sa.text("10"))
    collection_binding_id = mapped_column(StringUUID, nullable=True)
    retrieval_model = mapped_column(AdjustedJSON, nullable=True)
-    summary_index_setting = mapped_column(AdjustedJSON, nullable=True)
    built_in_field_enabled = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))
    icon_info = mapped_column(AdjustedJSON, nullable=True)
    runtime_mode = mapped_column(sa.String(255), nullable=True, server_default=sa.text("'general'"))
@@ -420,7 +419,6 @@ class Document(Base):
    doc_metadata = mapped_column(AdjustedJSON, nullable=True)
    doc_form = mapped_column(String(255), nullable=False, server_default=sa.text("'text_model'"))
    doc_language = mapped_column(String(255), nullable=True)
-    need_summary: Mapped[bool | None] = mapped_column(sa.Boolean, nullable=True, server_default=sa.text("false"))

    DATA_SOURCES = ["upload_file", "notion_import", "website_crawl"]

@@ -1569,34 +1567,3 @@ class SegmentAttachmentBinding(Base):
    segment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
    attachment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
    created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
-
-
-class DocumentSegmentSummary(Base):
-    __tablename__ = "document_segment_summary"
-    __table_args__ = (
-        sa.PrimaryKeyConstraint("id", name="document_segment_summary_pkey"),
-        sa.Index("document_segment_summary_dataset_id_idx", "dataset_id"),
-        sa.Index("document_segment_summary_document_id_idx", "document_id"),
-        sa.Index("document_segment_summary_chunk_id_idx", "chunk_id"),
-        sa.Index("document_segment_summary_status_idx", "status"),
-    )
-
-    id: Mapped[str] = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4()))
-    dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
-    document_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
-    # corresponds to DocumentSegment.id or parent chunk id
-    chunk_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
-    summary_content: Mapped[str] = mapped_column(LongText, nullable=True)
-    summary_index_node_id: Mapped[str] = mapped_column(String(255), nullable=True)
-    summary_index_node_hash: Mapped[str] = mapped_column(String(255), nullable=True)
-    status: Mapped[str] = mapped_column(String(32), nullable=False, server_default=sa.text("'generating'"))
-    error: Mapped[str] = mapped_column(LongText, nullable=True)
-    enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
-    disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    disabled_by = mapped_column(StringUUID, nullable=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp())
-
-    def __repr__(self):
-        return f"<DocumentSegmentSummary id={self.id} chunk_id={self.chunk_id} status={self.status}>"
-
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -33,8 +33,6 @@ from .provider_ids import GenericProviderID
 from .types import LongText, StringUUID

 if TYPE_CHECKING:
-    from core.app.entities.llm_generation_entities import LLMGenerationDetailData
-
    from .workflow import Workflow


@@ -605,64 +603,6 @@ class InstalledApp(TypeBase):
        return tenant


-class TrialApp(Base):
-    __tablename__ = "trial_apps"
-    __table_args__ = (
-        sa.PrimaryKeyConstraint("id", name="trial_app_pkey"),
-        sa.Index("trial_app_app_id_idx", "app_id"),
-        sa.Index("trial_app_tenant_id_idx", "tenant_id"),
-        sa.UniqueConstraint("app_id", name="unique_trail_app_id"),
-    )
-
-    id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
-    app_id = mapped_column(StringUUID, nullable=False)
-    tenant_id = mapped_column(StringUUID, nullable=False)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
-    trial_limit = mapped_column(sa.Integer, nullable=False, default=3)
-
-    @property
-    def app(self) -> App | None:
-        app = db.session.query(App).where(App.id == self.app_id).first()
-        return app
-
-
-class AccountTrialAppRecord(Base):
-    __tablename__ = "account_trial_app_records"
-    __table_args__ = (
-        sa.PrimaryKeyConstraint("id", name="user_trial_app_pkey"),
-        sa.Index("account_trial_app_record_account_id_idx", "account_id"),
-        sa.Index("account_trial_app_record_app_id_idx", "app_id"),
-        sa.UniqueConstraint("account_id", "app_id", name="unique_account_trial_app_record"),
-    )
-    id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
-    account_id = mapped_column(StringUUID, nullable=False)
-    app_id = mapped_column(StringUUID, nullable=False)
-    count = mapped_column(sa.Integer, nullable=False, default=0)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
-
-    @property
-    def app(self) -> App | None:
-        app = db.session.query(App).where(App.id == self.app_id).first()
-        return app
-
-    @property
-    def user(self) -> Account | None:
-        user = db.session.query(Account).where(Account.id == self.account_id).first()
-        return user
-
-
-class ExporleBanner(Base):
-    __tablename__ = "exporle_banners"
-    __table_args__ = (sa.PrimaryKeyConstraint("id", name="exporler_banner_pkey"),)
-    id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
-    content = mapped_column(sa.JSON, nullable=False)
-    link = mapped_column(String(255), nullable=False)
-    sort = mapped_column(sa.Integer, nullable=False)
-    status = mapped_column(sa.String(255), nullable=False, server_default=sa.text("'enabled'::character varying"))
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
-    language = mapped_column(String(255), nullable=False, server_default=sa.text("'en-US'::character varying"))
-
-
 class OAuthProviderApp(TypeBase):
    """
    Globally shared OAuth provider app information.
@@ -1264,18 +1204,6 @@ class Message(Base):
            .all()
        )

-    # FIXME (Novice) -- It's easy to cause N+1 query problem here.
-    @property
-    def generation_detail(self) -> dict[str, Any] | None:
-        """
-        Get LLM generation detail for this message.
-        Returns the detail as a dictionary or None if not found.
-        """
-        detail = db.session.query(LLMGenerationDetail).filter_by(message_id=self.id).first()
-        if detail:
-            return detail.to_dict()
-        return None
-
    @property
    def retriever_resources(self) -> Any:
        return self.message_metadata_dict.get("retriever_resources") if self.message_metadata else []
@@ -2179,87 +2107,3 @@ class TenantCreditPool(Base):

    def has_sufficient_credits(self, required_credits: int) -> bool:
        return self.remaining_credits >= required_credits
-
-
-class LLMGenerationDetail(Base):
-    """
-    Store LLM generation details including reasoning process and tool calls.
-
-    Association (choose one):
-    - For apps with Message: use message_id (one-to-one)
-    - For Workflow: use workflow_run_id + node_id (one run may have multiple LLM nodes)
-    """
-
-    __tablename__ = "llm_generation_details"
-    __table_args__ = (
-        sa.PrimaryKeyConstraint("id", name="llm_generation_detail_pkey"),
-        sa.Index("idx_llm_generation_detail_message", "message_id"),
-        sa.Index("idx_llm_generation_detail_workflow", "workflow_run_id", "node_id"),
-        sa.CheckConstraint(
-            "(message_id IS NOT NULL AND workflow_run_id IS NULL AND node_id IS NULL)"
-            " OR "
-            "(message_id IS NULL AND workflow_run_id IS NOT NULL AND node_id IS NOT NULL)",
-            name="ck_llm_generation_detail_assoc_mode",
-        ),
-    )
-
-    id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()))
-    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
-    app_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
-
-    # Association fields (choose one)
-    message_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True, unique=True)
-    workflow_run_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
-    node_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
-
-    # Core data as JSON strings
-    reasoning_content: Mapped[str | None] = mapped_column(LongText)
-    tool_calls: Mapped[str | None] = mapped_column(LongText)
-    sequence: Mapped[str | None] = mapped_column(LongText)
-
-    created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
-
-    def to_domain_model(self) -> LLMGenerationDetailData:
-        """Convert to Pydantic domain model with proper validation."""
-        from core.app.entities.llm_generation_entities import LLMGenerationDetailData
-
-        return LLMGenerationDetailData(
-            reasoning_content=json.loads(self.reasoning_content) if self.reasoning_content else [],
-            tool_calls=json.loads(self.tool_calls) if self.tool_calls else [],
-            sequence=json.loads(self.sequence) if self.sequence else [],
-        )
-
-    def to_dict(self) -> dict[str, Any]:
-        """Convert to dictionary for API response."""
-        return self.to_domain_model().to_response_dict()
-
-    @classmethod
-    def from_domain_model(
-        cls,
-        data: LLMGenerationDetailData,
-        *,
-        tenant_id: str,
-        app_id: str,
-        message_id: str | None = None,
-        workflow_run_id: str | None = None,
-        node_id: str | None = None,
-    ) -> LLMGenerationDetail:
-        """Create from Pydantic domain model."""
-        # Enforce association mode at object creation time as well.
-        message_mode = message_id is not None
-        workflow_mode = workflow_run_id is not None or node_id is not None
-        if message_mode and workflow_mode:
-            raise ValueError("LLMGenerationDetail cannot set both message_id and workflow_run_id/node_id.")
-        if not message_mode and not (workflow_run_id and node_id):
-            raise ValueError("LLMGenerationDetail requires either message_id or workflow_run_id+node_id.")
-
-        return cls(
-            tenant_id=tenant_id,
-            app_id=app_id,
-            message_id=message_id,
-            workflow_run_id=workflow_run_id,
-            node_id=node_id,
-            reasoning_content=json.dumps(data.reasoning_content) if data.reasoning_content else None,
-            tool_calls=json.dumps([tc.model_dump() for tc in data.tool_calls]) if data.tool_calls else None,
-            sequence=json.dumps([seg.model_dump() for seg in data.sequence]) if data.sequence else None,
-        )
--- a/api/models/workflow.py
+++ b/api/models/workflow.py
@@ -59,37 +59,6 @@ from .types import EnumText, LongText, StringUUID
 logger = logging.getLogger(__name__)


-def is_generation_outputs(outputs: Mapping[str, Any]) -> bool:
-    if not outputs:
-        return False
-
-    allowed_sequence_types = {"reasoning", "content", "tool_call"}
-
-    def valid_sequence_item(item: Mapping[str, Any]) -> bool:
-        return isinstance(item, Mapping) and item.get("type") in allowed_sequence_types
-
-    def valid_value(value: Any) -> bool:
-        if not isinstance(value, Mapping):
-            return False
-
-        content = value.get("content")
-        reasoning_content = value.get("reasoning_content")
-        tool_calls = value.get("tool_calls")
-        sequence = value.get("sequence")
-
-        return (
-            isinstance(content, str)
-            and isinstance(reasoning_content, list)
-            and all(isinstance(item, str) for item in reasoning_content)
-            and isinstance(tool_calls, list)
-            and all(isinstance(item, Mapping) for item in tool_calls)
-            and isinstance(sequence, list)
-            and all(valid_sequence_item(item) for item in sequence)
-        )
-
-    return all(valid_value(value) for value in outputs.values())
-
-
 class WorkflowType(StrEnum):
    """
    Workflow Type Enum
@@ -698,10 +667,6 @@ class WorkflowRun(Base):
    def workflow(self):
        return db.session.query(Workflow).where(Workflow.id == self.workflow_id).first()

-    @property
-    def outputs_as_generation(self):
-        return is_generation_outputs(self.outputs_dict)
-
    def to_dict(self):
        return {
            "id": self.id,
@@ -715,7 +680,6 @@ class WorkflowRun(Base):
            "inputs": self.inputs_dict,
            "status": self.status,
            "outputs": self.outputs_dict,
-            "outputs_as_generation": self.outputs_as_generation,
            "error": self.error,
            "elapsed_time": self.elapsed_time,
            "total_tokens": self.total_tokens,
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dify-api"
-version = "1.11.3"
+version = "1.11.4"
 requires-python = ">=3.11,<3.13"

 dependencies = [
--- a/api/services/annotation_service.py
+++ b/api/services/annotation_service.py
@@ -355,6 +355,7 @@ class AppAnnotationService:
    def batch_import_app_annotations(cls, app_id, file: FileStorage):
        """
        Batch import annotations from CSV file with enhanced security checks.
+
        Security features:
        - File size validation
        - Row count limits (min/max)
@@ -363,6 +364,7 @@ class AppAnnotationService:
        - Concurrency tracking
        """
        from configs import dify_config
+
        # get app info
        current_user, current_tenant_id = current_account_with_tenant()
        app = (
@@ -446,31 +448,27 @@ class AppAnnotationService:
                    f"The CSV file must contain at least {min_records} valid annotation record(s). "
                    f"Found {len(result)} valid record(s)."
                )
+
            # Check annotation quota limit
            features = FeatureService.get_features(current_tenant_id)
            if features.billing.enabled:
                annotation_quota_limit = features.annotation_quota_limit
                if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size:
-                    raise ValueError(
-                        f"The number of annotations ({len(result)}) would exceed your subscription limit. "
-                        f"Current usage: {annotation_quota_limit.size}/{annotation_quota_limit.limit}. "
-                        f"Available: {annotation_quota_limit.limit - annotation_quota_limit.size}."
-                    )
-
-            # Create async job
+                    raise ValueError("The number of annotations exceeds the limit of your subscription.")
+            # async job
            job_id = str(uuid.uuid4())
            indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}"
+
            # Register job in active tasks list for concurrency tracking
            current_time = int(naive_utc_now().timestamp() * 1000)
            active_jobs_key = f"annotation_import_active:{current_tenant_id}"
            redis_client.zadd(active_jobs_key, {job_id: current_time})
            redis_client.expire(active_jobs_key, 7200)  # 2 hours TTL
+
            # Set job status
            redis_client.setnx(indexing_cache_key, "waiting")
-            redis_client.expire(indexing_cache_key, 3600)  # 1 hour TTL
-
-            # Send batch import task
            batch_import_annotations_task.delay(str(job_id), result, app_id, current_tenant_id, current_user.id)
+
        except ValueError as e:
            return {"error_msg": str(e)}
        except Exception as e:
--- a/api/services/billing_service.py
+++ b/api/services/billing_service.py
@@ -44,33 +44,6 @@ class BillingService:
        billing_info = cls._send_request("GET", "/subscription/info", params=params)
        return billing_info

-    @classmethod
-    def get_info_bulk(cls, tenant_ids: Sequence[str]) -> dict[str, str]:
-        """
-        Bulk billing info fetch via billing API.
-
-        Payload: {"tenant_ids": ["t1", "t2", ...]} (max 200 per request)
-
-        Returns:
-            Mapping of tenant_id -> plan
-        """
-        results: dict[str, str] = {}
-
-        chunk_size = 200
-        for i in range(0, len(tenant_ids), chunk_size):
-            chunk = tenant_ids[i : i + chunk_size]
-            try:
-                resp = cls._send_request("POST", "/subscription/plan/batch", json={"tenant_ids": chunk})
-                data = resp.get("data", {})
-                for tenant_id, plan in data.items():
-                    if isinstance(plan, str):
-                        results[tenant_id] = plan
-            except Exception:
-                logger.exception("Failed to fetch billing info batch for tenants: %s", chunk)
-                continue
-
-        return results
-
    @classmethod
    def get_tenant_feature_plan_usage_info(cls, tenant_id: str):
        params = {"tenant_id": tenant_id}
--- a/api/services/clear_free_plan_expired_workflow_run_logs.py
+++ b/api/services/clear_free_plan_expired_workflow_run_logs.py
@@ -1,171 +0,0 @@
-import datetime
-import logging
-from collections.abc import Iterable, Sequence
-
-import click
-from sqlalchemy.orm import Session, sessionmaker
-
-from configs import dify_config
-from enums.cloud_plan import CloudPlan
-from extensions.ext_database import db
-from models.workflow import WorkflowRun
-from repositories.api_workflow_run_repository import APIWorkflowRunRepository
-from repositories.sqlalchemy_api_workflow_node_execution_repository import (
-    DifyAPISQLAlchemyWorkflowNodeExecutionRepository,
-)
-from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository
-from services.billing_service import BillingService
-
-logger = logging.getLogger(__name__)
-
-
-class WorkflowRunCleanup:
-    def __init__(
-        self,
-        days: int,
-        batch_size: int,
-        start_after: datetime.datetime | None = None,
-        end_before: datetime.datetime | None = None,
-        workflow_run_repo: APIWorkflowRunRepository | None = None,
-    ):
-        if (start_after is None) ^ (end_before is None):
-            raise ValueError("start_after and end_before must be both set or both omitted.")
-
-        computed_cutoff = datetime.datetime.now() - datetime.timedelta(days=days)
-        self.window_start = start_after
-        self.window_end = end_before or computed_cutoff
-
-        if self.window_start and self.window_end <= self.window_start:
-            raise ValueError("end_before must be greater than start_after.")
-
-        self.batch_size = batch_size
-        self.billing_cache: dict[str, CloudPlan | None] = {}
-        self.workflow_run_repo: APIWorkflowRunRepository
-        if workflow_run_repo:
-            self.workflow_run_repo = workflow_run_repo
-        else:
-            # Lazy import to avoid circular dependencies during module import
-            from repositories.factory import DifyAPIRepositoryFactory
-
-            session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
-            self.workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
-
-    def run(self) -> None:
-        click.echo(
-            click.style(
-                f"Cleaning workflow runs "
-                f"{'between ' + self.window_start.isoformat() + ' and ' if self.window_start else 'before '}"
-                f"{self.window_end.isoformat()} (batch={self.batch_size})",
-                fg="white",
-            )
-        )
-
-        total_runs_deleted = 0
-        batch_index = 0
-        last_seen: tuple[datetime.datetime, str] | None = None
-
-        while True:
-            run_rows = self.workflow_run_repo.get_runs_batch_by_time_range(
-                start_after=self.window_start,
-                end_before=self.window_end,
-                last_seen=last_seen,
-                batch_size=self.batch_size,
-            )
-            if not run_rows:
-                break
-
-            batch_index += 1
-            last_seen = (run_rows[-1].created_at, run_rows[-1].id)
-            tenant_ids = {row.tenant_id for row in run_rows}
-            free_tenants = self._filter_free_tenants(tenant_ids)
-            free_runs = [row for row in run_rows if row.tenant_id in free_tenants]
-            paid_or_skipped = len(run_rows) - len(free_runs)
-
-            if not free_runs:
-                click.echo(
-                    click.style(
-                        f"[batch #{batch_index}] skipped (no sandbox runs in batch, {paid_or_skipped} paid/unknown)",
-                        fg="yellow",
-                    )
-                )
-                continue
-
-            try:
-                counts = self.workflow_run_repo.delete_runs_with_related(
-                    free_runs,
-                    delete_node_executions=self._delete_node_executions,
-                    delete_trigger_logs=self._delete_trigger_logs,
-                )
-            except Exception:
-                logger.exception("Failed to delete workflow runs batch ending at %s", last_seen[0])
-                raise
-
-            total_runs_deleted += counts["runs"]
-            click.echo(
-                click.style(
-                    f"[batch #{batch_index}] deleted runs: {counts['runs']} "
-                    f"(nodes {counts['node_executions']}, offloads {counts['offloads']}, "
-                    f"app_logs {counts['app_logs']}, trigger_logs {counts['trigger_logs']}, "
-                    f"pauses {counts['pauses']}, pause_reasons {counts['pause_reasons']}); "
-                    f"skipped {paid_or_skipped} paid/unknown",
-                    fg="green",
-                )
-            )
-
-        if self.window_start:
-            summary_message = (
-                f"Cleanup complete. Deleted {total_runs_deleted} workflow runs "
-                f"between {self.window_start.isoformat()} and {self.window_end.isoformat()}"
-            )
-        else:
-            summary_message = (
-                f"Cleanup complete. Deleted {total_runs_deleted} workflow runs before {self.window_end.isoformat()}"
-            )
-
-        click.echo(click.style(summary_message, fg="white"))
-
-    def _filter_free_tenants(self, tenant_ids: Iterable[str]) -> set[str]:
-        if not dify_config.BILLING_ENABLED:
-            return set(tenant_ids)
-
-        tenant_id_list = list(tenant_ids)
-        uncached_tenants = [tenant_id for tenant_id in tenant_id_list if tenant_id not in self.billing_cache]
-
-        if uncached_tenants:
-            try:
-                bulk_info = BillingService.get_info_bulk(uncached_tenants)
-            except Exception:
-                bulk_info = {}
-                logger.exception("Failed to fetch billing plans in bulk for tenants: %s", uncached_tenants)
-
-            for tenant_id in uncached_tenants:
-                plan: CloudPlan | None = None
-                info = bulk_info.get(tenant_id)
-                if info:
-                    try:
-                        plan = CloudPlan(info)
-                    except Exception:
-                        logger.exception("Failed to parse billing plan for tenant %s", tenant_id)
-                else:
-                    logger.warning("Missing billing info for tenant %s in bulk resp; treating as non-free", tenant_id)
-
-                self.billing_cache[tenant_id] = plan
-
-        return {tenant_id for tenant_id in tenant_id_list if self.billing_cache.get(tenant_id) == CloudPlan.SANDBOX}
-
-    def _delete_trigger_logs(self, session: Session, run_ids: Sequence[str]) -> int:
-        trigger_repo = SQLAlchemyWorkflowTriggerLogRepository(session)
-        return trigger_repo.delete_by_run_ids(run_ids)
-
-    def _delete_node_executions(self, session: Session, runs: Sequence[WorkflowRun]) -> tuple[int, int]:
-        run_contexts: list[DifyAPISQLAlchemyWorkflowNodeExecutionRepository.RunContext] = [
-            {
-                "run_id": run.id,
-                "tenant_id": run.tenant_id,
-                "app_id": run.app_id,
-                "workflow_id": run.workflow_id,
-                "triggered_from": run.triggered_from,
-            }
-            for run in runs
-        ]
-        return DifyAPISQLAlchemyWorkflowNodeExecutionRepository.delete_by_runs(session, run_contexts)
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -89,7 +89,6 @@ from tasks.enable_segments_to_index_task import enable_segments_to_index_task
 from tasks.recover_document_indexing_task import recover_document_indexing_task
 from tasks.remove_document_from_index_task import remove_document_from_index_task
 from tasks.retry_document_indexing_task import retry_document_indexing_task
-from tasks.regenerate_summary_index_task import regenerate_summary_index_task
 from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task

 logger = logging.getLogger(__name__)
@@ -475,11 +474,6 @@ class DatasetService:
        if external_retrieval_model:
            dataset.retrieval_model = external_retrieval_model

-        # Update summary index setting if provided
-        summary_index_setting = data.get("summary_index_setting", None)
-        if summary_index_setting is not None:
-            dataset.summary_index_setting = summary_index_setting
-
        # Update basic dataset properties
        dataset.name = data.get("name", dataset.name)
        dataset.description = data.get("description", dataset.description)
@@ -562,20 +556,12 @@ class DatasetService:
        # Handle indexing technique changes and embedding model updates
        action = DatasetService._handle_indexing_technique_change(dataset, data, filtered_data)

-        # Check if summary_index_setting model changed (before updating database)
-        summary_model_changed = DatasetService._check_summary_index_setting_model_changed(
-            dataset, data
-        )
-
        # Add metadata fields
        filtered_data["updated_by"] = user.id
        filtered_data["updated_at"] = naive_utc_now()
        # update Retrieval model
        if data.get("retrieval_model"):
            filtered_data["retrieval_model"] = data["retrieval_model"]
-        # update summary index setting
-        if data.get("summary_index_setting"):
-            filtered_data["summary_index_setting"] = data.get("summary_index_setting")
        # update icon info
        if data.get("icon_info"):
            filtered_data["icon_info"] = data.get("icon_info")
@@ -584,30 +570,12 @@ class DatasetService:
        db.session.query(Dataset).filter_by(id=dataset.id).update(filtered_data)
        db.session.commit()

-        # Reload dataset to get updated values
-        db.session.refresh(dataset)
-
        # update pipeline knowledge base node data
        DatasetService._update_pipeline_knowledge_base_node_data(dataset, user.id)

        # Trigger vector index task if indexing technique changed
        if action:
            deal_dataset_vector_index_task.delay(dataset.id, action)
-            # If embedding_model changed, also regenerate summary vectors
-            if action == "update":
-                regenerate_summary_index_task.delay(
-                    dataset.id,
-                    regenerate_reason="embedding_model_changed",
-                    regenerate_vectors_only=True,
-                )
-
-        # Trigger summary index regeneration if summary model changed
-        if summary_model_changed:
-            regenerate_summary_index_task.delay(
-                dataset.id,
-                regenerate_reason="summary_model_changed",
-                regenerate_vectors_only=False,
-            )

        return dataset

@@ -646,7 +614,6 @@ class DatasetService:
                            knowledge_index_node_data["chunk_structure"] = dataset.chunk_structure
                            knowledge_index_node_data["indexing_technique"] = dataset.indexing_technique  # pyright: ignore[reportAttributeAccessIssue]
                            knowledge_index_node_data["keyword_number"] = dataset.keyword_number
-                            knowledge_index_node_data["summary_index_setting"] = dataset.summary_index_setting
                            node["data"] = knowledge_index_node_data
                            updated = True
                        except Exception:
@@ -885,49 +852,6 @@ class DatasetService:
        )
        filtered_data["collection_binding_id"] = dataset_collection_binding.id

-    @staticmethod
-    def _check_summary_index_setting_model_changed(dataset: Dataset, data: dict[str, Any]) -> bool:
-        """
-        Check if summary_index_setting model (model_name or model_provider_name) has changed.
-
-        Args:
-            dataset: Current dataset object
-            data: Update data dictionary
-
-        Returns:
-            bool: True if summary model changed, False otherwise
-        """
-        # Check if summary_index_setting is being updated
-        if "summary_index_setting" not in data or data.get("summary_index_setting") is None:
-            return False
-
-        new_summary_setting = data.get("summary_index_setting")
-        old_summary_setting = dataset.summary_index_setting
-
-        # If old setting doesn't exist or is disabled, no need to regenerate
-        if not old_summary_setting or not old_summary_setting.get("enable"):
-            return False
-
-        # If new setting is disabled, no need to regenerate
-        if not new_summary_setting or not new_summary_setting.get("enable"):
-            return False
-
-        # Compare model_name and model_provider_name
-        old_model_name = old_summary_setting.get("model_name")
-        old_model_provider = old_summary_setting.get("model_provider_name")
-        new_model_name = new_summary_setting.get("model_name")
-        new_model_provider = new_summary_setting.get("model_provider_name")
-
-        # Check if model changed
-        if old_model_name != new_model_name or old_model_provider != new_model_provider:
-            logger.info(
-                f"Summary index setting model changed for dataset {dataset.id}: "
-                f"old={old_model_provider}/{old_model_name}, new={new_model_provider}/{new_model_name}"
-            )
-            return True
-
-        return False
-
    @staticmethod
    def update_rag_pipeline_dataset_settings(
        session: Session, dataset: Dataset, knowledge_configuration: KnowledgeConfiguration, has_published: bool = False
@@ -1900,8 +1824,6 @@ class DocumentService:
                        DuplicateDocumentIndexingTaskProxy(
                            dataset.tenant_id, dataset.id, duplicate_document_ids
                        ).delay()
-                    # Note: Summary index generation is triggered in document_indexing_task after indexing completes
-                    # to ensure segments are available. See tasks/document_indexing_task.py
            except LockNotOwnedError:
                pass

@@ -2206,14 +2128,6 @@ class DocumentService:
        name: str,
        batch: str,
    ):
-        # Set need_summary based on dataset's summary_index_setting
-        need_summary = False
-        if (
-            dataset.summary_index_setting
-            and dataset.summary_index_setting.get("enable") is True
-        ):
-            need_summary = True
-        
        document = Document(
            tenant_id=dataset.tenant_id,
            dataset_id=dataset.id,
@@ -2227,7 +2141,6 @@ class DocumentService:
            created_by=account.id,
            doc_form=document_form,
            doc_language=document_language,
-            need_summary=need_summary,
        )
        doc_metadata = {}
        if dataset.built_in_field_enabled:
@@ -2452,7 +2365,6 @@ class DocumentService:
            embedding_model_provider=knowledge_config.embedding_model_provider,
            collection_binding_id=dataset_collection_binding_id,
            retrieval_model=retrieval_model.model_dump() if retrieval_model else None,
-            summary_index_setting=knowledge_config.summary_index_setting,
            is_multimodal=knowledge_config.is_multimodal,
        )

@@ -2634,14 +2546,6 @@ class DocumentService:
            if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
                raise ValueError("Process rule segmentation max_tokens is invalid")

-        # valid summary index setting
-        if args["process_rule"]["summary_index_setting"] and args["process_rule"]["summary_index_setting"]["enable"]:
-            summary_index_setting = args["process_rule"]["summary_index_setting"]
-            if "model_name" not in summary_index_setting or not summary_index_setting["model_name"]:
-                raise ValueError("Summary index model name is required")
-            if "model_provider_name" not in summary_index_setting or not summary_index_setting["model_provider_name"]:
-                raise ValueError("Summary index model provider name is required")
-
    @staticmethod
    def batch_update_document_status(
        dataset: Dataset, document_ids: list[str], action: Literal["enable", "disable", "archive", "un_archive"], user
@@ -3110,37 +3014,6 @@ class SegmentService:
                    if args.enabled or keyword_changed:
                        # update segment vector index
                        VectorService.update_segment_vector(args.keywords, segment, dataset)
-                # update summary index if summary is provided and has changed
-                if args.summary is not None:
-                    # Check if summary index is enabled
-                    has_summary_index = (
-                        dataset.indexing_technique == "high_quality"
-                        and dataset.summary_index_setting
-                        and dataset.summary_index_setting.get("enable") is True
-                    )
-                    
-                    if has_summary_index:
-                        # Query existing summary from database
-                        from models.dataset import DocumentSegmentSummary
-                        existing_summary = (
-                            db.session.query(DocumentSegmentSummary)
-                            .where(
-                                DocumentSegmentSummary.chunk_id == segment.id,
-                                DocumentSegmentSummary.dataset_id == dataset.id,
-                            )
-                            .first()
-                        )
-                        
-                        # Check if summary has changed
-                        existing_summary_content = existing_summary.summary_content if existing_summary else None
-                        if existing_summary_content != args.summary:
-                            # Summary has changed, update it
-                            from services.summary_index_service import SummaryIndexService
-                            try:
-                                SummaryIndexService.update_summary_for_segment(segment, dataset, args.summary)
-                            except Exception as e:
-                                logger.exception(f"Failed to update summary for segment {segment.id}: {str(e)}")
-                                # Don't fail the entire update if summary update fails
            else:
                segment_hash = helper.generate_text_hash(content)
                tokens = 0
@@ -3215,15 +3088,6 @@ class SegmentService:
                elif document.doc_form in (IndexStructureType.PARAGRAPH_INDEX, IndexStructureType.QA_INDEX):
                    # update segment vector index
                    VectorService.update_segment_vector(args.keywords, segment, dataset)
-            # update summary index if summary is provided
-            if args.summary is not None:
-                from services.summary_index_service import SummaryIndexService
-
-                try:
-                    SummaryIndexService.update_summary_for_segment(segment, dataset, args.summary)
-                except Exception as e:
-                    logger.exception(f"Failed to update summary for segment {segment.id}: {str(e)}")
-                    # Don't fail the entire update if summary update fails
            # update multimodel vector index
            VectorService.update_multimodel_vector(segment, args.attachment_ids or [], dataset)
        except Exception as e:
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@@ -119,7 +119,6 @@ class KnowledgeConfig(BaseModel):
    data_source: DataSource | None = None
    process_rule: ProcessRule | None = None
    retrieval_model: RetrievalModel | None = None
-    summary_index_setting: dict | None = None
    doc_form: str = "text_model"
    doc_language: str = "English"
    embedding_model: str | None = None
@@ -142,7 +141,6 @@ class SegmentUpdateArgs(BaseModel):
    regenerate_child_chunks: bool = False
    enabled: bool | None = None
    attachment_ids: list[str] | None = None
-    summary: str | None = None  # Summary content for summary index


 class ChildChunkUpdateArgs(BaseModel):
--- a/api/services/feature_service.py
+++ b/api/services/feature_service.py
@@ -170,8 +170,6 @@ class SystemFeatureModel(BaseModel):
    plugin_installation_permission: PluginInstallationPermissionModel = PluginInstallationPermissionModel()
    enable_change_email: bool = True
    plugin_manager: PluginManagerModel = PluginManagerModel()
-    enable_trial_app: bool = False
-    enable_explore_banner: bool = False


 class FeatureService:
@@ -227,8 +225,6 @@ class FeatureService:
        system_features.is_allow_register = dify_config.ALLOW_REGISTER
        system_features.is_allow_create_workspace = dify_config.ALLOW_CREATE_WORKSPACE
        system_features.is_email_setup = dify_config.MAIL_TYPE is not None and dify_config.MAIL_TYPE != ""
-        system_features.enable_trial_app = dify_config.ENABLE_TRIAL_APP
-        system_features.enable_explore_banner = dify_config.ENABLE_EXPLORE_BANNER

    @classmethod
    def _fulfill_params_from_env(cls, features: FeatureModel):
--- a/api/services/llm_generation_service.py
+++ b/api/services/llm_generation_service.py
@@ -1,37 +0,0 @@
-"""
-LLM Generation Detail Service.
-
-Provides methods to query and attach generation details to workflow node executions
-and messages, avoiding N+1 query problems.
-"""
-
-from sqlalchemy import select
-from sqlalchemy.orm import Session
-
-from core.app.entities.llm_generation_entities import LLMGenerationDetailData
-from models import LLMGenerationDetail
-
-
-class LLMGenerationService:
-    """Service for handling LLM generation details."""
-
-    def __init__(self, session: Session):
-        self._session = session
-
-    def get_generation_detail_for_message(self, message_id: str) -> LLMGenerationDetailData | None:
-        """Query generation detail for a specific message."""
-        stmt = select(LLMGenerationDetail).where(LLMGenerationDetail.message_id == message_id)
-        detail = self._session.scalars(stmt).first()
-        return detail.to_domain_model() if detail else None
-
-    def get_generation_details_for_messages(
-        self,
-        message_ids: list[str],
-    ) -> dict[str, LLMGenerationDetailData]:
-        """Batch query generation details for multiple messages."""
-        if not message_ids:
-            return {}
-
-        stmt = select(LLMGenerationDetail).where(LLMGenerationDetail.message_id.in_(message_ids))
-        details = self._session.scalars(stmt).all()
-        return {detail.message_id: detail.to_domain_model() for detail in details if detail.message_id}
--- a/api/services/recommended_app_service.py
+++ b/api/services/recommended_app_service.py
@@ -1,7 +1,4 @@
 from configs import dify_config
-from extensions.ext_database import db
-from models.model import AccountTrialAppRecord, TrialApp
-from services.feature_service import FeatureService
 from services.recommend_app.recommend_app_factory import RecommendAppRetrievalFactory


@@ -23,15 +20,6 @@ class RecommendedAppService:
                )
            )

-        if FeatureService.get_system_features().enable_trial_app:
-            apps = result["recommended_apps"]
-            for app in apps:
-                app_id = app["app_id"]
-                trial_app_model = db.session.query(TrialApp).where(TrialApp.app_id == app_id).first()
-                if trial_app_model:
-                    app["can_trial"] = True
-                else:
-                    app["can_trial"] = False
        return result

    @classmethod
@@ -44,30 +32,4 @@ class RecommendedAppService:
        mode = dify_config.HOSTED_FETCH_APP_TEMPLATES_MODE
        retrieval_instance = RecommendAppRetrievalFactory.get_recommend_app_factory(mode)()
        result: dict = retrieval_instance.get_recommend_app_detail(app_id)
-        if FeatureService.get_system_features().enable_trial_app:
-            app_id = result["id"]
-            trial_app_model = db.session.query(TrialApp).where(TrialApp.app_id == app_id).first()
-            if trial_app_model:
-                result["can_trial"] = True
-            else:
-                result["can_trial"] = False
        return result
-
-    @classmethod
-    def add_trial_app_record(cls, app_id: str, account_id: str):
-        """
-        Add trial app record.
-        :param app_id: app id
-        :return:
-        """
-        account_trial_app_record = (
-            db.session.query(AccountTrialAppRecord)
-            .where(AccountTrialAppRecord.app_id == app_id, AccountTrialAppRecord.account_id == account_id)
-            .first()
-        )
-        if account_trial_app_record:
-            account_trial_app_record.count += 1
-            db.session.commit()
-        else:
-            db.session.add(AccountTrialAppRecord(app_id=app_id, count=1, account_id=account_id))
-            db.session.commit()
--- a/api/services/summary_index_service.py
+++ b/api/services/summary_index_service.py
@@ -1,612 +0,0 @@
-"""Summary index service for generating and managing document segment summaries."""
-
-import logging
-import time
-import uuid
-from typing import Any
-
-from core.rag.datasource.vdb.vector_factory import Vector
-from core.rag.index_processor.constant.doc_type import DocType
-from core.rag.models.document import Document
-from extensions.ext_database import db
-from libs import helper
-from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
-from models.dataset import Document as DatasetDocument
-
-logger = logging.getLogger(__name__)
-
-
-class SummaryIndexService:
-    """Service for generating and managing summary indexes."""
-
-    @staticmethod
-    def generate_summary_for_segment(
-        segment: DocumentSegment,
-        dataset: Dataset,
-        summary_index_setting: dict,
-    ) -> str:
-        """
-        Generate summary for a single segment.
-
-        Args:
-            segment: DocumentSegment to generate summary for
-            dataset: Dataset containing the segment
-            summary_index_setting: Summary index configuration
-
-        Returns:
-            Generated summary text
-
-        Raises:
-            ValueError: If summary_index_setting is invalid or generation fails
-        """
-        # Reuse the existing generate_summary method from ParagraphIndexProcessor
-        # Use lazy import to avoid circular import
-        from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
-
-        summary_content = ParagraphIndexProcessor.generate_summary(
-            tenant_id=dataset.tenant_id,
-            text=segment.content,
-            summary_index_setting=summary_index_setting,
-        )
-
-        if not summary_content:
-            raise ValueError("Generated summary is empty")
-
-        return summary_content
-
-    @staticmethod
-    def create_summary_record(
-        segment: DocumentSegment,
-        dataset: Dataset,
-        summary_content: str,
-        status: str = "generating",
-    ) -> DocumentSegmentSummary:
-        """
-        Create or update a DocumentSegmentSummary record.
-        If a summary record already exists for this segment, it will be updated instead of creating a new one.
-
-        Args:
-            segment: DocumentSegment to create summary for
-            dataset: Dataset containing the segment
-            summary_content: Generated summary content
-            status: Summary status (default: "generating")
-
-        Returns:
-            Created or updated DocumentSegmentSummary instance
-        """
-        # Check if summary record already exists
-        existing_summary = (
-            db.session.query(DocumentSegmentSummary)
-            .filter_by(chunk_id=segment.id, dataset_id=dataset.id)
-            .first()
-        )
-        
-        if existing_summary:
-            # Update existing record
-            existing_summary.summary_content = summary_content
-            existing_summary.status = status
-            existing_summary.error = None  # Clear any previous errors
-            # Re-enable if it was disabled
-            if not existing_summary.enabled:
-                existing_summary.enabled = True
-                existing_summary.disabled_at = None
-                existing_summary.disabled_by = None
-            db.session.add(existing_summary)
-            db.session.flush()
-            return existing_summary
-        else:
-            # Create new record (enabled by default)
-            summary_record = DocumentSegmentSummary(
-                dataset_id=dataset.id,
-                document_id=segment.document_id,
-                chunk_id=segment.id,
-                summary_content=summary_content,
-                status=status,
-                enabled=True,  # Explicitly set enabled to True
-            )
-            db.session.add(summary_record)
-            db.session.flush()
-            return summary_record
-
-    @staticmethod
-    def vectorize_summary(
-        summary_record: DocumentSegmentSummary,
-        segment: DocumentSegment,
-        dataset: Dataset,
-    ) -> None:
-        """
-        Vectorize summary and store in vector database.
-
-        Args:
-            summary_record: DocumentSegmentSummary record
-            segment: Original DocumentSegment
-            dataset: Dataset containing the segment
-        """
-        if dataset.indexing_technique != "high_quality":
-            logger.warning(
-                f"Summary vectorization skipped for dataset {dataset.id}: "
-                "indexing_technique is not high_quality"
-            )
-            return
-
-        # Reuse existing index_node_id if available (like segment does), otherwise generate new one
-        old_summary_node_id = summary_record.summary_index_node_id
-        if old_summary_node_id:
-            # Reuse existing index_node_id (like segment behavior)
-            summary_index_node_id = old_summary_node_id
-        else:
-            # Generate new index node ID only for new summaries
-            summary_index_node_id = str(uuid.uuid4())
-        
-        # Always regenerate hash (in case summary content changed)
-        summary_hash = helper.generate_text_hash(summary_record.summary_content)
-        
-        # Delete old vector only if we're reusing the same index_node_id (to overwrite)
-        # If index_node_id changed, the old vector should have been deleted elsewhere
-        if old_summary_node_id and old_summary_node_id == summary_index_node_id:
-            try:
-                vector = Vector(dataset)
-                vector.delete_by_ids([old_summary_node_id])
-            except Exception as e:
-                logger.warning(
-                    f"Failed to delete old summary vector for segment {segment.id}: {str(e)}. "
-                    "Continuing with new vectorization."
-                )
-
-        # Create document with summary content and metadata
-        summary_document = Document(
-            page_content=summary_record.summary_content,
-            metadata={
-                "doc_id": summary_index_node_id,
-                "doc_hash": summary_hash,
-                "dataset_id": dataset.id,
-                "document_id": segment.document_id,
-                "original_chunk_id": segment.id,  # Key: link to original chunk
-                "doc_type": DocType.TEXT,
-                "is_summary": True,  # Identifier for summary documents
-            },
-        )
-
-        # Vectorize and store with retry mechanism for connection errors
-        max_retries = 3
-        retry_delay = 2.0
-        
-        for attempt in range(max_retries):
-            try:
-                vector = Vector(dataset)
-                vector.add_texts([summary_document], duplicate_check=True)
-                
-                # Success - update summary record with index node info
-                summary_record.summary_index_node_id = summary_index_node_id
-                summary_record.summary_index_node_hash = summary_hash
-                summary_record.status = "completed"
-                db.session.add(summary_record)
-                db.session.flush()
-                return  # Success, exit function
-                
-            except (ConnectionError, Exception) as e:
-                error_str = str(e).lower()
-                # Check if it's a connection-related error that might be transient
-                is_connection_error = any(keyword in error_str for keyword in [
-                    "connection", "disconnected", "timeout", "network", 
-                    "could not connect", "server disconnected", "weaviate"
-                ])
-                
-                if is_connection_error and attempt < max_retries - 1:
-                    # Retry for connection errors
-                    wait_time = retry_delay * (2 ** attempt)  # Exponential backoff
-                    logger.warning(
-                        f"Vectorization attempt {attempt + 1}/{max_retries} failed for segment {segment.id}: {str(e)}. "
-                        f"Retrying in {wait_time:.1f} seconds..."
-                    )
-                    time.sleep(wait_time)
-                    continue
-                else:
-                    # Final attempt failed or non-connection error - log and update status
-                    logger.error(
-                        f"Failed to vectorize summary for segment {segment.id} after {attempt + 1} attempts: {str(e)}",
-                        exc_info=True
-                    )
-                    summary_record.status = "error"
-                    summary_record.error = f"Vectorization failed: {str(e)}"
-                    db.session.add(summary_record)
-                    db.session.flush()
-                    raise
-
-    @staticmethod
-    def generate_and_vectorize_summary(
-        segment: DocumentSegment,
-        dataset: Dataset,
-        summary_index_setting: dict,
-    ) -> DocumentSegmentSummary:
-        """
-        Generate summary for a segment and vectorize it.
-
-        Args:
-            segment: DocumentSegment to generate summary for
-            dataset: Dataset containing the segment
-            summary_index_setting: Summary index configuration
-
-        Returns:
-            Created DocumentSegmentSummary instance
-
-        Raises:
-            ValueError: If summary generation fails
-        """
-        try:
-            # Generate summary
-            summary_content = SummaryIndexService.generate_summary_for_segment(
-                segment, dataset, summary_index_setting
-            )
-
-            # Create or update summary record (will handle overwrite internally)
-            summary_record = SummaryIndexService.create_summary_record(
-                segment, dataset, summary_content, status="generating"
-            )
-
-            # Vectorize summary (will delete old vector if exists before creating new one)
-            SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
-
-            db.session.commit()
-            logger.info(f"Successfully generated and vectorized summary for segment {segment.id}")
-            return summary_record
-
-        except Exception as e:
-            logger.exception(f"Failed to generate summary for segment {segment.id}: {str(e)}")
-            # Update summary record with error status if it exists
-            summary_record = (
-                db.session.query(DocumentSegmentSummary)
-                .filter_by(chunk_id=segment.id, dataset_id=dataset.id)
-                .first()
-            )
-            if summary_record:
-                summary_record.status = "error"
-                summary_record.error = str(e)
-                db.session.add(summary_record)
-                db.session.commit()
-            raise
-
-    @staticmethod
-    def generate_summaries_for_document(
-        dataset: Dataset,
-        document: DatasetDocument,
-        summary_index_setting: dict,
-        segment_ids: list[str] | None = None,
-        only_parent_chunks: bool = False,
-    ) -> list[DocumentSegmentSummary]:
-        """
-        Generate summaries for all segments in a document including vectorization.
-
-        Args:
-            dataset: Dataset containing the document
-            document: DatasetDocument to generate summaries for
-            summary_index_setting: Summary index configuration
-            segment_ids: Optional list of specific segment IDs to process
-            only_parent_chunks: If True, only process parent chunks (for parent-child mode)
-
-        Returns:
-            List of created DocumentSegmentSummary instances
-        """
-        # Only generate summary index for high_quality indexing technique
-        if dataset.indexing_technique != "high_quality":
-            logger.info(
-                f"Skipping summary generation for dataset {dataset.id}: "
-                f"indexing_technique is {dataset.indexing_technique}, not 'high_quality'"
-            )
-            return []
-
-        if not summary_index_setting or not summary_index_setting.get("enable"):
-            logger.info(f"Summary index is disabled for dataset {dataset.id}")
-            return []
-
-        # Skip qa_model documents
-        if document.doc_form == "qa_model":
-            logger.info(f"Skipping summary generation for qa_model document {document.id}")
-            return []
-
-        logger.info(
-            f"Starting summary generation for document {document.id} in dataset {dataset.id}, "
-            f"segment_ids: {len(segment_ids) if segment_ids else 'all'}, "
-            f"only_parent_chunks: {only_parent_chunks}"
-        )
-
-        # Query segments (only enabled segments)
-        query = db.session.query(DocumentSegment).filter_by(
-            dataset_id=dataset.id,
-            document_id=document.id,
-            status="completed",
-            enabled=True,  # Only generate summaries for enabled segments
-        )
-
-        if segment_ids:
-            query = query.filter(DocumentSegment.id.in_(segment_ids))
-
-        segments = query.all()
-
-        if not segments:
-            logger.info(f"No segments found for document {document.id}")
-            return []
-
-        summary_records = []
-
-        for segment in segments:
-            # For parent-child mode, only process parent chunks
-            # In parent-child mode, all DocumentSegments are parent chunks,
-            # so we process all of them. Child chunks are stored in ChildChunk table
-            # and are not DocumentSegments, so they won't be in the segments list.
-            # This check is mainly for clarity and future-proofing.
-            if only_parent_chunks:
-                # In parent-child mode, all segments in the query are parent chunks
-                # Child chunks are not DocumentSegments, so they won't appear here
-                # We can process all segments
-                pass
-
-            try:
-                summary_record = SummaryIndexService.generate_and_vectorize_summary(
-                    segment, dataset, summary_index_setting
-                )
-                summary_records.append(summary_record)
-            except Exception as e:
-                logger.error(f"Failed to generate summary for segment {segment.id}: {str(e)}")
-                # Continue with other segments
-                continue
-
-        logger.info(
-            f"Completed summary generation for document {document.id}: "
-            f"{len(summary_records)} summaries generated and vectorized"
-        )
-        return summary_records
-
-    @staticmethod
-    def disable_summaries_for_segments(
-        dataset: Dataset,
-        segment_ids: list[str] | None = None,
-        disabled_by: str | None = None,
-    ) -> None:
-        """
-        Disable summary records and remove vectors from vector database for segments.
-        Unlike delete, this preserves the summary records but marks them as disabled.
-
-        Args:
-            dataset: Dataset containing the segments
-            segment_ids: List of segment IDs to disable summaries for. If None, disable all.
-            disabled_by: User ID who disabled the summaries
-        """
-        from libs.datetime_utils import naive_utc_now
-        
-        query = db.session.query(DocumentSegmentSummary).filter_by(
-            dataset_id=dataset.id,
-            enabled=True,  # Only disable enabled summaries
-        )
-
-        if segment_ids:
-            query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
-
-        summaries = query.all()
-
-        if not summaries:
-            return
-
-        logger.info(
-            f"Disabling {len(summaries)} summary records for dataset {dataset.id}, "
-            f"segment_ids: {len(segment_ids) if segment_ids else 'all'}"
-        )
-
-        # Remove from vector database (but keep records)
-        if dataset.indexing_technique == "high_quality":
-            summary_node_ids = [
-                s.summary_index_node_id for s in summaries if s.summary_index_node_id
-            ]
-            if summary_node_ids:
-                try:
-                    vector = Vector(dataset)
-                    vector.delete_by_ids(summary_node_ids)
-                except Exception as e:
-                    logger.warning(f"Failed to remove summary vectors: {str(e)}")
-
-        # Disable summary records (don't delete)
-        now = naive_utc_now()
-        for summary in summaries:
-            summary.enabled = False
-            summary.disabled_at = now
-            summary.disabled_by = disabled_by
-            db.session.add(summary)
-
-        db.session.commit()
-        logger.info(f"Disabled {len(summaries)} summary records for dataset {dataset.id}")
-
-    @staticmethod
-    def enable_summaries_for_segments(
-        dataset: Dataset,
-        segment_ids: list[str] | None = None,
-    ) -> None:
-        """
-        Enable summary records and re-add vectors to vector database for segments.
-
-        Args:
-            dataset: Dataset containing the segments
-            segment_ids: List of segment IDs to enable summaries for. If None, enable all.
-        """
-        # Only enable summary index for high_quality indexing technique
-        if dataset.indexing_technique != "high_quality":
-            return
-
-        # Check if summary index is enabled
-        summary_index_setting = dataset.summary_index_setting
-        if not summary_index_setting or not summary_index_setting.get("enable"):
-            return
-
-        query = db.session.query(DocumentSegmentSummary).filter_by(
-            dataset_id=dataset.id,
-            enabled=False,  # Only enable disabled summaries
-        )
-
-        if segment_ids:
-            query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
-
-        summaries = query.all()
-
-        if not summaries:
-            return
-
-        logger.info(
-            f"Enabling {len(summaries)} summary records for dataset {dataset.id}, "
-            f"segment_ids: {len(segment_ids) if segment_ids else 'all'}"
-        )
-
-        # Re-vectorize and re-add to vector database
-        enabled_count = 0
-        for summary in summaries:
-            # Get the original segment
-            segment = db.session.query(DocumentSegment).filter_by(
-                id=summary.chunk_id,
-                dataset_id=dataset.id,
-            ).first()
-            
-            if not segment or not segment.enabled or segment.status != "completed":
-                continue
-
-            if not summary.summary_content:
-                continue
-
-            try:
-                # Re-vectorize summary
-                SummaryIndexService.vectorize_summary(summary, segment, dataset)
-                
-                # Enable summary record
-                summary.enabled = True
-                summary.disabled_at = None
-                summary.disabled_by = None
-                db.session.add(summary)
-                enabled_count += 1
-            except Exception as e:
-                logger.error(f"Failed to re-vectorize summary {summary.id}: {str(e)}")
-                # Keep it disabled if vectorization fails
-                continue
-
-        db.session.commit()
-        logger.info(f"Enabled {enabled_count} summary records for dataset {dataset.id}")
-
-    @staticmethod
-    def delete_summaries_for_segments(
-        dataset: Dataset,
-        segment_ids: list[str] | None = None,
-    ) -> None:
-        """
-        Delete summary records and vectors for segments (used only for actual deletion scenarios).
-        For disable/enable operations, use disable_summaries_for_segments/enable_summaries_for_segments.
-
-        Args:
-            dataset: Dataset containing the segments
-            segment_ids: List of segment IDs to delete summaries for. If None, delete all.
-        """
-        query = db.session.query(DocumentSegmentSummary).filter_by(dataset_id=dataset.id)
-
-        if segment_ids:
-            query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
-
-        summaries = query.all()
-
-        if not summaries:
-            return
-
-        # Delete from vector database
-        if dataset.indexing_technique == "high_quality":
-            summary_node_ids = [
-                s.summary_index_node_id for s in summaries if s.summary_index_node_id
-            ]
-            if summary_node_ids:
-                vector = Vector(dataset)
-                vector.delete_by_ids(summary_node_ids)
-
-        # Delete summary records
-        for summary in summaries:
-            db.session.delete(summary)
-
-        db.session.commit()
-        logger.info(f"Deleted {len(summaries)} summary records for dataset {dataset.id}")
-
-    @staticmethod
-    def update_summary_for_segment(
-        segment: DocumentSegment,
-        dataset: Dataset,
-        summary_content: str,
-    ) -> DocumentSegmentSummary | None:
-        """
-        Update summary for a segment and re-vectorize it.
-
-        Args:
-            segment: DocumentSegment to update summary for
-            dataset: Dataset containing the segment
-            summary_content: New summary content
-
-        Returns:
-            Updated DocumentSegmentSummary instance, or None if summary index is not enabled
-        """
-        # Only update summary index for high_quality indexing technique
-        if dataset.indexing_technique != "high_quality":
-            return None
-
-        # Check if summary index is enabled
-        summary_index_setting = dataset.summary_index_setting
-        if not summary_index_setting or not summary_index_setting.get("enable"):
-            return None
-
-        # Skip qa_model documents
-        if segment.document and segment.document.doc_form == "qa_model":
-            return None
-
-        try:
-            # Find existing summary record
-            summary_record = (
-                db.session.query(DocumentSegmentSummary)
-                .filter_by(chunk_id=segment.id, dataset_id=dataset.id)
-                .first()
-            )
-
-            if summary_record:
-                # Update existing summary
-                old_summary_node_id = summary_record.summary_index_node_id
-
-                # Update summary content
-                summary_record.summary_content = summary_content
-                summary_record.status = "generating"
-                db.session.add(summary_record)
-                db.session.flush()
-
-                # Delete old vector if exists
-                if old_summary_node_id:
-                    vector = Vector(dataset)
-                    vector.delete_by_ids([old_summary_node_id])
-
-                # Re-vectorize summary
-                SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
-
-                db.session.commit()
-                logger.info(f"Successfully updated and re-vectorized summary for segment {segment.id}")
-                return summary_record
-            else:
-                # Create new summary record if doesn't exist
-                summary_record = SummaryIndexService.create_summary_record(
-                    segment, dataset, summary_content, status="generating"
-                )
-                SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
-                db.session.commit()
-                logger.info(f"Successfully created and vectorized summary for segment {segment.id}")
-                return summary_record
-
-        except Exception as e:
-            logger.exception(f"Failed to update summary for segment {segment.id}: {str(e)}")
-            # Update summary record with error status if it exists
-            summary_record = (
-                db.session.query(DocumentSegmentSummary)
-                .filter_by(chunk_id=segment.id, dataset_id=dataset.id)
-                .first()
-            )
-            if summary_record:
-                summary_record.status = "error"
-                summary_record.error = str(e)
-                db.session.add(summary_record)
-                db.session.commit()
-            raise
-
--- a/api/tasks/add_document_to_index_task.py
+++ b/api/tasks/add_document_to_index_task.py
@@ -117,18 +117,6 @@ def add_document_to_index_task(dataset_document_id: str):
        )
        db.session.commit()

-        # Enable summary indexes for all segments in this document
-        from services.summary_index_service import SummaryIndexService
-        segment_ids_list = [segment.id for segment in segments]
-        if segment_ids_list:
-            try:
-                SummaryIndexService.enable_summaries_for_segments(
-                    dataset=dataset,
-                    segment_ids=segment_ids_list,
-                )
-            except Exception as e:
-                logger.warning(f"Failed to enable summaries for document {dataset_document.id}: {str(e)}")
-
        end_at = time.perf_counter()
        logger.info(
            click.style(f"Document added to index: {dataset_document.id} latency: {end_at - start_at}", fg="green")
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
wangxiaolei	acfd34e876	fix: fix Cannot destructure property 'name' of 'value' as it is undef… (#30991 )	2026-01-15 13:25:30 +08:00
-LAN-	036a7cf839	chore: bump version to 1.11.4 (#30961 )	2026-01-15 11:40:33 +08:00
Stephen Zhou	86beacc64f	build: require node 24.13.0 (#30945 )	2026-01-15 11:40:27 +08:00
wangxiaolei	2c6bd90d6f	fix: fix missing id and message_id (#31008 )	2026-01-15 11:40:13 +08:00
Stephen Zhou	f5aaa8f97e	fix: redirect after login (#30985 )	2026-01-15 11:40:02 +08:00