refactor: select in console datasets document controller (#34029)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-04-05 09:49:25 +08:00 · 2026-03-25 04:47:25 +01:00
parent 4c32acf857
commit d87263f7c3
55 changed files with 233 additions and 195 deletions
--- a/api/tasks/annotation/add_annotation_to_index_task.py
+++ b/api/tasks/annotation/add_annotation_to_index_task.py
@@ -5,6 +5,7 @@ import click
 from celery import shared_task

 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.models.document import Document
 from models.dataset import Dataset
 from services.dataset_service import DatasetCollectionBindingService
@@ -36,7 +37,7 @@ def add_annotation_to_index_task(
        dataset = Dataset(
            id=app_id,
            tenant_id=tenant_id,
-            indexing_technique="high_quality",
+            indexing_technique=IndexTechniqueType.HIGH_QUALITY,
            embedding_model_provider=dataset_collection_binding.provider_name,
            embedding_model=dataset_collection_binding.model_name,
            collection_binding_id=dataset_collection_binding.id,
--- a/api/tasks/annotation/batch_import_annotations_task.py
+++ b/api/tasks/annotation/batch_import_annotations_task.py
@@ -7,6 +7,7 @@ from werkzeug.exceptions import NotFound

 from core.db.session_factory import session_factory
 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.models.document import Document
 from extensions.ext_redis import redis_client
 from models.dataset import Dataset
@@ -67,7 +68,7 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id:
                    dataset = Dataset(
                        id=app_id,
                        tenant_id=tenant_id,
-                        indexing_technique="high_quality",
+                        indexing_technique=IndexTechniqueType.HIGH_QUALITY,
                        embedding_model_provider=dataset_collection_binding.provider_name,
                        embedding_model=dataset_collection_binding.model_name,
                        collection_binding_id=dataset_collection_binding.id,
--- a/api/tasks/annotation/delete_annotation_index_task.py
+++ b/api/tasks/annotation/delete_annotation_index_task.py
@@ -5,6 +5,7 @@ import click
 from celery import shared_task

 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from models.dataset import Dataset
 from services.dataset_service import DatasetCollectionBindingService

@@ -26,7 +27,7 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str
        dataset = Dataset(
            id=app_id,
            tenant_id=tenant_id,
-            indexing_technique="high_quality",
+            indexing_technique=IndexTechniqueType.HIGH_QUALITY,
            collection_binding_id=dataset_collection_binding.id,
        )

--- a/api/tasks/annotation/disable_annotation_reply_task.py
+++ b/api/tasks/annotation/disable_annotation_reply_task.py
@@ -7,6 +7,7 @@ from sqlalchemy import exists, select

 from core.db.session_factory import session_factory
 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from extensions.ext_redis import redis_client
 from models.dataset import Dataset
 from models.model import App, AppAnnotationSetting, MessageAnnotation
@@ -44,7 +45,7 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str):
            dataset = Dataset(
                id=app_id,
                tenant_id=tenant_id,
-                indexing_technique="high_quality",
+                indexing_technique=IndexTechniqueType.HIGH_QUALITY,
                collection_binding_id=app_annotation_setting.collection_binding_id,
            )

--- a/api/tasks/annotation/enable_annotation_reply_task.py
+++ b/api/tasks/annotation/enable_annotation_reply_task.py
@@ -7,6 +7,7 @@ from sqlalchemy import select

 from core.db.session_factory import session_factory
 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.models.document import Document
 from extensions.ext_redis import redis_client
 from libs.datetime_utils import naive_utc_now
@@ -64,7 +65,7 @@ def enable_annotation_reply_task(
                        old_dataset = Dataset(
                            id=app_id,
                            tenant_id=tenant_id,
-                            indexing_technique="high_quality",
+                            indexing_technique=IndexTechniqueType.HIGH_QUALITY,
                            embedding_model_provider=old_dataset_collection_binding.provider_name,
                            embedding_model=old_dataset_collection_binding.model_name,
                            collection_binding_id=old_dataset_collection_binding.id,
@@ -93,7 +94,7 @@ def enable_annotation_reply_task(
            dataset = Dataset(
                id=app_id,
                tenant_id=tenant_id,
-                indexing_technique="high_quality",
+                indexing_technique=IndexTechniqueType.HIGH_QUALITY,
                embedding_model_provider=embedding_provider_name,
                embedding_model=embedding_model_name,
                collection_binding_id=dataset_collection_binding.id,
--- a/api/tasks/annotation/update_annotation_to_index_task.py
+++ b/api/tasks/annotation/update_annotation_to_index_task.py
@@ -5,6 +5,7 @@ import click
 from celery import shared_task

 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.models.document import Document
 from models.dataset import Dataset
 from services.dataset_service import DatasetCollectionBindingService
@@ -37,7 +38,7 @@ def update_annotation_to_index_task(
        dataset = Dataset(
            id=app_id,
            tenant_id=tenant_id,
-            indexing_technique="high_quality",
+            indexing_technique=IndexTechniqueType.HIGH_QUALITY,
            embedding_model_provider=dataset_collection_binding.provider_name,
            embedding_model=dataset_collection_binding.model_name,
            collection_binding_id=dataset_collection_binding.id,
--- a/api/tasks/batch_create_segment_to_index_task.py
+++ b/api/tasks/batch_create_segment_to_index_task.py
@@ -11,7 +11,7 @@ from sqlalchemy import func

 from core.db.session_factory import session_factory
 from core.model_manager import ModelManager
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from extensions.ext_redis import redis_client
 from extensions.ext_storage import storage
@@ -120,7 +120,7 @@ def batch_create_segment_to_index_task(

    document_segments = []
    embedding_model = None
-    if dataset_config["indexing_technique"] == "high_quality":
+    if dataset_config["indexing_technique"] == IndexTechniqueType.HIGH_QUALITY:
        model_manager = ModelManager()
        embedding_model = model_manager.get_model_instance(
            tenant_id=dataset_config["tenant_id"],
--- a/api/tasks/document_indexing_task.py
+++ b/api/tasks/document_indexing_task.py
@@ -10,7 +10,7 @@ from configs import dify_config
 from core.db.session_factory import session_factory
 from core.entities.document_task import DocumentTask
 from core.indexing_runner import DocumentIsPausedError, IndexingRunner
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from core.rag.pipeline.queue import TenantIsolatedTaskQueue
 from enums.cloud_plan import CloudPlan
 from libs.datetime_utils import naive_utc_now
@@ -127,7 +127,7 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
                logger.warning("Dataset %s not found after indexing", dataset_id)
                return

-            if dataset.indexing_technique == "high_quality":
+            if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
                summary_index_setting = dataset.summary_index_setting
                if summary_index_setting and summary_index_setting.get("enable"):
                    # expire all session to get latest document's indexing status
--- a/api/tasks/generate_summary_index_task.py
+++ b/api/tasks/generate_summary_index_task.py
@@ -7,6 +7,7 @@ import click
 from celery import shared_task

 from core.db.session_factory import session_factory
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from models.dataset import Dataset, DocumentSegment
 from models.dataset import Document as DatasetDocument
 from services.summary_index_service import SummaryIndexService
@@ -59,7 +60,7 @@ def generate_summary_index_task(dataset_id: str, document_id: str, segment_ids:
                return

            # Only generate summary index for high_quality indexing technique
-            if dataset.indexing_technique != "high_quality":
+            if dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
                logger.info(
                    click.style(
                        f"Skipping summary generation for dataset {dataset_id}: "
--- a/api/tasks/regenerate_summary_index_task.py
+++ b/api/tasks/regenerate_summary_index_task.py
@@ -9,7 +9,7 @@ from celery import shared_task
 from sqlalchemy import or_, select

 from core.db.session_factory import session_factory
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
 from models.dataset import Document as DatasetDocument
 from services.summary_index_service import SummaryIndexService
@@ -53,7 +53,7 @@ def regenerate_summary_index_task(
                return

            # Only regenerate summary index for high_quality indexing technique
-            if dataset.indexing_technique != "high_quality":
+            if dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
                logger.info(
                    click.style(
                        f"Skipping summary regeneration for dataset {dataset_id}: "