refactor: select in console datasets document controller (#34029)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
tmimmanuel
2026-03-25 04:47:25 +01:00
committed by GitHub
parent 4c32acf857
commit d87263f7c3
55 changed files with 233 additions and 195 deletions

View File

@@ -5,6 +5,7 @@ import click
from celery import shared_task
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.models.document import Document
from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService
@@ -36,7 +37,7 @@ def add_annotation_to_index_task(
dataset = Dataset(
id=app_id,
tenant_id=tenant_id,
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
embedding_model_provider=dataset_collection_binding.provider_name,
embedding_model=dataset_collection_binding.model_name,
collection_binding_id=dataset_collection_binding.id,

View File

@@ -7,6 +7,7 @@ from werkzeug.exceptions import NotFound
from core.db.session_factory import session_factory
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.models.document import Document
from extensions.ext_redis import redis_client
from models.dataset import Dataset
@@ -67,7 +68,7 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id:
dataset = Dataset(
id=app_id,
tenant_id=tenant_id,
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
embedding_model_provider=dataset_collection_binding.provider_name,
embedding_model=dataset_collection_binding.model_name,
collection_binding_id=dataset_collection_binding.id,

View File

@@ -5,6 +5,7 @@ import click
from celery import shared_task
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService
@@ -26,7 +27,7 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str
dataset = Dataset(
id=app_id,
tenant_id=tenant_id,
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
collection_binding_id=dataset_collection_binding.id,
)

View File

@@ -7,6 +7,7 @@ from sqlalchemy import exists, select
from core.db.session_factory import session_factory
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from extensions.ext_redis import redis_client
from models.dataset import Dataset
from models.model import App, AppAnnotationSetting, MessageAnnotation
@@ -44,7 +45,7 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str):
dataset = Dataset(
id=app_id,
tenant_id=tenant_id,
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
collection_binding_id=app_annotation_setting.collection_binding_id,
)

View File

@@ -7,6 +7,7 @@ from sqlalchemy import select
from core.db.session_factory import session_factory
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.models.document import Document
from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
@@ -64,7 +65,7 @@ def enable_annotation_reply_task(
old_dataset = Dataset(
id=app_id,
tenant_id=tenant_id,
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
embedding_model_provider=old_dataset_collection_binding.provider_name,
embedding_model=old_dataset_collection_binding.model_name,
collection_binding_id=old_dataset_collection_binding.id,
@@ -93,7 +94,7 @@ def enable_annotation_reply_task(
dataset = Dataset(
id=app_id,
tenant_id=tenant_id,
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
embedding_model_provider=embedding_provider_name,
embedding_model=embedding_model_name,
collection_binding_id=dataset_collection_binding.id,

View File

@@ -5,6 +5,7 @@ import click
from celery import shared_task
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.models.document import Document
from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService
@@ -37,7 +38,7 @@ def update_annotation_to_index_task(
dataset = Dataset(
id=app_id,
tenant_id=tenant_id,
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
embedding_model_provider=dataset_collection_binding.provider_name,
embedding_model=dataset_collection_binding.model_name,
collection_binding_id=dataset_collection_binding.id,

View File

@@ -11,7 +11,7 @@ from sqlalchemy import func
from core.db.session_factory import session_factory
from core.model_manager import ModelManager
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from dify_graph.model_runtime.entities.model_entities import ModelType
from extensions.ext_redis import redis_client
from extensions.ext_storage import storage
@@ -120,7 +120,7 @@ def batch_create_segment_to_index_task(
document_segments = []
embedding_model = None
if dataset_config["indexing_technique"] == "high_quality":
if dataset_config["indexing_technique"] == IndexTechniqueType.HIGH_QUALITY:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=dataset_config["tenant_id"],

View File

@@ -10,7 +10,7 @@ from configs import dify_config
from core.db.session_factory import session_factory
from core.entities.document_task import DocumentTask
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from libs.datetime_utils import naive_utc_now
@@ -127,7 +127,7 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
logger.warning("Dataset %s not found after indexing", dataset_id)
return
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
summary_index_setting = dataset.summary_index_setting
if summary_index_setting and summary_index_setting.get("enable"):
# expire all session to get latest document's indexing status

View File

@@ -7,6 +7,7 @@ import click
from celery import shared_task
from core.db.session_factory import session_factory
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from models.dataset import Dataset, DocumentSegment
from models.dataset import Document as DatasetDocument
from services.summary_index_service import SummaryIndexService
@@ -59,7 +60,7 @@ def generate_summary_index_task(dataset_id: str, document_id: str, segment_ids:
return
# Only generate summary index for high_quality indexing technique
if dataset.indexing_technique != "high_quality":
if dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
logger.info(
click.style(
f"Skipping summary generation for dataset {dataset_id}: "

View File

@@ -9,7 +9,7 @@ from celery import shared_task
from sqlalchemy import or_, select
from core.db.session_factory import session_factory
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
from models.dataset import Document as DatasetDocument
from services.summary_index_service import SummaryIndexService
@@ -53,7 +53,7 @@ def regenerate_summary_index_task(
return
# Only regenerate summary index for high_quality indexing technique
if dataset.indexing_technique != "high_quality":
if dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
logger.info(
click.style(
f"Skipping summary regeneration for dataset {dataset_id}: "