refactor: use EnumText for dataset and replace string literals 4 (#33606)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
tmimmanuel
2026-03-18 00:18:08 +00:00
committed by GitHub
parent 0bc6c3a73e
commit 3870b2ad2d
69 changed files with 1027 additions and 849 deletions

View File

@@ -13,6 +13,7 @@ from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
from models.dataset import DatasetAutoDisableLog, DocumentSegment
from models.dataset import Document as DatasetDocument
from models.enums import IndexingStatus, SegmentStatus
logger = logging.getLogger(__name__)
@@ -34,7 +35,7 @@ def add_document_to_index_task(dataset_document_id: str):
logger.info(click.style(f"Document not found: {dataset_document_id}", fg="red"))
return
if dataset_document.indexing_status != "completed":
if dataset_document.indexing_status != IndexingStatus.COMPLETED:
return
indexing_cache_key = f"document_{dataset_document.id}_indexing"
@@ -48,7 +49,7 @@ def add_document_to_index_task(dataset_document_id: str):
session.query(DocumentSegment)
.where(
DocumentSegment.document_id == dataset_document.id,
DocumentSegment.status == "completed",
DocumentSegment.status == SegmentStatus.COMPLETED,
)
.order_by(DocumentSegment.position.asc())
.all()
@@ -139,7 +140,7 @@ def add_document_to_index_task(dataset_document_id: str):
logger.exception("add document to index failed")
dataset_document.enabled = False
dataset_document.disabled_at = naive_utc_now()
dataset_document.indexing_status = "error"
dataset_document.indexing_status = IndexingStatus.ERROR
dataset_document.error = str(e)
session.commit()
finally:

View File

@@ -11,6 +11,7 @@ from core.rag.models.document import Document
from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset
from models.enums import CollectionBindingType
from models.model import App, AppAnnotationSetting, MessageAnnotation
from services.dataset_service import DatasetCollectionBindingService
@@ -47,7 +48,7 @@ def enable_annotation_reply_task(
try:
documents = []
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_provider_name, embedding_model_name, "annotation"
embedding_provider_name, embedding_model_name, CollectionBindingType.ANNOTATION
)
annotation_setting = (
session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first()
@@ -56,7 +57,7 @@ def enable_annotation_reply_task(
if dataset_collection_binding.id != annotation_setting.collection_binding_id:
old_dataset_collection_binding = (
DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
annotation_setting.collection_binding_id, "annotation"
annotation_setting.collection_binding_id, CollectionBindingType.ANNOTATION
)
)
if old_dataset_collection_binding and annotations:

View File

@@ -10,6 +10,7 @@ from core.rag.models.document import Document
from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
from models.dataset import DocumentSegment
from models.enums import IndexingStatus, SegmentStatus
logger = logging.getLogger(__name__)
@@ -31,7 +32,7 @@ def create_segment_to_index_task(segment_id: str, keywords: list[str] | None = N
logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
return
if segment.status != "waiting":
if segment.status != SegmentStatus.WAITING:
return
indexing_cache_key = f"segment_{segment.id}_indexing"
@@ -40,7 +41,7 @@ def create_segment_to_index_task(segment_id: str, keywords: list[str] | None = N
# update segment status to indexing
session.query(DocumentSegment).filter_by(id=segment.id).update(
{
DocumentSegment.status: "indexing",
DocumentSegment.status: SegmentStatus.INDEXING,
DocumentSegment.indexing_at: naive_utc_now(),
}
)
@@ -70,7 +71,7 @@ def create_segment_to_index_task(segment_id: str, keywords: list[str] | None = N
if (
not dataset_document.enabled
or dataset_document.archived
or dataset_document.indexing_status != "completed"
or dataset_document.indexing_status != IndexingStatus.COMPLETED
):
logger.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
return
@@ -82,7 +83,7 @@ def create_segment_to_index_task(segment_id: str, keywords: list[str] | None = N
# update segment to completed
session.query(DocumentSegment).filter_by(id=segment.id).update(
{
DocumentSegment.status: "completed",
DocumentSegment.status: SegmentStatus.COMPLETED,
DocumentSegment.completed_at: naive_utc_now(),
}
)
@@ -94,7 +95,7 @@ def create_segment_to_index_task(segment_id: str, keywords: list[str] | None = N
logger.exception("create segment to index failed")
segment.enabled = False
segment.disabled_at = naive_utc_now()
segment.status = "error"
segment.status = SegmentStatus.ERROR
segment.error = str(e)
session.commit()
finally:

View File

@@ -12,6 +12,7 @@ from core.rag.extractor.notion_extractor import NotionExtractor
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import IndexingStatus
from services.datasource_provider_service import DatasourceProviderService
logger = logging.getLogger(__name__)
@@ -37,7 +38,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
return
if document.indexing_status == "parsing":
if document.indexing_status == IndexingStatus.PARSING:
logger.info(click.style(f"Document {document_id} is already being processed, skipping", fg="yellow"))
return
@@ -88,7 +89,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
with session_factory.create_session() as session, session.begin():
document = session.query(Document).filter_by(id=document_id).first()
if document:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = "Datasource credential not found. Please reconnect your Notion workspace."
document.stopped_at = naive_utc_now()
return
@@ -128,7 +129,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
data_source_info["last_edited_time"] = last_edited_time
document.data_source_info = json.dumps(data_source_info)
document.indexing_status = "parsing"
document.indexing_status = IndexingStatus.PARSING
document.processing_started_at = naive_utc_now()
segment_delete_stmt = delete(DocumentSegment).where(DocumentSegment.document_id == document_id)
@@ -151,6 +152,6 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
with session_factory.create_session() as session, session.begin():
document = session.query(Document).filter_by(id=document_id).first()
if document:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = str(e)
document.stopped_at = naive_utc_now()

View File

@@ -14,6 +14,7 @@ from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document
from models.enums import IndexingStatus
from services.feature_service import FeatureService
from tasks.generate_summary_index_task import generate_summary_index_task
@@ -81,7 +82,7 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
)
if document:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = str(e)
document.stopped_at = naive_utc_now()
session.add(document)
@@ -96,7 +97,7 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
for document in documents:
if document:
document.indexing_status = "parsing"
document.indexing_status = IndexingStatus.PARSING
document.processing_started_at = naive_utc_now()
session.add(document)
# Transaction committed and closed
@@ -148,7 +149,7 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
document.need_summary,
)
if (
document.indexing_status == "completed"
document.indexing_status == IndexingStatus.COMPLETED
and document.doc_form != "qa_model"
and document.need_summary is True
):

View File

@@ -10,6 +10,7 @@ from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import IndexingStatus
logger = logging.getLogger(__name__)
@@ -33,7 +34,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
return
document.indexing_status = "parsing"
document.indexing_status = IndexingStatus.PARSING
document.processing_started_at = naive_utc_now()
dataset = session.query(Dataset).where(Dataset.id == dataset_id).first()

View File

@@ -15,6 +15,7 @@ from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import IndexingStatus
from services.feature_service import FeatureService
logger = logging.getLogger(__name__)
@@ -112,7 +113,7 @@ def _duplicate_document_indexing_task(dataset_id: str, document_ids: Sequence[st
)
for document in documents:
if document:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = str(e)
document.stopped_at = naive_utc_now()
session.add(document)
@@ -146,7 +147,7 @@ def _duplicate_document_indexing_task(dataset_id: str, document_ids: Sequence[st
session.execute(segment_delete_stmt)
session.commit()
document.indexing_status = "parsing"
document.indexing_status = IndexingStatus.PARSING
document.processing_started_at = naive_utc_now()
session.add(document)
session.commit()

View File

@@ -12,6 +12,7 @@ from core.rag.models.document import AttachmentDocument, ChildDocument, Document
from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
from models.dataset import DocumentSegment
from models.enums import IndexingStatus, SegmentStatus
logger = logging.getLogger(__name__)
@@ -33,7 +34,7 @@ def enable_segment_to_index_task(segment_id: str):
logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
return
if segment.status != "completed":
if segment.status != SegmentStatus.COMPLETED:
logger.info(click.style(f"Segment is not completed, enable is not allowed: {segment_id}", fg="red"))
return
@@ -65,7 +66,7 @@ def enable_segment_to_index_task(segment_id: str):
if (
not dataset_document.enabled
or dataset_document.archived
or dataset_document.indexing_status != "completed"
or dataset_document.indexing_status != IndexingStatus.COMPLETED
):
logger.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
return
@@ -123,7 +124,7 @@ def enable_segment_to_index_task(segment_id: str):
logger.exception("enable segment to index failed")
segment.enabled = False
segment.disabled_at = naive_utc_now()
segment.status = "error"
segment.status = SegmentStatus.ERROR
segment.error = str(e)
session.commit()
finally:

View File

@@ -12,6 +12,7 @@ from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
from models import Account, Tenant
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import IndexingStatus
from services.feature_service import FeatureService
from services.rag_pipeline.rag_pipeline import RagPipelineService
@@ -63,7 +64,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
.first()
)
if document:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = str(e)
document.stopped_at = naive_utc_now()
session.add(document)
@@ -95,7 +96,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
session.execute(segment_delete_stmt)
session.commit()
document.indexing_status = "parsing"
document.indexing_status = IndexingStatus.PARSING
document.processing_started_at = naive_utc_now()
session.add(document)
session.commit()
@@ -108,7 +109,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
indexing_runner.run([document])
redis_client.delete(retry_indexing_cache_key)
except Exception as ex:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = str(ex)
document.stopped_at = naive_utc_now()
session.add(document)

View File

@@ -11,6 +11,7 @@ from core.rag.index_processor.index_processor_factory import IndexProcessorFacto
from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import IndexingStatus
from services.feature_service import FeatureService
logger = logging.getLogger(__name__)
@@ -48,7 +49,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
)
if document:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = str(e)
document.stopped_at = naive_utc_now()
session.add(document)
@@ -76,7 +77,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
session.execute(segment_delete_stmt)
session.commit()
document.indexing_status = "parsing"
document.indexing_status = IndexingStatus.PARSING
document.processing_started_at = naive_utc_now()
session.add(document)
session.commit()
@@ -85,7 +86,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
indexing_runner.run([document])
redis_client.delete(sync_indexing_cache_key)
except Exception as ex:
document.indexing_status = "error"
document.indexing_status = IndexingStatus.ERROR
document.error = str(ex)
document.stopped_at = naive_utc_now()
session.add(document)