refactor: reuse redis connection instead of create new one (#32678)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
wangxiaolei
2026-03-09 15:53:21 +08:00
committed by GitHub
parent cbb19cce39
commit 9970f4449a
10 changed files with 1360 additions and 112 deletions

View File

@@ -1,9 +1,10 @@
import logging
import time
from collections.abc import Callable, Sequence
from collections.abc import Sequence
from typing import Any, Protocol
import click
from celery import shared_task
from celery import current_app, shared_task
from configs import dify_config
from core.db.session_factory import session_factory
@@ -19,6 +20,12 @@ from tasks.generate_summary_index_task import generate_summary_index_task
logger = logging.getLogger(__name__)
class CeleryTaskLike(Protocol):
def delay(self, *args: Any, **kwargs: Any) -> Any: ...
def apply_async(self, *args: Any, **kwargs: Any) -> Any: ...
@shared_task(queue="dataset")
def document_indexing_task(dataset_id: str, document_ids: list):
"""
@@ -179,8 +186,8 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
def _document_indexing_with_tenant_queue(
tenant_id: str, dataset_id: str, document_ids: Sequence[str], task_func: Callable[[str, str, Sequence[str]], None]
):
tenant_id: str, dataset_id: str, document_ids: Sequence[str], task_func: CeleryTaskLike
) -> None:
try:
_document_indexing(dataset_id, document_ids)
except Exception:
@@ -201,16 +208,20 @@ def _document_indexing_with_tenant_queue(
logger.info("document indexing tenant isolation queue %s next tasks: %s", tenant_id, next_tasks)
if next_tasks:
for next_task in next_tasks:
document_task = DocumentTask(**next_task)
# Process the next waiting task
# Keep the flag set to indicate a task is running
tenant_isolated_task_queue.set_task_waiting_time()
task_func.delay( # type: ignore
tenant_id=document_task.tenant_id,
dataset_id=document_task.dataset_id,
document_ids=document_task.document_ids,
)
with current_app.producer_or_acquire() as producer: # type: ignore
for next_task in next_tasks:
document_task = DocumentTask(**next_task)
# Keep the flag set to indicate a task is running
tenant_isolated_task_queue.set_task_waiting_time()
task_func.apply_async(
kwargs={
"tenant_id": document_task.tenant_id,
"dataset_id": document_task.dataset_id,
"document_ids": document_task.document_ids,
},
producer=producer,
)
else:
# No more waiting tasks, clear the flag
tenant_isolated_task_queue.delete_task_key()

View File

@@ -3,12 +3,13 @@ import json
import logging
import time
import uuid
from collections.abc import Mapping
from collections.abc import Mapping, Sequence
from concurrent.futures import ThreadPoolExecutor
from itertools import islice
from typing import Any
import click
from celery import shared_task # type: ignore
from celery import group, shared_task
from flask import current_app, g
from sqlalchemy.orm import Session, sessionmaker
@@ -27,6 +28,11 @@ from services.file_service import FileService
logger = logging.getLogger(__name__)
def chunked(iterable: Sequence, size: int):
it = iter(iterable)
return iter(lambda: list(islice(it, size)), [])
@shared_task(queue="pipeline")
def rag_pipeline_run_task(
rag_pipeline_invoke_entities_file_id: str,
@@ -83,16 +89,24 @@ def rag_pipeline_run_task(
logger.info("rag pipeline tenant isolation queue %s next files: %s", tenant_id, next_file_ids)
if next_file_ids:
for next_file_id in next_file_ids:
# Process the next waiting task
# Keep the flag set to indicate a task is running
tenant_isolated_task_queue.set_task_waiting_time()
rag_pipeline_run_task.delay( # type: ignore
rag_pipeline_invoke_entities_file_id=next_file_id.decode("utf-8")
if isinstance(next_file_id, bytes)
else next_file_id,
tenant_id=tenant_id,
)
for batch in chunked(next_file_ids, 100):
jobs = []
for next_file_id in batch:
tenant_isolated_task_queue.set_task_waiting_time()
file_id = (
next_file_id.decode("utf-8") if isinstance(next_file_id, (bytes, bytearray)) else next_file_id
)
jobs.append(
rag_pipeline_run_task.s(
rag_pipeline_invoke_entities_file_id=file_id,
tenant_id=tenant_id,
)
)
if jobs:
group(jobs).apply_async()
else:
# No more waiting tasks, clear the flag
tenant_isolated_task_queue.delete_task_key()