feat: implement content-based deduplication for document segments

- Add database index on (dataset_id, index_node_hash) for efficient deduplication queries - Add deduplication check in SegmentService.create_segment and multi_create_segment methods - Add deduplication check in DatasetDocumentStore.add_documents method to prevent duplicate embedding processing - Skip creating segments with identical content hashes across the entire dataset This prevents duplicate content from being re-processed and re-embedded when uploading documents with repeated content, improving efficiency and reducing unnecessary compute costs.
fix
2026-04-12 23:09:23 +08:00 · 2025-09-20 06:28:14 +08:00 · 2025-09-20 05:41:25 +08:00 · 2025-09-20 05:30:39 +08:00
106 changed files with 2194 additions and 1596 deletions
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -8,7 +8,8 @@ on:
      - "deploy/enterprise"
      - "build/**"
      - "release/e-*"
-      - "hotfix/**"
+      - "deploy/rag-dev"
+      - "feat/rag-2"
    tags:
      - "*"

--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -4,7 +4,7 @@ on:
  workflow_run:
    workflows: ["Build and Push API & Web"]
    branches:
-      - "deploy/dev"
+      - "deploy/rag-dev"
    types:
      - completed

@@ -13,7 +13,7 @@ jobs:
    runs-on: ubuntu-latest
    if: |
      github.event.workflow_run.conclusion == 'success' &&
-      github.event.workflow_run.head_branch == 'deploy/dev'
+      github.event.workflow_run.head_branch == 'deploy/rag-dev'
    steps:
      - name: Deploy to server
        uses: appleboy/ssh-action@v0.1.8
--- a/.gitignore
+++ b/.gitignore
@@ -147,6 +147,7 @@ api/.idea

 api/.env
 api/storage/*
+api/Dockerfile.local

 docker-legacy/volumes/app/storage/*
 docker-legacy/volumes/db/data/*
--- a/api/.env.example
+++ b/api/.env.example
@@ -304,8 +304,6 @@ BAIDU_VECTOR_DB_API_KEY=dify
 BAIDU_VECTOR_DB_DATABASE=dify
 BAIDU_VECTOR_DB_SHARD=1
 BAIDU_VECTOR_DB_REPLICAS=3
-BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER=DEFAULT_ANALYZER
-BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE=COARSE_MODE

 # Upstash configuration
 UPSTASH_VECTOR_URL=your-server-url
--- a/api/celery_entrypoint.py
+++ b/api/celery_entrypoint.py
@@ -1,11 +1,20 @@
+import logging
+
 import psycogreen.gevent as pscycogreen_gevent  # type: ignore
 from grpc.experimental import gevent as grpc_gevent  # type: ignore

+_logger = logging.getLogger(__name__)
+
+
+def _log(message: str):
+    _logger.debug(message)
+
+
 # grpc gevent
 grpc_gevent.init_gevent()
-print("gRPC patched with gevent.", flush=True)  # noqa: T201
+_log("gRPC  patched with gevent.")
 pscycogreen_gevent.patch_psycopg()
-print("psycopg2 patched with gevent.", flush=True)  # noqa: T201
+_log("psycopg2 patched with gevent.")


 from app import app, celery
--- a/api/commands.py
+++ b/api/commands.py
@@ -10,7 +10,6 @@ from flask import current_app
 from pydantic import TypeAdapter
 from sqlalchemy import select
 from sqlalchemy.exc import SQLAlchemyError
-from sqlalchemy.orm import sessionmaker

 from configs import dify_config
 from constants.languages import languages
@@ -62,30 +61,31 @@ def reset_password(email, new_password, password_confirm):
    if str(new_password).strip() != str(password_confirm).strip():
        click.echo(click.style("Passwords do not match.", fg="red"))
        return
-    with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-        account = session.query(Account).where(Account.email == email).one_or_none()

-        if not account:
-            click.echo(click.style(f"Account not found for email: {email}", fg="red"))
-            return
+    account = db.session.query(Account).where(Account.email == email).one_or_none()

-        try:
-            valid_password(new_password)
-        except:
-            click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red"))
-            return
+    if not account:
+        click.echo(click.style(f"Account not found for email: {email}", fg="red"))
+        return

-        # generate password salt
-        salt = secrets.token_bytes(16)
-        base64_salt = base64.b64encode(salt).decode()
+    try:
+        valid_password(new_password)
+    except:
+        click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red"))
+        return

-        # encrypt password with salt
-        password_hashed = hash_password(new_password, salt)
-        base64_password_hashed = base64.b64encode(password_hashed).decode()
-        account.password = base64_password_hashed
-        account.password_salt = base64_salt
-        AccountService.reset_login_error_rate_limit(email)
-        click.echo(click.style("Password reset successfully.", fg="green"))
+    # generate password salt
+    salt = secrets.token_bytes(16)
+    base64_salt = base64.b64encode(salt).decode()
+
+    # encrypt password with salt
+    password_hashed = hash_password(new_password, salt)
+    base64_password_hashed = base64.b64encode(password_hashed).decode()
+    account.password = base64_password_hashed
+    account.password_salt = base64_salt
+    db.session.commit()
+    AccountService.reset_login_error_rate_limit(email)
+    click.echo(click.style("Password reset successfully.", fg="green"))


@click.command("reset-email", help="Reset the account email.")
@@ -100,21 +100,22 @@ def reset_email(email, new_email, email_confirm):
    if str(new_email).strip() != str(email_confirm).strip():
        click.echo(click.style("New emails do not match.", fg="red"))
        return
-    with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-        account = session.query(Account).where(Account.email == email).one_or_none()

-        if not account:
-            click.echo(click.style(f"Account not found for email: {email}", fg="red"))
-            return
+    account = db.session.query(Account).where(Account.email == email).one_or_none()

-        try:
-            email_validate(new_email)
-        except:
-            click.echo(click.style(f"Invalid email: {new_email}", fg="red"))
-            return
+    if not account:
+        click.echo(click.style(f"Account not found for email: {email}", fg="red"))
+        return

-        account.email = new_email
-        click.echo(click.style("Email updated successfully.", fg="green"))
+    try:
+        email_validate(new_email)
+    except:
+        click.echo(click.style(f"Invalid email: {new_email}", fg="red"))
+        return
+
+    account.email = new_email
+    db.session.commit()
+    click.echo(click.style("Email updated successfully.", fg="green"))


@click.command(
@@ -138,24 +139,25 @@ def reset_encrypt_key_pair():
    if dify_config.EDITION != "SELF_HOSTED":
        click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red"))
        return
-    with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-        tenants = session.query(Tenant).all()
-        for tenant in tenants:
-            if not tenant:
-                click.echo(click.style("No workspaces found. Run /install first.", fg="red"))
-                return

-            tenant.encrypt_public_key = generate_key_pair(tenant.id)
+    tenants = db.session.query(Tenant).all()
+    for tenant in tenants:
+        if not tenant:
+            click.echo(click.style("No workspaces found. Run /install first.", fg="red"))
+            return

-            session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete()
-            session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete()
+        tenant.encrypt_public_key = generate_key_pair(tenant.id)

-            click.echo(
-                click.style(
-                    f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.",
-                    fg="green",
-                )
+        db.session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete()
+        db.session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete()
+        db.session.commit()
+
+        click.echo(
+            click.style(
+                f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.",
+                fg="green",
            )
+        )


@click.command("vdb-migrate", help="Migrate vector db.")
@@ -180,15 +182,14 @@ def migrate_annotation_vector_database():
        try:
            # get apps info
            per_page = 50
-            with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-                apps = (
-                    session.query(App)
-                    .where(App.status == "normal")
-                    .order_by(App.created_at.desc())
-                    .limit(per_page)
-                    .offset((page - 1) * per_page)
-                    .all()
-                )
+            apps = (
+                db.session.query(App)
+                .where(App.status == "normal")
+                .order_by(App.created_at.desc())
+                .limit(per_page)
+                .offset((page - 1) * per_page)
+                .all()
+            )
            if not apps:
                break
        except SQLAlchemyError:
@@ -202,27 +203,26 @@ def migrate_annotation_vector_database():
            )
            try:
                click.echo(f"Creating app annotation index: {app.id}")
-                with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-                    app_annotation_setting = (
-                        session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first()
-                    )
+                app_annotation_setting = (
+                    db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first()
+                )

-                    if not app_annotation_setting:
-                        skipped_count = skipped_count + 1
-                        click.echo(f"App annotation setting disabled: {app.id}")
-                        continue
-                    # get dataset_collection_binding info
-                    dataset_collection_binding = (
-                        session.query(DatasetCollectionBinding)
-                        .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id)
-                        .first()
-                    )
-                    if not dataset_collection_binding:
-                        click.echo(f"App annotation collection binding not found: {app.id}")
-                        continue
-                    annotations = session.scalars(
-                        select(MessageAnnotation).where(MessageAnnotation.app_id == app.id)
-                    ).all()
+                if not app_annotation_setting:
+                    skipped_count = skipped_count + 1
+                    click.echo(f"App annotation setting disabled: {app.id}")
+                    continue
+                # get dataset_collection_binding info
+                dataset_collection_binding = (
+                    db.session.query(DatasetCollectionBinding)
+                    .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id)
+                    .first()
+                )
+                if not dataset_collection_binding:
+                    click.echo(f"App annotation collection binding not found: {app.id}")
+                    continue
+                annotations = db.session.scalars(
+                    select(MessageAnnotation).where(MessageAnnotation.app_id == app.id)
+                ).all()
                dataset = Dataset(
                    id=app.id,
                    tenant_id=app.tenant_id,
@@ -1448,52 +1448,41 @@ def transform_datasource_credentials():
                    notion_credentials_tenant_mapping[tenant_id] = []
                notion_credentials_tenant_mapping[tenant_id].append(notion_credential)
            for tenant_id, notion_tenant_credentials in notion_credentials_tenant_mapping.items():
-                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
-                if not tenant:
-                    continue
-                try:
-                    # check notion plugin is installed
-                    installed_plugins = installer_manager.list_plugins(tenant_id)
-                    installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
-                    if notion_plugin_id not in installed_plugins_ids:
-                        if notion_plugin_unique_identifier:
-                            # install notion plugin
-                            PluginService.install_from_marketplace_pkg(tenant_id, [notion_plugin_unique_identifier])
-                    auth_count = 0
-                    for notion_tenant_credential in notion_tenant_credentials:
-                        auth_count += 1
-                        # get credential oauth params
-                        access_token = notion_tenant_credential.access_token
-                        # notion info
-                        notion_info = notion_tenant_credential.source_info
-                        workspace_id = notion_info.get("workspace_id")
-                        workspace_name = notion_info.get("workspace_name")
-                        workspace_icon = notion_info.get("workspace_icon")
-                        new_credentials = {
-                            "integration_secret": encrypter.encrypt_token(tenant_id, access_token),
-                            "workspace_id": workspace_id,
-                            "workspace_name": workspace_name,
-                            "workspace_icon": workspace_icon,
-                        }
-                        datasource_provider = DatasourceProvider(
-                            provider="notion_datasource",
-                            tenant_id=tenant_id,
-                            plugin_id=notion_plugin_id,
-                            auth_type=oauth_credential_type.value,
-                            encrypted_credentials=new_credentials,
-                            name=f"Auth {auth_count}",
-                            avatar_url=workspace_icon or "default",
-                            is_default=False,
-                        )
-                        db.session.add(datasource_provider)
-                        deal_notion_count += 1
-                except Exception as e:
-                    click.echo(
-                        click.style(
-                            f"Error transforming notion credentials: {str(e)}, tenant_id: {tenant_id}", fg="red"
-                        )
+                # check notion plugin is installed
+                installed_plugins = installer_manager.list_plugins(tenant_id)
+                installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
+                if notion_plugin_id not in installed_plugins_ids:
+                    if notion_plugin_unique_identifier:
+                        # install notion plugin
+                        PluginService.install_from_marketplace_pkg(tenant_id, [notion_plugin_unique_identifier])
+                auth_count = 0
+                for notion_tenant_credential in notion_tenant_credentials:
+                    auth_count += 1
+                    # get credential oauth params
+                    access_token = notion_tenant_credential.access_token
+                    # notion info
+                    notion_info = notion_tenant_credential.source_info
+                    workspace_id = notion_info.get("workspace_id")
+                    workspace_name = notion_info.get("workspace_name")
+                    workspace_icon = notion_info.get("workspace_icon")
+                    new_credentials = {
+                        "integration_secret": encrypter.encrypt_token(tenant_id, access_token),
+                        "workspace_id": workspace_id,
+                        "workspace_name": workspace_name,
+                        "workspace_icon": workspace_icon,
+                    }
+                    datasource_provider = DatasourceProvider(
+                        provider="notion_datasource",
+                        tenant_id=tenant_id,
+                        plugin_id=notion_plugin_id,
+                        auth_type=oauth_credential_type.value,
+                        encrypted_credentials=new_credentials,
+                        name=f"Auth {auth_count}",
+                        avatar_url=workspace_icon or "default",
+                        is_default=False,
                    )
-                    continue
+                    db.session.add(datasource_provider)
+                    deal_notion_count += 1
                db.session.commit()
        # deal firecrawl credentials
        deal_firecrawl_count = 0
@@ -1506,48 +1495,37 @@ def transform_datasource_credentials():
                    firecrawl_credentials_tenant_mapping[tenant_id] = []
                firecrawl_credentials_tenant_mapping[tenant_id].append(firecrawl_credential)
            for tenant_id, firecrawl_tenant_credentials in firecrawl_credentials_tenant_mapping.items():
-                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
-                if not tenant:
-                    continue
-                try:
-                    # check firecrawl plugin is installed
-                    installed_plugins = installer_manager.list_plugins(tenant_id)
-                    installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
-                    if firecrawl_plugin_id not in installed_plugins_ids:
-                        if firecrawl_plugin_unique_identifier:
-                            # install firecrawl plugin
-                            PluginService.install_from_marketplace_pkg(tenant_id, [firecrawl_plugin_unique_identifier])
+                # check firecrawl plugin is installed
+                installed_plugins = installer_manager.list_plugins(tenant_id)
+                installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
+                if firecrawl_plugin_id not in installed_plugins_ids:
+                    if firecrawl_plugin_unique_identifier:
+                        # install firecrawl plugin
+                        PluginService.install_from_marketplace_pkg(tenant_id, [firecrawl_plugin_unique_identifier])

-                    auth_count = 0
-                    for firecrawl_tenant_credential in firecrawl_tenant_credentials:
-                        auth_count += 1
-                        # get credential api key
-                        credentials_json = json.loads(firecrawl_tenant_credential.credentials)
-                        api_key = credentials_json.get("config", {}).get("api_key")
-                        base_url = credentials_json.get("config", {}).get("base_url")
-                        new_credentials = {
-                            "firecrawl_api_key": api_key,
-                            "base_url": base_url,
-                        }
-                        datasource_provider = DatasourceProvider(
-                            provider="firecrawl",
-                            tenant_id=tenant_id,
-                            plugin_id=firecrawl_plugin_id,
-                            auth_type=api_key_credential_type.value,
-                            encrypted_credentials=new_credentials,
-                            name=f"Auth {auth_count}",
-                            avatar_url="default",
-                            is_default=False,
-                        )
-                        db.session.add(datasource_provider)
-                        deal_firecrawl_count += 1
-                except Exception as e:
-                    click.echo(
-                        click.style(
-                            f"Error transforming firecrawl credentials: {str(e)}, tenant_id: {tenant_id}", fg="red"
-                        )
+                auth_count = 0
+                for firecrawl_tenant_credential in firecrawl_tenant_credentials:
+                    auth_count += 1
+                    # get credential api key
+                    credentials_json = json.loads(firecrawl_tenant_credential.credentials)
+                    api_key = credentials_json.get("config", {}).get("api_key")
+                    base_url = credentials_json.get("config", {}).get("base_url")
+                    new_credentials = {
+                        "firecrawl_api_key": api_key,
+                        "base_url": base_url,
+                    }
+                    datasource_provider = DatasourceProvider(
+                        provider="firecrawl",
+                        tenant_id=tenant_id,
+                        plugin_id=firecrawl_plugin_id,
+                        auth_type=api_key_credential_type.value,
+                        encrypted_credentials=new_credentials,
+                        name=f"Auth {auth_count}",
+                        avatar_url="default",
+                        is_default=False,
                    )
-                    continue
+                    db.session.add(datasource_provider)
+                    deal_firecrawl_count += 1
                db.session.commit()
        # deal jina credentials
        deal_jina_count = 0
@@ -1560,45 +1538,36 @@ def transform_datasource_credentials():
                    jina_credentials_tenant_mapping[tenant_id] = []
                jina_credentials_tenant_mapping[tenant_id].append(jina_credential)
            for tenant_id, jina_tenant_credentials in jina_credentials_tenant_mapping.items():
-                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
-                if not tenant:
-                    continue
-                try:
-                    # check jina plugin is installed
-                    installed_plugins = installer_manager.list_plugins(tenant_id)
-                    installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
-                    if jina_plugin_id not in installed_plugins_ids:
-                        if jina_plugin_unique_identifier:
-                            # install jina plugin
-                            logger.debug("Installing Jina plugin %s", jina_plugin_unique_identifier)
-                            PluginService.install_from_marketplace_pkg(tenant_id, [jina_plugin_unique_identifier])
+                # check jina plugin is installed
+                installed_plugins = installer_manager.list_plugins(tenant_id)
+                installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
+                if jina_plugin_id not in installed_plugins_ids:
+                    if jina_plugin_unique_identifier:
+                        # install jina plugin
+                        logger.debug("Installing Jina plugin %s", jina_plugin_unique_identifier)
+                        PluginService.install_from_marketplace_pkg(tenant_id, [jina_plugin_unique_identifier])

-                    auth_count = 0
-                    for jina_tenant_credential in jina_tenant_credentials:
-                        auth_count += 1
-                        # get credential api key
-                        credentials_json = json.loads(jina_tenant_credential.credentials)
-                        api_key = credentials_json.get("config", {}).get("api_key")
-                        new_credentials = {
-                            "integration_secret": api_key,
-                        }
-                        datasource_provider = DatasourceProvider(
-                            provider="jina",
-                            tenant_id=tenant_id,
-                            plugin_id=jina_plugin_id,
-                            auth_type=api_key_credential_type.value,
-                            encrypted_credentials=new_credentials,
-                            name=f"Auth {auth_count}",
-                            avatar_url="default",
-                            is_default=False,
-                        )
-                        db.session.add(datasource_provider)
-                        deal_jina_count += 1
-                except Exception as e:
-                    click.echo(
-                        click.style(f"Error transforming jina credentials: {str(e)}, tenant_id: {tenant_id}", fg="red")
+                auth_count = 0
+                for jina_tenant_credential in jina_tenant_credentials:
+                    auth_count += 1
+                    # get credential api key
+                    credentials_json = json.loads(jina_tenant_credential.credentials)
+                    api_key = credentials_json.get("config", {}).get("api_key")
+                    new_credentials = {
+                        "integration_secret": api_key,
+                    }
+                    datasource_provider = DatasourceProvider(
+                        provider="jina",
+                        tenant_id=tenant_id,
+                        plugin_id=jina_plugin_id,
+                        auth_type=api_key_credential_type.value,
+                        encrypted_credentials=new_credentials,
+                        name=f"Auth {auth_count}",
+                        avatar_url="default",
+                        is_default=False,
                    )
-                    continue
+                    db.session.add(datasource_provider)
+                    deal_jina_count += 1
                db.session.commit()
    except Exception as e:
        click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
--- a/api/configs/middleware/vdb/baidu_vector_config.py
+++ b/api/configs/middleware/vdb/baidu_vector_config.py
@@ -41,13 +41,3 @@ class BaiduVectorDBConfig(BaseSettings):
        description="Number of replicas for the Baidu Vector Database (default is 3)",
        default=3,
    )
-
-    BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER: str = Field(
-        description="Analyzer type for inverted index in Baidu Vector Database (default is DEFAULT_ANALYZER)",
-        default="DEFAULT_ANALYZER",
-    )
-
-    BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE: str = Field(
-        description="Parser mode for inverted index in Baidu Vector Database (default is COARSE_MODE)",
-        default="COARSE_MODE",
-    )
--- a/api/configs/middleware/vdb/oceanbase_config.py
+++ b/api/configs/middleware/vdb/oceanbase_config.py
@@ -37,15 +37,3 @@ class OceanBaseVectorConfig(BaseSettings):
        "with older versions",
        default=False,
    )
-
-    OCEANBASE_FULLTEXT_PARSER: str | None = Field(
-        description=(
-            "Fulltext parser to use for text indexing. "
-            "Built-in options: 'ngram' (N-gram tokenizer for English/numbers), "
-            "'beng' (Basic English tokenizer), 'space' (Space-based tokenizer), "
-            "'ngram2' (Improved N-gram tokenizer), 'ik' (Chinese tokenizer). "
-            "External plugins (require installation): 'japanese_ftparser' (Japanese tokenizer), "
-            "'thai_ftparser' (Thai tokenizer). Default is 'ik'"
-        ),
-        default="ik",
-    )
--- a/api/configs/remote_settings_sources/nacos/http_request.py
+++ b/api/configs/remote_settings_sources/nacos/http_request.py
@@ -5,7 +5,7 @@ import logging
 import os
 import time

-import httpx
+import requests

 logger = logging.getLogger(__name__)

@@ -30,10 +30,10 @@ class NacosHttpClient:
            params = {}
        try:
            self._inject_auth_info(headers, params)
-            response = httpx.request(method, url="http://" + self.server + url, headers=headers, params=params)
+            response = requests.request(method, url="http://" + self.server + url, headers=headers, params=params)
            response.raise_for_status()
            return response.text
-        except httpx.RequestError as e:
+        except requests.RequestException as e:
            return f"Request to Nacos failed: {e}"

    def _inject_auth_info(self, headers: dict[str, str], params: dict[str, str], module: str = "config") -> None:
@@ -78,7 +78,7 @@ class NacosHttpClient:
        params = {"username": self.username, "password": self.password}
        url = "http://" + self.server + "/nacos/v1/auth/login"
        try:
-            resp = httpx.request("POST", url, headers=None, params=params)
+            resp = requests.request("POST", url, headers=None, params=params)
            resp.raise_for_status()
            response_data = resp.json()
            self.token = response_data.get("accessToken")
--- a/api/controllers/console/app/conversation.py
+++ b/api/controllers/console/app/conversation.py
@@ -1,7 +1,6 @@
 from datetime import datetime

 import pytz  # pip install pytz
-import sqlalchemy as sa
 from flask_login import current_user
 from flask_restx import Resource, marshal_with, reqparse
 from flask_restx.inputs import int_range
@@ -71,7 +70,7 @@ class CompletionConversationApi(Resource):
        parser.add_argument("limit", type=int_range(1, 100), default=20, location="args")
        args = parser.parse_args()

-        query = sa.select(Conversation).where(
+        query = db.select(Conversation).where(
            Conversation.app_id == app_model.id, Conversation.mode == "completion", Conversation.is_deleted.is_(False)
        )

@@ -237,7 +236,7 @@ class ChatConversationApi(Resource):
            .subquery()
        )

-        query = sa.select(Conversation).where(Conversation.app_id == app_model.id, Conversation.is_deleted.is_(False))
+        query = db.select(Conversation).where(Conversation.app_id == app_model.id, Conversation.is_deleted.is_(False))

        if args["keyword"]:
            keyword_filter = f"%{args['keyword']}%"
--- a/api/controllers/console/app/statistic.py
+++ b/api/controllers/console/app/statistic.py
@@ -50,9 +50,8 @@ class DailyMessageStatistic(Resource):
 FROM
    messages
 WHERE
-    app_id = :app_id
-    AND invoke_from != :invoke_from"""
-        arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER.value}
+    app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}

        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc
@@ -188,9 +187,8 @@ class DailyTerminalsStatistic(Resource):
 FROM
    messages
 WHERE
-    app_id = :app_id
-    AND invoke_from != :invoke_from"""
-        arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER.value}
+    app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}

        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc
@@ -261,9 +259,8 @@ class DailyTokenCostStatistic(Resource):
 FROM
    messages
 WHERE
-    app_id = :app_id
-    AND invoke_from != :invoke_from"""
-        arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER.value}
+    app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}

        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc
@@ -343,9 +340,8 @@ FROM
            messages m
            ON c.id = m.conversation_id
        WHERE
-            c.app_id = :app_id
-            AND m.invoke_from != :invoke_from"""
-        arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER.value}
+            c.app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}

        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc
@@ -430,9 +426,8 @@ LEFT JOIN
    message_feedbacks mf
    ON mf.message_id=m.id AND mf.rating='like'
 WHERE
-    m.app_id = :app_id
-    AND m.invoke_from != :invoke_from"""
-        arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER.value}
+    m.app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}

        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc
@@ -507,9 +502,8 @@ class AverageResponseTimeStatistic(Resource):
 FROM
    messages
 WHERE
-    app_id = :app_id
-    AND invoke_from != :invoke_from"""
-        arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER.value}
+    app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}

        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc
@@ -582,9 +576,8 @@ class TokensPerSecondStatistic(Resource):
 FROM
    messages
 WHERE
-    app_id = :app_id
-    AND invoke_from != :invoke_from"""
-        arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER.value}
+    app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}

        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc
--- a/api/controllers/console/auth/data_source_oauth.py
+++ b/api/controllers/console/auth/data_source_oauth.py
@@ -1,6 +1,6 @@
 import logging

-import httpx
+import requests
 from flask import current_app, redirect, request
 from flask_login import current_user
 from flask_restx import Resource, fields
@@ -119,7 +119,7 @@ class OAuthDataSourceBinding(Resource):
                return {"error": "Invalid code"}, 400
            try:
                oauth_provider.get_access_token(code)
-            except httpx.HTTPStatusError as e:
+            except requests.HTTPError as e:
                logger.exception(
                    "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text
                )
@@ -152,7 +152,7 @@ class OAuthDataSourceSync(Resource):
            return {"error": "Invalid provider"}, 400
        try:
            oauth_provider.sync_data_source(binding_id)
-        except httpx.HTTPStatusError as e:
+        except requests.HTTPError as e:
            logger.exception(
                "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text
            )
--- a/api/controllers/console/auth/oauth.py
+++ b/api/controllers/console/auth/oauth.py
@@ -1,6 +1,6 @@
 import logging

-import httpx
+import requests
 from flask import current_app, redirect, request
 from flask_restx import Resource
 from sqlalchemy import select
@@ -101,10 +101,8 @@ class OAuthCallback(Resource):
        try:
            token = oauth_provider.get_access_token(code)
            user_info = oauth_provider.get_user_info(token)
-        except httpx.RequestError as e:
-            error_text = str(e)
-            if isinstance(e, httpx.HTTPStatusError):
-                error_text = e.response.text
+        except requests.RequestException as e:
+            error_text = e.response.text if e.response else str(e)
            logger.exception("An error occurred during the OAuth process with %s: %s", provider, error_text)
            return {"error": "OAuth process failed"}, 400

--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -782,6 +782,7 @@ class DatasetRetrievalSettingApi(Resource):
                | VectorType.TIDB_VECTOR
                | VectorType.CHROMA
                | VectorType.PGVECTO_RS
+                | VectorType.BAIDU
                | VectorType.VIKINGDB
                | VectorType.UPSTASH
            ):
@@ -808,7 +809,6 @@ class DatasetRetrievalSettingApi(Resource):
                | VectorType.TENCENT
                | VectorType.MATRIXONE
                | VectorType.CLICKZETTA
-                | VectorType.BAIDU
            ):
                return {
                    "retrieval_method": [
@@ -838,6 +838,7 @@ class DatasetRetrievalSettingMockApi(Resource):
                | VectorType.TIDB_VECTOR
                | VectorType.CHROMA
                | VectorType.PGVECTO_RS
+                | VectorType.BAIDU
                | VectorType.VIKINGDB
                | VectorType.UPSTASH
            ):
@@ -862,7 +863,6 @@ class DatasetRetrievalSettingMockApi(Resource):
                | VectorType.HUAWEI_CLOUD
                | VectorType.MATRIXONE
                | VectorType.CLICKZETTA
-                | VectorType.BAIDU
            ):
                return {
                    "retrieval_method": [
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -4,7 +4,6 @@ from argparse import ArgumentTypeError
 from collections.abc import Sequence
 from typing import Literal, cast

-import sqlalchemy as sa
 from flask import request
 from flask_login import current_user
 from flask_restx import Resource, fields, marshal, marshal_with, reqparse
@@ -212,13 +211,13 @@ class DatasetDocumentListApi(Resource):

        if sort == "hit_count":
            sub_query = (
-                sa.select(DocumentSegment.document_id, sa.func.sum(DocumentSegment.hit_count).label("total_hit_count"))
+                db.select(DocumentSegment.document_id, db.func.sum(DocumentSegment.hit_count).label("total_hit_count"))
                .group_by(DocumentSegment.document_id)
                .subquery()
            )

            query = query.outerjoin(sub_query, sub_query.c.document_id == Document.id).order_by(
-                sort_logic(sa.func.coalesce(sub_query.c.total_hit_count, 0)),
+                sort_logic(db.func.coalesce(sub_query.c.total_hit_count, 0)),
                sort_logic(Document.position),
            )
        elif sort == "created_at":
--- a/api/controllers/console/version.py
+++ b/api/controllers/console/version.py
@@ -1,7 +1,7 @@
 import json
 import logging

-import httpx
+import requests
 from flask_restx import Resource, fields, reqparse
 from packaging import version

@@ -57,11 +57,7 @@ class VersionApi(Resource):
            return result

        try:
-            response = httpx.get(
-                check_update_url,
-                params={"current_version": args["current_version"]},
-                timeout=httpx.Timeout(connect=3, read=10),
-            )
+            response = requests.get(check_update_url, {"current_version": args["current_version"]}, timeout=(3, 10))
        except Exception as error:
            logger.warning("Check update version error: %s.", str(error))
            result["version"] = args["current_version"]
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@@ -79,12 +79,29 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
        if not app_record:
            raise ValueError("App not found")

-        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
-            # Handle single iteration or single loop run
-            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
+        if self.application_generate_entity.single_iteration_run:
+            # if only single iteration run is requested
+            graph_runtime_state = GraphRuntimeState(
+                variable_pool=VariablePool.empty(),
+                start_at=time.time(),
+            )
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
                workflow=self._workflow,
-                single_iteration_run=self.application_generate_entity.single_iteration_run,
-                single_loop_run=self.application_generate_entity.single_loop_run,
+                node_id=self.application_generate_entity.single_iteration_run.node_id,
+                user_inputs=dict(self.application_generate_entity.single_iteration_run.inputs),
+                graph_runtime_state=graph_runtime_state,
+            )
+        elif self.application_generate_entity.single_loop_run:
+            # if only single loop run is requested
+            graph_runtime_state = GraphRuntimeState(
+                variable_pool=VariablePool.empty(),
+                start_at=time.time(),
+            )
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
+                workflow=self._workflow,
+                node_id=self.application_generate_entity.single_loop_run.node_id,
+                user_inputs=dict(self.application_generate_entity.single_loop_run.inputs),
+                graph_runtime_state=graph_runtime_state,
            )
        else:
            inputs = self.application_generate_entity.inputs
--- a/api/core/app/apps/pipeline/pipeline_generator.py
+++ b/api/core/app/apps/pipeline/pipeline_generator.py
@@ -427,9 +427,6 @@ class PipelineGenerator(BaseAppGenerator):
            invoke_from=InvokeFrom.DEBUGGER,
            call_depth=0,
            workflow_execution_id=str(uuid.uuid4()),
-            single_iteration_run=RagPipelineGenerateEntity.SingleIterationRunEntity(
-                node_id=node_id, inputs=args["inputs"]
-            ),
        )
        contexts.plugin_tool_providers.set({})
        contexts.plugin_tool_providers_lock.set(threading.Lock())
@@ -468,7 +465,6 @@ class PipelineGenerator(BaseAppGenerator):
            workflow_node_execution_repository=workflow_node_execution_repository,
            streaming=streaming,
            variable_loader=var_loader,
-            context=contextvars.copy_context(),
        )

    def single_loop_generate(
@@ -563,7 +559,6 @@ class PipelineGenerator(BaseAppGenerator):
            workflow_node_execution_repository=workflow_node_execution_repository,
            streaming=streaming,
            variable_loader=var_loader,
-            context=contextvars.copy_context(),
        )

    def _generate_worker(
--- a/api/core/app/apps/pipeline/pipeline_runner.py
+++ b/api/core/app/apps/pipeline/pipeline_runner.py
@@ -86,12 +86,29 @@ class PipelineRunner(WorkflowBasedAppRunner):
        db.session.close()

        # if only single iteration run is requested
-        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
-            # Handle single iteration or single loop run
-            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
+        if self.application_generate_entity.single_iteration_run:
+            graph_runtime_state = GraphRuntimeState(
+                variable_pool=VariablePool.empty(),
+                start_at=time.time(),
+            )
+            # if only single iteration run is requested
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
                workflow=workflow,
-                single_iteration_run=self.application_generate_entity.single_iteration_run,
-                single_loop_run=self.application_generate_entity.single_loop_run,
+                node_id=self.application_generate_entity.single_iteration_run.node_id,
+                user_inputs=self.application_generate_entity.single_iteration_run.inputs,
+                graph_runtime_state=graph_runtime_state,
+            )
+        elif self.application_generate_entity.single_loop_run:
+            graph_runtime_state = GraphRuntimeState(
+                variable_pool=VariablePool.empty(),
+                start_at=time.time(),
+            )
+            # if only single loop run is requested
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
+                workflow=workflow,
+                node_id=self.application_generate_entity.single_loop_run.node_id,
+                user_inputs=self.application_generate_entity.single_loop_run.inputs,
+                graph_runtime_state=graph_runtime_state,
            )
        else:
            inputs = self.application_generate_entity.inputs
--- a/api/core/app/apps/workflow/app_runner.py
+++ b/api/core/app/apps/workflow/app_runner.py
@@ -51,12 +51,30 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
        app_config = self.application_generate_entity.app_config
        app_config = cast(WorkflowAppConfig, app_config)

-        # if only single iteration or single loop run is requested
-        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
-            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
+        # if only single iteration run is requested
+        if self.application_generate_entity.single_iteration_run:
+            # if only single iteration run is requested
+            graph_runtime_state = GraphRuntimeState(
+                variable_pool=VariablePool.empty(),
+                start_at=time.time(),
+            )
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
                workflow=self._workflow,
-                single_iteration_run=self.application_generate_entity.single_iteration_run,
-                single_loop_run=self.application_generate_entity.single_loop_run,
+                node_id=self.application_generate_entity.single_iteration_run.node_id,
+                user_inputs=self.application_generate_entity.single_iteration_run.inputs,
+                graph_runtime_state=graph_runtime_state,
+            )
+        elif self.application_generate_entity.single_loop_run:
+            # if only single loop run is requested
+            graph_runtime_state = GraphRuntimeState(
+                variable_pool=VariablePool.empty(),
+                start_at=time.time(),
+            )
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
+                workflow=self._workflow,
+                node_id=self.application_generate_entity.single_loop_run.node_id,
+                user_inputs=self.application_generate_entity.single_loop_run.inputs,
+                graph_runtime_state=graph_runtime_state,
            )
        else:
            inputs = self.application_generate_entity.inputs
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@@ -1,4 +1,3 @@
-import time
 from collections.abc import Mapping
 from typing import Any, cast

@@ -120,81 +119,15 @@ class WorkflowBasedAppRunner:

        return graph

-    def _prepare_single_node_execution(
-        self,
-        workflow: Workflow,
-        single_iteration_run: Any | None = None,
-        single_loop_run: Any | None = None,
-    ) -> tuple[Graph, VariablePool, GraphRuntimeState]:
-        """
-        Prepare graph, variable pool, and runtime state for single node execution
-        (either single iteration or single loop).
-
-        Args:
-            workflow: The workflow instance
-            single_iteration_run: SingleIterationRunEntity if running single iteration, None otherwise
-            single_loop_run: SingleLoopRunEntity if running single loop, None otherwise
-
-        Returns:
-            A tuple containing (graph, variable_pool, graph_runtime_state)
-
-        Raises:
-            ValueError: If neither single_iteration_run nor single_loop_run is specified
-        """
-        # Create initial runtime state with variable pool containing environment variables
-        graph_runtime_state = GraphRuntimeState(
-            variable_pool=VariablePool(
-                system_variables=SystemVariable.empty(),
-                user_inputs={},
-                environment_variables=workflow.environment_variables,
-            ),
-            start_at=time.time(),
-        )
-
-        # Determine which type of single node execution and get graph/variable_pool
-        if single_iteration_run:
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
-                workflow=workflow,
-                node_id=single_iteration_run.node_id,
-                user_inputs=dict(single_iteration_run.inputs),
-                graph_runtime_state=graph_runtime_state,
-            )
-        elif single_loop_run:
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
-                workflow=workflow,
-                node_id=single_loop_run.node_id,
-                user_inputs=dict(single_loop_run.inputs),
-                graph_runtime_state=graph_runtime_state,
-            )
-        else:
-            raise ValueError("Neither single_iteration_run nor single_loop_run is specified")
-
-        # Return the graph, variable_pool, and the same graph_runtime_state used during graph creation
-        # This ensures all nodes in the graph reference the same GraphRuntimeState instance
-        return graph, variable_pool, graph_runtime_state
-
-    def _get_graph_and_variable_pool_for_single_node_run(
+    def _get_graph_and_variable_pool_of_single_iteration(
        self,
        workflow: Workflow,
        node_id: str,
-        user_inputs: dict[str, Any],
+        user_inputs: dict,
        graph_runtime_state: GraphRuntimeState,
-        node_type_filter_key: str,  # 'iteration_id' or 'loop_id'
-        node_type_label: str = "node",  # 'iteration' or 'loop' for error messages
    ) -> tuple[Graph, VariablePool]:
        """
-        Get graph and variable pool for single node execution (iteration or loop).
-
-        Args:
-            workflow: The workflow instance
-            node_id: The node ID to execute
-            user_inputs: User inputs for the node
-            graph_runtime_state: The graph runtime state
-            node_type_filter_key: The key to filter nodes ('iteration_id' or 'loop_id')
-            node_type_label: Label for error messages ('iteration' or 'loop')
-
-        Returns:
-            A tuple containing (graph, variable_pool)
+        Get variable pool of single iteration
        """
        # fetch workflow graph
        graph_config = workflow.graph_dict
@@ -212,22 +145,18 @@ class WorkflowBasedAppRunner:
        if not isinstance(graph_config.get("edges"), list):
            raise ValueError("edges in workflow graph must be a list")

-        # filter nodes only in the specified node type (iteration or loop)
-        main_node_config = next((n for n in graph_config.get("nodes", []) if n.get("id") == node_id), None)
-        start_node_id = main_node_config.get("data", {}).get("start_node_id") if main_node_config else None
+        # filter nodes only in iteration
        node_configs = [
            node
            for node in graph_config.get("nodes", [])
-            if node.get("id") == node_id
-            or node.get("data", {}).get(node_type_filter_key, "") == node_id
-            or (start_node_id and node.get("id") == start_node_id)
+            if node.get("id") == node_id or node.get("data", {}).get("iteration_id", "") == node_id
        ]

        graph_config["nodes"] = node_configs

        node_ids = [node.get("id") for node in node_configs]

-        # filter edges only in the specified node type
+        # filter edges only in iteration
        edge_configs = [
            edge
            for edge in graph_config.get("edges", [])
@@ -261,26 +190,30 @@ class WorkflowBasedAppRunner:
            raise ValueError("graph not found in workflow")

        # fetch node config from node id
-        target_node_config = None
+        iteration_node_config = None
        for node in node_configs:
            if node.get("id") == node_id:
-                target_node_config = node
+                iteration_node_config = node
                break

-        if not target_node_config:
-            raise ValueError(f"{node_type_label} node id not found in workflow graph")
+        if not iteration_node_config:
+            raise ValueError("iteration node id not found in workflow graph")

        # Get node class
-        node_type = NodeType(target_node_config.get("data", {}).get("type"))
-        node_version = target_node_config.get("data", {}).get("version", "1")
+        node_type = NodeType(iteration_node_config.get("data", {}).get("type"))
+        node_version = iteration_node_config.get("data", {}).get("version", "1")
        node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]

-        # Use the variable pool from graph_runtime_state instead of creating a new one
-        variable_pool = graph_runtime_state.variable_pool
+        # init variable pool
+        variable_pool = VariablePool(
+            system_variables=SystemVariable.empty(),
+            user_inputs={},
+            environment_variables=workflow.environment_variables,
+        )

        try:
            variable_mapping = node_cls.extract_variable_selector_to_variable_mapping(
-                graph_config=workflow.graph_dict, config=target_node_config
+                graph_config=workflow.graph_dict, config=iteration_node_config
            )
        except NotImplementedError:
            variable_mapping = {}
@@ -301,44 +234,120 @@ class WorkflowBasedAppRunner:

        return graph, variable_pool

-    def _get_graph_and_variable_pool_of_single_iteration(
-        self,
-        workflow: Workflow,
-        node_id: str,
-        user_inputs: dict[str, Any],
-        graph_runtime_state: GraphRuntimeState,
-    ) -> tuple[Graph, VariablePool]:
-        """
-        Get variable pool of single iteration
-        """
-        return self._get_graph_and_variable_pool_for_single_node_run(
-            workflow=workflow,
-            node_id=node_id,
-            user_inputs=user_inputs,
-            graph_runtime_state=graph_runtime_state,
-            node_type_filter_key="iteration_id",
-            node_type_label="iteration",
-        )
-
    def _get_graph_and_variable_pool_of_single_loop(
        self,
        workflow: Workflow,
        node_id: str,
-        user_inputs: dict[str, Any],
+        user_inputs: dict,
        graph_runtime_state: GraphRuntimeState,
    ) -> tuple[Graph, VariablePool]:
        """
        Get variable pool of single loop
        """
-        return self._get_graph_and_variable_pool_for_single_node_run(
-            workflow=workflow,
-            node_id=node_id,
-            user_inputs=user_inputs,
-            graph_runtime_state=graph_runtime_state,
-            node_type_filter_key="loop_id",
-            node_type_label="loop",
+        # fetch workflow graph
+        graph_config = workflow.graph_dict
+        if not graph_config:
+            raise ValueError("workflow graph not found")
+
+        graph_config = cast(dict[str, Any], graph_config)
+
+        if "nodes" not in graph_config or "edges" not in graph_config:
+            raise ValueError("nodes or edges not found in workflow graph")
+
+        if not isinstance(graph_config.get("nodes"), list):
+            raise ValueError("nodes in workflow graph must be a list")
+
+        if not isinstance(graph_config.get("edges"), list):
+            raise ValueError("edges in workflow graph must be a list")
+
+        # filter nodes only in loop
+        node_configs = [
+            node
+            for node in graph_config.get("nodes", [])
+            if node.get("id") == node_id or node.get("data", {}).get("loop_id", "") == node_id
+        ]
+
+        graph_config["nodes"] = node_configs
+
+        node_ids = [node.get("id") for node in node_configs]
+
+        # filter edges only in loop
+        edge_configs = [
+            edge
+            for edge in graph_config.get("edges", [])
+            if (edge.get("source") is None or edge.get("source") in node_ids)
+            and (edge.get("target") is None or edge.get("target") in node_ids)
+        ]
+
+        graph_config["edges"] = edge_configs
+
+        # Create required parameters for Graph.init
+        graph_init_params = GraphInitParams(
+            tenant_id=workflow.tenant_id,
+            app_id=self._app_id,
+            workflow_id=workflow.id,
+            graph_config=graph_config,
+            user_id="",
+            user_from=UserFrom.ACCOUNT.value,
+            invoke_from=InvokeFrom.SERVICE_API.value,
+            call_depth=0,
        )

+        node_factory = DifyNodeFactory(
+            graph_init_params=graph_init_params,
+            graph_runtime_state=graph_runtime_state,
+        )
+
+        # init graph
+        graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=node_id)
+
+        if not graph:
+            raise ValueError("graph not found in workflow")
+
+        # fetch node config from node id
+        loop_node_config = None
+        for node in node_configs:
+            if node.get("id") == node_id:
+                loop_node_config = node
+                break
+
+        if not loop_node_config:
+            raise ValueError("loop node id not found in workflow graph")
+
+        # Get node class
+        node_type = NodeType(loop_node_config.get("data", {}).get("type"))
+        node_version = loop_node_config.get("data", {}).get("version", "1")
+        node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]
+
+        # init variable pool
+        variable_pool = VariablePool(
+            system_variables=SystemVariable.empty(),
+            user_inputs={},
+            environment_variables=workflow.environment_variables,
+        )
+
+        try:
+            variable_mapping = node_cls.extract_variable_selector_to_variable_mapping(
+                graph_config=workflow.graph_dict, config=loop_node_config
+            )
+        except NotImplementedError:
+            variable_mapping = {}
+        load_into_variable_pool(
+            self._variable_loader,
+            variable_pool=variable_pool,
+            variable_mapping=variable_mapping,
+            user_inputs=user_inputs,
+        )
+
+        WorkflowEntry.mapping_user_inputs_to_variable_pool(
+            variable_mapping=variable_mapping,
+            user_inputs=user_inputs,
+            variable_pool=variable_pool,
+            tenant_id=workflow.tenant_id,
+        )
+
+        return graph, variable_pool
+
    def _handle_event(self, workflow_entry: WorkflowEntry, event: GraphEngineEvent):
        """
        Handle event
--- a/api/core/datasource/utils/parser.py
+++ b/api/core/datasource/utils/parser.py
@@ -0,0 +1,388 @@
+import re
+import uuid
+from json import dumps as json_dumps
+from json import loads as json_loads
+from json.decoder import JSONDecodeError
+
+from flask import request
+from requests import get
+from yaml import YAMLError, safe_load  # type: ignore
+
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_bundle import ApiToolBundle
+from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
+from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
+
+
+class ApiBasedToolSchemaParser:
+    @staticmethod
+    def parse_openapi_to_tool_bundle(
+        openapi: dict, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        # set description to extra_info
+        extra_info["description"] = openapi["info"].get("description", "")
+
+        if len(openapi["servers"]) == 0:
+            raise ToolProviderNotFoundError("No server found in the openapi yaml.")
+
+        server_url = openapi["servers"][0]["url"]
+        request_env = request.headers.get("X-Request-Env")
+        if request_env:
+            matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
+            server_url = matched_servers[0] if matched_servers else server_url
+
+        # list all interfaces
+        interfaces = []
+        for path, path_item in openapi["paths"].items():
+            methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
+            for method in methods:
+                if method in path_item:
+                    interfaces.append(
+                        {
+                            "path": path,
+                            "method": method,
+                            "operation": path_item[method],
+                        }
+                    )
+
+        # get all parameters
+        bundles = []
+        for interface in interfaces:
+            # convert parameters
+            parameters = []
+            if "parameters" in interface["operation"]:
+                for parameter in interface["operation"]["parameters"]:
+                    tool_parameter = ToolParameter(
+                        name=parameter["name"],
+                        label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
+                        human_description=I18nObject(
+                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
+                        ),
+                        type=ToolParameter.ToolParameterType.STRING,
+                        required=parameter.get("required", False),
+                        form=ToolParameter.ToolParameterForm.LLM,
+                        llm_description=parameter.get("description"),
+                        default=parameter["schema"]["default"]
+                        if "schema" in parameter and "default" in parameter["schema"]
+                        else None,
+                        placeholder=I18nObject(
+                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
+                        ),
+                    )
+
+                    # check if there is a type
+                    typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
+                    if typ:
+                        tool_parameter.type = typ
+
+                    parameters.append(tool_parameter)
+            # create tool bundle
+            # check if there is a request body
+            if "requestBody" in interface["operation"]:
+                request_body = interface["operation"]["requestBody"]
+                if "content" in request_body:
+                    for content_type, content in request_body["content"].items():
+                        # if there is a reference, get the reference and overwrite the content
+                        if "schema" not in content:
+                            continue
+
+                        if "$ref" in content["schema"]:
+                            # get the reference
+                            root = openapi
+                            reference = content["schema"]["$ref"].split("/")[1:]
+                            for ref in reference:
+                                root = root[ref]
+                            # overwrite the content
+                            interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
+
+                    # parse body parameters
+                    if "schema" in interface["operation"]["requestBody"]["content"][content_type]:  # pyright: ignore[reportIndexIssue, reportPossiblyUnboundVariable]
+                        body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]  # pyright: ignore[reportIndexIssue, reportPossiblyUnboundVariable]
+                        required = body_schema.get("required", [])
+                        properties = body_schema.get("properties", {})
+                        for name, property in properties.items():
+                            tool = ToolParameter(
+                                name=name,
+                                label=I18nObject(en_US=name, zh_Hans=name),
+                                human_description=I18nObject(
+                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
+                                ),
+                                type=ToolParameter.ToolParameterType.STRING,
+                                required=name in required,
+                                form=ToolParameter.ToolParameterForm.LLM,
+                                llm_description=property.get("description", ""),
+                                default=property.get("default", None),
+                                placeholder=I18nObject(
+                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
+                                ),
+                            )
+
+                            # check if there is a type
+                            typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
+                            if typ:
+                                tool.type = typ
+
+                            parameters.append(tool)
+
+            # check if parameters is duplicated
+            parameters_count = {}
+            for parameter in parameters:
+                if parameter.name not in parameters_count:
+                    parameters_count[parameter.name] = 0
+                parameters_count[parameter.name] += 1
+            for name, count in parameters_count.items():
+                if count > 1:
+                    warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
+
+            # check if there is a operation id, use $path_$method as operation id if not
+            if "operationId" not in interface["operation"]:
+                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
+                path = interface["path"]
+                if interface["path"].startswith("/"):
+                    path = interface["path"][1:]
+                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
+                path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
+                if not path:
+                    path = str(uuid.uuid4())
+
+                interface["operation"]["operationId"] = f"{path}_{interface['method']}"
+
+            bundles.append(
+                ApiToolBundle(
+                    server_url=server_url + interface["path"],
+                    method=interface["method"],
+                    summary=interface["operation"]["description"]
+                    if "description" in interface["operation"]
+                    else interface["operation"].get("summary", None),
+                    operation_id=interface["operation"]["operationId"],
+                    parameters=parameters,
+                    author="",
+                    icon=None,
+                    openapi=interface["operation"],
+                )
+            )
+
+        return bundles
+
+    @staticmethod
+    def _get_tool_parameter_type(parameter: dict) -> ToolParameter.ToolParameterType | None:
+        parameter = parameter or {}
+        typ: str | None = None
+        if parameter.get("format") == "binary":
+            return ToolParameter.ToolParameterType.FILE
+
+        if "type" in parameter:
+            typ = parameter["type"]
+        elif "schema" in parameter and "type" in parameter["schema"]:
+            typ = parameter["schema"]["type"]
+
+        if typ in {"integer", "number"}:
+            return ToolParameter.ToolParameterType.NUMBER
+        elif typ == "boolean":
+            return ToolParameter.ToolParameterType.BOOLEAN
+        elif typ == "string":
+            return ToolParameter.ToolParameterType.STRING
+        elif typ == "array":
+            items = parameter.get("items") or parameter.get("schema", {}).get("items")
+            return ToolParameter.ToolParameterType.FILES if items and items.get("format") == "binary" else None
+        else:
+            return None
+
+    @staticmethod
+    def parse_openapi_yaml_to_tool_bundle(
+        yaml: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        """
+        parse openapi yaml to tool bundle
+
+        :param yaml: the yaml string
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: the tool bundle
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        openapi: dict = safe_load(yaml)
+        if openapi is None:
+            raise ToolApiSchemaError("Invalid openapi yaml.")
+        return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
+
+    @staticmethod
+    def parse_swagger_to_openapi(swagger: dict, extra_info: dict | None = None, warning: dict | None = None) -> dict:
+        warning = warning or {}
+        """
+        parse swagger to openapi
+
+        :param swagger: the swagger dict
+        :return: the openapi dict
+        """
+        # convert swagger to openapi
+        info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
+
+        servers = swagger.get("servers", [])
+
+        if len(servers) == 0:
+            raise ToolApiSchemaError("No server found in the swagger yaml.")
+
+        openapi = {
+            "openapi": "3.0.0",
+            "info": {
+                "title": info.get("title", "Swagger"),
+                "description": info.get("description", "Swagger"),
+                "version": info.get("version", "1.0.0"),
+            },
+            "servers": swagger["servers"],
+            "paths": {},
+            "components": {"schemas": {}},
+        }
+
+        # check paths
+        if "paths" not in swagger or len(swagger["paths"]) == 0:
+            raise ToolApiSchemaError("No paths found in the swagger yaml.")
+
+        # convert paths
+        for path, path_item in swagger["paths"].items():
+            openapi["paths"][path] = {}  # pyright: ignore[reportIndexIssue]
+            for method, operation in path_item.items():
+                if "operationId" not in operation:
+                    raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
+
+                if ("summary" not in operation or len(operation["summary"]) == 0) and (
+                    "description" not in operation or len(operation["description"]) == 0
+                ):
+                    if warning is not None:
+                        warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
+
+                openapi["paths"][path][method] = {  # pyright: ignore[reportIndexIssue]
+                    "operationId": operation["operationId"],
+                    "summary": operation.get("summary", ""),
+                    "description": operation.get("description", ""),
+                    "parameters": operation.get("parameters", []),
+                    "responses": operation.get("responses", {}),
+                }
+
+                if "requestBody" in operation:
+                    openapi["paths"][path][method]["requestBody"] = operation["requestBody"]  # pyright: ignore[reportIndexIssue]
+
+        # convert definitions
+        for name, definition in swagger["definitions"].items():
+            openapi["components"]["schemas"][name] = definition  # pyright: ignore[reportIndexIssue, reportArgumentType]
+
+        return openapi
+
+    @staticmethod
+    def parse_openai_plugin_json_to_tool_bundle(
+        json: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        """
+        parse openapi plugin yaml to tool bundle
+
+        :param json: the json string
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: the tool bundle
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        try:
+            openai_plugin = json_loads(json)
+            api = openai_plugin["api"]
+            api_url = api["url"]
+            api_type = api["type"]
+        except JSONDecodeError:
+            raise ToolProviderNotFoundError("Invalid openai plugin json.")
+
+        if api_type != "openapi":
+            raise ToolNotSupportedError("Only openapi is supported now.")
+
+        # get openapi yaml
+        response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
+
+        if response.status_code != 200:
+            raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
+
+        return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
+            response.text, extra_info=extra_info, warning=warning
+        )
+
+    @staticmethod
+    def auto_parse_to_tool_bundle(
+        content: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> tuple[list[ApiToolBundle], str]:
+        """
+        auto parse to tool bundle
+
+        :param content: the content
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: tools bundle, schema_type
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        content = content.strip()
+        loaded_content = None
+        json_error = None
+        yaml_error = None
+
+        try:
+            loaded_content = json_loads(content)
+        except JSONDecodeError as e:
+            json_error = e
+
+        if loaded_content is None:
+            try:
+                loaded_content = safe_load(content)
+            except YAMLError as e:
+                yaml_error = e
+        if loaded_content is None:
+            raise ToolApiSchemaError(
+                f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
+                f" yaml error: {str(yaml_error)}"
+            )
+
+        swagger_error = None
+        openapi_error = None
+        openapi_plugin_error = None
+        schema_type = None
+
+        try:
+            openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
+                loaded_content, extra_info=extra_info, warning=warning
+            )
+            schema_type = ApiProviderSchemaType.OPENAPI.value
+            return openapi, schema_type
+        except ToolApiSchemaError as e:
+            openapi_error = e
+
+        # openai parse error, fallback to swagger
+        try:
+            converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
+                loaded_content, extra_info=extra_info, warning=warning
+            )
+            schema_type = ApiProviderSchemaType.SWAGGER.value
+            return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
+                converted_swagger, extra_info=extra_info, warning=warning
+            ), schema_type
+        except ToolApiSchemaError as e:
+            swagger_error = e
+
+        # swagger parse error, fallback to openai plugin
+        try:
+            openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
+                json_dumps(loaded_content), extra_info=extra_info, warning=warning
+            )
+            return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
+        except ToolNotSupportedError as e:
+            # maybe it's not plugin at all
+            openapi_plugin_error = e
+
+        raise ToolApiSchemaError(
+            f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
+            f" openapi plugin error: {str(openapi_plugin_error)}"
+        )
--- a/api/core/datasource/utils/text_processing_utils.py
+++ b/api/core/datasource/utils/text_processing_utils.py
@@ -0,0 +1,17 @@
+import re
+
+
+def remove_leading_symbols(text: str) -> str:
+    """
+    Remove leading punctuation or symbols from the given text.
+
+    Args:
+        text (str): The input text to process.
+
+    Returns:
+        str: The text with leading punctuation or symbols removed.
+    """
+    # Match Unicode ranges for punctuation and symbols
+    # FIXME this pattern is confused quick fix for #11868 maybe refactor it later
+    pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+"
+    return re.sub(pattern, "", text)
--- a/api/core/datasource/utils/uuid_utils.py
+++ b/api/core/datasource/utils/uuid_utils.py
@@ -0,0 +1,9 @@
+import uuid
+
+
+def is_valid_uuid(uuid_str: str) -> bool:
+    try:
+        uuid.UUID(uuid_str)
+        return True
+    except Exception:
+        return False
--- a/api/core/datasource/utils/workflow_configuration_sync.py
+++ b/api/core/datasource/utils/workflow_configuration_sync.py
@@ -0,0 +1,43 @@
+from collections.abc import Mapping, Sequence
+from typing import Any
+
+from core.app.app_config.entities import VariableEntity
+from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration
+
+
+class WorkflowToolConfigurationUtils:
+    @classmethod
+    def check_parameter_configurations(cls, configurations: list[Mapping[str, Any]]):
+        for configuration in configurations:
+            WorkflowToolParameterConfiguration.model_validate(configuration)
+
+    @classmethod
+    def get_workflow_graph_variables(cls, graph: Mapping[str, Any]) -> Sequence[VariableEntity]:
+        """
+        get workflow graph variables
+        """
+        nodes = graph.get("nodes", [])
+        start_node = next(filter(lambda x: x.get("data", {}).get("type") == "start", nodes), None)
+
+        if not start_node:
+            return []
+
+        return [VariableEntity.model_validate(variable) for variable in start_node.get("data", {}).get("variables", [])]
+
+    @classmethod
+    def check_is_synced(
+        cls, variables: list[VariableEntity], tool_configurations: list[WorkflowToolParameterConfiguration]
+    ):
+        """
+        check is synced
+
+        raise ValueError if not synced
+        """
+        variable_names = [variable.variable for variable in variables]
+
+        if len(tool_configurations) != len(variables):
+            raise ValueError("parameter configuration mismatch, please republish the tool to update")
+
+        for parameter in tool_configurations:
+            if parameter.name not in variable_names:
+                raise ValueError("parameter configuration mismatch, please republish the tool to update")
--- a/api/core/datasource/utils/yaml_utils.py
+++ b/api/core/datasource/utils/yaml_utils.py
@@ -0,0 +1,35 @@
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml  # type: ignore
+from yaml import YAMLError
+
+logger = logging.getLogger(__name__)
+
+
+def load_yaml_file(file_path: str, ignore_error: bool = True, default_value: Any = {}) -> Any:
+    """
+    Safe loading a YAML file
+    :param file_path: the path of the YAML file
+    :param ignore_error:
+        if True, return default_value if error occurs and the error will be logged in debug level
+        if False, raise error if error occurs
+    :param default_value: the value returned when errors ignored
+    :return: an object of the YAML content
+    """
+    if not file_path or not Path(file_path).exists():
+        if ignore_error:
+            return default_value
+        else:
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+    with open(file_path, encoding="utf-8") as yaml_file:
+        try:
+            yaml_content = yaml.safe_load(yaml_file)
+            return yaml_content or default_value
+        except Exception as e:
+            if ignore_error:
+                return default_value
+            else:
+                raise YAMLError(f"Failed to load YAML file {file_path}: {e}") from e
--- a/api/core/entities/provider_configuration.py
+++ b/api/core/entities/provider_configuration.py
@@ -205,10 +205,16 @@ class ProviderConfiguration(BaseModel):
        """
        Get custom provider record.
        """
+        # get provider
+        model_provider_id = ModelProviderID(self.provider.provider)
+        provider_names = [self.provider.provider]
+        if model_provider_id.is_langgenius():
+            provider_names.append(model_provider_id.provider_name)
+
        stmt = select(Provider).where(
            Provider.tenant_id == self.tenant_id,
            Provider.provider_type == ProviderType.CUSTOM.value,
-            Provider.provider_name.in_(self._get_provider_names()),
+            Provider.provider_name.in_(provider_names),
        )

        return session.execute(stmt).scalar_one_or_none()
@@ -270,7 +276,7 @@ class ProviderConfiguration(BaseModel):
        """
        stmt = select(ProviderCredential.id).where(
            ProviderCredential.tenant_id == self.tenant_id,
-            ProviderCredential.provider_name.in_(self._get_provider_names()),
+            ProviderCredential.provider_name == self.provider.provider,
            ProviderCredential.credential_name == credential_name,
        )
        if exclude_id:
@@ -318,7 +324,7 @@ class ProviderConfiguration(BaseModel):
                try:
                    stmt = select(ProviderCredential).where(
                        ProviderCredential.tenant_id == self.tenant_id,
-                        ProviderCredential.provider_name.in_(self._get_provider_names()),
+                        ProviderCredential.provider_name == self.provider.provider,
                        ProviderCredential.id == credential_id,
                    )
                    credential_record = s.execute(stmt).scalar_one_or_none()
@@ -368,7 +374,7 @@ class ProviderConfiguration(BaseModel):
            session=session,
            query_factory=lambda: select(ProviderCredential).where(
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name.in_(self._get_provider_names()),
+                ProviderCredential.provider_name == self.provider.provider,
            ),
        )

@@ -381,7 +387,7 @@ class ProviderConfiguration(BaseModel):
            session=session,
            query_factory=lambda: select(ProviderModelCredential).where(
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                ProviderModelCredential.provider_name == self.provider.provider,
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            ),
@@ -417,16 +423,6 @@ class ProviderConfiguration(BaseModel):
            logger.warning("Error generating next credential name: %s", str(e))
            return "API KEY 1"

-    def _get_provider_names(self):
-        """
-        The provider name might be stored in the database as either `openai` or `langgenius/openai/openai`.
-        """
-        model_provider_id = ModelProviderID(self.provider.provider)
-        provider_names = [self.provider.provider]
-        if model_provider_id.is_langgenius():
-            provider_names.append(model_provider_id.provider_name)
-        return provider_names
-
    def create_provider_credential(self, credentials: dict, credential_name: str | None):
        """
        Add custom provider credentials.
@@ -505,7 +501,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderCredential).where(
                ProviderCredential.id == credential_id,
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name.in_(self._get_provider_names()),
+                ProviderCredential.provider_name == self.provider.provider,
            )

            # Get the credential record to update
@@ -558,7 +554,7 @@ class ProviderConfiguration(BaseModel):
        # Find all load balancing configs that use this credential_id
        stmt = select(LoadBalancingModelConfig).where(
            LoadBalancingModelConfig.tenant_id == self.tenant_id,
-            LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
+            LoadBalancingModelConfig.provider_name == self.provider.provider,
            LoadBalancingModelConfig.credential_id == credential_id,
            LoadBalancingModelConfig.credential_source_type == credential_source,
        )
@@ -595,7 +591,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderCredential).where(
                ProviderCredential.id == credential_id,
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name.in_(self._get_provider_names()),
+                ProviderCredential.provider_name == self.provider.provider,
            )

            # Get the credential record to update
@@ -606,7 +602,7 @@ class ProviderConfiguration(BaseModel):
            # Check if this credential is used in load balancing configs
            lb_stmt = select(LoadBalancingModelConfig).where(
                LoadBalancingModelConfig.tenant_id == self.tenant_id,
-                LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
+                LoadBalancingModelConfig.provider_name == self.provider.provider,
                LoadBalancingModelConfig.credential_id == credential_id,
                LoadBalancingModelConfig.credential_source_type == "provider",
            )
@@ -628,7 +624,7 @@ class ProviderConfiguration(BaseModel):
                # if this is the last credential, we need to delete the provider record
                count_stmt = select(func.count(ProviderCredential.id)).where(
                    ProviderCredential.tenant_id == self.tenant_id,
-                    ProviderCredential.provider_name.in_(self._get_provider_names()),
+                    ProviderCredential.provider_name == self.provider.provider,
                )
                available_credentials_count = session.execute(count_stmt).scalar() or 0
                session.delete(credential_record)
@@ -672,7 +668,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderCredential).where(
                ProviderCredential.id == credential_id,
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name.in_(self._get_provider_names()),
+                ProviderCredential.provider_name == self.provider.provider,
            )
            credential_record = session.execute(stmt).scalar_one_or_none()
            if not credential_record:
@@ -741,7 +737,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                ProviderModelCredential.provider_name == self.provider.provider,
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@@ -788,7 +784,7 @@ class ProviderConfiguration(BaseModel):
        """
        stmt = select(ProviderModelCredential).where(
            ProviderModelCredential.tenant_id == self.tenant_id,
-            ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+            ProviderModelCredential.provider_name == self.provider.provider,
            ProviderModelCredential.model_name == model,
            ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            ProviderModelCredential.credential_name == credential_name,
@@ -864,7 +860,7 @@ class ProviderConfiguration(BaseModel):
                    stmt = select(ProviderModelCredential).where(
                        ProviderModelCredential.id == credential_id,
                        ProviderModelCredential.tenant_id == self.tenant_id,
-                        ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                        ProviderModelCredential.provider_name == self.provider.provider,
                        ProviderModelCredential.model_name == model,
                        ProviderModelCredential.model_type == model_type.to_origin_model_type(),
                    )
@@ -1001,7 +997,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                ProviderModelCredential.provider_name == self.provider.provider,
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@@ -1046,7 +1042,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                ProviderModelCredential.provider_name == self.provider.provider,
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@@ -1056,7 +1052,7 @@ class ProviderConfiguration(BaseModel):

            lb_stmt = select(LoadBalancingModelConfig).where(
                LoadBalancingModelConfig.tenant_id == self.tenant_id,
-                LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
+                LoadBalancingModelConfig.provider_name == self.provider.provider,
                LoadBalancingModelConfig.credential_id == credential_id,
                LoadBalancingModelConfig.credential_source_type == "custom_model",
            )
@@ -1079,7 +1075,7 @@ class ProviderConfiguration(BaseModel):
                # if this is the last credential, we need to delete the custom model record
                count_stmt = select(func.count(ProviderModelCredential.id)).where(
                    ProviderModelCredential.tenant_id == self.tenant_id,
-                    ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                    ProviderModelCredential.provider_name == self.provider.provider,
                    ProviderModelCredential.model_name == model,
                    ProviderModelCredential.model_type == model_type.to_origin_model_type(),
                )
@@ -1119,7 +1115,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                ProviderModelCredential.provider_name == self.provider.provider,
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@@ -1161,7 +1157,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
+                ProviderModelCredential.provider_name == self.provider.provider,
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@@ -1208,9 +1204,15 @@ class ProviderConfiguration(BaseModel):
        """
        Get provider model setting.
        """
+
+        model_provider_id = ModelProviderID(self.provider.provider)
+        provider_names = [self.provider.provider]
+        if model_provider_id.is_langgenius():
+            provider_names.append(model_provider_id.provider_name)
+
        stmt = select(ProviderModelSetting).where(
            ProviderModelSetting.tenant_id == self.tenant_id,
-            ProviderModelSetting.provider_name.in_(self._get_provider_names()),
+            ProviderModelSetting.provider_name.in_(provider_names),
            ProviderModelSetting.model_type == model_type.to_origin_model_type(),
            ProviderModelSetting.model_name == model,
        )
@@ -1382,9 +1384,15 @@ class ProviderConfiguration(BaseModel):
            return

        def _switch(s: Session):
+            # get preferred provider
+            model_provider_id = ModelProviderID(self.provider.provider)
+            provider_names = [self.provider.provider]
+            if model_provider_id.is_langgenius():
+                provider_names.append(model_provider_id.provider_name)
+
            stmt = select(TenantPreferredModelProvider).where(
                TenantPreferredModelProvider.tenant_id == self.tenant_id,
-                TenantPreferredModelProvider.provider_name.in_(self._get_provider_names()),
+                TenantPreferredModelProvider.provider_name.in_(provider_names),
            )
            preferred_model_provider = s.execute(stmt).scalars().first()

--- a/api/core/ops/aliyun_trace/data_exporter/traceclient.py
+++ b/api/core/ops/aliyun_trace/data_exporter/traceclient.py
@@ -8,7 +8,7 @@ from collections import deque
 from collections.abc import Sequence
 from datetime import datetime

-import httpx
+import requests
 from opentelemetry import trace as trace_api
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 from opentelemetry.sdk.resources import Resource
@@ -65,13 +65,13 @@ class TraceClient:

    def api_check(self):
        try:
-            response = httpx.head(self.endpoint, timeout=5)
+            response = requests.head(self.endpoint, timeout=5)
            if response.status_code == 405:
                return True
            else:
                logger.debug("AliyunTrace API check failed: Unexpected status code: %s", response.status_code)
                return False
-        except httpx.RequestError as e:
+        except requests.RequestException as e:
            logger.debug("AliyunTrace API check failed: %s", str(e))
            raise ValueError(f"AliyunTrace API check failed: {str(e)}")

--- a/api/core/provider_manager.py
+++ b/api/core/provider_manager.py
@@ -513,21 +513,6 @@ class ProviderManager:

        return provider_name_to_provider_load_balancing_model_configs_dict

-    @staticmethod
-    def _get_provider_names(provider_name: str) -> list[str]:
-        """
-        provider_name: `openai` or `langgenius/openai/openai`
-        return: [`openai`, `langgenius/openai/openai`]
-        """
-        provider_names = [provider_name]
-        model_provider_id = ModelProviderID(provider_name)
-        if model_provider_id.is_langgenius():
-            if "/" in provider_name:
-                provider_names.append(model_provider_id.provider_name)
-            else:
-                provider_names.append(str(model_provider_id))
-        return provider_names
-
    @staticmethod
    def get_provider_available_credentials(tenant_id: str, provider_name: str) -> list[CredentialConfiguration]:
        """
@@ -540,10 +525,7 @@ class ProviderManager:
        with Session(db.engine, expire_on_commit=False) as session:
            stmt = (
                select(ProviderCredential)
-                .where(
-                    ProviderCredential.tenant_id == tenant_id,
-                    ProviderCredential.provider_name.in_(ProviderManager._get_provider_names(provider_name)),
-                )
+                .where(ProviderCredential.tenant_id == tenant_id, ProviderCredential.provider_name == provider_name)
                .order_by(ProviderCredential.created_at.desc())
            )

@@ -572,7 +554,7 @@ class ProviderManager:
                select(ProviderModelCredential)
                .where(
                    ProviderModelCredential.tenant_id == tenant_id,
-                    ProviderModelCredential.provider_name.in_(ProviderManager._get_provider_names(provider_name)),
+                    ProviderModelCredential.provider_name == provider_name,
                    ProviderModelCredential.model_name == model_name,
                    ProviderModelCredential.model_type == model_type,
                )
--- a/api/core/rag/datasource/vdb/baidu/baidu_vector.py
+++ b/api/core/rag/datasource/vdb/baidu/baidu_vector.py
@@ -1,5 +1,4 @@
 import json
-import logging
 import time
 import uuid
 from typing import Any
@@ -10,24 +9,11 @@ from pymochow import MochowClient  # type: ignore
 from pymochow.auth.bce_credentials import BceCredentials  # type: ignore
 from pymochow.configuration import Configuration  # type: ignore
 from pymochow.exception import ServerError  # type: ignore
-from pymochow.model.database import Database
 from pymochow.model.enum import FieldType, IndexState, IndexType, MetricType, ServerErrCode, TableState  # type: ignore
-from pymochow.model.schema import (
-    Field,
-    FilteringIndex,
-    HNSWParams,
-    InvertedIndex,
-    InvertedIndexAnalyzer,
-    InvertedIndexFieldAttribute,
-    InvertedIndexParams,
-    InvertedIndexParseMode,
-    Schema,
-    VectorIndex,
-)  # type: ignore
-from pymochow.model.table import AnnSearch, BM25SearchRequest, HNSWSearchParams, Partition, Row  # type: ignore
+from pymochow.model.schema import Field, HNSWParams, Schema, VectorIndex  # type: ignore
+from pymochow.model.table import AnnSearch, HNSWSearchParams, Partition, Row  # type: ignore

 from configs import dify_config
-from core.rag.datasource.vdb.field import Field as VDBField
 from core.rag.datasource.vdb.vector_base import BaseVector
 from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
 from core.rag.datasource.vdb.vector_type import VectorType
@@ -36,8 +22,6 @@ from core.rag.models.document import Document
 from extensions.ext_redis import redis_client
 from models.dataset import Dataset

-logger = logging.getLogger(__name__)
-

 class BaiduConfig(BaseModel):
    endpoint: str
@@ -46,11 +30,9 @@ class BaiduConfig(BaseModel):
    api_key: str
    database: str
    index_type: str = "HNSW"
-    metric_type: str = "IP"
+    metric_type: str = "L2"
    shard: int = 1
    replicas: int = 3
-    inverted_index_analyzer: str = "DEFAULT_ANALYZER"
-    inverted_index_parser_mode: str = "COARSE_MODE"

    @model_validator(mode="before")
    @classmethod
@@ -67,9 +49,13 @@ class BaiduConfig(BaseModel):


 class BaiduVector(BaseVector):
-    vector_index: str = "vector_idx"
-    filtering_index: str = "filtering_idx"
-    inverted_index: str = "content_inverted_idx"
+    field_id: str = "id"
+    field_vector: str = "vector"
+    field_text: str = "text"
+    field_metadata: str = "metadata"
+    field_app_id: str = "app_id"
+    field_annotation_id: str = "annotation_id"
+    index_vector: str = "vector_idx"

    def __init__(self, collection_name: str, config: BaiduConfig):
        super().__init__(collection_name)
@@ -88,6 +74,8 @@ class BaiduVector(BaseVector):
        self.add_texts(texts, embeddings)

    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
+        texts = [doc.page_content for doc in documents]
+        metadatas = [doc.metadata for doc in documents if doc.metadata is not None]
        total_count = len(documents)
        batch_size = 1000

@@ -96,31 +84,29 @@ class BaiduVector(BaseVector):
        for start in range(0, total_count, batch_size):
            end = min(start + batch_size, total_count)
            rows = []
+            assert len(metadatas) == total_count, "metadatas length should be equal to total_count"
            for i in range(start, end, 1):
-                metadata = documents[i].metadata
                row = Row(
-                    id=metadata.get("doc_id", str(uuid.uuid4())),
-                    page_content=documents[i].page_content,
-                    metadata=metadata,
+                    id=metadatas[i].get("doc_id", str(uuid.uuid4())),
                    vector=embeddings[i],
+                    text=texts[i],
+                    metadata=json.dumps(metadatas[i]),
+                    app_id=metadatas[i].get("app_id", ""),
+                    annotation_id=metadatas[i].get("annotation_id", ""),
                )
                rows.append(row)
            table.upsert(rows=rows)

        # rebuild vector index after upsert finished
-        table.rebuild_index(self.vector_index)
-        timeout = 3600  # 1 hour timeout
-        start_time = time.time()
+        table.rebuild_index(self.index_vector)
        while True:
            time.sleep(1)
-            index = table.describe_index(self.vector_index)
+            index = table.describe_index(self.index_vector)
            if index.state == IndexState.NORMAL:
                break
-            if time.time() - start_time > timeout:
-                raise TimeoutError(f"Index rebuild timeout after {timeout} seconds")

    def text_exists(self, id: str) -> bool:
-        res = self._db.table(self._collection_name).query(primary_key={VDBField.PRIMARY_KEY: id})
+        res = self._db.table(self._collection_name).query(primary_key={self.field_id: id})
        if res and res.code == 0:
            return True
        return False
@@ -129,73 +115,53 @@ class BaiduVector(BaseVector):
        if not ids:
            return
        quoted_ids = [f"'{id}'" for id in ids]
-        self._db.table(self._collection_name).delete(filter=f"{VDBField.PRIMARY_KEY} IN({', '.join(quoted_ids)})")
+        self._db.table(self._collection_name).delete(filter=f"id IN({', '.join(quoted_ids)})")

    def delete_by_metadata_field(self, key: str, value: str):
-        # Escape double quotes in value to prevent injection
-        escaped_value = value.replace('"', '\\"')
-        self._db.table(self._collection_name).delete(filter=f'metadata["{key}"] = "{escaped_value}"')
+        self._db.table(self._collection_name).delete(filter=f"{key} = '{value}'")

    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
        query_vector = [float(val) if isinstance(val, np.float64) else val for val in query_vector]
        document_ids_filter = kwargs.get("document_ids_filter")
-        filter = ""
        if document_ids_filter:
            document_ids = ", ".join(f"'{id}'" for id in document_ids_filter)
-            filter = f'metadata["document_id"] IN({document_ids})'
-        anns = AnnSearch(
-            vector_field=VDBField.VECTOR,
-            vector_floats=query_vector,
-            params=HNSWSearchParams(ef=kwargs.get("ef", 20), limit=kwargs.get("top_k", 4)),
-            filter=filter,
-        )
+            anns = AnnSearch(
+                vector_field=self.field_vector,
+                vector_floats=query_vector,
+                params=HNSWSearchParams(ef=kwargs.get("ef", 10), limit=kwargs.get("top_k", 4)),
+                filter=f"document_id IN ({document_ids})",
+            )
+        else:
+            anns = AnnSearch(
+                vector_field=self.field_vector,
+                vector_floats=query_vector,
+                params=HNSWSearchParams(ef=kwargs.get("ef", 10), limit=kwargs.get("top_k", 4)),
+            )
        res = self._db.table(self._collection_name).search(
            anns=anns,
-            projections=[VDBField.CONTENT_KEY, VDBField.METADATA_KEY],
-            retrieve_vector=False,
+            projections=[self.field_id, self.field_text, self.field_metadata],
+            retrieve_vector=True,
        )
        score_threshold = float(kwargs.get("score_threshold") or 0.0)
        return self._get_search_res(res, score_threshold)

    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
-        # document ids filter
-        document_ids_filter = kwargs.get("document_ids_filter")
-        filter = ""
-        if document_ids_filter:
-            document_ids = ", ".join(f"'{id}'" for id in document_ids_filter)
-            filter = f'metadata["document_id"] IN({document_ids})'
-
-        request = BM25SearchRequest(
-            index_name=self.inverted_index, search_text=query, limit=kwargs.get("top_k", 4), filter=filter
-        )
-        res = self._db.table(self._collection_name).bm25_search(
-            request=request, projections=[VDBField.CONTENT_KEY, VDBField.METADATA_KEY]
-        )
-        score_threshold = float(kwargs.get("score_threshold") or 0.0)
-        return self._get_search_res(res, score_threshold)
+        # baidu vector database doesn't support bm25 search on current version
+        return []

    def _get_search_res(self, res, score_threshold) -> list[Document]:
        docs = []
        for row in res.rows:
            row_data = row.get("row", {})
+            meta = row_data.get(self.field_metadata)
+            if meta is not None:
+                meta = json.loads(meta)
            score = row.get("score", 0.0)
-            meta = row_data.get(VDBField.METADATA_KEY, {})
-
-            # Handle both JSON string and dict formats for backward compatibility
-            if isinstance(meta, str):
-                try:
-                    import json
-
-                    meta = json.loads(meta)
-                except (json.JSONDecodeError, TypeError):
-                    meta = {}
-            elif not isinstance(meta, dict):
-                meta = {}
-
            if score >= score_threshold:
                meta["score"] = score
-                doc = Document(page_content=row_data.get(VDBField.CONTENT_KEY), metadata=meta)
+                doc = Document(page_content=row_data.get(self.field_text), metadata=meta)
                docs.append(doc)
+
        return docs

    def delete(self):
@@ -212,7 +178,7 @@ class BaiduVector(BaseVector):
        client = MochowClient(config)
        return client

-    def _init_database(self) -> Database:
+    def _init_database(self):
        exists = False
        for db in self._client.list_databases():
            if db.database_name == self._client_config.database:
@@ -226,10 +192,10 @@ class BaiduVector(BaseVector):
                self._client.create_database(database_name=self._client_config.database)
            except ServerError as e:
                if e.code == ServerErrCode.DB_ALREADY_EXIST:
-                    return self._client.database(self._client_config.database)
+                    pass
                else:
                    raise
-            return self._client.database(self._client_config.database)
+            return

    def _table_existed(self) -> bool:
        tables = self._db.list_table()
@@ -266,7 +232,7 @@ class BaiduVector(BaseVector):
            fields = []
            fields.append(
                Field(
-                    VDBField.PRIMARY_KEY,
+                    self.field_id,
                    FieldType.STRING,
                    primary_key=True,
                    partition_key=True,
@@ -274,57 +240,24 @@ class BaiduVector(BaseVector):
                    not_null=True,
                )
            )
-            fields.append(Field(VDBField.CONTENT_KEY, FieldType.TEXT, not_null=False))
-            fields.append(Field(VDBField.METADATA_KEY, FieldType.JSON, not_null=False))
-            fields.append(Field(VDBField.VECTOR, FieldType.FLOAT_VECTOR, not_null=True, dimension=dimension))
+            fields.append(Field(self.field_metadata, FieldType.STRING, not_null=True))
+            fields.append(Field(self.field_app_id, FieldType.STRING))
+            fields.append(Field(self.field_annotation_id, FieldType.STRING))
+            fields.append(Field(self.field_text, FieldType.TEXT, not_null=True))
+            fields.append(Field(self.field_vector, FieldType.FLOAT_VECTOR, not_null=True, dimension=dimension))

            # Construct vector index params
            indexes = []
            indexes.append(
                VectorIndex(
-                    index_name=self.vector_index,
+                    index_name="vector_idx",
                    index_type=index_type,
-                    field=VDBField.VECTOR,
+                    field="vector",
                    metric_type=metric_type,
                    params=HNSWParams(m=16, efconstruction=200),
                )
            )

-            # Filtering index
-            indexes.append(
-                FilteringIndex(
-                    index_name=self.filtering_index,
-                    fields=[VDBField.METADATA_KEY],
-                )
-            )
-
-            # Get analyzer and parse_mode from config
-            analyzer = getattr(
-                InvertedIndexAnalyzer,
-                self._client_config.inverted_index_analyzer,
-                InvertedIndexAnalyzer.DEFAULT_ANALYZER,
-            )
-
-            parse_mode = getattr(
-                InvertedIndexParseMode,
-                self._client_config.inverted_index_parser_mode,
-                InvertedIndexParseMode.COARSE_MODE,
-            )
-
-            # Inverted index
-            indexes.append(
-                InvertedIndex(
-                    index_name=self.inverted_index,
-                    fields=[VDBField.CONTENT_KEY],
-                    params=InvertedIndexParams(
-                        analyzer=analyzer,
-                        parse_mode=parse_mode,
-                        case_sensitive=True,
-                    ),
-                    field_attributes=[InvertedIndexFieldAttribute.ANALYZED],
-                )
-            )
-
            # Create table
            self._db.create_table(
                table_name=self._collection_name,
@@ -335,15 +268,11 @@ class BaiduVector(BaseVector):
            )

            # Wait for table created
-            timeout = 300  # 5 minutes timeout
-            start_time = time.time()
            while True:
                time.sleep(1)
                table = self._db.describe_table(self._collection_name)
                if table.state == TableState.NORMAL:
                    break
-                if time.time() - start_time > timeout:
-                    raise TimeoutError(f"Table creation timeout after {timeout} seconds")
            redis_client.set(table_exist_cache_key, 1, ex=3600)


@@ -367,7 +296,5 @@ class BaiduVectorFactory(AbstractVectorFactory):
                database=dify_config.BAIDU_VECTOR_DB_DATABASE or "",
                shard=dify_config.BAIDU_VECTOR_DB_SHARD,
                replicas=dify_config.BAIDU_VECTOR_DB_REPLICAS,
-                inverted_index_analyzer=dify_config.BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER,
-                inverted_index_parser_mode=dify_config.BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE,
            ),
        )
--- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py
+++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py
@@ -4,7 +4,7 @@ import math
 from typing import Any

 from pydantic import BaseModel, model_validator
-from pyobvector import VECTOR, ObVecClient, l2_distance  # type: ignore
+from pyobvector import VECTOR, FtsIndexParam, FtsParser, ObVecClient, l2_distance  # type: ignore
 from sqlalchemy import JSON, Column, String
 from sqlalchemy.dialects.mysql import LONGTEXT

@@ -117,39 +117,22 @@ class OceanBaseVector(BaseVector):
                columns=cols,
                vidxs=vidx_params,
            )
-            logger.debug("DEBUG: Table '%s' created successfully", self._collection_name)
-
-            if self._hybrid_search_enabled:
-                # Get parser from config or use default ik parser
-                parser_name = dify_config.OCEANBASE_FULLTEXT_PARSER or "ik"
-
-                allowed_parsers = ["ngram", "beng", "space", "ngram2", "ik", "japanese_ftparser", "thai_ftparser"]
-                if parser_name not in allowed_parsers:
-                    raise ValueError(
-                        f"Invalid OceanBase full-text parser: {parser_name}. "
-                        f"Allowed values are: {', '.join(allowed_parsers)}"
+            try:
+                if self._hybrid_search_enabled:
+                    self._client.create_fts_idx_with_fts_index_param(
+                        table_name=self._collection_name,
+                        fts_idx_param=FtsIndexParam(
+                            index_name="fulltext_index_for_col_text",
+                            field_names=["text"],
+                            parser_type=FtsParser.IK,
+                        ),
                    )
-                logger.debug("Hybrid search is enabled, parser_name='%s'", parser_name)
-                logger.debug(
-                    "About to create fulltext index for collection '%s' using parser '%s'",
-                    self._collection_name,
-                    parser_name,
+            except Exception as e:
+                raise Exception(
+                    "Failed to add fulltext index to the target table, your OceanBase version must be 4.3.5.1 or above "
+                    + "to support fulltext index and vector index in the same table",
+                    e,
                )
-                try:
-                    sql_command = f"""ALTER TABLE {self._collection_name}
-                    ADD FULLTEXT INDEX fulltext_index_for_col_text (text) WITH PARSER {parser_name}"""
-                    logger.debug("DEBUG: Executing SQL: %s", sql_command)
-                    self._client.perform_raw_text_sql(sql_command)
-                    logger.debug("DEBUG: Fulltext index created successfully for '%s'", self._collection_name)
-                except Exception as e:
-                    logger.exception("Exception occurred while creating fulltext index")
-                    raise Exception(
-                        "Failed to add fulltext index to the target table, your OceanBase version must be "
-                        "4.3.5.1 or above to support fulltext index and vector index in the same table"
-                    ) from e
-            else:
-                logger.debug("DEBUG: Hybrid search is NOT enabled for '%s'", self._collection_name)
-
            self._client.refresh_metadata([self._collection_name])
            redis_client.set(collection_exist_cache_key, 1, ex=3600)

--- a/api/core/rag/docstore/dataset_docstore.py
+++ b/api/core/rag/docstore/dataset_docstore.py
@@ -93,6 +93,17 @@ class DatasetDocumentStore:

            segment_document = self.get_document_segment(doc_id=doc.metadata["doc_id"])

+            # Check if a segment with the same content hash already exists in the dataset
+            existing_segment_by_hash = db.session.query(DocumentSegment).filter_by(
+                dataset_id=self._dataset.id,
+                index_node_hash=doc.metadata["doc_hash"],
+                enabled=True
+            ).first()
+
+            if existing_segment_by_hash:
+                # Skip creating duplicate segment with same content hash
+                continue
+
            # NOTE: doc could already exist in the store, but we overwrite it
            if not allow_update and segment_document:
                raise ValueError(
--- a/api/core/workflow/graph_engine/domain/graph_execution.py
+++ b/api/core/workflow/graph_engine/domain/graph_execution.py
@@ -41,8 +41,7 @@ class GraphExecutionState(BaseModel):
    completed: bool = Field(default=False)
    aborted: bool = Field(default=False)
    error: GraphExecutionErrorState | None = Field(default=None)
-    exceptions_count: int = Field(default=0)
-    node_executions: list[NodeExecutionState] = Field(default_factory=list[NodeExecutionState])
+    node_executions: list[NodeExecutionState] = Field(default_factory=list)


 def _serialize_error(error: Exception | None) -> GraphExecutionErrorState | None:
@@ -104,8 +103,7 @@ class GraphExecution:
    completed: bool = False
    aborted: bool = False
    error: Exception | None = None
-    node_executions: dict[str, NodeExecution] = field(default_factory=dict[str, NodeExecution])
-    exceptions_count: int = 0
+    node_executions: dict[str, NodeExecution] = field(default_factory=dict)

    def start(self) -> None:
        """Mark the graph execution as started."""
@@ -174,7 +172,6 @@ class GraphExecution:
            completed=self.completed,
            aborted=self.aborted,
            error=_serialize_error(self.error),
-            exceptions_count=self.exceptions_count,
            node_executions=node_states,
        )

@@ -198,7 +195,6 @@ class GraphExecution:
        self.completed = state.completed
        self.aborted = state.aborted
        self.error = _deserialize_error(state.error)
-        self.exceptions_count = state.exceptions_count
        self.node_executions = {
            item.node_id: NodeExecution(
                node_id=item.node_id,
@@ -209,7 +205,3 @@ class GraphExecution:
            )
            for item in state.node_executions
        }
-
-    def record_node_failure(self) -> None:
-        """Increment the count of node failures encountered during execution."""
-        self.exceptions_count += 1
--- a/api/core/workflow/graph_engine/event_management/event_handlers.py
+++ b/api/core/workflow/graph_engine/event_management/event_handlers.py
@@ -3,12 +3,11 @@ Event handler implementations for different event types.
 """

 import logging
-from collections.abc import Mapping
 from functools import singledispatchmethod
 from typing import TYPE_CHECKING, final

 from core.workflow.entities import GraphRuntimeState
-from core.workflow.enums import ErrorStrategy, NodeExecutionType
+from core.workflow.enums import NodeExecutionType
 from core.workflow.graph import Graph
 from core.workflow.graph_events import (
    GraphNodeEventBase,
@@ -123,15 +122,13 @@ class EventHandler:
        """
        # Track execution in domain model
        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
-        is_initial_attempt = node_execution.retry_count == 0
        node_execution.mark_started(event.id)

        # Track in response coordinator for stream ordering
        self._response_coordinator.track_node_execution(event.node_id, event.id)

-        # Collect the event only for the first attempt; retries remain silent
-        if is_initial_attempt:
-            self._event_collector.collect(event)
+        # Collect the event
+        self._event_collector.collect(event)

    @_dispatch.register
    def _(self, event: NodeRunStreamChunkEvent) -> None:
@@ -164,7 +161,7 @@ class EventHandler:
        node_execution.mark_taken()

        # Store outputs in variable pool
-        self._store_node_outputs(event.node_id, event.node_run_result.outputs)
+        self._store_node_outputs(event)

        # Forward to response coordinator and emit streaming events
        streaming_events = self._response_coordinator.intercept_event(event)
@@ -194,7 +191,7 @@ class EventHandler:

        # Handle response node outputs
        if node.execution_type == NodeExecutionType.RESPONSE:
-            self._update_response_outputs(event.node_run_result.outputs)
+            self._update_response_outputs(event)

        # Collect the event
        self._event_collector.collect(event)
@@ -210,7 +207,6 @@ class EventHandler:
        # Update domain model
        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
        node_execution.mark_failed(event.error)
-        self._graph_execution.record_node_failure()

        result = self._error_handler.handle_node_failure(event)

@@ -231,40 +227,10 @@ class EventHandler:
        Args:
            event: The node exception event
        """
-        # Node continues via fail-branch/default-value, treat as completion
+        # Node continues via fail-branch, so it's technically "succeeded"
        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
        node_execution.mark_taken()

-        # Persist outputs produced by the exception strategy (e.g. default values)
-        self._store_node_outputs(event.node_id, event.node_run_result.outputs)
-
-        node = self._graph.nodes[event.node_id]
-
-        if node.error_strategy == ErrorStrategy.DEFAULT_VALUE:
-            ready_nodes, edge_streaming_events = self._edge_processor.process_node_success(event.node_id)
-        elif node.error_strategy == ErrorStrategy.FAIL_BRANCH:
-            ready_nodes, edge_streaming_events = self._edge_processor.handle_branch_completion(
-                event.node_id, event.node_run_result.edge_source_handle
-            )
-        else:
-            raise NotImplementedError(f"Unsupported error strategy: {node.error_strategy}")
-
-        for edge_event in edge_streaming_events:
-            self._event_collector.collect(edge_event)
-
-        for node_id in ready_nodes:
-            self._state_manager.enqueue_node(node_id)
-            self._state_manager.start_execution(node_id)
-
-        # Update response outputs if applicable
-        if node.execution_type == NodeExecutionType.RESPONSE:
-            self._update_response_outputs(event.node_run_result.outputs)
-
-        self._state_manager.finish_execution(event.node_id)
-
-        # Collect the exception event for observers
-        self._event_collector.collect(event)
-
    @_dispatch.register
    def _(self, event: NodeRunRetryEvent) -> None:
        """
@@ -276,31 +242,21 @@ class EventHandler:
        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
        node_execution.increment_retry()

-        # Finish the previous attempt before re-queuing the node
-        self._state_manager.finish_execution(event.node_id)
-
-        # Emit retry event for observers
-        self._event_collector.collect(event)
-
-        # Re-queue node for execution
-        self._state_manager.enqueue_node(event.node_id)
-        self._state_manager.start_execution(event.node_id)
-
-    def _store_node_outputs(self, node_id: str, outputs: Mapping[str, object]) -> None:
+    def _store_node_outputs(self, event: NodeRunSucceededEvent) -> None:
        """
        Store node outputs in the variable pool.

        Args:
            event: The node succeeded event containing outputs
        """
-        for variable_name, variable_value in outputs.items():
-            self._graph_runtime_state.variable_pool.add((node_id, variable_name), variable_value)
+        for variable_name, variable_value in event.node_run_result.outputs.items():
+            self._graph_runtime_state.variable_pool.add((event.node_id, variable_name), variable_value)

-    def _update_response_outputs(self, outputs: Mapping[str, object]) -> None:
+    def _update_response_outputs(self, event: NodeRunSucceededEvent) -> None:
        """Update response outputs for response nodes."""
        # TODO: Design a mechanism for nodes to notify the engine about how to update outputs
        # in runtime state, rather than allowing nodes to directly access runtime state.
-        for key, value in outputs.items():
+        for key, value in event.node_run_result.outputs.items():
            if key == "answer":
                existing = self._graph_runtime_state.get_output("answer", "")
                if existing:
--- a/api/core/workflow/graph_engine/event_management/event_manager.py
+++ b/api/core/workflow/graph_engine/event_management/event_manager.py
@@ -5,7 +5,6 @@ Unified event manager for collecting and emitting events.
 import threading
 import time
 from collections.abc import Generator
-from contextlib import contextmanager
 from typing import final

 from core.workflow.graph_events import GraphEngineEvent
@@ -52,23 +51,43 @@ class ReadWriteLock:
        """Release a write lock."""
        self._read_ready.release()

-    @contextmanager
-    def read_lock(self):
+    def read_lock(self) -> "ReadLockContext":
        """Return a context manager for read locking."""
-        self.acquire_read()
-        try:
-            yield
-        finally:
-            self.release_read()
+        return ReadLockContext(self)

-    @contextmanager
-    def write_lock(self):
+    def write_lock(self) -> "WriteLockContext":
        """Return a context manager for write locking."""
-        self.acquire_write()
-        try:
-            yield
-        finally:
-            self.release_write()
+        return WriteLockContext(self)
+
+
+@final
+class ReadLockContext:
+    """Context manager for read locks."""
+
+    def __init__(self, lock: ReadWriteLock) -> None:
+        self._lock = lock
+
+    def __enter__(self) -> "ReadLockContext":
+        self._lock.acquire_read()
+        return self
+
+    def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: object) -> None:
+        self._lock.release_read()
+
+
+@final
+class WriteLockContext:
+    """Context manager for write locks."""
+
+    def __init__(self, lock: ReadWriteLock) -> None:
+        self._lock = lock
+
+    def __enter__(self) -> "WriteLockContext":
+        self._lock.acquire_write()
+        return self
+
+    def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: object) -> None:
+        self._lock.release_write()


@final
--- a/api/core/workflow/graph_engine/graph_engine.py
+++ b/api/core/workflow/graph_engine/graph_engine.py
@@ -23,7 +23,6 @@ from core.workflow.graph_events import (
    GraphNodeEventBase,
    GraphRunAbortedEvent,
    GraphRunFailedEvent,
-    GraphRunPartialSucceededEvent,
    GraphRunStartedEvent,
    GraphRunSucceededEvent,
 )
@@ -261,23 +260,12 @@ class GraphEngine:
                if self._graph_execution.error:
                    raise self._graph_execution.error
            else:
-                outputs = self._graph_runtime_state.outputs
-                exceptions_count = self._graph_execution.exceptions_count
-                if exceptions_count > 0:
-                    yield GraphRunPartialSucceededEvent(
-                        exceptions_count=exceptions_count,
-                        outputs=outputs,
-                    )
-                else:
-                    yield GraphRunSucceededEvent(
-                        outputs=outputs,
-                    )
+                yield GraphRunSucceededEvent(
+                    outputs=self._graph_runtime_state.outputs,
+                )

        except Exception as e:
-            yield GraphRunFailedEvent(
-                error=str(e),
-                exceptions_count=self._graph_execution.exceptions_count,
-            )
+            yield GraphRunFailedEvent(error=str(e))
            raise

        finally:
--- a/api/core/workflow/graph_engine/layers/debug_logging.py
+++ b/api/core/workflow/graph_engine/layers/debug_logging.py
@@ -15,7 +15,6 @@ from core.workflow.graph_events import (
    GraphEngineEvent,
    GraphRunAbortedEvent,
    GraphRunFailedEvent,
-    GraphRunPartialSucceededEvent,
    GraphRunStartedEvent,
    GraphRunSucceededEvent,
    NodeRunExceptionEvent,
@@ -128,13 +127,6 @@ class DebugLoggingLayer(GraphEngineLayer):
            if self.include_outputs and event.outputs:
                self.logger.info("  Final outputs: %s", self._format_dict(event.outputs))

-        elif isinstance(event, GraphRunPartialSucceededEvent):
-            self.logger.warning("⚠️ Graph run partially succeeded")
-            if event.exceptions_count > 0:
-                self.logger.warning("  Total exceptions: %s", event.exceptions_count)
-            if self.include_outputs and event.outputs:
-                self.logger.info("  Final outputs: %s", self._format_dict(event.outputs))
-
        elif isinstance(event, GraphRunFailedEvent):
            self.logger.error("❌ Graph run failed: %s", event.error)
            if event.exceptions_count > 0:
@@ -146,12 +138,6 @@ class DebugLoggingLayer(GraphEngineLayer):
                self.logger.info("  Partial outputs: %s", self._format_dict(event.outputs))

        # Node-level events
-        # Retry before Started because Retry subclasses Started;
-        elif isinstance(event, NodeRunRetryEvent):
-            self.retry_count += 1
-            self.logger.warning("🔄 Node retry: %s (attempt %s)", event.node_id, event.retry_index)
-            self.logger.warning("  Previous error: %s", event.error)
-
        elif isinstance(event, NodeRunStartedEvent):
            self.node_count += 1
            self.logger.info('▶️ Node started: %s - "%s" (type: %s)', event.node_id, event.node_title, event.node_type)
@@ -181,6 +167,11 @@ class DebugLoggingLayer(GraphEngineLayer):
            self.logger.warning("⚠️ Node exception handled: %s", event.node_id)
            self.logger.warning("  Error: %s", event.error)

+        elif isinstance(event, NodeRunRetryEvent):
+            self.retry_count += 1
+            self.logger.warning("🔄 Node retry: %s (attempt %s)", event.node_id, event.retry_index)
+            self.logger.warning("  Previous error: %s", event.error)
+
        elif isinstance(event, NodeRunStreamChunkEvent):
            # Log stream chunks at debug level to avoid spam
            final_indicator = " (FINAL)" if event.is_final else ""
--- a/api/core/workflow/nodes/iteration/iteration_node.py
+++ b/api/core/workflow/nodes/iteration/iteration_node.py
@@ -19,7 +19,6 @@ from core.workflow.enums import (
 from core.workflow.graph_events import (
    GraphNodeEventBase,
    GraphRunFailedEvent,
-    GraphRunPartialSucceededEvent,
    GraphRunSucceededEvent,
 )
 from core.workflow.node_events import (
@@ -373,16 +372,43 @@ class IterationNode(Node):
        variable_mapping: dict[str, Sequence[str]] = {
            f"{node_id}.input_selector": typed_node_data.iterator_selector,
        }
-        iteration_node_ids = set()

-        # Find all nodes that belong to this loop
-        nodes = graph_config.get("nodes", [])
-        for node in nodes:
-            node_data = node.get("data", {})
-            if node_data.get("iteration_id") == node_id:
-                in_iteration_node_id = node.get("id")
-                if in_iteration_node_id:
-                    iteration_node_ids.add(in_iteration_node_id)
+        # init graph
+        from core.workflow.entities import GraphInitParams, GraphRuntimeState
+        from core.workflow.graph import Graph
+        from core.workflow.nodes.node_factory import DifyNodeFactory
+
+        # Create minimal GraphInitParams for static analysis
+        graph_init_params = GraphInitParams(
+            tenant_id="",
+            app_id="",
+            workflow_id="",
+            graph_config=graph_config,
+            user_id="",
+            user_from="",
+            invoke_from="",
+            call_depth=0,
+        )
+
+        # Create minimal GraphRuntimeState for static analysis
+        from core.workflow.entities import VariablePool
+
+        graph_runtime_state = GraphRuntimeState(
+            variable_pool=VariablePool(),
+            start_at=0,
+        )
+
+        # Create node factory for static analysis
+        node_factory = DifyNodeFactory(graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state)
+
+        iteration_graph = Graph.init(
+            graph_config=graph_config,
+            node_factory=node_factory,
+            root_node_id=typed_node_data.start_node_id,
+        )
+
+        if not iteration_graph:
+            raise IterationGraphNotFoundError("iteration graph not found")

        # Get node configs from graph_config instead of non-existent node_id_config_mapping
        node_configs = {node["id"]: node for node in graph_config.get("nodes", []) if "id" in node}
@@ -418,7 +444,9 @@ class IterationNode(Node):
            variable_mapping.update(sub_node_variable_mapping)

        # remove variable out from iteration
-        variable_mapping = {key: value for key, value in variable_mapping.items() if value[0] not in iteration_node_ids}
+        variable_mapping = {
+            key: value for key, value in variable_mapping.items() if value[0] not in iteration_graph.node_ids
+        }

        return variable_mapping

@@ -457,7 +485,7 @@ class IterationNode(Node):
            if isinstance(event, GraphNodeEventBase):
                self._append_iteration_info_to_event(event=event, iter_run_index=current_index)
                yield event
-            elif isinstance(event, (GraphRunSucceededEvent, GraphRunPartialSucceededEvent)):
+            elif isinstance(event, GraphRunSucceededEvent):
                result = variable_pool.get(self._node_data.output_selector)
                if result is None:
                    outputs.append(None)
--- a/api/core/workflow/nodes/knowledge_index/entities.py
+++ b/api/core/workflow/nodes/knowledge_index/entities.py
@@ -63,7 +63,7 @@ class RetrievalSetting(BaseModel):
    Retrieval Setting.
    """

-    search_method: Literal["semantic_search", "keyword_search", "full_text_search", "hybrid_search"]
+    search_method: Literal["semantic_search", "keyword_search", "fulltext_search", "hybrid_search"]
    top_k: int
    score_threshold: float | None = 0.5
    score_threshold_enabled: bool = False
--- a/api/core/workflow/nodes/loop/loop_node.py
+++ b/api/core/workflow/nodes/loop/loop_node.py
@@ -1,4 +1,3 @@
-import contextlib
 import json
 import logging
 from collections.abc import Callable, Generator, Mapping, Sequence
@@ -128,13 +127,11 @@ class LoopNode(Node):
        try:
            reach_break_condition = False
            if break_conditions:
-                with contextlib.suppress(ValueError):
-                    _, _, reach_break_condition = condition_processor.process_conditions(
-                        variable_pool=self.graph_runtime_state.variable_pool,
-                        conditions=break_conditions,
-                        operator=logical_operator,
-                    )
-
+                _, _, reach_break_condition = condition_processor.process_conditions(
+                    variable_pool=self.graph_runtime_state.variable_pool,
+                    conditions=break_conditions,
+                    operator=logical_operator,
+                )
            if reach_break_condition:
                loop_count = 0
            cost_tokens = 0
@@ -298,11 +295,42 @@ class LoopNode(Node):

        variable_mapping = {}

-        # Extract loop node IDs statically from graph_config
+        # init graph
+        from core.workflow.entities import GraphInitParams, GraphRuntimeState, VariablePool
+        from core.workflow.graph import Graph
+        from core.workflow.nodes.node_factory import DifyNodeFactory

-        loop_node_ids = cls._extract_loop_node_ids_from_config(graph_config, node_id)
+        # Create minimal GraphInitParams for static analysis
+        graph_init_params = GraphInitParams(
+            tenant_id="",
+            app_id="",
+            workflow_id="",
+            graph_config=graph_config,
+            user_id="",
+            user_from="",
+            invoke_from="",
+            call_depth=0,
+        )

-        # Get node configs from graph_config
+        # Create minimal GraphRuntimeState for static analysis
+        graph_runtime_state = GraphRuntimeState(
+            variable_pool=VariablePool(),
+            start_at=0,
+        )
+
+        # Create node factory for static analysis
+        node_factory = DifyNodeFactory(graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state)
+
+        loop_graph = Graph.init(
+            graph_config=graph_config,
+            node_factory=node_factory,
+            root_node_id=typed_node_data.start_node_id,
+        )
+
+        if not loop_graph:
+            raise ValueError("loop graph not found")
+
+        # Get node configs from graph_config instead of non-existent node_id_config_mapping
        node_configs = {node["id"]: node for node in graph_config.get("nodes", []) if "id" in node}
        for sub_node_id, sub_node_config in node_configs.items():
            if sub_node_config.get("data", {}).get("loop_id") != node_id:
@@ -343,35 +371,12 @@ class LoopNode(Node):
                variable_mapping[f"{node_id}.{loop_variable.label}"] = selector

        # remove variable out from loop
-        variable_mapping = {key: value for key, value in variable_mapping.items() if value[0] not in loop_node_ids}
+        variable_mapping = {
+            key: value for key, value in variable_mapping.items() if value[0] not in loop_graph.node_ids
+        }

        return variable_mapping

-    @classmethod
-    def _extract_loop_node_ids_from_config(cls, graph_config: Mapping[str, Any], loop_node_id: str) -> set[str]:
-        """
-        Extract node IDs that belong to a specific loop from graph configuration.
-
-        This method statically analyzes the graph configuration to find all nodes
-        that are part of the specified loop, without creating actual node instances.
-
-        :param graph_config: the complete graph configuration
-        :param loop_node_id: the ID of the loop node
-        :return: set of node IDs that belong to the loop
-        """
-        loop_node_ids = set()
-
-        # Find all nodes that belong to this loop
-        nodes = graph_config.get("nodes", [])
-        for node in nodes:
-            node_data = node.get("data", {})
-            if node_data.get("loop_id") == loop_node_id:
-                node_id = node.get("id")
-                if node_id:
-                    loop_node_ids.add(node_id)
-
-        return loop_node_ids
-
    @staticmethod
    def _get_segment_for_constant(var_type: SegmentType, original_value: Any) -> Segment:
        """Get the appropriate segment type for a constant value."""
--- a/api/fields/file_fields.py
+++ b/api/fields/file_fields.py
@@ -33,7 +33,6 @@ file_fields = {
    "created_by": fields.String,
    "created_at": TimestampField,
    "preview_url": fields.String,
-    "source_url": fields.String,
 }


--- a/api/gunicorn.conf.py
+++ b/api/gunicorn.conf.py
@@ -1,32 +1,10 @@
 import psycogreen.gevent as pscycogreen_gevent  # type: ignore
-from gevent import events as gevent_events
 from grpc.experimental import gevent as grpc_gevent  # type: ignore

-# NOTE(QuantumGhost): here we cannot use post_fork to patch gRPC, as
-# grpc_gevent.init_gevent must be called after patching stdlib.
-# Gunicorn calls `post_init` before applying monkey patch.
-# Use `post_init` to setup gRPC gevent support would cause deadlock and
-# some other weird issues.
-#
-# ref:
-# - https://github.com/grpc/grpc/blob/62533ea13879d6ee95c6fda11ec0826ca822c9dd/src/python/grpcio/grpc/experimental/gevent.py
-# - https://github.com/gevent/gevent/issues/2060#issuecomment-3016768668
-# - https://github.com/benoitc/gunicorn/blob/master/gunicorn/arbiter.py#L607-L613

-
-def post_patch(event):
-    # this function is only called for gevent worker.
-    # from gevent docs (https://www.gevent.org/api/gevent.monkey.html):
-    # You can also subscribe to the events to provide additional patching beyond what gevent distributes, either for
-    # additional standard library modules, or for third-party packages. The suggested time to do this patching is in
-    # the subscriber for gevent.events.GeventDidPatchBuiltinModulesEvent.
-    if not isinstance(event, gevent_events.GeventDidPatchBuiltinModulesEvent):
-        return
+def post_fork(server, worker):
    # grpc gevent
    grpc_gevent.init_gevent()
-    print("gRPC patched with gevent.", flush=True)  # noqa: T201
+    server.log.info("gRPC  patched with gevent.")
    pscycogreen_gevent.patch_psycopg()
-    print("psycopg2 patched with gevent.", flush=True)  # noqa: T201
-
-
-gevent_events.subscribers.append(post_patch)
+    server.log.info("psycopg2 patched with gevent.")
--- a/api/libs/oauth.py
+++ b/api/libs/oauth.py
@@ -1,7 +1,7 @@
 import urllib.parse
 from dataclasses import dataclass

-import httpx
+import requests


@dataclass
@@ -58,7 +58,7 @@ class GitHubOAuth(OAuth):
            "redirect_uri": self.redirect_uri,
        }
        headers = {"Accept": "application/json"}
-        response = httpx.post(self._TOKEN_URL, data=data, headers=headers)
+        response = requests.post(self._TOKEN_URL, data=data, headers=headers)

        response_json = response.json()
        access_token = response_json.get("access_token")
@@ -70,11 +70,11 @@ class GitHubOAuth(OAuth):

    def get_raw_user_info(self, token: str):
        headers = {"Authorization": f"token {token}"}
-        response = httpx.get(self._USER_INFO_URL, headers=headers)
+        response = requests.get(self._USER_INFO_URL, headers=headers)
        response.raise_for_status()
        user_info = response.json()

-        email_response = httpx.get(self._EMAIL_INFO_URL, headers=headers)
+        email_response = requests.get(self._EMAIL_INFO_URL, headers=headers)
        email_info = email_response.json()
        primary_email: dict = next((email for email in email_info if email["primary"] == True), {})

@@ -112,7 +112,7 @@ class GoogleOAuth(OAuth):
            "redirect_uri": self.redirect_uri,
        }
        headers = {"Accept": "application/json"}
-        response = httpx.post(self._TOKEN_URL, data=data, headers=headers)
+        response = requests.post(self._TOKEN_URL, data=data, headers=headers)

        response_json = response.json()
        access_token = response_json.get("access_token")
@@ -124,7 +124,7 @@ class GoogleOAuth(OAuth):

    def get_raw_user_info(self, token: str):
        headers = {"Authorization": f"Bearer {token}"}
-        response = httpx.get(self._USER_INFO_URL, headers=headers)
+        response = requests.get(self._USER_INFO_URL, headers=headers)
        response.raise_for_status()
        return response.json()

--- a/api/libs/oauth_data_source.py
+++ b/api/libs/oauth_data_source.py
@@ -1,7 +1,7 @@
 import urllib.parse
 from typing import Any

-import httpx
+import requests
 from flask_login import current_user
 from sqlalchemy import select

@@ -43,7 +43,7 @@ class NotionOAuth(OAuthDataSource):
        data = {"code": code, "grant_type": "authorization_code", "redirect_uri": self.redirect_uri}
        headers = {"Accept": "application/json"}
        auth = (self.client_id, self.client_secret)
-        response = httpx.post(self._TOKEN_URL, data=data, auth=auth, headers=headers)
+        response = requests.post(self._TOKEN_URL, data=data, auth=auth, headers=headers)

        response_json = response.json()
        access_token = response_json.get("access_token")
@@ -239,7 +239,7 @@ class NotionOAuth(OAuthDataSource):
                "Notion-Version": "2022-06-28",
            }

-            response = httpx.post(url=self._NOTION_PAGE_SEARCH, json=data, headers=headers)
+            response = requests.post(url=self._NOTION_PAGE_SEARCH, json=data, headers=headers)
            response_json = response.json()

            results.extend(response_json.get("results", []))
@@ -254,7 +254,7 @@ class NotionOAuth(OAuthDataSource):
            "Authorization": f"Bearer {access_token}",
            "Notion-Version": "2022-06-28",
        }
-        response = httpx.get(url=f"{self._NOTION_BLOCK_SEARCH}/{block_id}", headers=headers)
+        response = requests.get(url=f"{self._NOTION_BLOCK_SEARCH}/{block_id}", headers=headers)
        response_json = response.json()
        if response.status_code != 200:
            message = response_json.get("message", "unknown error")
@@ -270,7 +270,7 @@ class NotionOAuth(OAuthDataSource):
            "Authorization": f"Bearer {access_token}",
            "Notion-Version": "2022-06-28",
        }
-        response = httpx.get(url=self._NOTION_BOT_USER, headers=headers)
+        response = requests.get(url=self._NOTION_BOT_USER, headers=headers)
        response_json = response.json()
        if "object" in response_json and response_json["object"] == "user":
            user_type = response_json["type"]
@@ -294,7 +294,7 @@ class NotionOAuth(OAuthDataSource):
                "Authorization": f"Bearer {access_token}",
                "Notion-Version": "2022-06-28",
            }
-            response = httpx.post(url=self._NOTION_PAGE_SEARCH, json=data, headers=headers)
+            response = requests.post(url=self._NOTION_PAGE_SEARCH, json=data, headers=headers)
            response_json = response.json()

            results.extend(response_json.get("results", []))
--- a/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py
+++ b/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py
@@ -47,7 +47,7 @@ def upgrade():
    sa.Column('plugin_id', sa.String(length=255), nullable=False),
    sa.Column('auth_type', sa.String(length=255), nullable=False),
    sa.Column('encrypted_credentials', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
-    sa.Column('avatar_url', sa.Text(), nullable=True),
+    sa.Column('avatar_url', sa.String(length=255), nullable=True),
    sa.Column('is_default', sa.Boolean(), server_default=sa.text('false'), nullable=False),
    sa.Column('expires_at', sa.Integer(), server_default='-1', nullable=False),
    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -689,6 +689,7 @@ class DocumentSegment(Base):
        sa.Index("document_segment_tenant_document_idx", "document_id", "tenant_id"),
        sa.Index("document_segment_node_dataset_idx", "index_node_id", "dataset_id"),
        sa.Index("document_segment_tenant_idx", "tenant_id"),
+        sa.Index("document_segment_dataset_hash_idx", "dataset_id", "index_node_hash"),
    )

    # initial fields
@@ -910,7 +911,7 @@ class AppDatasetJoin(Base):
    id = mapped_column(StringUUID, primary_key=True, nullable=False, server_default=sa.text("uuid_generate_v4()"))
    app_id = mapped_column(StringUUID, nullable=False)
    dataset_id = mapped_column(StringUUID, nullable=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp())
+    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=db.func.current_timestamp())

    @property
    def app(self):
@@ -931,7 +932,7 @@ class DatasetQuery(Base):
    source_app_id = mapped_column(StringUUID, nullable=True)
    created_by_role = mapped_column(String, nullable=False)
    created_by = mapped_column(StringUUID, nullable=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp())
+    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=db.func.current_timestamp())


 class DatasetKeywordTable(Base):
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -1044,7 +1044,7 @@ class Message(Base):
                sign_url = sign_tool_file(tool_file_id=tool_file_id, extension=extension)
            elif "file-preview" in url:
                # get upload file id
-                upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview\?timestamp="
+                upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview?\?timestamp="
                result = re.search(upload_file_id_pattern, url)
                if not result:
                    continue
@@ -1055,7 +1055,7 @@ class Message(Base):
                sign_url = file_helpers.get_signed_file_url(upload_file_id)
            elif "image-preview" in url:
                # image-preview is deprecated, use file-preview instead
-                upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview\?timestamp="
+                upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview?\?timestamp="
                result = re.search(upload_file_id_pattern, url)
                if not result:
                    continue
@@ -1731,7 +1731,7 @@ class MessageChain(Base):
    type: Mapped[str] = mapped_column(String(255), nullable=False)
    input = mapped_column(sa.Text, nullable=True)
    output = mapped_column(sa.Text, nullable=True)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp())
+    created_at = mapped_column(sa.DateTime, nullable=False, server_default=db.func.current_timestamp())


 class MessageAgentThought(Base):
@@ -1769,7 +1769,7 @@ class MessageAgentThought(Base):
    latency: Mapped[float | None] = mapped_column(sa.Float, nullable=True)
    created_by_role = mapped_column(String, nullable=False)
    created_by = mapped_column(StringUUID, nullable=False)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp())
+    created_at = mapped_column(sa.DateTime, nullable=False, server_default=db.func.current_timestamp())

    @property
    def files(self) -> list[Any]:
@@ -1872,7 +1872,7 @@ class DatasetRetrieverResource(Base):
    index_node_hash = mapped_column(sa.Text, nullable=True)
    retriever_from = mapped_column(sa.Text, nullable=False)
    created_by = mapped_column(StringUUID, nullable=False)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp())
+    created_at = mapped_column(sa.DateTime, nullable=False, server_default=db.func.current_timestamp())


 class Tag(Base):
--- a/api/models/oauth.py
+++ b/api/models/oauth.py
@@ -35,7 +35,7 @@ class DatasourceProvider(Base):
    plugin_id: Mapped[str] = db.Column(db.String(255), nullable=False)
    auth_type: Mapped[str] = db.Column(db.String(255), nullable=False)
    encrypted_credentials: Mapped[dict] = db.Column(JSONB, nullable=False)
-    avatar_url: Mapped[str] = db.Column(db.Text, nullable=True, default="default")
+    avatar_url: Mapped[str] = db.Column(db.String(255), nullable=True, default="default")
    is_default: Mapped[bool] = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
    expires_at: Mapped[int] = db.Column(db.Integer, nullable=False, server_default="-1")

--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,11 +1,11 @@
 [project]
 name = "dify-api"
-version = "1.9.0"
+version = "2.0.0-beta2"
 requires-python = ">=3.11,<3.13"

 dependencies = [
    "arize-phoenix-otel~=0.9.2",
-    "authlib==1.6.4",
+    "authlib==1.3.1",
    "azure-identity==1.16.1",
    "beautifulsoup4==4.12.2",
    "boto3==1.35.99",
@@ -20,7 +20,7 @@ dependencies = [
    "flask-migrate~=4.0.7",
    "flask-orjson~=2.0.0",
    "flask-sqlalchemy~=3.1.1",
-    "gevent~=25.9.1",
+    "gevent~=24.11.1",
    "gmpy2~=2.2.1",
    "google-api-core==2.18.0",
    "google-api-python-client==2.90.0",
@@ -169,7 +169,7 @@ dev = [
    "types-redis>=4.6.0.20241004",
    "celery-types>=0.23.0",
    "mypy~=1.17.1",
-    # "locust>=2.40.4",  # Temporarily removed due to compatibility issues. Uncomment when resolved.
+    "locust>=2.40.4",
    "sseclient-py>=1.8.0",
 ]

@@ -211,7 +211,7 @@ vdb = [
    "pgvecto-rs[sqlalchemy]~=0.2.1",
    "pgvector==0.2.5",
    "pymilvus~=2.5.0",
-    "pymochow==2.2.9",
+    "pymochow==1.3.1",
    "pyobvector~=0.2.15",
    "qdrant-client==1.9.0",
    "tablestore==6.2.0",
--- a/api/services/app_service.py
+++ b/api/services/app_service.py
@@ -2,7 +2,6 @@ import json
 import logging
 from typing import TypedDict, cast

-import sqlalchemy as sa
 from flask_sqlalchemy.pagination import Pagination

 from configs import dify_config
@@ -66,7 +65,7 @@ class AppService:
                return None

        app_models = db.paginate(
-            sa.select(App).where(*filters).order_by(App.created_at.desc()),
+            db.select(App).where(*filters).order_by(App.created_at.desc()),
            page=args["page"],
            per_page=args["limit"],
            error_out=False,
--- a/api/services/auth/firecrawl/firecrawl.py
+++ b/api/services/auth/firecrawl/firecrawl.py
@@ -1,6 +1,6 @@
 import json

-import httpx
+import requests

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@@ -36,7 +36,7 @@ class FirecrawlAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

    def _post_request(self, url, data, headers):
-        return httpx.post(url, headers=headers, json=data)
+        return requests.post(url, headers=headers, json=data)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/auth/jina.py
+++ b/api/services/auth/jina.py
@@ -1,6 +1,6 @@
 import json

-import httpx
+import requests

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@@ -31,7 +31,7 @@ class JinaAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

    def _post_request(self, url, data, headers):
-        return httpx.post(url, headers=headers, json=data)
+        return requests.post(url, headers=headers, json=data)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/auth/jina/jina.py
+++ b/api/services/auth/jina/jina.py
@@ -1,6 +1,6 @@
 import json

-import httpx
+import requests

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@@ -31,7 +31,7 @@ class JinaAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

    def _post_request(self, url, data, headers):
-        return httpx.post(url, headers=headers, json=data)
+        return requests.post(url, headers=headers, json=data)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/auth/watercrawl/watercrawl.py
+++ b/api/services/auth/watercrawl/watercrawl.py
@@ -1,7 +1,7 @@
 import json
 from urllib.parse import urljoin

-import httpx
+import requests

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@@ -31,7 +31,7 @@ class WatercrawlAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "X-API-KEY": self.api_key}

    def _get_request(self, url, headers):
-        return httpx.get(url, headers=headers)
+        return requests.get(url, headers=headers)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -115,12 +115,12 @@ class DatasetService:
                    # Check if permitted_dataset_ids is not empty to avoid WHERE false condition
                    if permitted_dataset_ids and len(permitted_dataset_ids) > 0:
                        query = query.where(
-                            sa.or_(
+                            db.or_(
                                Dataset.permission == DatasetPermissionEnum.ALL_TEAM,
-                                sa.and_(
+                                db.and_(
                                    Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id
                                ),
-                                sa.and_(
+                                db.and_(
                                    Dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM,
                                    Dataset.id.in_(permitted_dataset_ids),
                                ),
@@ -128,9 +128,9 @@ class DatasetService:
                        )
                    else:
                        query = query.where(
-                            sa.or_(
+                            db.or_(
                                Dataset.permission == DatasetPermissionEnum.ALL_TEAM,
-                                sa.and_(
+                                db.and_(
                                    Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id
                                ),
                            )
@@ -1879,7 +1879,7 @@ class DocumentService:
    #                 for notion_info in notion_info_list:
    #                     workspace_id = notion_info.workspace_id
    #                     data_source_binding = DataSourceOauthBinding.query.filter(
-    #                         sa.and_(
+    #                         db.and_(
    #                             DataSourceOauthBinding.tenant_id == current_user.current_tenant_id,
    #                             DataSourceOauthBinding.provider == "notion",
    #                             DataSourceOauthBinding.disabled == False,
@@ -2623,6 +2623,17 @@ class SegmentService:
            tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
        lock_name = f"add_segment_lock_document_id_{document.id}"
        with redis_client.lock(lock_name, timeout=600):
+            # Check if a segment with the same content hash already exists
+            existing_segment = db.session.query(DocumentSegment).filter_by(
+                dataset_id=document.dataset_id,
+                index_node_hash=segment_hash,
+                enabled=True
+            ).first()
+
+            if existing_segment:
+                logger.info(f"Segment with same content hash already exists: {segment_hash}")
+                return existing_segment
+
            max_position = (
                db.session.query(func.max(DocumentSegment.position))
                .where(DocumentSegment.document_id == document.id)
@@ -2689,6 +2700,15 @@ class SegmentService:
                .where(DocumentSegment.document_id == document.id)
                .scalar()
            )
+            # Batch query existing hashes before the loop
+            segment_hashes = [helper.generate_text_hash(seg["content"]) for seg in segments]
+            existing_segments = db.session.query(DocumentSegment.index_node_hash).filter(
+                DocumentSegment.dataset_id == document.dataset_id,
+                DocumentSegment.index_node_hash.in_(segment_hashes),
+                DocumentSegment.enabled == True
+            ).all()
+            existing_hashes = {seg.index_node_hash for seg in existing_segments}
+
            pre_segment_data_list = []
            segment_data_list = []
            keywords_list = []
@@ -2697,6 +2717,12 @@ class SegmentService:
                content = segment_item["content"]
                doc_id = str(uuid.uuid4())
                segment_hash = helper.generate_text_hash(content)
+
+                # Skip existing segments
+                if segment_hash in existing_hashes:
+                    logger.info(f"Skipping duplicate segment with hash: {segment_hash}")
+                    continue
+
                tokens = 0
                if dataset.indexing_technique == "high_quality" and embedding_model:
                    # calc embedding use tokens
--- a/api/services/entities/knowledge_entities/rag_pipeline_entities.py
+++ b/api/services/entities/knowledge_entities/rag_pipeline_entities.py
@@ -83,7 +83,7 @@ class RetrievalSetting(BaseModel):
    Retrieval Setting.
    """

-    search_method: Literal["semantic_search", "full_text_search", "keyword_search", "hybrid_search"]
+    search_method: Literal["semantic_search", "fulltext_search", "keyword_search", "hybrid_search"]
    top_k: int
    score_threshold: float | None = 0.5
    score_threshold_enabled: bool = False
--- a/api/services/operation_service.py
+++ b/api/services/operation_service.py
@@ -1,6 +1,6 @@
 import os

-import httpx
+import requests


 class OperationService:
@@ -12,7 +12,7 @@ class OperationService:
        headers = {"Content-Type": "application/json", "Billing-Api-Secret-Key": cls.secret_key}

        url = f"{cls.base_url}{endpoint}"
-        response = httpx.request(method, url, json=json, params=params, headers=headers)
+        response = requests.request(method, url, json=json, params=params, headers=headers)

        return response.json()

--- a/api/services/plugin/plugin_migration.py
+++ b/api/services/plugin/plugin_migration.py
@@ -471,7 +471,7 @@ class PluginMigration:
        total_failed_tenant = 0
        while True:
            # paginate
-            tenants = db.paginate(sa.select(Tenant).order_by(Tenant.created_at.desc()), page=page, per_page=100)
+            tenants = db.paginate(db.select(Tenant).order_by(Tenant.created_at.desc()), page=page, per_page=100)
            if tenants.items is None or len(tenants.items) == 0:
                break

--- a/api/services/rag_pipeline/rag_pipeline.py
+++ b/api/services/rag_pipeline/rag_pipeline.py
@@ -1381,8 +1381,8 @@ class RagPipelineService:
        datasource_nodes = workflow.graph_dict.get("nodes", [])
        datasource_plugins = []
        for datasource_node in datasource_nodes:
-            if datasource_node.get("data", {}).get("type") == "datasource":
-                datasource_node_data = datasource_node["data"]
+            if datasource_node.get("type") == "datasource":
+                datasource_node_data = datasource_node.get("data", {})
                if not datasource_node_data:
                    continue

--- a/api/services/tag_service.py
+++ b/api/services/tag_service.py
@@ -1,6 +1,5 @@
 import uuid

-import sqlalchemy as sa
 from flask_login import current_user
 from sqlalchemy import func, select
 from werkzeug.exceptions import NotFound
@@ -19,7 +18,7 @@ class TagService:
            .where(Tag.type == tag_type, Tag.tenant_id == current_tenant_id)
        )
        if keyword:
-            query = query.where(sa.and_(Tag.name.ilike(f"%{keyword}%")))
+            query = query.where(db.and_(Tag.name.ilike(f"%{keyword}%")))
        query = query.group_by(Tag.id, Tag.type, Tag.name, Tag.created_at)
        results: list = query.order_by(Tag.created_at.desc()).all()
        return results
--- a/api/services/variable_truncator.py
+++ b/api/services/variable_truncator.py
@@ -262,14 +262,6 @@ class VariableTruncator:
        target_length = self._array_element_limit

        for i, item in enumerate(value):
-            # Dirty fix:
-            # The output of `Start` node may contain list of `File` elements,
-            # causing `AssertionError` while invoking `_truncate_json_primitives`.
-            #
-            # This check ensures that `list[File]` are handled separately
-            if isinstance(item, File):
-                truncated_value.append(item)
-                continue
            if i >= target_length:
                return _PartResult(truncated_value, used_size, True)
            if i > 0:
--- a/api/services/website_service.py
+++ b/api/services/website_service.py
@@ -3,7 +3,7 @@ import json
 from dataclasses import dataclass
 from typing import Any

-import httpx
+import requests
 from flask_login import current_user

 from core.helper import encrypter
@@ -216,7 +216,7 @@ class WebsiteService:
    @classmethod
    def _crawl_with_jinareader(cls, request: CrawlRequest, api_key: str) -> dict[str, Any]:
        if not request.options.crawl_sub_pages:
-            response = httpx.get(
+            response = requests.get(
                f"https://r.jina.ai/{request.url}",
                headers={"Accept": "application/json", "Authorization": f"Bearer {api_key}"},
            )
@@ -224,7 +224,7 @@ class WebsiteService:
                raise ValueError("Failed to crawl:")
            return {"status": "active", "data": response.json().get("data")}
        else:
-            response = httpx.post(
+            response = requests.post(
                "https://adaptivecrawl-kir3wx7b3a-uc.a.run.app",
                json={
                    "url": request.url,
@@ -287,7 +287,7 @@ class WebsiteService:

    @classmethod
    def _get_jinareader_status(cls, job_id: str, api_key: str) -> dict[str, Any]:
-        response = httpx.post(
+        response = requests.post(
            "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
            headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
            json={"taskId": job_id},
@@ -303,7 +303,7 @@ class WebsiteService:
        }

        if crawl_status_data["status"] == "completed":
-            response = httpx.post(
+            response = requests.post(
                "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
                json={"taskId": job_id, "urls": list(data.get("processed", {}).keys())},
@@ -362,7 +362,7 @@ class WebsiteService:
    @classmethod
    def _get_jinareader_url_data(cls, job_id: str, url: str, api_key: str) -> dict[str, Any] | None:
        if not job_id:
-            response = httpx.get(
+            response = requests.get(
                f"https://r.jina.ai/{url}",
                headers={"Accept": "application/json", "Authorization": f"Bearer {api_key}"},
            )
@@ -371,7 +371,7 @@ class WebsiteService:
            return dict(response.json().get("data", {}))
        else:
            # Get crawl status first
-            status_response = httpx.post(
+            status_response = requests.post(
                "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
                json={"taskId": job_id},
@@ -381,7 +381,7 @@ class WebsiteService:
                raise ValueError("Crawl job is not completed")

            # Get processed data
-            data_response = httpx.post(
+            data_response = requests.post(
                "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
                json={"taskId": job_id, "urls": list(status_data.get("processed", {}).keys())},
--- a/api/tasks/document_indexing_sync_task.py
+++ b/api/tasks/document_indexing_sync_task.py
@@ -2,7 +2,6 @@ import logging
 import time

 import click
-import sqlalchemy as sa
 from celery import shared_task
 from sqlalchemy import select

@@ -52,7 +51,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
        data_source_binding = (
            db.session.query(DataSourceOauthBinding)
            .where(
-                sa.and_(
+                db.and_(
                    DataSourceOauthBinding.tenant_id == document.tenant_id,
                    DataSourceOauthBinding.provider == "notion",
                    DataSourceOauthBinding.disabled == False,
--- a/api/tests/integration_tests/plugin/__mock/http.py
+++ b/api/tests/integration_tests/plugin/__mock/http.py
@@ -1,8 +1,8 @@
 import os
 from typing import Literal

-import httpx
 import pytest
+import requests

 from core.plugin.entities.plugin_daemon import PluginDaemonBasicResponse
 from core.tools.entities.common_entities import I18nObject
@@ -27,11 +27,13 @@ class MockedHttp:
    @classmethod
    def requests_request(
        cls, method: Literal["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD"], url: str, **kwargs
-    ) -> httpx.Response:
+    ) -> requests.Response:
        """
-        Mocked httpx.request
+        Mocked requests.request
        """
-        request = httpx.Request(method, url)
+        request = requests.PreparedRequest()
+        request.method = method
+        request.url = url
        if url.endswith("/tools"):
            content = PluginDaemonBasicResponse[list[ToolProviderEntity]](
                code=0, message="success", data=cls.list_tools()
@@ -39,7 +41,8 @@ class MockedHttp:
        else:
            raise ValueError("")

-        response = httpx.Response(status_code=200)
+        response = requests.Response()
+        response.status_code = 200
        response.request = request
        response._content = content.encode("utf-8")
        return response
@@ -51,7 +54,7 @@ MOCK_SWITCH = os.getenv("MOCK_SWITCH", "false").lower() == "true"
@pytest.fixture
 def setup_http_mock(request, monkeypatch: pytest.MonkeyPatch):
    if MOCK_SWITCH:
-        monkeypatch.setattr(httpx, "request", MockedHttp.requests_request)
+        monkeypatch.setattr(requests, "request", MockedHttp.requests_request)

        def unpatch():
            monkeypatch.undo()
--- a/api/tests/integration_tests/vdb/__mock/baiduvectordb.py
+++ b/api/tests/integration_tests/vdb/__mock/baiduvectordb.py
@@ -100,8 +100,8 @@ class MockBaiduVectorDBClass:
                "row": {
                    "id": primary_key.get("id"),
                    "vector": [0.23432432, 0.8923744, 0.89238432],
-                    "page_content": "text",
-                    "metadata": {"doc_id": "doc_id_001"},
+                    "text": "text",
+                    "metadata": '{"doc_id": "doc_id_001"}',
                },
                "code": 0,
                "msg": "Success",
@@ -127,8 +127,8 @@ class MockBaiduVectorDBClass:
                        "row": {
                            "id": "doc_id_001",
                            "vector": [0.23432432, 0.8923744, 0.89238432],
-                            "page_content": "text",
-                            "metadata": {"doc_id": "doc_id_001"},
+                            "text": "text",
+                            "metadata": '{"doc_id": "doc_id_001"}',
                        },
                        "distance": 0.1,
                        "score": 0.5,
--- a/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py
+++ b/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py
@@ -6,7 +6,7 @@ Test Clickzetta integration in Docker environment
 import os
 import time

-import httpx
+import requests
 from clickzetta import connect


@@ -66,7 +66,7 @@ def test_dify_api():
    max_retries = 30
    for i in range(max_retries):
        try:
-            response = httpx.get(f"{base_url}/console/api/health")
+            response = requests.get(f"{base_url}/console/api/health")
            if response.status_code == 200:
                print("✓ Dify API is ready")
                break
--- a/api/tests/test_containers_integration_tests/conftest.py
+++ b/api/tests/test_containers_integration_tests/conftest.py
@@ -173,7 +173,7 @@ class DifyTestContainers:
        # Start Dify Plugin Daemon container for plugin management
        # Dify Plugin Daemon provides plugin lifecycle management and execution
        logger.info("Initializing Dify Plugin Daemon container...")
-        self.dify_plugin_daemon = DockerContainer(image="langgenius/dify-plugin-daemon:0.3.0-local")
+        self.dify_plugin_daemon = DockerContainer(image="langgenius/dify-plugin-daemon:0.2.0-local")
        self.dify_plugin_daemon.with_exposed_ports(5002)
        self.dify_plugin_daemon.env = {
            "DB_HOST": db_host,
--- a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py
@@ -13,7 +13,6 @@ import pytest
 from faker import Faker

 from extensions.ext_database import db
-from libs.datetime_utils import naive_utc_now
 from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
 from models.dataset import Dataset, Document, DocumentSegment
 from models.model import UploadFile
@@ -203,6 +202,7 @@ class TestBatchCleanDocumentTask:
            UploadFile: Created upload file instance
        """
        fake = Faker()
+        from datetime import datetime

        from models.enums import CreatorUserRole

@@ -216,7 +216,7 @@ class TestBatchCleanDocumentTask:
            mime_type="text/plain",
            created_by_role=CreatorUserRole.ACCOUNT,
            created_by=account.id,
-            created_at=naive_utc_now(),
+            created_at=datetime.utcnow(),
            used=False,
        )

--- a/api/tests/unit_tests/controllers/console/auth/test_oauth.py
+++ b/api/tests/unit_tests/controllers/console/auth/test_oauth.py
@@ -201,9 +201,9 @@ class TestOAuthCallback:
        mock_db.session.rollback = MagicMock()

        # Import the real requests module to create a proper exception
-        import httpx
+        import requests

-        request_exception = httpx.RequestError("OAuth error")
+        request_exception = requests.exceptions.RequestException("OAuth error")
        request_exception.response = MagicMock()
        request_exception.response.text = str(exception)

--- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py
+++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py
@@ -1,5 +1,6 @@
 """Unit tests for workflow node execution conflict handling."""

+from datetime import datetime
 from unittest.mock import MagicMock, Mock

 import psycopg2.errors
@@ -15,7 +16,6 @@ from core.workflow.entities.workflow_node_execution import (
    WorkflowNodeExecutionStatus,
 )
 from core.workflow.enums import NodeType
-from libs.datetime_utils import naive_utc_now
 from models import Account, WorkflowNodeExecutionTriggeredFrom


@@ -74,7 +74,7 @@ class TestWorkflowNodeExecutionConflictHandling:
            title="Test Node",
            index=1,
            status=WorkflowNodeExecutionStatus.RUNNING,
-            created_at=naive_utc_now(),
+            created_at=datetime.utcnow(),
        )

        original_id = execution.id
@@ -112,7 +112,7 @@ class TestWorkflowNodeExecutionConflictHandling:
            title="Test Node",
            index=1,
            status=WorkflowNodeExecutionStatus.SUCCEEDED,
-            created_at=naive_utc_now(),
+            created_at=datetime.utcnow(),
        )

        # Save should update existing record
@@ -157,7 +157,7 @@ class TestWorkflowNodeExecutionConflictHandling:
            title="Test Node",
            index=1,
            status=WorkflowNodeExecutionStatus.RUNNING,
-            created_at=naive_utc_now(),
+            created_at=datetime.utcnow(),
        )

        # Save should raise IntegrityError after max retries
@@ -199,7 +199,7 @@ class TestWorkflowNodeExecutionConflictHandling:
            title="Test Node",
            index=1,
            status=WorkflowNodeExecutionStatus.RUNNING,
-            created_at=naive_utc_now(),
+            created_at=datetime.utcnow(),
        )

        # Save should raise error immediately
--- a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py
@@ -1,120 +0,0 @@
-"""Tests for graph engine event handlers."""
-
-from __future__ import annotations
-
-from datetime import datetime
-
-from core.workflow.entities import GraphRuntimeState, VariablePool
-from core.workflow.enums import NodeExecutionType, NodeState, NodeType, WorkflowNodeExecutionStatus
-from core.workflow.graph import Graph
-from core.workflow.graph_engine.domain.graph_execution import GraphExecution
-from core.workflow.graph_engine.event_management.event_handlers import EventHandler
-from core.workflow.graph_engine.event_management.event_manager import EventManager
-from core.workflow.graph_engine.graph_state_manager import GraphStateManager
-from core.workflow.graph_engine.ready_queue.in_memory import InMemoryReadyQueue
-from core.workflow.graph_engine.response_coordinator.coordinator import ResponseStreamCoordinator
-from core.workflow.graph_events import NodeRunRetryEvent, NodeRunStartedEvent
-from core.workflow.node_events import NodeRunResult
-from core.workflow.nodes.base.entities import RetryConfig
-
-
-class _StubEdgeProcessor:
-    """Minimal edge processor stub for tests."""
-
-
-class _StubErrorHandler:
-    """Minimal error handler stub for tests."""
-
-
-class _StubNode:
-    """Simple node stub exposing the attributes needed by the state manager."""
-
-    def __init__(self, node_id: str) -> None:
-        self.id = node_id
-        self.state = NodeState.UNKNOWN
-        self.title = "Stub Node"
-        self.execution_type = NodeExecutionType.EXECUTABLE
-        self.error_strategy = None
-        self.retry_config = RetryConfig()
-        self.retry = False
-
-
-def _build_event_handler(node_id: str) -> tuple[EventHandler, EventManager, GraphExecution]:
-    """Construct an EventHandler with in-memory dependencies for testing."""
-
-    node = _StubNode(node_id)
-    graph = Graph(nodes={node_id: node}, edges={}, in_edges={}, out_edges={}, root_node=node)
-
-    variable_pool = VariablePool()
-    runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=0.0)
-    graph_execution = GraphExecution(workflow_id="test-workflow")
-
-    event_manager = EventManager()
-    state_manager = GraphStateManager(graph=graph, ready_queue=InMemoryReadyQueue())
-    response_coordinator = ResponseStreamCoordinator(variable_pool=variable_pool, graph=graph)
-
-    handler = EventHandler(
-        graph=graph,
-        graph_runtime_state=runtime_state,
-        graph_execution=graph_execution,
-        response_coordinator=response_coordinator,
-        event_collector=event_manager,
-        edge_processor=_StubEdgeProcessor(),
-        state_manager=state_manager,
-        error_handler=_StubErrorHandler(),
-    )
-
-    return handler, event_manager, graph_execution
-
-
-def test_retry_does_not_emit_additional_start_event() -> None:
-    """Ensure retry attempts do not produce duplicate start events."""
-
-    node_id = "test-node"
-    handler, event_manager, graph_execution = _build_event_handler(node_id)
-
-    execution_id = "exec-1"
-    node_type = NodeType.CODE
-    start_time = datetime.utcnow()
-
-    start_event = NodeRunStartedEvent(
-        id=execution_id,
-        node_id=node_id,
-        node_type=node_type,
-        node_title="Stub Node",
-        start_at=start_time,
-    )
-    handler.dispatch(start_event)
-
-    retry_event = NodeRunRetryEvent(
-        id=execution_id,
-        node_id=node_id,
-        node_type=node_type,
-        node_title="Stub Node",
-        start_at=start_time,
-        error="boom",
-        retry_index=1,
-        node_run_result=NodeRunResult(
-            status=WorkflowNodeExecutionStatus.FAILED,
-            error="boom",
-            error_type="TestError",
-        ),
-    )
-    handler.dispatch(retry_event)
-
-    # Simulate the node starting execution again after retry
-    second_start_event = NodeRunStartedEvent(
-        id=execution_id,
-        node_id=node_id,
-        node_type=node_type,
-        node_title="Stub Node",
-        start_at=start_time,
-    )
-    handler.dispatch(second_start_event)
-
-    collected_types = [type(event) for event in event_manager._events]  # type: ignore[attr-defined]
-
-    assert collected_types == [NodeRunStartedEvent, NodeRunRetryEvent]
-
-    node_execution = graph_execution.get_or_create_node_execution(node_id)
-    assert node_execution.retry_count == 1
--- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py
@@ -10,18 +10,11 @@ import time
 from hypothesis import HealthCheck, given, settings
 from hypothesis import strategies as st

-from core.workflow.enums import ErrorStrategy
 from core.workflow.graph_engine import GraphEngine
 from core.workflow.graph_engine.command_channels import InMemoryChannel
-from core.workflow.graph_events import (
-    GraphRunPartialSucceededEvent,
-    GraphRunStartedEvent,
-    GraphRunSucceededEvent,
-)
-from core.workflow.nodes.base.entities import DefaultValue, DefaultValueType
+from core.workflow.graph_events import GraphRunStartedEvent, GraphRunSucceededEvent

 # Import the test framework from the new module
-from .test_mock_config import MockConfigBuilder
 from .test_table_runner import TableTestRunner, WorkflowRunner, WorkflowTestCase


@@ -728,39 +721,3 @@ def test_event_sequence_validation_with_table_tests():
        else:
            assert result.event_sequence_match is True
        assert result.success, f"Test {i + 1} failed: {result.event_mismatch_details or result.error}"
-
-
-def test_graph_run_emits_partial_success_when_node_failure_recovered():
-    runner = TableTestRunner()
-
-    fixture_data = runner.workflow_runner.load_fixture("basic_chatflow")
-    mock_config = MockConfigBuilder().with_node_error("llm", "mock llm failure").build()
-
-    graph, graph_runtime_state = runner.workflow_runner.create_graph_from_fixture(
-        fixture_data=fixture_data,
-        query="hello",
-        use_mock_factory=True,
-        mock_config=mock_config,
-    )
-
-    llm_node = graph.nodes["llm"]
-    base_node_data = llm_node.get_base_node_data()
-    base_node_data.error_strategy = ErrorStrategy.DEFAULT_VALUE
-    base_node_data.default_value = [DefaultValue(key="text", value="fallback response", type=DefaultValueType.STRING)]
-
-    engine = GraphEngine(
-        workflow_id="test_workflow",
-        graph=graph,
-        graph_runtime_state=graph_runtime_state,
-        command_channel=InMemoryChannel(),
-    )
-
-    events = list(engine.run())
-
-    assert isinstance(events[-1], GraphRunPartialSucceededEvent)
-
-    partial_event = next(event for event in events if isinstance(event, GraphRunPartialSucceededEvent))
-    assert partial_event.exceptions_count == 1
-    assert partial_event.outputs.get("answer") == "fallback response"
-
-    assert not any(isinstance(event, GraphRunSucceededEvent) for event in events)
--- a/api/tests/unit_tests/core/workflow/nodes/test_retry.py
+++ b/api/tests/unit_tests/core/workflow/nodes/test_retry.py
@@ -0,0 +1,65 @@
+import pytest
+
+pytest.skip(
+    "Retry functionality is part of Phase 2 enhanced error handling - not implemented in MVP of queue-based engine",
+    allow_module_level=True,
+)
+
+DEFAULT_VALUE_EDGE = [
+    {
+        "id": "start-source-node-target",
+        "source": "start",
+        "target": "node",
+        "sourceHandle": "source",
+    },
+    {
+        "id": "node-source-answer-target",
+        "source": "node",
+        "target": "answer",
+        "sourceHandle": "source",
+    },
+]
+
+
+def test_retry_default_value_partial_success():
+    """retry default value node with partial success status"""
+    graph_config = {
+        "edges": DEFAULT_VALUE_EDGE,
+        "nodes": [
+            {"data": {"title": "start", "type": "start", "variables": []}, "id": "start"},
+            {"data": {"title": "answer", "type": "answer", "answer": "{{#node.result#}}"}, "id": "answer"},
+            ContinueOnErrorTestHelper.get_http_node(
+                "default-value",
+                [{"key": "result", "type": "string", "value": "http node got error response"}],
+                retry_config={"retry_config": {"max_retries": 2, "retry_interval": 1000, "retry_enabled": True}},
+            ),
+        ],
+    }
+
+    graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
+    events = list(graph_engine.run())
+    assert sum(1 for e in events if isinstance(e, NodeRunRetryEvent)) == 2
+    assert events[-1].outputs == {"answer": "http node got error response"}
+    assert any(isinstance(e, GraphRunPartialSucceededEvent) for e in events)
+    assert len(events) == 11
+
+
+def test_retry_failed():
+    """retry failed with success status"""
+    graph_config = {
+        "edges": DEFAULT_VALUE_EDGE,
+        "nodes": [
+            {"data": {"title": "start", "type": "start", "variables": []}, "id": "start"},
+            {"data": {"title": "answer", "type": "answer", "answer": "{{#node.result#}}"}, "id": "answer"},
+            ContinueOnErrorTestHelper.get_http_node(
+                None,
+                None,
+                retry_config={"retry_config": {"max_retries": 2, "retry_interval": 1000, "retry_enabled": True}},
+            ),
+        ],
+    }
+    graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
+    events = list(graph_engine.run())
+    assert sum(1 for e in events if isinstance(e, NodeRunRetryEvent)) == 2
+    assert any(isinstance(e, GraphRunFailedEvent) for e in events)
+    assert len(events) == 8
--- a/api/tests/unit_tests/libs/test_oauth_clients.py
+++ b/api/tests/unit_tests/libs/test_oauth_clients.py
@@ -1,8 +1,8 @@
 import urllib.parse
 from unittest.mock import MagicMock, patch

-import httpx
 import pytest
+import requests

 from libs.oauth import GitHubOAuth, GoogleOAuth, OAuthUserInfo

@@ -68,7 +68,7 @@ class TestGitHubOAuth(BaseOAuthTest):
            ({}, None, True),
        ],
    )
-    @patch("httpx.post")
+    @patch("requests.post")
    def test_should_retrieve_access_token(
        self, mock_post, oauth, mock_response, response_data, expected_token, should_raise
    ):
@@ -105,7 +105,7 @@ class TestGitHubOAuth(BaseOAuthTest):
            ),
        ],
    )
-    @patch("httpx.get")
+    @patch("requests.get")
    def test_should_retrieve_user_info_correctly(self, mock_get, oauth, user_data, email_data, expected_email):
        user_response = MagicMock()
        user_response.json.return_value = user_data
@@ -121,11 +121,11 @@ class TestGitHubOAuth(BaseOAuthTest):
        assert user_info.name == user_data["name"]
        assert user_info.email == expected_email

-    @patch("httpx.get")
+    @patch("requests.get")
    def test_should_handle_network_errors(self, mock_get, oauth):
-        mock_get.side_effect = httpx.RequestError("Network error")
+        mock_get.side_effect = requests.exceptions.RequestException("Network error")

-        with pytest.raises(httpx.RequestError):
+        with pytest.raises(requests.exceptions.RequestException):
            oauth.get_raw_user_info("test_token")


@@ -167,7 +167,7 @@ class TestGoogleOAuth(BaseOAuthTest):
            ({}, None, True),
        ],
    )
-    @patch("httpx.post")
+    @patch("requests.post")
    def test_should_retrieve_access_token(
        self, mock_post, oauth, oauth_config, mock_response, response_data, expected_token, should_raise
    ):
@@ -201,7 +201,7 @@ class TestGoogleOAuth(BaseOAuthTest):
            ({"sub": "123", "email": "test@example.com", "name": "Test User"}, ""),  # Always returns empty string
        ],
    )
-    @patch("httpx.get")
+    @patch("requests.get")
    def test_should_retrieve_user_info_correctly(self, mock_get, oauth, mock_response, user_data, expected_name):
        mock_response.json.return_value = user_data
        mock_get.return_value = mock_response
@@ -217,12 +217,12 @@ class TestGoogleOAuth(BaseOAuthTest):
    @pytest.mark.parametrize(
        "exception_type",
        [
-            httpx.HTTPError,
-            httpx.ConnectError,
-            httpx.TimeoutException,
+            requests.exceptions.HTTPError,
+            requests.exceptions.ConnectionError,
+            requests.exceptions.Timeout,
        ],
    )
-    @patch("httpx.get")
+    @patch("requests.get")
    def test_should_handle_http_errors(self, mock_get, oauth, exception_type):
        mock_response = MagicMock()
        mock_response.raise_for_status.side_effect = exception_type("Error")
--- a/api/tests/unit_tests/models/test_model.py
+++ b/api/tests/unit_tests/models/test_model.py
@@ -1,83 +0,0 @@
-import importlib
-import types
-
-import pytest
-
-from models.model import Message
-
-
-@pytest.fixture(autouse=True)
-def patch_file_helpers(monkeypatch: pytest.MonkeyPatch):
-    """
-    Patch file_helpers.get_signed_file_url to a deterministic stub.
-    """
-    model_module = importlib.import_module("models.model")
-    dummy = types.SimpleNamespace(get_signed_file_url=lambda fid: f"https://signed.example/{fid}")
-    # Inject/override file_helpers on models.model
-    monkeypatch.setattr(model_module, "file_helpers", dummy, raising=False)
-
-
-def _wrap_md(url: str) -> str:
-    """
-    Wrap a raw URL into the markdown that re_sign_file_url_answer expects:
-    [link](<url>)
-    """
-    return f"please click [file]({url}) to download."
-
-
-def test_file_preview_valid_replaced():
-    """
-    Valid file-preview URL must be re-signed:
-    - Extract upload_file_id correctly
-    - Replace the original URL with the signed URL
-    """
-    upload_id = "abc-123"
-    url = f"/files/{upload_id}/file-preview?timestamp=111&nonce=222&sign=333"
-    msg = Message(answer=_wrap_md(url))
-
-    out = msg.re_sign_file_url_answer
-    assert f"https://signed.example/{upload_id}" in out
-    assert url not in out
-
-
-def test_file_preview_misspelled_not_replaced():
-    """
-    Misspelled endpoint 'file-previe?timestamp=' should NOT be rewritten.
-    """
-    upload_id = "zzz-001"
-    # path deliberately misspelled: file-previe? (missing 'w')
-    # and we append &note=file-preview to trick the old `"file-preview" in url` check.
-    url = f"/files/{upload_id}/file-previe?timestamp=111&nonce=222&sign=333&note=file-preview"
-    original = _wrap_md(url)
-    msg = Message(answer=original)
-
-    out = msg.re_sign_file_url_answer
-    # Expect NO replacement, should not rewrite misspelled file-previe URL
-    assert out == original
-
-
-def test_image_preview_valid_replaced():
-    """
-    Valid image-preview URL must be re-signed.
-    """
-    upload_id = "img-789"
-    url = f"/files/{upload_id}/image-preview?timestamp=123&nonce=456&sign=789"
-    msg = Message(answer=_wrap_md(url))
-
-    out = msg.re_sign_file_url_answer
-    assert f"https://signed.example/{upload_id}" in out
-    assert url not in out
-
-
-def test_image_preview_misspelled_not_replaced():
-    """
-    Misspelled endpoint 'image-previe?timestamp=' should NOT be rewritten.
-    """
-    upload_id = "img-err-42"
-    url = f"/files/{upload_id}/image-previe?timestamp=1&nonce=2&sign=3&note=image-preview"
-    original = _wrap_md(url)
-    msg = Message(answer=original)
-
-    out = msg.re_sign_file_url_answer
-    # Expect NO replacement, should not rewrite misspelled image-previe URL
-    assert out == original
--- a/api/tests/unit_tests/services/auth/test_auth_integration.py
+++ b/api/tests/unit_tests/services/auth/test_auth_integration.py
@@ -6,8 +6,8 @@ import json
 from concurrent.futures import ThreadPoolExecutor
 from unittest.mock import Mock, patch

-import httpx
 import pytest
+import requests

 from services.auth.api_key_auth_factory import ApiKeyAuthFactory
 from services.auth.api_key_auth_service import ApiKeyAuthService
@@ -26,7 +26,7 @@ class TestAuthIntegration:
        self.watercrawl_credentials = {"auth_type": "x-api-key", "config": {"api_key": "wc_test_key_789"}}

    @patch("services.auth.api_key_auth_service.db.session")
-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    @patch("services.auth.api_key_auth_service.encrypter.encrypt_token")
    def test_end_to_end_auth_flow(self, mock_encrypt, mock_http, mock_session):
        """Test complete authentication flow: request → validation → encryption → storage"""
@@ -47,7 +47,7 @@ class TestAuthIntegration:
        mock_session.add.assert_called_once()
        mock_session.commit.assert_called_once()

-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_cross_component_integration(self, mock_http):
        """Test factory → provider → HTTP call integration"""
        mock_http.return_value = self._create_success_response()
@@ -97,7 +97,7 @@ class TestAuthIntegration:
        assert "another_secret" not in factory_str

    @patch("services.auth.api_key_auth_service.db.session")
-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    @patch("services.auth.api_key_auth_service.encrypter.encrypt_token")
    def test_concurrent_creation_safety(self, mock_encrypt, mock_http, mock_session):
        """Test concurrent authentication creation safety"""
@@ -142,31 +142,31 @@ class TestAuthIntegration:
        with pytest.raises((ValueError, KeyError, TypeError, AttributeError)):
            ApiKeyAuthFactory(AuthType.FIRECRAWL, invalid_input)

-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_http_error_handling(self, mock_http):
        """Test proper HTTP error handling"""
        mock_response = Mock()
        mock_response.status_code = 401
        mock_response.text = '{"error": "Unauthorized"}'
-        mock_response.raise_for_status.side_effect = httpx.HTTPError("Unauthorized")
+        mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("Unauthorized")
        mock_http.return_value = mock_response

        # PT012: Split into single statement for pytest.raises
        factory = ApiKeyAuthFactory(AuthType.FIRECRAWL, self.firecrawl_credentials)
-        with pytest.raises((httpx.HTTPError, Exception)):
+        with pytest.raises((requests.exceptions.HTTPError, Exception)):
            factory.validate_credentials()

    @patch("services.auth.api_key_auth_service.db.session")
-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_network_failure_recovery(self, mock_http, mock_session):
        """Test system recovery from network failures"""
-        mock_http.side_effect = httpx.RequestError("Network timeout")
+        mock_http.side_effect = requests.exceptions.RequestException("Network timeout")
        mock_session.add = Mock()
        mock_session.commit = Mock()

        args = {"category": self.category, "provider": AuthType.FIRECRAWL, "credentials": self.firecrawl_credentials}

-        with pytest.raises(httpx.RequestError):
+        with pytest.raises(requests.exceptions.RequestException):
            ApiKeyAuthService.create_provider_auth(self.tenant_id_1, args)

        mock_session.commit.assert_not_called()
--- a/api/tests/unit_tests/services/auth/test_firecrawl_auth.py
+++ b/api/tests/unit_tests/services/auth/test_firecrawl_auth.py
@@ -1,7 +1,7 @@
 from unittest.mock import MagicMock, patch

-import httpx
 import pytest
+import requests

 from services.auth.firecrawl.firecrawl import FirecrawlAuth

@@ -64,7 +64,7 @@ class TestFirecrawlAuth:
            FirecrawlAuth(credentials)
        assert str(exc_info.value) == expected_error

-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_should_validate_valid_credentials_successfully(self, mock_post, auth_instance):
        """Test successful credential validation"""
        mock_response = MagicMock()
@@ -95,7 +95,7 @@ class TestFirecrawlAuth:
            (500, "Internal server error"),
        ],
    )
-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_should_handle_http_errors(self, mock_post, status_code, error_message, auth_instance):
        """Test handling of various HTTP error codes"""
        mock_response = MagicMock()
@@ -115,7 +115,7 @@ class TestFirecrawlAuth:
            (401, "Not JSON", True, "Expecting value"),  # JSON decode error
        ],
    )
-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_should_handle_unexpected_errors(
        self, mock_post, status_code, response_text, has_json_error, expected_error_contains, auth_instance
    ):
@@ -134,13 +134,13 @@ class TestFirecrawlAuth:
    @pytest.mark.parametrize(
        ("exception_type", "exception_message"),
        [
-            (httpx.ConnectError, "Network error"),
-            (httpx.TimeoutException, "Request timeout"),
-            (httpx.ReadTimeout, "Read timeout"),
-            (httpx.ConnectTimeout, "Connection timeout"),
+            (requests.ConnectionError, "Network error"),
+            (requests.Timeout, "Request timeout"),
+            (requests.ReadTimeout, "Read timeout"),
+            (requests.ConnectTimeout, "Connection timeout"),
        ],
    )
-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_should_handle_network_errors(self, mock_post, exception_type, exception_message, auth_instance):
        """Test handling of various network-related errors including timeouts"""
        mock_post.side_effect = exception_type(exception_message)
@@ -162,7 +162,7 @@ class TestFirecrawlAuth:
            FirecrawlAuth({"auth_type": "basic", "config": {"api_key": "super_secret_key_12345"}})
        assert "super_secret_key_12345" not in str(exc_info.value)

-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_should_use_custom_base_url_in_validation(self, mock_post):
        """Test that custom base URL is used in validation"""
        mock_response = MagicMock()
@@ -179,12 +179,12 @@ class TestFirecrawlAuth:
        assert result is True
        assert mock_post.call_args[0][0] == "https://custom.firecrawl.dev/v1/crawl"

-    @patch("services.auth.firecrawl.firecrawl.httpx.post")
+    @patch("services.auth.firecrawl.firecrawl.requests.post")
    def test_should_handle_timeout_with_retry_suggestion(self, mock_post, auth_instance):
        """Test that timeout errors are handled gracefully with appropriate error message"""
-        mock_post.side_effect = httpx.TimeoutException("The request timed out after 30 seconds")
+        mock_post.side_effect = requests.Timeout("The request timed out after 30 seconds")

-        with pytest.raises(httpx.TimeoutException) as exc_info:
+        with pytest.raises(requests.Timeout) as exc_info:
            auth_instance.validate_credentials()

        # Verify the timeout exception is raised with original message
--- a/api/tests/unit_tests/services/auth/test_jina_auth.py
+++ b/api/tests/unit_tests/services/auth/test_jina_auth.py
@@ -1,7 +1,7 @@
 from unittest.mock import MagicMock, patch

-import httpx
 import pytest
+import requests

 from services.auth.jina.jina import JinaAuth

@@ -35,7 +35,7 @@ class TestJinaAuth:
            JinaAuth(credentials)
        assert str(exc_info.value) == "No API key provided"

-    @patch("services.auth.jina.jina.httpx.post")
+    @patch("services.auth.jina.jina.requests.post")
    def test_should_validate_valid_credentials_successfully(self, mock_post):
        """Test successful credential validation"""
        mock_response = MagicMock()
@@ -53,7 +53,7 @@ class TestJinaAuth:
            json={"url": "https://example.com"},
        )

-    @patch("services.auth.jina.jina.httpx.post")
+    @patch("services.auth.jina.jina.requests.post")
    def test_should_handle_http_402_error(self, mock_post):
        """Test handling of 402 Payment Required error"""
        mock_response = MagicMock()
@@ -68,7 +68,7 @@ class TestJinaAuth:
            auth.validate_credentials()
        assert str(exc_info.value) == "Failed to authorize. Status code: 402. Error: Payment required"

-    @patch("services.auth.jina.jina.httpx.post")
+    @patch("services.auth.jina.jina.requests.post")
    def test_should_handle_http_409_error(self, mock_post):
        """Test handling of 409 Conflict error"""
        mock_response = MagicMock()
@@ -83,7 +83,7 @@ class TestJinaAuth:
            auth.validate_credentials()
        assert str(exc_info.value) == "Failed to authorize. Status code: 409. Error: Conflict error"

-    @patch("services.auth.jina.jina.httpx.post")
+    @patch("services.auth.jina.jina.requests.post")
    def test_should_handle_http_500_error(self, mock_post):
        """Test handling of 500 Internal Server Error"""
        mock_response = MagicMock()
@@ -98,7 +98,7 @@ class TestJinaAuth:
            auth.validate_credentials()
        assert str(exc_info.value) == "Failed to authorize. Status code: 500. Error: Internal server error"

-    @patch("services.auth.jina.jina.httpx.post")
+    @patch("services.auth.jina.jina.requests.post")
    def test_should_handle_unexpected_error_with_text_response(self, mock_post):
        """Test handling of unexpected errors with text response"""
        mock_response = MagicMock()
@@ -114,7 +114,7 @@ class TestJinaAuth:
            auth.validate_credentials()
        assert str(exc_info.value) == "Failed to authorize. Status code: 403. Error: Forbidden"

-    @patch("services.auth.jina.jina.httpx.post")
+    @patch("services.auth.jina.jina.requests.post")
    def test_should_handle_unexpected_error_without_text(self, mock_post):
        """Test handling of unexpected errors without text response"""
        mock_response = MagicMock()
@@ -130,15 +130,15 @@ class TestJinaAuth:
            auth.validate_credentials()
        assert str(exc_info.value) == "Unexpected error occurred while trying to authorize. Status code: 404"

-    @patch("services.auth.jina.jina.httpx.post")
+    @patch("services.auth.jina.jina.requests.post")
    def test_should_handle_network_errors(self, mock_post):
        """Test handling of network connection errors"""
-        mock_post.side_effect = httpx.ConnectError("Network error")
+        mock_post.side_effect = requests.ConnectionError("Network error")

        credentials = {"auth_type": "bearer", "config": {"api_key": "test_api_key_123"}}
        auth = JinaAuth(credentials)

-        with pytest.raises(httpx.ConnectError):
+        with pytest.raises(requests.ConnectionError):
            auth.validate_credentials()

    def test_should_not_expose_api_key_in_error_messages(self):
--- a/api/tests/unit_tests/services/auth/test_watercrawl_auth.py
+++ b/api/tests/unit_tests/services/auth/test_watercrawl_auth.py
@@ -1,7 +1,7 @@
 from unittest.mock import MagicMock, patch

-import httpx
 import pytest
+import requests

 from services.auth.watercrawl.watercrawl import WatercrawlAuth

@@ -64,7 +64,7 @@ class TestWatercrawlAuth:
            WatercrawlAuth(credentials)
        assert str(exc_info.value) == expected_error

-    @patch("services.auth.watercrawl.watercrawl.httpx.get")
+    @patch("services.auth.watercrawl.watercrawl.requests.get")
    def test_should_validate_valid_credentials_successfully(self, mock_get, auth_instance):
        """Test successful credential validation"""
        mock_response = MagicMock()
@@ -87,7 +87,7 @@ class TestWatercrawlAuth:
            (500, "Internal server error"),
        ],
    )
-    @patch("services.auth.watercrawl.watercrawl.httpx.get")
+    @patch("services.auth.watercrawl.watercrawl.requests.get")
    def test_should_handle_http_errors(self, mock_get, status_code, error_message, auth_instance):
        """Test handling of various HTTP error codes"""
        mock_response = MagicMock()
@@ -107,7 +107,7 @@ class TestWatercrawlAuth:
            (401, "Not JSON", True, "Expecting value"),  # JSON decode error
        ],
    )
-    @patch("services.auth.watercrawl.watercrawl.httpx.get")
+    @patch("services.auth.watercrawl.watercrawl.requests.get")
    def test_should_handle_unexpected_errors(
        self, mock_get, status_code, response_text, has_json_error, expected_error_contains, auth_instance
    ):
@@ -126,13 +126,13 @@ class TestWatercrawlAuth:
    @pytest.mark.parametrize(
        ("exception_type", "exception_message"),
        [
-            (httpx.ConnectError, "Network error"),
-            (httpx.TimeoutException, "Request timeout"),
-            (httpx.ReadTimeout, "Read timeout"),
-            (httpx.ConnectTimeout, "Connection timeout"),
+            (requests.ConnectionError, "Network error"),
+            (requests.Timeout, "Request timeout"),
+            (requests.ReadTimeout, "Read timeout"),
+            (requests.ConnectTimeout, "Connection timeout"),
        ],
    )
-    @patch("services.auth.watercrawl.watercrawl.httpx.get")
+    @patch("services.auth.watercrawl.watercrawl.requests.get")
    def test_should_handle_network_errors(self, mock_get, exception_type, exception_message, auth_instance):
        """Test handling of various network-related errors including timeouts"""
        mock_get.side_effect = exception_type(exception_message)
@@ -154,7 +154,7 @@ class TestWatercrawlAuth:
            WatercrawlAuth({"auth_type": "bearer", "config": {"api_key": "super_secret_key_12345"}})
        assert "super_secret_key_12345" not in str(exc_info.value)

-    @patch("services.auth.watercrawl.watercrawl.httpx.get")
+    @patch("services.auth.watercrawl.watercrawl.requests.get")
    def test_should_use_custom_base_url_in_validation(self, mock_get):
        """Test that custom base URL is used in validation"""
        mock_response = MagicMock()
@@ -179,7 +179,7 @@ class TestWatercrawlAuth:
            ("https://app.watercrawl.dev//", "https://app.watercrawl.dev/api/v1/core/crawl-requests/"),
        ],
    )
-    @patch("services.auth.watercrawl.watercrawl.httpx.get")
+    @patch("services.auth.watercrawl.watercrawl.requests.get")
    def test_should_use_urljoin_for_url_construction(self, mock_get, base_url, expected_url):
        """Test that urljoin is used correctly for URL construction with various base URLs"""
        mock_response = MagicMock()
@@ -193,12 +193,12 @@ class TestWatercrawlAuth:
        # Verify the correct URL was called
        assert mock_get.call_args[0][0] == expected_url

-    @patch("services.auth.watercrawl.watercrawl.httpx.get")
+    @patch("services.auth.watercrawl.watercrawl.requests.get")
    def test_should_handle_timeout_with_retry_suggestion(self, mock_get, auth_instance):
        """Test that timeout errors are handled gracefully with appropriate error message"""
-        mock_get.side_effect = httpx.TimeoutException("The request timed out after 30 seconds")
+        mock_get.side_effect = requests.Timeout("The request timed out after 30 seconds")

-        with pytest.raises(httpx.TimeoutException) as exc_info:
+        with pytest.raises(requests.Timeout) as exc_info:
            auth_instance.validate_credentials()

        # Verify the timeout exception is raised with original message
--- a/api/tests/unit_tests/services/test_variable_truncator.py
+++ b/api/tests/unit_tests/services/test_variable_truncator.py
@@ -588,11 +588,3 @@ class TestIntegrationScenarios:
            if isinstance(result.result, ObjectSegment):
                result_size = truncator.calculate_json_size(result.result.value)
                assert result_size <= original_size
-
-    def test_file_and_array_file_variable_mapping(self, file):
-        truncator = VariableTruncator(string_length_limit=30, array_element_limit=3, max_size_bytes=300)
-
-        mapping = {"array_file": [file]}
-        truncated_mapping, truncated = truncator.truncate_variable_mapping(mapping)
-        assert truncated is False
-        assert truncated_mapping == mapping
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -404,14 +404,14 @@ wheels = [

 [[package]]
 name = "authlib"
-version = "1.6.4"
+version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "cryptography" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ce/bb/73a1f1c64ee527877f64122422dafe5b87a846ccf4ac933fe21bcbb8fee8/authlib-1.6.4.tar.gz", hash = "sha256:104b0442a43061dc8bc23b133d1d06a2b0a9c2e3e33f34c4338929e816287649", size = 164046, upload-time = "2025-09-17T09:59:23.897Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/09/47/df70ecd34fbf86d69833fe4e25bb9ecbaab995c8e49df726dd416f6bb822/authlib-1.3.1.tar.gz", hash = "sha256:7ae843f03c06c5c0debd63c9db91f9fda64fa62a42a77419fa15fbb7e7a58917", size = 146074, upload-time = "2024-06-04T14:15:32.06Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0e/aa/91355b5f539caf1b94f0e66ff1e4ee39373b757fce08204981f7829ede51/authlib-1.6.4-py2.py3-none-any.whl", hash = "sha256:39313d2a2caac3ecf6d8f95fbebdfd30ae6ea6ae6a6db794d976405fdd9aa796", size = 243076, upload-time = "2025-09-17T09:59:22.259Z" },
+    { url = "https://files.pythonhosted.org/packages/87/1f/bc95e43ffb57c05b8efcc376dd55a0240bf58f47ddf5a0f92452b6457b75/Authlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:d35800b973099bbadc49b42b256ecb80041ad56b7fe1216a362c7943c088f377", size = 223827, upload-time = "2024-06-04T14:15:29.218Z" },
 ]

 [[package]]
@@ -544,6 +544,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/57/f4/a69c20ee4f660081a7dedb1ac57f29be9378e04edfcb90c526b923d4bebc/beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a", size = 142979, upload-time = "2023-04-07T15:02:50.77Z" },
 ]

+[[package]]
+name = "bidict"
+version = "0.23.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093, upload-time = "2024-02-18T19:09:05.748Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764, upload-time = "2024-02-18T19:09:04.156Z" },
+]
+
 [[package]]
 name = "billiard"
 version = "4.2.1"
@@ -578,16 +587,16 @@ wheels = [

 [[package]]
 name = "boto3-stubs"
-version = "1.40.35"
+version = "1.40.29"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "botocore-stubs" },
    { name = "types-s3transfer" },
    { name = "typing-extensions", marker = "python_full_version < '3.12'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/24/18/6a64ff9603845d635f6167b6d9a3f9a6e658d8a28eef36f8423eb5a99ae1/boto3_stubs-1.40.35.tar.gz", hash = "sha256:2d6f2dbe6e9b42deb7b8fbeed051461e7906903f26e99634d00be45cc40db41a", size = 100819, upload-time = "2025-09-19T19:42:36.372Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dd/35/0cdc62641577e8a0a6d4191ecc803fee16adf18de1e81280eb3d87c7d9e8/boto3_stubs-1.40.29.tar.gz", hash = "sha256:9fc7d24dcbcc786093daf42487a9ed4a58a6be7f1ccf28f5be0b2bad4a3edb11", size = 100996, upload-time = "2025-09-11T19:48:28.487Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/d4/d744260908ad55903baefa086a3c9cabc50bfafd63c3f2d0e05688378013/boto3_stubs-1.40.35-py3-none-any.whl", hash = "sha256:2bb44e6c17831650a28e3e00bf5be0a6ba771fce08724ba978ffcd06a7bca7e3", size = 69689, upload-time = "2025-09-19T19:42:30.08Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/a2/e47bf7595fadc6154ff2941e9ab9bb68173fba95f5ccdb24e5c13d16e5e5/boto3_stubs-1.40.29-py3-none-any.whl", hash = "sha256:1ad373b68b1c9a5e8e5deb243ef3a4c5b1d2c25c3477559eba1089ed4a0ee94e", size = 69769, upload-time = "2025-09-11T19:48:20.453Z" },
 ]

 [package.optional-dependencies]
@@ -1074,6 +1083,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" },
 ]

+[[package]]
+name = "configargparse"
+version = "1.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/4d/6c9ef746dfcc2a32e26f3860bb4a011c008c392b83eabdfb598d1a8bbe5d/configargparse-1.7.1.tar.gz", hash = "sha256:79c2ddae836a1e5914b71d58e4b9adbd9f7779d4e6351a637b7d2d9b6c46d3d9", size = 43958, upload-time = "2025-05-23T14:26:17.369Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/28/d28211d29bcc3620b1fece85a65ce5bb22f18670a03cd28ea4b75ede270c/configargparse-1.7.1-py3-none-any.whl", hash = "sha256:8b586a31f9d873abd1ca527ffbe58863c99f36d896e2829779803125e83be4b6", size = 25607, upload-time = "2025-05-23T14:26:15.923Z" },
+]
+
 [[package]]
 name = "cos-python-sdk-v5"
 version = "1.9.30"
@@ -1273,7 +1291,7 @@ wheels = [

 [[package]]
 name = "dify-api"
-version = "1.9.0"
+version = "2.0.0-beta2"
 source = { virtual = "." }
 dependencies = [
    { name = "arize-phoenix-otel" },
@@ -1371,6 +1389,7 @@ dev = [
    { name = "faker" },
    { name = "hypothesis" },
    { name = "import-linter" },
+    { name = "locust" },
    { name = "lxml-stubs" },
    { name = "mypy" },
    { name = "pandas-stubs" },
@@ -1471,7 +1490,7 @@ vdb = [
 [package.metadata]
 requires-dist = [
    { name = "arize-phoenix-otel", specifier = "~=0.9.2" },
-    { name = "authlib", specifier = "==1.6.4" },
+    { name = "authlib", specifier = "==1.3.1" },
    { name = "azure-identity", specifier = "==1.16.1" },
    { name = "beautifulsoup4", specifier = "==4.12.2" },
    { name = "boto3", specifier = "==1.35.99" },
@@ -1487,7 +1506,7 @@ requires-dist = [
    { name = "flask-orjson", specifier = "~=2.0.0" },
    { name = "flask-restx", specifier = "~=1.3.0" },
    { name = "flask-sqlalchemy", specifier = "~=3.1.1" },
-    { name = "gevent", specifier = "~=25.9.1" },
+    { name = "gevent", specifier = "~=24.11.1" },
    { name = "gmpy2", specifier = "~=2.2.1" },
    { name = "google-api-core", specifier = "==2.18.0" },
    { name = "google-api-python-client", specifier = "==2.90.0" },
@@ -1565,6 +1584,7 @@ dev = [
    { name = "faker", specifier = "~=32.1.0" },
    { name = "hypothesis", specifier = ">=6.131.15" },
    { name = "import-linter", specifier = ">=2.3" },
+    { name = "locust", specifier = ">=2.40.4" },
    { name = "lxml-stubs", specifier = "~=0.5.1" },
    { name = "mypy", specifier = "~=1.17.1" },
    { name = "pandas-stubs", specifier = "~=2.2.3" },
@@ -1650,7 +1670,7 @@ vdb = [
    { name = "pgvecto-rs", extras = ["sqlalchemy"], specifier = "~=0.2.1" },
    { name = "pgvector", specifier = "==0.2.5" },
    { name = "pymilvus", specifier = "~=2.5.0" },
-    { name = "pymochow", specifier = "==2.2.9" },
+    { name = "pymochow", specifier = "==1.3.1" },
    { name = "pyobvector", specifier = "~=0.2.15" },
    { name = "qdrant-client", specifier = "==1.9.0" },
    { name = "tablestore", specifier = "==6.2.0" },
@@ -2045,7 +2065,7 @@ wheels = [

 [[package]]
 name = "gevent"
-version = "25.9.1"
+version = "24.11.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "cffi", marker = "platform_python_implementation == 'CPython' and sys_platform == 'win32'" },
@@ -2053,23 +2073,76 @@ dependencies = [
    { name = "zope-event" },
    { name = "zope-interface" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9e/48/b3ef2673ffb940f980966694e40d6d32560f3ffa284ecaeb5ea3a90a6d3f/gevent-25.9.1.tar.gz", hash = "sha256:adf9cd552de44a4e6754c51ff2e78d9193b7fa6eab123db9578a210e657235dd", size = 5059025, upload-time = "2025-09-17T16:15:34.528Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ab/75/a53f1cb732420f5e5d79b2563fc3504d22115e7ecfe7966e5cf9b3582ae7/gevent-24.11.1.tar.gz", hash = "sha256:8bd1419114e9e4a3ed33a5bad766afff9a3cf765cb440a582a1b3a9bc80c1aca", size = 5976624, upload-time = "2024-11-11T15:36:45.991Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/86/03f8db0704fed41b0fa830425845f1eb4e20c92efa3f18751ee17809e9c6/gevent-25.9.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5aff9e8342dc954adb9c9c524db56c2f3557999463445ba3d9cbe3dada7b7", size = 1792418, upload-time = "2025-09-17T15:41:24.384Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/35/f6b3a31f0849a62cfa2c64574bcc68a781d5499c3195e296e892a121a3cf/gevent-25.9.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1cdf6db28f050ee103441caa8b0448ace545364f775059d5e2de089da975c457", size = 1875700, upload-time = "2025-09-17T15:48:59.652Z" },
-    { url = "https://files.pythonhosted.org/packages/66/1e/75055950aa9b48f553e061afa9e3728061b5ccecca358cef19166e4ab74a/gevent-25.9.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:812debe235a8295be3b2a63b136c2474241fa5c58af55e6a0f8cfc29d4936235", size = 1831365, upload-time = "2025-09-17T15:49:19.426Z" },
-    { url = "https://files.pythonhosted.org/packages/31/e8/5c1f6968e5547e501cfa03dcb0239dff55e44c3660a37ec534e32a0c008f/gevent-25.9.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b28b61ff9216a3d73fe8f35669eefcafa957f143ac534faf77e8a19eb9e6883a", size = 2122087, upload-time = "2025-09-17T15:15:12.329Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/2c/ebc5d38a7542af9fb7657bfe10932a558bb98c8a94e4748e827d3823fced/gevent-25.9.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5e4b6278b37373306fc6b1e5f0f1cf56339a1377f67c35972775143d8d7776ff", size = 1808776, upload-time = "2025-09-17T15:52:40.16Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/26/e1d7d6c8ffbf76fe1fbb4e77bdb7f47d419206adc391ec40a8ace6ebbbf0/gevent-25.9.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d99f0cb2ce43c2e8305bf75bee61a8bde06619d21b9d0316ea190fc7a0620a56", size = 2179141, upload-time = "2025-09-17T15:24:09.895Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/6c/bb21fd9c095506aeeaa616579a356aa50935165cc0f1e250e1e0575620a7/gevent-25.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:72152517ecf548e2f838c61b4be76637d99279dbaa7e01b3924df040aa996586", size = 1677941, upload-time = "2025-09-17T19:59:50.185Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/49/e55930ba5259629eb28ac7ee1abbca971996a9165f902f0249b561602f24/gevent-25.9.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:46b188248c84ffdec18a686fcac5dbb32365d76912e14fda350db5dc0bfd4f86", size = 2955991, upload-time = "2025-09-17T14:52:30.568Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/88/63dc9e903980e1da1e16541ec5c70f2b224ec0a8e34088cb42794f1c7f52/gevent-25.9.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f2b54ea3ca6f0c763281cd3f96010ac7e98c2e267feb1221b5a26e2ca0b9a692", size = 1808503, upload-time = "2025-09-17T15:41:25.59Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/8d/7236c3a8f6ef7e94c22e658397009596fa90f24c7d19da11ad7ab3a9248e/gevent-25.9.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7a834804ac00ed8a92a69d3826342c677be651b1c3cd66cc35df8bc711057aa2", size = 1890001, upload-time = "2025-09-17T15:49:01.227Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/63/0d7f38c4a2085ecce26b50492fc6161aa67250d381e26d6a7322c309b00f/gevent-25.9.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:323a27192ec4da6b22a9e51c3d9d896ff20bc53fdc9e45e56eaab76d1c39dd74", size = 1855335, upload-time = "2025-09-17T15:49:20.582Z" },
-    { url = "https://files.pythonhosted.org/packages/95/18/da5211dfc54c7a57e7432fd9a6ffeae1ce36fe5a313fa782b1c96529ea3d/gevent-25.9.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6ea78b39a2c51d47ff0f130f4c755a9a4bbb2dd9721149420ad4712743911a51", size = 2109046, upload-time = "2025-09-17T15:15:13.817Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/5a/7bb5ec8e43a2c6444853c4a9f955f3e72f479d7c24ea86c95fb264a2de65/gevent-25.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:dc45cd3e1cc07514a419960af932a62eb8515552ed004e56755e4bf20bad30c5", size = 1827099, upload-time = "2025-09-17T15:52:41.384Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/d4/b63a0a60635470d7d986ef19897e893c15326dd69e8fb342c76a4f07fe9e/gevent-25.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34e01e50c71eaf67e92c186ee0196a039d6e4f4b35670396baed4a2d8f1b347f", size = 2172623, upload-time = "2025-09-17T15:24:12.03Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/98/caf06d5d22a7c129c1fb2fc1477306902a2c8ddfd399cd26bbbd4caf2141/gevent-25.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:4acd6bcd5feabf22c7c5174bd3b9535ee9f088d2bbce789f740ad8d6554b18f3", size = 1682837, upload-time = "2025-09-17T19:48:47.318Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/fd/86a170f77ef51a15297573c50dbec4cc67ddc98b677cc2d03cc7f2927f4c/gevent-24.11.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:351d1c0e4ef2b618ace74c91b9b28b3eaa0dd45141878a964e03c7873af09f62", size = 2951424, upload-time = "2024-11-11T14:32:36.451Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/0a/987268c9d446f61883bc627c77c5ed4a97869c0f541f76661a62b2c411f6/gevent-24.11.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5efe72e99b7243e222ba0c2c2ce9618d7d36644c166d63373af239da1036bab", size = 4878504, upload-time = "2024-11-11T15:20:03.521Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/d4/2f77ddd837c0e21b4a4460bcb79318b6754d95ef138b7a29f3221c7e9993/gevent-24.11.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d3b249e4e1f40c598ab8393fc01ae6a3b4d51fc1adae56d9ba5b315f6b2d758", size = 5007668, upload-time = "2024-11-11T15:21:00.422Z" },
+    { url = "https://files.pythonhosted.org/packages/80/a0/829e0399a1f9b84c344b72d2be9aa60fe2a64e993cac221edcc14f069679/gevent-24.11.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81d918e952954675f93fb39001da02113ec4d5f4921bf5a0cc29719af6824e5d", size = 5067055, upload-time = "2024-11-11T15:22:44.279Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/67/0e693f9ddb7909c2414f8fcfc2409aa4157884c147bc83dab979e9cf717c/gevent-24.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9c935b83d40c748b6421625465b7308d87c7b3717275acd587eef2bd1c39546", size = 6761883, upload-time = "2024-11-11T14:57:09.359Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/b6/b69883fc069d7148dd23c5dda20826044e54e7197f3c8e72b8cc2cd4035a/gevent-24.11.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff96c5739834c9a594db0e12bf59cb3fa0e5102fc7b893972118a3166733d61c", size = 5440802, upload-time = "2024-11-11T15:37:04.983Z" },
+    { url = "https://files.pythonhosted.org/packages/32/4e/b00094d995ff01fd88b3cf6b9d1d794f935c31c645c431e65cd82d808c9c/gevent-24.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d6c0a065e31ef04658f799215dddae8752d636de2bed61365c358f9c91e7af61", size = 6866992, upload-time = "2024-11-11T15:03:44.208Z" },
+    { url = "https://files.pythonhosted.org/packages/37/ed/58dbe9fb09d36f6477ff8db0459ebd3be9a77dc05ae5d96dc91ad657610d/gevent-24.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:97e2f3999a5c0656f42065d02939d64fffaf55861f7d62b0107a08f52c984897", size = 1543736, upload-time = "2024-11-11T15:03:06.121Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/32/301676f67ffa996ff1c4175092fb0c48c83271cc95e5c67650b87156b6cf/gevent-24.11.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:a3d75fa387b69c751a3d7c5c3ce7092a171555126e136c1d21ecd8b50c7a6e46", size = 2956467, upload-time = "2024-11-11T14:32:33.238Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/84/aef1a598123cef2375b6e2bf9d17606b961040f8a10e3dcc3c3dd2a99f05/gevent-24.11.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:beede1d1cff0c6fafae3ab58a0c470d7526196ef4cd6cc18e7769f207f2ea4eb", size = 5136486, upload-time = "2024-11-11T15:20:04.972Z" },
+    { url = "https://files.pythonhosted.org/packages/92/7b/04f61187ee1df7a913b3fca63b0a1206c29141ab4d2a57e7645237b6feb5/gevent-24.11.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85329d556aaedced90a993226d7d1186a539c843100d393f2349b28c55131c85", size = 5299718, upload-time = "2024-11-11T15:21:03.354Z" },
+    { url = "https://files.pythonhosted.org/packages/36/2a/ebd12183ac25eece91d084be2111e582b061f4d15ead32239b43ed47e9ba/gevent-24.11.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:816b3883fa6842c1cf9d2786722014a0fd31b6312cca1f749890b9803000bad6", size = 5400118, upload-time = "2024-11-11T15:22:45.897Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/c9/f006c0cd59f0720fbb62ee11da0ad4c4c0fd12799afd957dd491137e80d9/gevent-24.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b24d800328c39456534e3bc3e1684a28747729082684634789c2f5a8febe7671", size = 6775163, upload-time = "2024-11-11T14:57:11.991Z" },
+    { url = "https://files.pythonhosted.org/packages/49/f1/5edf00b674b10d67e3b967c2d46b8a124c2bc8cfd59d4722704392206444/gevent-24.11.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a5f1701ce0f7832f333dd2faf624484cbac99e60656bfbb72504decd42970f0f", size = 5479886, upload-time = "2024-11-11T15:37:06.558Z" },
+    { url = "https://files.pythonhosted.org/packages/22/11/c48e62744a32c0d48984268ae62b99edb81eaf0e03b42de52e2f09855509/gevent-24.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:d740206e69dfdfdcd34510c20adcb9777ce2cc18973b3441ab9767cd8948ca8a", size = 6891452, upload-time = "2024-11-11T15:03:46.892Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b2/5d20664ef6a077bec9f27f7a7ee761edc64946d0b1e293726a3d074a9a18/gevent-24.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:68bee86b6e1c041a187347ef84cf03a792f0b6c7238378bf6ba4118af11feaae", size = 1541631, upload-time = "2024-11-11T14:55:34.977Z" },
+]
+
+[[package]]
+name = "geventhttpclient"
+version = "2.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "brotli" },
+    { name = "certifi" },
+    { name = "gevent" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/89/19/1ca8de73dcc0596d3df01be299e940d7fc3bccbeb6f62bb8dd2d427a3a50/geventhttpclient-2.3.4.tar.gz", hash = "sha256:1749f75810435a001fc6d4d7526c92cf02b39b30ab6217a886102f941c874222", size = 83545, upload-time = "2025-06-11T13:18:14.144Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/c7/c4c31bd92b08c4e34073c722152b05c48c026bc6978cf04f52be7e9050d5/geventhttpclient-2.3.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fb8f6a18f1b5e37724111abbd3edf25f8f00e43dc261b11b10686e17688d2405", size = 71919, upload-time = "2025-06-11T13:16:49.796Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/8a/4565e6e768181ecb06677861d949b3679ed29123b6f14333e38767a17b5a/geventhttpclient-2.3.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dbb28455bb5d82ca3024f9eb7d65c8ff6707394b584519def497b5eb9e5b1222", size = 52577, upload-time = "2025-06-11T13:16:50.657Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a1/fb623cf478799c08f95774bc41edb8ae4c2f1317ae986b52f233d0f3fa05/geventhttpclient-2.3.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96578fc4a5707b5535d1c25a89e72583e02aafe64d14f3b4d78f9c512c6d613c", size = 51981, upload-time = "2025-06-11T13:16:52.586Z" },
+    { url = "https://files.pythonhosted.org/packages/18/b2/a4ddd3d24c8aa064b19b9f180eb5e1517248518289d38af70500569ebedf/geventhttpclient-2.3.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:19721357db976149ccf54ac279eab8139da8cdf7a11343fd02212891b6f39677", size = 114287, upload-time = "2025-08-24T12:16:47.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/cc/caac4d4bd2c72d53836dbf50018aed3747c0d0c6f1d08175a785083d9d36/geventhttpclient-2.3.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecf830cdcd1d4d28463c8e0c48f7f5fb06f3c952fff875da279385554d1d4d65", size = 115208, upload-time = "2025-08-24T12:16:48.108Z" },
+    { url = "https://files.pythonhosted.org/packages/04/a2/8278bd4d16b9df88bd538824595b7b84efd6f03c7b56b2087d09be838e02/geventhttpclient-2.3.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:47dbf8a163a07f83b38b0f8a35b85e5d193d3af4522ab8a5bbecffff1a4cd462", size = 121101, upload-time = "2025-08-24T12:16:49.417Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/0e/a9ebb216140bd0854007ff953094b2af983cdf6d4aec49796572fcbf2606/geventhttpclient-2.3.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e39ad577b33a5be33b47bff7c2dda9b19ced4773d169d6555777cd8445c13c0", size = 118494, upload-time = "2025-06-11T13:16:54.172Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/95/6d45dead27e4f5db7a6d277354b0e2877c58efb3cd1687d90a02d5c7b9cd/geventhttpclient-2.3.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:110d863baf7f0a369b6c22be547c5582e87eea70ddda41894715c870b2e82eb0", size = 123860, upload-time = "2025-06-11T13:16:55.824Z" },
+    { url = "https://files.pythonhosted.org/packages/70/a1/4baa8dca3d2df94e6ccca889947bb5929aca5b64b59136bbf1779b5777ba/geventhttpclient-2.3.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d9fca98469bd770e3efd88326854296d1aa68016f285bd1a2fb6cd21e17ee", size = 114969, upload-time = "2025-06-11T13:16:58.02Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/48/123fa67f6fca14c557332a168011565abd9cbdccc5c8b7ed76d9a736aeb2/geventhttpclient-2.3.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71dbc6d4004017ef88c70229809df4ad2317aad4876870c0b6bcd4d6695b7a8d", size = 113311, upload-time = "2025-06-11T13:16:59.423Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e4/8a467991127ca6c53dd79a8aecb26a48207e7e7976c578fb6eb31378792c/geventhttpclient-2.3.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ed35391ad697d6cda43c94087f59310f028c3e9fb229e435281a92509469c627", size = 111154, upload-time = "2025-06-11T13:17:01.139Z" },
+    { url = "https://files.pythonhosted.org/packages/11/e7/cca0663d90bc8e68592a62d7b28148eb9fd976f739bb107e4c93f9ae6d81/geventhttpclient-2.3.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:97cd2ab03d303fd57dea4f6d9c2ab23b7193846f1b3bbb4c80b315ebb5fc8527", size = 112532, upload-time = "2025-06-11T13:17:03.729Z" },
+    { url = "https://files.pythonhosted.org/packages/02/98/625cee18a3be5f7ca74c612d4032b0c013b911eb73c7e72e06fa56a44ba2/geventhttpclient-2.3.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ec4d1aa08569b7eb075942caeacabefee469a0e283c96c7aac0226d5e7598fe8", size = 117806, upload-time = "2025-06-11T13:17:05.138Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/5e/e561a5f8c9d98b7258685355aacb9cca8a3c714190cf92438a6e91da09d5/geventhttpclient-2.3.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:93926aacdb0f4289b558f213bc32c03578f3432a18b09e4b6d73a716839d7a74", size = 111392, upload-time = "2025-06-11T13:17:06.053Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/37/42d09ad90fd1da960ff68facaa3b79418ccf66297f202ba5361038fc3182/geventhttpclient-2.3.4-cp311-cp311-win32.whl", hash = "sha256:ea87c25e933991366049a42c88e91ad20c2b72e11c7bd38ef68f80486ab63cb2", size = 48332, upload-time = "2025-06-11T13:17:06.965Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0b/55e2a9ed4b1aed7c97e857dc9649a7e804609a105e1ef3cb01da857fbce7/geventhttpclient-2.3.4-cp311-cp311-win_amd64.whl", hash = "sha256:e02e0e9ef2e45475cf33816c8fb2e24595650bcf259e7b15b515a7b49cae1ccf", size = 48969, upload-time = "2025-06-11T13:17:08.239Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/72/dcbc6dbf838549b7b0c2c18c1365d2580eb7456939e4b608c3ab213fce78/geventhttpclient-2.3.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9ac30c38d86d888b42bb2ab2738ab9881199609e9fa9a153eb0c66fc9188c6cb", size = 71984, upload-time = "2025-06-11T13:17:09.126Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/f9/74aa8c556364ad39b238919c954a0da01a6154ad5e85a1d1ab5f9f5ac186/geventhttpclient-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b802000a4fad80fa57e895009671d6e8af56777e3adf0d8aee0807e96188fd9", size = 52631, upload-time = "2025-06-11T13:17:10.061Z" },
+    { url = "https://files.pythonhosted.org/packages/11/1a/bc4b70cba8b46be8b2c6ca5b8067c4f086f8c90915eb68086ab40ff6243d/geventhttpclient-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:461e4d9f4caee481788ec95ac64e0a4a087c1964ddbfae9b6f2dc51715ba706c", size = 51991, upload-time = "2025-06-11T13:17:11.049Z" },
+    { url = "https://files.pythonhosted.org/packages/03/3f/5ce6e003b3b24f7caf3207285831afd1a4f857ce98ac45e1fb7a6815bd58/geventhttpclient-2.3.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b7e41687c74e8fbe6a665458bbaea0c5a75342a95e2583738364a73bcbf1671b", size = 114982, upload-time = "2025-08-24T12:16:50.76Z" },
+    { url = "https://files.pythonhosted.org/packages/60/16/6f9dad141b7c6dd7ee831fbcd72dd02535c57bc1ec3c3282f07e72c31344/geventhttpclient-2.3.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ea5da20f4023cf40207ce15f5f4028377ffffdba3adfb60b4c8f34925fce79", size = 115654, upload-time = "2025-08-24T12:16:52.072Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/52/9b516a2ff423d8bd64c319e1950a165ceebb552781c5a88c1e94e93e8713/geventhttpclient-2.3.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:91f19a8a6899c27867dbdace9500f337d3e891a610708e86078915f1d779bf53", size = 121672, upload-time = "2025-08-24T12:16:53.361Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/f5/8d0f1e998f6d933c251b51ef92d11f7eb5211e3cd579018973a2b455f7c5/geventhttpclient-2.3.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41f2dcc0805551ea9d49f9392c3b9296505a89b9387417b148655d0d8251b36e", size = 119012, upload-time = "2025-06-11T13:17:11.956Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/0e/59e4ab506b3c19fc72e88ca344d150a9028a00c400b1099637100bec26fc/geventhttpclient-2.3.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62f3a29bf242ecca6360d497304900683fd8f42cbf1de8d0546c871819251dad", size = 124565, upload-time = "2025-06-11T13:17:12.896Z" },
+    { url = "https://files.pythonhosted.org/packages/39/5d/dcbd34dfcda0c016b4970bd583cb260cc5ebfc35b33d0ec9ccdb2293587a/geventhttpclient-2.3.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8714a3f2c093aeda3ffdb14c03571d349cb3ed1b8b461d9f321890659f4a5dbf", size = 115573, upload-time = "2025-06-11T13:17:13.937Z" },
+    { url = "https://files.pythonhosted.org/packages/03/51/89af99e4805e9ce7f95562dfbd23c0b0391830831e43d58f940ec74489ac/geventhttpclient-2.3.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b11f38b74bab75282db66226197024a731250dcbe25542fd4e85ac5313547332", size = 114260, upload-time = "2025-06-11T13:17:14.913Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/ec/3a3000bda432953abcc6f51d008166fa7abc1eeddd1f0246933d83854f73/geventhttpclient-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fccc2023a89dfbce2e1b1409b967011e45d41808df81b7fa0259397db79ba647", size = 111592, upload-time = "2025-06-11T13:17:15.879Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/a3/88fd71fe6bbe1315a2d161cbe2cc7810c357d99bced113bea1668ede8bcf/geventhttpclient-2.3.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9d54b8e9a44890159ae36ba4ae44efd8bb79ff519055137a340d357538a68aa3", size = 113216, upload-time = "2025-06-11T13:17:16.883Z" },
+    { url = "https://files.pythonhosted.org/packages/52/eb/20435585a6911b26e65f901a827ef13551c053133926f8c28a7cca0fb08e/geventhttpclient-2.3.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:407cb68a3c3a2c4f5d503930298f2b26ae68137d520e8846d8e230a9981d9334", size = 118450, upload-time = "2025-06-11T13:17:17.968Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/79/82782283d613570373990b676a0966c1062a38ca8f41a0f20843c5808e01/geventhttpclient-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:54fbbcca2dcf06f12a337dd8f98417a09a49aa9d9706aa530fc93acb59b7d83c", size = 112226, upload-time = "2025-06-11T13:17:18.942Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/c4/417d12fc2a31ad93172b03309c7f8c3a8bbd0cf25b95eb7835de26b24453/geventhttpclient-2.3.4-cp312-cp312-win32.whl", hash = "sha256:83143b41bde2eb010c7056f142cb764cfbf77f16bf78bda2323a160767455cf5", size = 48365, upload-time = "2025-06-11T13:17:20.096Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/f4/7e5ee2f460bbbd09cb5d90ff63a1cf80d60f1c60c29dac20326324242377/geventhttpclient-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:46eda9a9137b0ca7886369b40995d2a43a5dff033d0a839a54241015d1845d41", size = 48961, upload-time = "2025-06-11T13:17:21.111Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/a7/de506f91a1ec67d3c4a53f2aa7475e7ffb869a17b71b94ba370a027a69ac/geventhttpclient-2.3.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:707a66cd1e3bf06e2c4f8f21d3b4e6290c9e092456f489c560345a8663cdd93e", size = 50828, upload-time = "2025-06-11T13:17:57.589Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/43/86479c278e96cd3e190932b0003d5b8e415660d9e519d59094728ae249da/geventhttpclient-2.3.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0129ce7ef50e67d66ea5de44d89a3998ab778a4db98093d943d6855323646fa5", size = 50086, upload-time = "2025-06-11T13:17:58.567Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/f7/d3e04f95de14db3ca4fe126eb0e3ec24356125c5ca1f471a9b28b1d7714d/geventhttpclient-2.3.4-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fac2635f68b3b6752c2a576833d9d18f0af50bdd4bd7dd2d2ca753e3b8add84c", size = 54523, upload-time = "2025-06-11T13:17:59.536Z" },
+    { url = "https://files.pythonhosted.org/packages/45/a7/d80c9ec1663f70f4bd976978bf86b3d0d123a220c4ae636c66d02d3accdb/geventhttpclient-2.3.4-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71206ab89abdd0bd5fee21e04a3995ec1f7d8ae1478ee5868f9e16e85a831653", size = 58866, upload-time = "2025-06-11T13:18:03.719Z" },
+    { url = "https://files.pythonhosted.org/packages/55/92/d874ff7e52803cef3850bf8875816a9f32e0a154b079a74e6663534bef30/geventhttpclient-2.3.4-pp311-pypy311_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8bde667d0ce46065fe57f8ff24b2e94f620a5747378c97314dcfc8fbab35b73", size = 54766, upload-time = "2025-06-11T13:18:04.724Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/73/2e03125170485193fcc99ef23b52749543d6c6711706d58713fe315869c4/geventhttpclient-2.3.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:5f71c75fc138331cbbe668a08951d36b641d2c26fb3677d7e497afb8419538db", size = 49011, upload-time = "2025-06-11T13:18:05.702Z" },
 ]

 [[package]]
@@ -3087,6 +3160,51 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380, upload-time = "2025-01-20T11:14:02.442Z" },
 ]

+[[package]]
+name = "locust"
+version = "2.40.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "configargparse" },
+    { name = "flask" },
+    { name = "flask-cors" },
+    { name = "flask-login" },
+    { name = "gevent" },
+    { name = "geventhttpclient" },
+    { name = "locust-cloud" },
+    { name = "msgpack" },
+    { name = "psutil" },
+    { name = "pytest" },
+    { name = "python-engineio" },
+    { name = "python-socketio", extra = ["client"] },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "pyzmq" },
+    { name = "requests" },
+    { name = "setuptools" },
+    { name = "typing-extensions", marker = "python_full_version < '3.12'" },
+    { name = "werkzeug" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c8/40/31ff56ab6f46c7c77e61bbbd23f87fdf6a4aaf674dc961a3c573320caedc/locust-2.40.4.tar.gz", hash = "sha256:3a3a470459edc4ba1349229bf1aca4c0cb651c4e2e3f85d3bc28fe8118f5a18f", size = 1412529, upload-time = "2025-09-11T09:26:13.713Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7e/db1d969caf45ce711e81cd4f3e7c4554c3925a02383a1dcadb442eae3802/locust-2.40.4-py3-none-any.whl", hash = "sha256:50e647a73c5a4e7a775c6e4311979472fce8b00ed783837a2ce9bb36786f7d1a", size = 1430961, upload-time = "2025-09-11T09:26:11.623Z" },
+]
+
+[[package]]
+name = "locust-cloud"
+version = "1.26.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "configargparse" },
+    { name = "gevent" },
+    { name = "platformdirs" },
+    { name = "python-engineio" },
+    { name = "python-socketio", extra = ["client"] },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/84/ad/10b299b134068a4250a9156e6832a717406abe1dfea2482a07ae7bdca8f3/locust_cloud-1.26.3.tar.gz", hash = "sha256:587acfd4d2dee715fb5f0c3c2d922770babf0b7cff7b2927afbb693a9cd193cc", size = 456042, upload-time = "2025-07-15T19:51:53.791Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/6a/276fc50a9d170e7cbb6715735480cb037abb526639bca85491576e6eee4a/locust_cloud-1.26.3-py3-none-any.whl", hash = "sha256:8cb4b8bb9adcd5b99327bc8ed1d98cf67a29d9d29512651e6e94869de6f1faa8", size = 410023, upload-time = "2025-07-15T19:51:52.056Z" },
+]
+
 [[package]]
 name = "lxml"
 version = "6.0.1"
@@ -3366,6 +3484,34 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" },
 ]

+[[package]]
+name = "msgpack"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555, upload-time = "2025-06-13T06:52:51.324Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/83/97f24bf9848af23fe2ba04380388216defc49a8af6da0c28cc636d722502/msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558", size = 82728, upload-time = "2025-06-13T06:51:50.68Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/7f/2eaa388267a78401f6e182662b08a588ef4f3de6f0eab1ec09736a7aaa2b/msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d", size = 79279, upload-time = "2025-06-13T06:51:51.72Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/46/31eb60f4452c96161e4dfd26dbca562b4ec68c72e4ad07d9566d7ea35e8a/msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0", size = 423859, upload-time = "2025-06-13T06:51:52.749Z" },
+    { url = "https://files.pythonhosted.org/packages/45/16/a20fa8c32825cc7ae8457fab45670c7a8996d7746ce80ce41cc51e3b2bd7/msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f", size = 429975, upload-time = "2025-06-13T06:51:53.97Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ea/6c958e07692367feeb1a1594d35e22b62f7f476f3c568b002a5ea09d443d/msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704", size = 413528, upload-time = "2025-06-13T06:51:55.507Z" },
+    { url = "https://files.pythonhosted.org/packages/75/05/ac84063c5dae79722bda9f68b878dc31fc3059adb8633c79f1e82c2cd946/msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2", size = 413338, upload-time = "2025-06-13T06:51:57.023Z" },
+    { url = "https://files.pythonhosted.org/packages/69/e8/fe86b082c781d3e1c09ca0f4dacd457ede60a13119b6ce939efe2ea77b76/msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2", size = 422658, upload-time = "2025-06-13T06:51:58.419Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/2b/bafc9924df52d8f3bb7c00d24e57be477f4d0f967c0a31ef5e2225e035c7/msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752", size = 427124, upload-time = "2025-06-13T06:51:59.969Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/3b/1f717e17e53e0ed0b68fa59e9188f3f610c79d7151f0e52ff3cd8eb6b2dc/msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295", size = 65016, upload-time = "2025-06-13T06:52:01.294Z" },
+    { url = "https://files.pythonhosted.org/packages/48/45/9d1780768d3b249accecc5a38c725eb1e203d44a191f7b7ff1941f7df60c/msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458", size = 72267, upload-time = "2025-06-13T06:52:02.568Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/26/389b9c593eda2b8551b2e7126ad3a06af6f9b44274eb3a4f054d48ff7e47/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238", size = 82359, upload-time = "2025-06-13T06:52:03.909Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/65/7d1de38c8a22cf8b1551469159d4b6cf49be2126adc2482de50976084d78/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157", size = 79172, upload-time = "2025-06-13T06:52:05.246Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/bd/cacf208b64d9577a62c74b677e1ada005caa9b69a05a599889d6fc2ab20a/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce", size = 425013, upload-time = "2025-06-13T06:52:06.341Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/ec/fd869e2567cc9c01278a736cfd1697941ba0d4b81a43e0aa2e8d71dab208/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a", size = 426905, upload-time = "2025-06-13T06:52:07.501Z" },
+    { url = "https://files.pythonhosted.org/packages/55/2a/35860f33229075bce803a5593d046d8b489d7ba2fc85701e714fc1aaf898/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c", size = 407336, upload-time = "2025-06-13T06:52:09.047Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/16/69ed8f3ada150bf92745fb4921bd621fd2cdf5a42e25eb50bcc57a5328f0/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b", size = 409485, upload-time = "2025-06-13T06:52:10.382Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/b6/0c398039e4c6d0b2e37c61d7e0e9d13439f91f780686deb8ee64ecf1ae71/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef", size = 412182, upload-time = "2025-06-13T06:52:11.644Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/d0/0cf4a6ecb9bc960d624c93effaeaae75cbf00b3bc4a54f35c8507273cda1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a", size = 419883, upload-time = "2025-06-13T06:52:12.806Z" },
+    { url = "https://files.pythonhosted.org/packages/62/83/9697c211720fa71a2dfb632cad6196a8af3abea56eece220fde4674dc44b/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c", size = 65406, upload-time = "2025-06-13T06:52:14.271Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/23/0abb886e80eab08f5e8c485d6f13924028602829f63b8f5fa25a06636628/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4", size = 72558, upload-time = "2025-06-13T06:52:15.252Z" },
+]
+
 [[package]]
 name = "msrest"
 version = "0.7.1"
@@ -4789,16 +4935,16 @@ wheels = [

 [[package]]
 name = "pymochow"
-version = "2.2.9"
+version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "future" },
    { name = "orjson" },
    { name = "requests" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b5/29/d9b112684ce490057b90bddede3fb6a69cf2787a3fd7736bdce203e77388/pymochow-2.2.9.tar.gz", hash = "sha256:5a28058edc8861deb67524410e786814571ed9fe0700c8c9fc0bc2ad5835b06c", size = 50079, upload-time = "2025-06-05T08:33:19.59Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cc/da/3027eeeaf7a7db9b0ca761079de4e676a002e1cc2c4260dab0ce812972b8/pymochow-1.3.1.tar.gz", hash = "sha256:1693d10cd0bb7bce45327890a90adafb503155922ccc029acb257699a73a20ba", size = 30800, upload-time = "2024-09-11T12:06:37.88Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/9b/be18f9709dfd8187ff233be5acb253a9f4f1b07f1db0e7b09d84197c28e2/pymochow-2.2.9-py3-none-any.whl", hash = "sha256:639192b97f143d4a22fc163872be12aee19523c46f12e22416e8f289f1354d15", size = 77899, upload-time = "2025-06-05T08:33:17.424Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/74/4b6227717f6baa37e7288f53e0fd55764939abc4119342eed4924a98f477/pymochow-1.3.1-py3-none-any.whl", hash = "sha256:a7f3b34fd6ea5d1d8413650bb6678365aa148fc396ae945e4ccb4f2365a52327", size = 42697, upload-time = "2024-09-11T12:06:36.114Z" },
 ]

 [[package]]
@@ -5041,6 +5187,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload-time = "2024-01-23T06:32:58.246Z" },
 ]

+[[package]]
+name = "python-engineio"
+version = "4.12.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "simple-websocket" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ba/0b/67295279b66835f9fa7a491650efcd78b20321c127036eef62c11a31e028/python_engineio-4.12.2.tar.gz", hash = "sha256:e7e712ffe1be1f6a05ee5f951e72d434854a32fcfc7f6e4d9d3cae24ec70defa", size = 91677, upload-time = "2025-06-04T19:22:18.789Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/fa/df59acedf7bbb937f69174d00f921a7b93aa5a5f5c17d05296c814fff6fc/python_engineio-4.12.2-py3-none-any.whl", hash = "sha256:8218ab66950e179dfec4b4bbb30aecf3f5d86f5e58e6fc1aa7fde2c698b2804f", size = 59536, upload-time = "2025-06-04T19:22:16.916Z" },
+]
+
 [[package]]
 name = "python-http-client"
 version = "3.3.7"
@@ -5097,6 +5255,25 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788, upload-time = "2024-08-07T17:33:28.192Z" },
 ]

+[[package]]
+name = "python-socketio"
+version = "5.13.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "bidict" },
+    { name = "python-engineio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/21/1a/396d50ccf06ee539fa758ce5623b59a9cb27637fc4b2dc07ed08bf495e77/python_socketio-5.13.0.tar.gz", hash = "sha256:ac4e19a0302ae812e23b712ec8b6427ca0521f7c582d6abb096e36e24a263029", size = 121125, upload-time = "2025-04-12T15:46:59.933Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/32/b4fb8585d1be0f68bde7e110dffbcf354915f77ad8c778563f0ad9655c02/python_socketio-5.13.0-py3-none-any.whl", hash = "sha256:51f68d6499f2df8524668c24bcec13ba1414117cfb3a90115c559b601ab10caf", size = 77800, upload-time = "2025-04-12T15:46:58.412Z" },
+]
+
+[package.optional-dependencies]
+client = [
+    { name = "requests" },
+    { name = "websocket-client" },
+]
+
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -5154,6 +5331,42 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" },
 ]

+[[package]]
+name = "pyzmq"
+version = "27.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "implementation_name == 'pypy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/04/0b/3c9baedbdf613ecaa7aa07027780b8867f57b6293b6ee50de316c9f3222b/pyzmq-27.1.0.tar.gz", hash = "sha256:ac0765e3d44455adb6ddbf4417dcce460fc40a05978c08efdf2948072f6db540", size = 281750, upload-time = "2025-09-08T23:10:18.157Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/5d/305323ba86b284e6fcb0d842d6adaa2999035f70f8c38a9b6d21ad28c3d4/pyzmq-27.1.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:226b091818d461a3bef763805e75685e478ac17e9008f49fce2d3e52b3d58b86", size = 1333328, upload-time = "2025-09-08T23:07:45.946Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/a0/fc7e78a23748ad5443ac3275943457e8452da67fda347e05260261108cbc/pyzmq-27.1.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0790a0161c281ca9723f804871b4027f2e8b5a528d357c8952d08cd1a9c15581", size = 908803, upload-time = "2025-09-08T23:07:47.551Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/22/37d15eb05f3bdfa4abea6f6d96eb3bb58585fbd3e4e0ded4e743bc650c97/pyzmq-27.1.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c895a6f35476b0c3a54e3eb6ccf41bf3018de937016e6e18748317f25d4e925f", size = 668836, upload-time = "2025-09-08T23:07:49.436Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/c4/2a6fe5111a01005fc7af3878259ce17684fabb8852815eda6225620f3c59/pyzmq-27.1.0-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bbf8d3630bf96550b3be8e1fc0fea5cbdc8d5466c1192887bd94869da17a63e", size = 857038, upload-time = "2025-09-08T23:07:51.234Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/eb/bfdcb41d0db9cd233d6fb22dc131583774135505ada800ebf14dfb0a7c40/pyzmq-27.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:15c8bd0fe0dabf808e2d7a681398c4e5ded70a551ab47482067a572c054c8e2e", size = 1657531, upload-time = "2025-09-08T23:07:52.795Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/21/e3180ca269ed4a0de5c34417dfe71a8ae80421198be83ee619a8a485b0c7/pyzmq-27.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:bafcb3dd171b4ae9f19ee6380dfc71ce0390fefaf26b504c0e5f628d7c8c54f2", size = 2034786, upload-time = "2025-09-08T23:07:55.047Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/b1/5e21d0b517434b7f33588ff76c177c5a167858cc38ef740608898cd329f2/pyzmq-27.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e829529fcaa09937189178115c49c504e69289abd39967cd8a4c215761373394", size = 1894220, upload-time = "2025-09-08T23:07:57.172Z" },
+    { url = "https://files.pythonhosted.org/packages/03/f2/44913a6ff6941905efc24a1acf3d3cb6146b636c546c7406c38c49c403d4/pyzmq-27.1.0-cp311-cp311-win32.whl", hash = "sha256:6df079c47d5902af6db298ec92151db82ecb557af663098b92f2508c398bb54f", size = 567155, upload-time = "2025-09-08T23:07:59.05Z" },
+    { url = "https://files.pythonhosted.org/packages/23/6d/d8d92a0eb270a925c9b4dd039c0b4dc10abc2fcbc48331788824ef113935/pyzmq-27.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:190cbf120fbc0fc4957b56866830def56628934a9d112aec0e2507aa6a032b97", size = 633428, upload-time = "2025-09-08T23:08:00.663Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/14/01afebc96c5abbbd713ecfc7469cfb1bc801c819a74ed5c9fad9a48801cb/pyzmq-27.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:eca6b47df11a132d1745eb3b5b5e557a7dae2c303277aa0e69c6ba91b8736e07", size = 559497, upload-time = "2025-09-08T23:08:02.15Z" },
+    { url = "https://files.pythonhosted.org/packages/92/e7/038aab64a946d535901103da16b953c8c9cc9c961dadcbf3609ed6428d23/pyzmq-27.1.0-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:452631b640340c928fa343801b0d07eb0c3789a5ffa843f6e1a9cee0ba4eb4fc", size = 1306279, upload-time = "2025-09-08T23:08:03.807Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/5e/c3c49fdd0f535ef45eefcc16934648e9e59dace4a37ee88fc53f6cd8e641/pyzmq-27.1.0-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:1c179799b118e554b66da67d88ed66cd37a169f1f23b5d9f0a231b4e8d44a113", size = 895645, upload-time = "2025-09-08T23:08:05.301Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/e5/b0b2504cb4e903a74dcf1ebae157f9e20ebb6ea76095f6cfffea28c42ecd/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3837439b7f99e60312f0c926a6ad437b067356dc2bc2ec96eb395fd0fe804233", size = 652574, upload-time = "2025-09-08T23:08:06.828Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/9b/c108cdb55560eaf253f0cbdb61b29971e9fb34d9c3499b0e96e4e60ed8a5/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43ad9a73e3da1fab5b0e7e13402f0b2fb934ae1c876c51d0afff0e7c052eca31", size = 840995, upload-time = "2025-09-08T23:08:08.396Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/bb/b79798ca177b9eb0825b4c9998c6af8cd2a7f15a6a1a4272c1d1a21d382f/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0de3028d69d4cdc475bfe47a6128eb38d8bc0e8f4d69646adfbcd840facbac28", size = 1642070, upload-time = "2025-09-08T23:08:09.989Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/80/2df2e7977c4ede24c79ae39dcef3899bfc5f34d1ca7a5b24f182c9b7a9ca/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:cf44a7763aea9298c0aa7dbf859f87ed7012de8bda0f3977b6fb1d96745df856", size = 2021121, upload-time = "2025-09-08T23:08:11.907Z" },
+    { url = "https://files.pythonhosted.org/packages/46/bd/2d45ad24f5f5ae7e8d01525eb76786fa7557136555cac7d929880519e33a/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f30f395a9e6fbca195400ce833c731e7b64c3919aa481af4d88c3759e0cb7496", size = 1878550, upload-time = "2025-09-08T23:08:13.513Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/2f/104c0a3c778d7c2ab8190e9db4f62f0b6957b53c9d87db77c284b69f33ea/pyzmq-27.1.0-cp312-abi3-win32.whl", hash = "sha256:250e5436a4ba13885494412b3da5d518cd0d3a278a1ae640e113c073a5f88edd", size = 559184, upload-time = "2025-09-08T23:08:15.163Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/7f/a21b20d577e4100c6a41795842028235998a643b1ad406a6d4163ea8f53e/pyzmq-27.1.0-cp312-abi3-win_amd64.whl", hash = "sha256:9ce490cf1d2ca2ad84733aa1d69ce6855372cb5ce9223802450c9b2a7cba0ccf", size = 619480, upload-time = "2025-09-08T23:08:17.192Z" },
+    { url = "https://files.pythonhosted.org/packages/78/c2/c012beae5f76b72f007a9e91ee9401cb88c51d0f83c6257a03e785c81cc2/pyzmq-27.1.0-cp312-abi3-win_arm64.whl", hash = "sha256:75a2f36223f0d535a0c919e23615fc85a1e23b71f40c7eb43d7b1dedb4d8f15f", size = 552993, upload-time = "2025-09-08T23:08:18.926Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/c6/c4dcdecdbaa70969ee1fdced6d7b8f60cfabe64d25361f27ac4665a70620/pyzmq-27.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:18770c8d3563715387139060d37859c02ce40718d1faf299abddcdcc6a649066", size = 836265, upload-time = "2025-09-08T23:09:49.376Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/79/f38c92eeaeb03a2ccc2ba9866f0439593bb08c5e3b714ac1d553e5c96e25/pyzmq-27.1.0-pp311-pypy311_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:ac25465d42f92e990f8d8b0546b01c391ad431c3bf447683fdc40565941d0604", size = 800208, upload-time = "2025-09-08T23:09:51.073Z" },
+    { url = "https://files.pythonhosted.org/packages/49/0e/3f0d0d335c6b3abb9b7b723776d0b21fa7f3a6c819a0db6097059aada160/pyzmq-27.1.0-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53b40f8ae006f2734ee7608d59ed661419f087521edbfc2149c3932e9c14808c", size = 567747, upload-time = "2025-09-08T23:09:52.698Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/cf/f2b3784d536250ffd4be70e049f3b60981235d70c6e8ce7e3ef21e1adb25/pyzmq-27.1.0-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f605d884e7c8be8fe1aa94e0a783bf3f591b84c24e4bc4f3e7564c82ac25e271", size = 747371, upload-time = "2025-09-08T23:09:54.563Z" },
+    { url = "https://files.pythonhosted.org/packages/01/1b/5dbe84eefc86f48473947e2f41711aded97eecef1231f4558f1f02713c12/pyzmq-27.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c9f7f6e13dff2e44a6afeaf2cf54cee5929ad64afaf4d40b50f93c58fc687355", size = 544862, upload-time = "2025-09-08T23:09:56.509Z" },
+]
+
 [[package]]
 name = "pyzstd"
 version = "0.17.0"
@@ -5648,6 +5861,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
 ]

+[[package]]
+name = "simple-websocket"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wsproto" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300, upload-time = "2024-10-10T22:39:31.412Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842, upload-time = "2024-10-10T22:39:29.645Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -5973,27 +6198,27 @@ wheels = [

 [[package]]
 name = "tokenizers"
-version = "0.22.1"
+version = "0.21.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "huggingface-hub" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1c/46/fb6854cec3278fbfa4a75b50232c77622bc517ac886156e6afbfa4d8fc6e/tokenizers-0.22.1.tar.gz", hash = "sha256:61de6522785310a309b3407bac22d99c4db5dba349935e99e4d15ea2226af2d9", size = 363123, upload-time = "2025-09-19T09:49:23.424Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c2/2f/402986d0823f8d7ca139d969af2917fefaa9b947d1fb32f6168c509f2492/tokenizers-0.21.4.tar.gz", hash = "sha256:fa23f85fbc9a02ec5c6978da172cdcbac23498c3ca9f3645c5c68740ac007880", size = 351253, upload-time = "2025-07-28T15:48:54.325Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/33/f4b2d94ada7ab297328fc671fed209368ddb82f965ec2224eb1892674c3a/tokenizers-0.22.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59fdb013df17455e5f950b4b834a7b3ee2e0271e6378ccb33aa74d178b513c73", size = 3069318, upload-time = "2025-09-19T09:49:11.848Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/58/2aa8c874d02b974990e89ff95826a4852a8b2a273c7d1b4411cdd45a4565/tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8d4e484f7b0827021ac5f9f71d4794aaef62b979ab7608593da22b1d2e3c4edc", size = 2926478, upload-time = "2025-09-19T09:49:09.759Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/3b/55e64befa1e7bfea963cf4b787b2cea1011362c4193f5477047532ce127e/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d2962dd28bc67c1f205ab180578a78eef89ac60ca7ef7cbe9635a46a56422a", size = 3256994, upload-time = "2025-09-19T09:48:56.701Z" },
-    { url = "https://files.pythonhosted.org/packages/71/0b/fbfecf42f67d9b7b80fde4aabb2b3110a97fac6585c9470b5bff103a80cb/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38201f15cdb1f8a6843e6563e6e79f4abd053394992b9bbdf5213ea3469b4ae7", size = 3153141, upload-time = "2025-09-19T09:48:59.749Z" },
-    { url = "https://files.pythonhosted.org/packages/17/a9/b38f4e74e0817af8f8ef925507c63c6ae8171e3c4cb2d5d4624bf58fca69/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1cbe5454c9a15df1b3443c726063d930c16f047a3cc724b9e6e1a91140e5a21", size = 3508049, upload-time = "2025-09-19T09:49:05.868Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/48/dd2b3dac46bb9134a88e35d72e1aa4869579eacc1a27238f1577270773ff/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7d094ae6312d69cc2a872b54b91b309f4f6fbce871ef28eb27b52a98e4d0214", size = 3710730, upload-time = "2025-09-19T09:49:01.832Z" },
-    { url = "https://files.pythonhosted.org/packages/93/0e/ccabc8d16ae4ba84a55d41345207c1e2ea88784651a5a487547d80851398/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afd7594a56656ace95cdd6df4cca2e4059d294c5cfb1679c57824b605556cb2f", size = 3412560, upload-time = "2025-09-19T09:49:03.867Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/c6/dc3a0db5a6766416c32c034286d7c2d406da1f498e4de04ab1b8959edd00/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2ef6063d7a84994129732b47e7915e8710f27f99f3a3260b8a38fc7ccd083f4", size = 3250221, upload-time = "2025-09-19T09:49:07.664Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/a6/2c8486eef79671601ff57b093889a345dd3d576713ef047776015dc66de7/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba0a64f450b9ef412c98f6bcd2a50c6df6e2443b560024a09fa6a03189726879", size = 9345569, upload-time = "2025-09-19T09:49:14.214Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/16/32ce667f14c35537f5f605fe9bea3e415ea1b0a646389d2295ec348d5657/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:331d6d149fa9c7d632cde4490fb8bbb12337fa3a0232e77892be656464f4b446", size = 9271599, upload-time = "2025-09-19T09:49:16.639Z" },
-    { url = "https://files.pythonhosted.org/packages/51/7c/a5f7898a3f6baa3fc2685c705e04c98c1094c523051c805cdd9306b8f87e/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:607989f2ea68a46cb1dfbaf3e3aabdf3f21d8748312dbeb6263d1b3b66c5010a", size = 9533862, upload-time = "2025-09-19T09:49:19.146Z" },
-    { url = "https://files.pythonhosted.org/packages/36/65/7e75caea90bc73c1dd8d40438adf1a7bc26af3b8d0a6705ea190462506e1/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a0f307d490295717726598ef6fa4f24af9d484809223bbc253b201c740a06390", size = 9681250, upload-time = "2025-09-19T09:49:21.501Z" },
-    { url = "https://files.pythonhosted.org/packages/30/2c/959dddef581b46e6209da82df3b78471e96260e2bc463f89d23b1bf0e52a/tokenizers-0.22.1-cp39-abi3-win32.whl", hash = "sha256:b5120eed1442765cd90b903bb6cfef781fd8fe64e34ccaecbae4c619b7b12a82", size = 2472003, upload-time = "2025-09-19T09:49:27.089Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/46/e33a8c93907b631a99377ef4c5f817ab453d0b34f93529421f42ff559671/tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138", size = 2674684, upload-time = "2025-09-19T09:49:24.953Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c6/fdb6f72bf6454f52eb4a2510be7fb0f614e541a2554d6210e370d85efff4/tokenizers-0.21.4-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2ccc10a7c3bcefe0f242867dc914fc1226ee44321eb618cfe3019b5df3400133", size = 2863987, upload-time = "2025-07-28T15:48:44.877Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/a6/28975479e35ddc751dc1ddc97b9b69bf7fcf074db31548aab37f8116674c/tokenizers-0.21.4-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5e2f601a8e0cd5be5cc7506b20a79112370b9b3e9cb5f13f68ab11acd6ca7d60", size = 2732457, upload-time = "2025-07-28T15:48:43.265Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/8f/24f39d7b5c726b7b0be95dca04f344df278a3fe3a4deb15a975d194cbb32/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b376f5a1aee67b4d29032ee85511bbd1b99007ec735f7f35c8a2eb104eade5", size = 3012624, upload-time = "2025-07-28T13:22:43.895Z" },
+    { url = "https://files.pythonhosted.org/packages/58/47/26358925717687a58cb74d7a508de96649544fad5778f0cd9827398dc499/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2107ad649e2cda4488d41dfd031469e9da3fcbfd6183e74e4958fa729ffbf9c6", size = 2939681, upload-time = "2025-07-28T13:22:47.499Z" },
+    { url = "https://files.pythonhosted.org/packages/99/6f/cc300fea5db2ab5ddc2c8aea5757a27b89c84469899710c3aeddc1d39801/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c73012da95afafdf235ba80047699df4384fdc481527448a078ffd00e45a7d9", size = 3247445, upload-time = "2025-07-28T15:48:39.711Z" },
+    { url = "https://files.pythonhosted.org/packages/be/bf/98cb4b9c3c4afd8be89cfa6423704337dc20b73eb4180397a6e0d456c334/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f23186c40395fc390d27f519679a58023f368a0aad234af145e0f39ad1212732", size = 3428014, upload-time = "2025-07-28T13:22:49.569Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/96c1cc780e6ca7f01a57c13235dd05b7bc1c0f3588512ebe9d1331b5f5ae/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc88bb34e23a54cc42713d6d98af5f1bf79c07653d24fe984d2d695ba2c922a2", size = 3193197, upload-time = "2025-07-28T13:22:51.471Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/90/273b6c7ec78af547694eddeea9e05de771278bd20476525ab930cecaf7d8/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51b7eabb104f46c1c50b486520555715457ae833d5aee9ff6ae853d1130506ff", size = 3115426, upload-time = "2025-07-28T15:48:41.439Z" },
+    { url = "https://files.pythonhosted.org/packages/91/43/c640d5a07e95f1cf9d2c92501f20a25f179ac53a4f71e1489a3dcfcc67ee/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:714b05b2e1af1288bd1bc56ce496c4cebb64a20d158ee802887757791191e6e2", size = 9089127, upload-time = "2025-07-28T15:48:46.472Z" },
+    { url = "https://files.pythonhosted.org/packages/44/a1/dd23edd6271d4dca788e5200a807b49ec3e6987815cd9d0a07ad9c96c7c2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:1340ff877ceedfa937544b7d79f5b7becf33a4cfb58f89b3b49927004ef66f78", size = 9055243, upload-time = "2025-07-28T15:48:48.539Z" },
+    { url = "https://files.pythonhosted.org/packages/21/2b/b410d6e9021c4b7ddb57248304dc817c4d4970b73b6ee343674914701197/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3c1f4317576e465ac9ef0d165b247825a2a4078bcd01cba6b54b867bdf9fdd8b", size = 9298237, upload-time = "2025-07-28T15:48:50.443Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/0a/42348c995c67e2e6e5c89ffb9cfd68507cbaeb84ff39c49ee6e0a6dd0fd2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:c212aa4e45ec0bb5274b16b6f31dd3f1c41944025c2358faaa5782c754e84c24", size = 9461980, upload-time = "2025-07-28T15:48:52.325Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d3/dacccd834404cd71b5c334882f3ba40331ad2120e69ded32cf5fda9a7436/tokenizers-0.21.4-cp39-abi3-win32.whl", hash = "sha256:6c42a930bc5f4c47f4ea775c91de47d27910881902b0f20e4990ebe045a415d0", size = 2329871, upload-time = "2025-07-28T15:48:56.841Z" },
+    { url = "https://files.pythonhosted.org/packages/41/f2/fd673d979185f5dcbac4be7d09461cbb99751554ffb6718d0013af8604cb/tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", size = 2507568, upload-time = "2025-07-28T15:48:55.456Z" },
 ]

 [[package]]
@@ -7059,6 +7284,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
 ]

+[[package]]
+name = "wsproto"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425, upload-time = "2022-08-23T19:58:21.447Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226, upload-time = "2022-08-23T19:58:19.96Z" },
+]
+
 [[package]]
 name = "xinference-client"
 version = "1.2.2"
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -635,8 +635,6 @@ BAIDU_VECTOR_DB_API_KEY=dify
 BAIDU_VECTOR_DB_DATABASE=dify
 BAIDU_VECTOR_DB_SHARD=1
 BAIDU_VECTOR_DB_REPLICAS=3
-BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER=DEFAULT_ANALYZER
-BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE=COARSE_MODE

 # VikingDB configurations, only available when VECTOR_STORE is `vikingdb`
 VIKINGDB_ACCESS_KEY=your-ak
@@ -655,8 +653,6 @@ LINDORM_USING_UGC=True
 LINDORM_QUERY_TIMEOUT=1

 # OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase`
-# Built-in fulltext parsers are `ngram`, `beng`, `space`, `ngram2`, `ik`
-# External fulltext parsers (require plugin installation) are `japanese_ftparser`, `thai_ftparser`
 OCEANBASE_VECTOR_HOST=oceanbase
 OCEANBASE_VECTOR_PORT=2881
 OCEANBASE_VECTOR_USER=root@test
@@ -665,7 +661,6 @@ OCEANBASE_VECTOR_DATABASE=test
 OCEANBASE_CLUSTER_NAME=difyai
 OCEANBASE_MEMORY_LIMIT=6G
 OCEANBASE_ENABLE_HYBRID_SEARCH=false
-OCEANBASE_FULLTEXT_PARSER=ik

 # opengauss configurations, only available when VECTOR_STORE is `opengauss`
 OPENGAUSS_HOST=opengauss
--- a/docker/README-local-test.md
+++ b/docker/README-local-test.md
@@ -0,0 +1,218 @@
+# 本地测试环境设置指南
+
+本文档说明如何创建和使用本地的Docker Compose测试环境，该环境不会被提交到版本控制。
+
+## 📁 文件结构
+
+```
+docker/
+├── .env                        # 本地环境配置
+├── docker-compose.override.yaml # 本地覆盖配置
+├── start-local-test.bat         # Windows启动脚本
+└── README-local-test.md         # 本文档
+```
+
+## 🚀 快速开始
+
+### 1. 准备环境配置文件
+
+**使用 `.env`**
+```bash
+cd docker
+copy .env.example .env
+```
+
+**注意**: 请确保 Docker Desktop 正在运行，然后执行启动脚本。
+
+### 2. 修改配置（可选）
+
+编辑你选择的环境文件，调整适合本地测试的配置：
+
+```bash
+# 开发环境
+DEPLOY_ENV=DEVELOPMENT
+
+# 启用调试
+DEBUG=true
+FLASK_DEBUG=true
+LOG_LEVEL=DEBUG
+
+# 数据库配置（保持默认即可）
+DB_USERNAME=postgres
+DB_PASSWORD=difyai123456
+
+# 向量存储（本地测试推荐Weaviate）
+VECTOR_STORE=weaviate
+```
+
+### 3. 启动测试环境
+
+**Windows用户**：
+```cmd
+cd docker
+start-local-test.bat
+```
+
+**脚本会自动**：
+- 检查 Docker Desktop 是否运行
+- 验证 `.env` 配置文件存在
+- 构建 worker 镜像（使用本地 Dockerfile）
+- 启动所有服务
+
+或者手动启动：
+
+```bash
+# 启动中间件（数据库、Redis、向量存储）
+docker compose -f docker-compose.middleware.yaml --profile weaviate up -d
+
+# 启动应用服务
+docker compose up -d
+```
+
+## 🎯 服务说明
+
+### 中间件服务（docker-compose.middleware.yaml）
+- **PostgreSQL**: 主数据库
+- **Redis**: 缓存和消息队列
+- **Weaviate**: 向量数据库（默认）
+- **其他**: 可根据需要启用不同的向量存储
+
+### 应用服务（docker-compose.yaml + override）
+- **API**: 后端服务（开发模式，支持热重载）
+- **Web**: 前端服务（开发模式）
+- **Nginx**: 反向代理
+- **Worker**: 后台任务处理
+
+## 📝 本地开发特性
+
+### 热重载
+- API服务会自动检测代码变化并重启
+- Web服务支持前端热重载
+
+### 数据持久化
+数据存储在 `docker/volumes/` 目录下，会在容器重启后保留。
+
+### 调试支持
+- 启用Flask调试模式
+- 详细的日志输出
+- API文档自动生成
+
+## 🛠️ 常用命令
+
+```bash
+# 查看服务状态
+docker compose ps
+
+# 查看日志
+docker compose logs -f [service_name]
+
+# 重启特定服务
+docker compose restart api
+
+# 进入容器调试
+docker compose exec api bash
+
+# 停止所有服务
+docker compose down
+
+# 停止并清理数据卷
+docker compose -f docker-compose.middleware.yaml down -v
+```
+
+## 🔧 自定义配置
+
+### 修改端口
+在环境文件中修改：
+```bash
+DIFY_PORT=5002  # API端口
+EXPOSE_NGINX_PORT=8080  # Web端口
+```
+
+### 切换向量存储
+在环境文件中修改：
+```bash
+VECTOR_STORE=qdrant  # 或 milvus, chroma 等
+```
+
+然后重新启动中间件：
+```bash
+docker compose -f docker-compose.middleware.yaml --profile qdrant up -d
+```
+
+### 使用本地 Dockerfile
+
+如果需要使用自定义的 Dockerfile（比如使用国内镜像加速）：
+
+1. **创建本地 Dockerfile**：
+   ```bash
+   # 复制原文件
+   cp api/Dockerfile api/Dockerfile.local
+
+   # 编辑本地文件（比如取消阿里云镜像注释）
+   # 第15行取消注释：RUN sed -i 's@deb.debian.org@mirrors.aliyun.com@g' /etc/apt/sources.list.d/debian.sources
+   ```
+
+2. **配置 override 使用本地 Dockerfile**：
+   `docker-compose.override.yaml` 已经配置好了使用 `Dockerfile.local`
+
+3. **构建时会自动使用**：
+   ```bash
+   docker compose --env-file .env build worker
+   ```
+
+### 添加自定义服务
+编辑 `docker-compose.override.yaml` 添加新服务。
+
+## 📚 最佳实践
+
+1. **不要修改官方文件**: 不要直接修改 `docker-compose.yaml`，所有本地改动都放在 `docker-compose.override.yaml` 中。
+
+2. **使用有意义的环境文件**: 使用 `.env` 文件进行本地配置。
+
+3. **定期清理**: 测试完成后清理不需要的数据卷。
+
+4. **版本控制**: 这些本地文件（`.env`, `docker-compose.override.yaml`, `Dockerfile.local`）会被 `.gitignore` 忽略，不会提交到仓库。
+
+## 🐛 故障排除
+
+### 服务启动失败
+```bash
+# 检查端口占用
+netstat -tulpn | grep :5001
+
+# 检查Docker资源
+docker system df
+
+# 查看详细日志
+docker compose logs
+```
+
+### 数据库连接问题
+```bash
+# 检查数据库状态
+docker compose exec db pg_isready
+
+# 重置数据库
+docker compose down
+docker volume rm dify_db_data
+docker compose up -d db
+```
+
+### 内存不足
+减少服务资源使用：
+```yaml
+# 在 docker-compose.override.yaml 中添加
+services:
+  db:
+    environment:
+      POSTGRES_SHARED_BUFFERS: 64MB
+  redis:
+    command: redis-server --maxmemory 64mb
+```
+
+## 📞 获取帮助
+
+如果遇到问题，请：
+1. 检查本文档
+2. 查看 [官方文档](https://docs.dify.ai)
+3. 在GitHub Issues中搜索类似问题
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env
 services:
  # API service
  api:
-    image: langgenius/dify-api:1.9.0
+    image: langgenius/dify-api:2.0.0-beta.2
    restart: always
    environment:
      # Use the shared environment variables.
@@ -31,7 +31,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:1.9.0
+    image: langgenius/dify-api:2.0.0-beta.2
    restart: always
    environment:
      # Use the shared environment variables.
@@ -58,7 +58,7 @@ services:
  # worker_beat service
  # Celery beat for scheduling periodic tasks.
  worker_beat:
-    image: langgenius/dify-api:1.9.0
+    image: langgenius/dify-api:2.0.0-beta.2
    restart: always
    environment:
      # Use the shared environment variables.
@@ -76,7 +76,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.9.0
+    image: langgenius/dify-web:2.0.0-beta.2
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@@ -177,7 +177,7 @@ services:

  # plugin daemon
  plugin_daemon:
-    image: langgenius/dify-plugin-daemon:0.3.0-local
+    image: langgenius/dify-plugin-daemon:0.3.0b1-local
    restart: always
    environment:
      # Use the shared environment variables.
@@ -504,7 +504,6 @@ services:
      OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
      OB_SERVER_IP: 127.0.0.1
      MODE: mini
-      LANG: en_US.UTF-8
    ports:
      - "${OCEANBASE_VECTOR_PORT:-2881}:2881"
    healthcheck:
--- a/docker/docker-compose.middleware.yaml
+++ b/docker/docker-compose.middleware.yaml
@@ -20,17 +20,7 @@ services:
    ports:
      - "${EXPOSE_POSTGRES_PORT:-5432}:5432"
    healthcheck:
-      test:
-        [
-          "CMD",
-          "pg_isready",
-          "-h",
-          "db",
-          "-U",
-          "${PGUSER:-postgres}",
-          "-d",
-          "${POSTGRES_DB:-dify}",
-        ]
+      test: [ 'CMD', 'pg_isready', '-h', 'db', '-U', '${PGUSER:-postgres}', '-d', '${POSTGRES_DB:-dify}' ]
      interval: 1s
      timeout: 3s
      retries: 30
@@ -51,11 +41,7 @@ services:
    ports:
      - "${EXPOSE_REDIS_PORT:-6379}:6379"
    healthcheck:
-      test:
-        [
-          "CMD-SHELL",
-          "redis-cli -a ${REDIS_PASSWORD:-difyai123456} ping | grep -q PONG",
-        ]
+      test: [ 'CMD-SHELL', 'redis-cli -a ${REDIS_PASSWORD:-difyai123456} ping | grep -q PONG' ]

  # The DifySandbox
  sandbox:
@@ -79,13 +65,13 @@ services:
      - ./volumes/sandbox/dependencies:/dependencies
      - ./volumes/sandbox/conf:/conf
    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8194/health"]
+      test: [ "CMD", "curl", "-f", "http://localhost:8194/health" ]
    networks:
      - ssrf_proxy_network

  # plugin daemon
  plugin_daemon:
-    image: langgenius/dify-plugin-daemon:0.3.0-local
+    image: langgenius/dify-plugin-daemon:0.3.0b1-local
    restart: always
    env_file:
      - ./middleware.env
@@ -157,12 +143,7 @@ services:
    volumes:
      - ./ssrf_proxy/squid.conf.template:/etc/squid/squid.conf.template
      - ./ssrf_proxy/docker-entrypoint.sh:/docker-entrypoint-mount.sh
-    entrypoint:
-      [
-        "sh",
-        "-c",
-        "cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh",
-      ]
+    entrypoint: [ "sh", "-c", "cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh" ]
    env_file:
      - ./middleware.env
    environment:
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -286,8 +286,6 @@ x-shared-env: &shared-api-worker-env
  BAIDU_VECTOR_DB_DATABASE: ${BAIDU_VECTOR_DB_DATABASE:-dify}
  BAIDU_VECTOR_DB_SHARD: ${BAIDU_VECTOR_DB_SHARD:-1}
  BAIDU_VECTOR_DB_REPLICAS: ${BAIDU_VECTOR_DB_REPLICAS:-3}
-  BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER: ${BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER:-DEFAULT_ANALYZER}
-  BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE: ${BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE:-COARSE_MODE}
  VIKINGDB_ACCESS_KEY: ${VIKINGDB_ACCESS_KEY:-your-ak}
  VIKINGDB_SECRET_KEY: ${VIKINGDB_SECRET_KEY:-your-sk}
  VIKINGDB_REGION: ${VIKINGDB_REGION:-cn-shanghai}
@@ -308,7 +306,6 @@ x-shared-env: &shared-api-worker-env
  OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
  OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
  OCEANBASE_ENABLE_HYBRID_SEARCH: ${OCEANBASE_ENABLE_HYBRID_SEARCH:-false}
-  OCEANBASE_FULLTEXT_PARSER: ${OCEANBASE_FULLTEXT_PARSER:-ik}
  OPENGAUSS_HOST: ${OPENGAUSS_HOST:-opengauss}
  OPENGAUSS_PORT: ${OPENGAUSS_PORT:-6600}
  OPENGAUSS_USER: ${OPENGAUSS_USER:-postgres}
@@ -593,7 +590,7 @@ x-shared-env: &shared-api-worker-env
 services:
  # API service
  api:
-    image: langgenius/dify-api:1.9.0
+    image: langgenius/dify-api:2.0.0-beta.2
    restart: always
    environment:
      # Use the shared environment variables.
@@ -622,7 +619,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:1.9.0
+    image: langgenius/dify-api:2.0.0-beta.2
    restart: always
    environment:
      # Use the shared environment variables.
@@ -649,7 +646,7 @@ services:
  # worker_beat service
  # Celery beat for scheduling periodic tasks.
  worker_beat:
-    image: langgenius/dify-api:1.9.0
+    image: langgenius/dify-api:2.0.0-beta.2
    restart: always
    environment:
      # Use the shared environment variables.
@@ -667,7 +664,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.9.0
+    image: langgenius/dify-web:2.0.0-beta.2
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@@ -768,7 +765,7 @@ services:

  # plugin daemon
  plugin_daemon:
-    image: langgenius/dify-plugin-daemon:0.3.0-local
+    image: langgenius/dify-plugin-daemon:0.3.0b1-local
    restart: always
    environment:
      # Use the shared environment variables.
@@ -1095,7 +1092,6 @@ services:
      OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
      OB_SERVER_IP: 127.0.0.1
      MODE: mini
-      LANG: en_US.UTF-8
    ports:
      - "${OCEANBASE_VECTOR_PORT:-2881}:2881"
    healthcheck:
--- a/docker/start-local-test.bat
+++ b/docker/start-local-test.bat
@@ -0,0 +1,63 @@
+@echo off
+chcp 65001 >nul
+REM Dify Local Test Environment Startup Script (Windows)
+REM Used to quickly start local development and testing environment
+
+echo [INFO] Starting Dify local test environment...
+
+REM Ensure in docker directory
+cd /d "%~dp0"
+
+REM Check if Docker is running
+docker info >nul 2>&1
+if errorlevel 1 (
+    echo [ERROR] Docker is not running. Please start Docker Desktop first.
+    pause
+    exit /b 1
+)
+
+REM Check if .env file exists
+if not exist ".env" (
+    echo [ERROR] .env configuration file not found
+    echo         Please create first: copy .env.example .env
+    pause
+    exit /b 1
+)
+
+echo [INFO] Using config file: .env
+
+REM Build worker image
+echo [INFO] Building worker image...
+docker compose --env-file .env build worker
+if errorlevel 1 (
+    echo [ERROR] Failed to build worker image
+    pause
+    exit /b 1
+)
+
+REM Start all services
+echo [INFO] Starting all services...
+docker compose --env-file .env up -d
+if errorlevel 1 (
+    echo [ERROR] Failed to start services
+    pause
+    exit /b 1
+)
+
+echo [SUCCESS] Local test environment started successfully!
+echo.
+echo [SERVICES] Service URLs:
+echo    - Web UI: http://localhost
+echo    - API Docs: http://localhost/swagger-ui.html
+echo    - API Service: http://localhost:5001
+echo.
+echo [COMMANDS] Available commands:
+echo    - View logs: docker compose logs -f
+echo    - Stop services: docker compose down
+echo    - Clean data: docker compose -f docker-compose.middleware.yaml down -v
+echo    - Restart services: docker compose restart
+echo.
+echo [TIP] If first run, wait a few minutes for services to fully start
+echo       Use 'docker compose ps' to check service status
+
+pause
--- a/spec.http
+++ b/spec.http
@@ -0,0 +1,4 @@
+GET /console/api/spec/schema-definitions
+Host: cloud-rag.dify.dev
+authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNzExMDZhYTQtZWJlMC00NGMzLWI4NWYtMWQ4Mjc5ZTExOGZmIiwiZXhwIjoxNzU2MTkyNDE4LCJpc3MiOiJDTE9VRCIsInN1YiI6IkNvbnNvbGUgQVBJIFBhc3Nwb3J0In0.Yx_TMdWVXCp5YEoQ8WR90lRhHHKggxAQvEl5RUnkZuc
+###
--- a/web/app/components/app/log/list.tsx
+++ b/web/app/components/app/log/list.tsx
@@ -35,7 +35,7 @@ import { useStore as useAppStore } from '@/app/components/app/store'
 import { useAppContext } from '@/context/app-context'
 import useTimestamp from '@/hooks/use-timestamp'
 import Tooltip from '@/app/components/base/tooltip'
-import CopyIcon from '@/app/components/base/copy-icon'
+import { CopyIcon } from '@/app/components/base/copy-icon'
 import { buildChatItemTree, getThreadMessages } from '@/app/components/base/chat/utils'
 import { getProcessedFilesFromResponse } from '@/app/components/base/file-uploader/utils'
 import cn from '@/utils/classnames'
--- a/web/app/components/base/copy-icon/index.tsx
+++ b/web/app/components/base/copy-icon/index.tsx
@@ -15,7 +15,7 @@ type Props = {

 const prefixEmbedded = 'appOverview.overview.appInfo.embedded'

-const CopyIcon = ({ content }: Props) => {
+export const CopyIcon = ({ content }: Props) => {
  const { t } = useTranslation()
  const [isCopied, setIsCopied] = useState<boolean>(false)

--- a/web/app/components/base/markdown-blocks/think-block.tsx
+++ b/web/app/components/base/markdown-blocks/think-block.tsx
@@ -63,7 +63,7 @@ const useThinkTimer = (children: any) => {
  return { elapsedTime, isComplete }
 }

-const ThinkBlock = ({ children, ...props }: React.ComponentProps<'details'>) => {
+export const ThinkBlock = ({ children, ...props }: any) => {
  const { elapsedTime, isComplete } = useThinkTimer(children)
  const displayContent = removeEndThink(children)
  const { t } = useTranslation()
--- a/web/app/components/base/svg-gallery/index.tsx
+++ b/web/app/components/base/svg-gallery/index.tsx
@@ -3,7 +3,7 @@ import { SVG } from '@svgdotjs/svg.js'
 import DOMPurify from 'dompurify'
 import ImagePreview from '@/app/components/base/image-uploader/image-preview'

-const SVGRenderer = ({ content }: { content: string }) => {
+export const SVGRenderer = ({ content }: { content: string }) => {
  const svgRef = useRef<HTMLDivElement>(null)
  const [imagePreview, setImagePreview] = useState('')
  const [windowSize, setWindowSize] = useState({
--- a/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx
+++ b/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx
@@ -1,18 +1,9 @@
 import { usePipelineTemplateList } from '@/service/use-pipeline'
 import TemplateCard from './template-card'
 import CreateCard from './create-card'
-import { useI18N } from '@/context/i18n'
-import { useMemo } from 'react'
-import { LanguagesSupported } from '@/i18n-config/language'

 const BuiltInPipelineList = () => {
-  const { locale } = useI18N()
-  const language = useMemo(() => {
-    if (['zh-Hans', 'ja-JP'].includes(locale))
-      return locale
-    return LanguagesSupported[0]
-  }, [locale])
-  const { data: pipelineList, isLoading } = usePipelineTemplateList({ type: 'built-in', language })
+  const { data: pipelineList, isLoading } = usePipelineTemplateList({ type: 'built-in' })
  const list = pipelineList?.pipeline_templates || []

  return (
--- a/web/app/components/datasets/documents/create-from-pipeline/processing/embedding-process/rule-detail.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/processing/embedding-process/rule-detail.tsx
@@ -20,8 +20,35 @@ const RuleDetail = ({
 }: RuleDetailProps) => {
  const { t } = useTranslation()

+  const segmentationRuleMap = {
+    mode: t('datasetDocuments.embedding.mode'),
+    segmentLength: t('datasetDocuments.embedding.segmentLength'),
+    textCleaning: t('datasetDocuments.embedding.textCleaning'),
+  }
+
+  const getRuleName = useCallback((key: string) => {
+    if (key === 'remove_extra_spaces')
+      return t('datasetCreation.stepTwo.removeExtraSpaces')
+
+    if (key === 'remove_urls_emails')
+      return t('datasetCreation.stepTwo.removeUrlEmails')
+
+    if (key === 'remove_stopwords')
+      return t('datasetCreation.stepTwo.removeStopwords')
+  }, [t])
+
+  const isNumber = useCallback((value: unknown) => {
+    return typeof value === 'number'
+  }, [])
+
  const getValue = useCallback((field: string) => {
-    let value = '-'
+    let value: string | number | undefined = '-'
+    const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
+      ? sourceData.rules.segmentation.max_tokens
+      : value
+    const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
+      ? sourceData.rules.subchunk_segmentation.max_tokens
+      : value
    switch (field) {
      case 'mode':
        value = !sourceData?.mode
@@ -34,16 +61,33 @@ const RuleDetail = ({
              ? t('dataset.parentMode.paragraph')
              : t('dataset.parentMode.fullDoc')}`
        break
+      case 'segmentLength':
+        value = !sourceData?.mode
+          ? value
+          // eslint-disable-next-line sonarjs/no-nested-conditional
+          : sourceData.mode === ProcessMode.general
+            ? maxTokens
+            : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}`
+        break
+      default:
+        value = !sourceData?.mode
+          ? value
+          : sourceData?.rules?.pre_processing_rules?.filter(rule =>
+            rule.enabled).map(rule => getRuleName(rule.id)).join(',')
+        break
    }
    return value
-  }, [sourceData, t])
+  }, [getRuleName, isNumber, sourceData, t])

  return (
    <div className='flex flex-col gap-1'>
-      <FieldInfo
-        label={t('datasetDocuments.embedding.mode')}
-        displayedValue={getValue('mode')}
-      />
+      {Object.keys(segmentationRuleMap).map((field) => {
+        return <FieldInfo
+          key={field}
+          label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
+          displayedValue={String(getValue(field))}
+        />
+      })}
      <FieldInfo
        label={t('datasetCreation.stepTwo.indexMode')}
        displayedValue={t(`datasetCreation.stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`) as string}
--- a/web/app/components/datasets/documents/status-item/index.tsx
+++ b/web/app/components/datasets/documents/status-item/index.tsx
@@ -72,7 +72,7 @@ const StatusItem = ({
    const [e] = await asyncRunSafe<CommonResponse>(opApi({ datasetId, documentId: id }) as Promise<CommonResponse>)
    if (!e) {
      notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
-      onUpdate?.(operationName)
+      onUpdate?.()
    }
    else { notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) }
  }
--- a/web/app/components/plugins/plugin-item/index.tsx
+++ b/web/app/components/plugins/plugin-item/index.tsx
@@ -146,6 +146,7 @@ const PluginItem: FC<Props> = ({
        {/* Organization & Name */}
        <div className='flex grow items-center overflow-hidden'>
          <OrgInfo
+            className='mt-0.5'
            orgName={orgName}
            packageName={name}
            packageNameClassName='w-auto max-w-[150px]'
@@ -153,8 +154,8 @@ const PluginItem: FC<Props> = ({
          {category === PluginType.extension && (
            <>
              <div className='system-xs-regular mx-2 text-text-quaternary'>·</div>
-              <div className='system-xs-regular flex items-center gap-x-1 overflow-hidden text-text-tertiary'>
-                <RiLoginCircleLine className='size-3 shrink-0' />
+              <div className='system-xs-regular flex space-x-1 overflow-hidden text-text-tertiary'>
+                <RiLoginCircleLine className='h-4 w-4 shrink-0' />
                <span
                  className='truncate'
                  title={t('plugin.endpointsEnabled', { num: endpoints_active })}
@@ -183,7 +184,7 @@ const PluginItem: FC<Props> = ({
            && <>
              <a href={getMarketplaceUrl(`/plugins/${author}/${name}`, { theme })} target='_blank' className='flex items-center gap-0.5'>
                <div className='system-2xs-medium-uppercase text-text-tertiary'>{t('plugin.from')} <span className='text-text-secondary'>marketplace</span></div>
-                <RiArrowRightUpLine className='h-3 w-3 text-text-secondary' />
+                <RiArrowRightUpLine className='h-3 w-3 text-text-tertiary' />
              </a>
            </>
          }
--- a/web/app/components/tools/provider/detail.tsx
+++ b/web/app/components/tools/provider/detail.tsx
@@ -244,8 +244,9 @@ const ProviderDetail = ({
              <div className="flex h-5 items-center">
                <Title title={collection.label[language]} />
              </div>
-              <div className='mb-1 mt-0.5 flex h-4 items-center justify-between'>
+              <div className='mb-1 flex h-4 items-center justify-between'>
                <OrgInfo
+                  className="mt-0.5"
                  packageNameClassName='w-auto'
                  orgName={collection.author}
                  packageName={collection.name}
--- a/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/json-schema-generator/index.tsx
+++ b/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/json-schema-generator/index.tsx
@@ -30,7 +30,7 @@ enum GeneratorView {
  result = 'result',
 }

-const JsonSchemaGenerator: FC<JsonSchemaGeneratorProps> = ({
+export const JsonSchemaGenerator: FC<JsonSchemaGeneratorProps> = ({
  onApply,
  crossAxisOffset,
 }) => {
--- a/web/app/components/workflow/nodes/variable-assigner/use-config.ts
+++ b/web/app/components/workflow/nodes/variable-assigner/use-config.ts
@@ -124,7 +124,7 @@ const useConfig = (id: string, payload: VariableAssignerNodeType) => {
  const handleAddGroup = useCallback(() => {
    let maxInGroupName = 1
    inputs.advanced_settings.groups.forEach((item) => {
-      const match = /(\d+)$/.exec(item.group_name)
+      const match = item.group_name.match(/(\d+)$/)
      if (match) {
        const num = Number.parseInt(match[1], 10)
        if (num > maxInGroupName)
--- a/web/app/styles/globals.css
+++ b/web/app/styles/globals.css
@@ -1,18 +1,12 @@
@import "preflight.css";
-
+@tailwind base;
+@tailwind components;

@import '../../themes/light.css';
@import '../../themes/dark.css';
@import "../../themes/manual-light.css";
@import "../../themes/manual-dark.css";

-@import "../components/base/button/index.css";
-@import "../components/base/action-button/index.css";
-@import "../components/base/modal/index.css";
-
-@tailwind base;
-@tailwind components;
-
 html {
  color-scheme: light;
 }
@@ -686,6 +680,10 @@ button:focus-within {
  display: none;
 }

+@import "../components/base/button/index.css";
+@import "../components/base/action-button/index.css";
+@import "../components/base/modal/index.css";
+
@tailwind utilities;

@layer utilities {
--- a/Show More
+++ b/Show More