chore: normalize frozenset literals and myscale typing (#34327)

This commit is contained in:
99
2026-03-31 16:21:22 +08:00
committed by GitHub
parent fcf04629d3
commit f27d669f87
17 changed files with 1536 additions and 1511 deletions

View File

@@ -7,15 +7,16 @@ UUID_NIL = "00000000-0000-0000-0000-000000000000"
DEFAULT_FILE_NUMBER_LIMITS = 3
IMAGE_EXTENSIONS = convert_to_lower_and_upper_set({"jpg", "jpeg", "png", "webp", "gif", "svg"})
_IMAGE_EXTENSION_BASE: frozenset[str] = frozenset(("jpg", "jpeg", "png", "webp", "gif", "svg"))
_VIDEO_EXTENSION_BASE: frozenset[str] = frozenset(("mp4", "mov", "mpeg", "webm"))
_AUDIO_EXTENSION_BASE: frozenset[str] = frozenset(("mp3", "m4a", "wav", "amr", "mpga"))
VIDEO_EXTENSIONS = convert_to_lower_and_upper_set({"mp4", "mov", "mpeg", "webm"})
IMAGE_EXTENSIONS: frozenset[str] = frozenset(convert_to_lower_and_upper_set(_IMAGE_EXTENSION_BASE))
VIDEO_EXTENSIONS: frozenset[str] = frozenset(convert_to_lower_and_upper_set(_VIDEO_EXTENSION_BASE))
AUDIO_EXTENSIONS: frozenset[str] = frozenset(convert_to_lower_and_upper_set(_AUDIO_EXTENSION_BASE))
AUDIO_EXTENSIONS = convert_to_lower_and_upper_set({"mp3", "m4a", "wav", "amr", "mpga"})
_doc_extensions: set[str]
if dify_config.ETL_TYPE == "Unstructured":
_doc_extensions = {
_UNSTRUCTURED_DOCUMENT_EXTENSION_BASE: frozenset[str] = frozenset(
(
"txt",
"markdown",
"md",
@@ -35,11 +36,10 @@ if dify_config.ETL_TYPE == "Unstructured":
"pptx",
"xml",
"epub",
}
if dify_config.UNSTRUCTURED_API_URL:
_doc_extensions.add("ppt")
else:
_doc_extensions = {
)
)
_DEFAULT_DOCUMENT_EXTENSION_BASE: frozenset[str] = frozenset(
(
"txt",
"markdown",
"md",
@@ -53,8 +53,17 @@ else:
"csv",
"vtt",
"properties",
}
DOCUMENT_EXTENSIONS: set[str] = convert_to_lower_and_upper_set(_doc_extensions)
)
)
_doc_extensions: set[str]
if dify_config.ETL_TYPE == "Unstructured":
_doc_extensions = set(_UNSTRUCTURED_DOCUMENT_EXTENSION_BASE)
if dify_config.UNSTRUCTURED_API_URL:
_doc_extensions.add("ppt")
else:
_doc_extensions = set(_DEFAULT_DOCUMENT_EXTENSION_BASE)
DOCUMENT_EXTENSIONS: frozenset[str] = frozenset(convert_to_lower_and_upper_set(_doc_extensions))
# console
COOKIE_NAME_ACCESS_TOKEN = "access_token"

View File

@@ -4,8 +4,8 @@ from urllib.parse import quote
from flask import Response
HTML_MIME_TYPES = frozenset({"text/html", "application/xhtml+xml"})
HTML_EXTENSIONS = frozenset({"html", "htm"})
HTML_MIME_TYPES: frozenset[str] = frozenset(("text/html", "application/xhtml+xml"))
HTML_EXTENSIONS: frozenset[str] = frozenset(("html", "htm"))
def _normalize_mime_type(mime_type: str | None) -> str:

View File

@@ -17,7 +17,7 @@ class CSVSanitizer:
"""
# Characters that can start a formula in Excel/LibreOffice/Google Sheets
FORMULA_CHARS = frozenset({"=", "+", "-", "@", "\t", "\r"})
FORMULA_CHARS = frozenset(("=", "+", "-", "@", "\t", "\r"))
@classmethod
def sanitize_value(cls, value: Any) -> str:

View File

@@ -122,6 +122,6 @@ class JiebaKeywordTableHandler:
results.add(token)
sub_tokens = re.findall(r"\w+", token)
if len(sub_tokens) > 1:
results.update({w for w in sub_tokens if w not in list(STOPWORDS)})
results.update({w for w in sub_tokens if w not in STOPWORDS})
return results

View File

@@ -1,4 +1,5 @@
STOPWORDS = {
STOPWORDS: frozenset[str] = frozenset(
(
"during",
"when",
"but",
@@ -1367,4 +1368,5 @@ STOPWORDS = {
"",
"",
"\n",
}
)
)

View File

@@ -4,7 +4,7 @@ import uuid
from enum import StrEnum
from typing import Any
from clickhouse_connect import get_client
from clickhouse_connect import get_client # type: ignore[import-untyped]
from pydantic import BaseModel
from configs import dify_config

View File

@@ -35,7 +35,7 @@ class PdfExtractor(BaseExtractor):
"""
# Magic bytes for image format detection: (magic_bytes, extension, mime_type)
IMAGE_FORMATS = [
IMAGE_FORMATS: tuple[tuple[bytes, str, str], ...] = (
(b"\xff\xd8\xff", "jpg", "image/jpeg"),
(b"\x89PNG\r\n\x1a\n", "png", "image/png"),
(b"\x00\x00\x00\x0c\x6a\x50\x20\x20\x0d\x0a\x87\x0a", "jp2", "image/jp2"),
@@ -45,7 +45,7 @@ class PdfExtractor(BaseExtractor):
(b"MM\x00*", "tiff", "image/tiff"),
(b"II+\x00", "tiff", "image/tiff"),
(b"MM\x00+", "tiff", "image/tiff"),
]
)
MAX_MAGIC_LEN = max(len(m) for m, _, _ in IMAGE_FORMATS)
def __init__(self, file_path: str, tenant_id: str, user_id: str, file_cache_key: str | None = None):

View File

@@ -5,11 +5,11 @@ TRIGGER_SCHEDULE_NODE_TYPE: Final[str] = "trigger-schedule"
TRIGGER_PLUGIN_NODE_TYPE: Final[str] = "trigger-plugin"
TRIGGER_NODE_TYPES: Final[frozenset[str]] = frozenset(
{
(
TRIGGER_WEBHOOK_NODE_TYPE,
TRIGGER_SCHEDULE_NODE_TYPE,
TRIGGER_PLUGIN_NODE_TYPE,
}
)
)

View File

@@ -8,24 +8,20 @@ from pydantic import BaseModel, Field, field_validator
from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE
_WEBHOOK_HEADER_ALLOWED_TYPES = frozenset(
{
SegmentType.STRING,
}
)
_WEBHOOK_HEADER_ALLOWED_TYPES: frozenset[SegmentType] = frozenset((SegmentType.STRING,))
_WEBHOOK_QUERY_PARAMETER_ALLOWED_TYPES = frozenset(
{
_WEBHOOK_QUERY_PARAMETER_ALLOWED_TYPES: frozenset[SegmentType] = frozenset(
(
SegmentType.STRING,
SegmentType.NUMBER,
SegmentType.BOOLEAN,
}
)
)
_WEBHOOK_PARAMETER_ALLOWED_TYPES = _WEBHOOK_HEADER_ALLOWED_TYPES | _WEBHOOK_QUERY_PARAMETER_ALLOWED_TYPES
_WEBHOOK_BODY_ALLOWED_TYPES = frozenset(
{
_WEBHOOK_BODY_ALLOWED_TYPES: frozenset[SegmentType] = frozenset(
(
SegmentType.STRING,
SegmentType.NUMBER,
SegmentType.BOOLEAN,
@@ -35,7 +31,7 @@ _WEBHOOK_BODY_ALLOWED_TYPES = frozenset(
SegmentType.ARRAY_BOOLEAN,
SegmentType.ARRAY_OBJECT,
SegmentType.FILE,
}
)
)

View File

@@ -1,9 +1,12 @@
def convert_to_lower_and_upper_set(inputs: list[str] | set[str]) -> set[str]:
from collections.abc import Collection
def convert_to_lower_and_upper_set(inputs: Collection[str]) -> set[str]:
"""
Convert a list or set of strings to a set containing both lower and upper case versions of each string.
Convert a collection of strings to a set containing both lower and upper case versions of each string.
Args:
inputs (list[str] | set[str]): A list or set of strings to be converted.
inputs (Collection[str]): A collection of strings to be converted.
Returns:
set[str]: A set containing both lower and upper case versions of each string.

View File

@@ -1386,7 +1386,7 @@ class ConversationVariable(TypeBase):
# Only `sys.query` and `sys.files` could be modified.
_EDITABLE_SYSTEM_VARIABLE = frozenset(["query", "files"])
_EDITABLE_SYSTEM_VARIABLE = frozenset(("query", "files"))
class WorkflowDraftVariable(Base):

View File

@@ -800,8 +800,8 @@ class DraftVariableSaver:
# technical variables from being exposed in the draft environment, particularly those
# that aren't meant to be directly edited or viewed by users.
_EXCLUDE_VARIABLE_NAMES_MAPPING: dict[NodeType, frozenset[str]] = {
BuiltinNodeTypes.LLM: frozenset(["finish_reason"]),
BuiltinNodeTypes.LOOP: frozenset(["loop_round"]),
BuiltinNodeTypes.LLM: frozenset(("finish_reason",)),
BuiltinNodeTypes.LOOP: frozenset(("loop_round",)),
}
# Database session used for persisting draft variables.

View File

@@ -1249,9 +1249,9 @@ class TestFileConstants:
"""
def test_image_extensions_set_properties(self):
"""Test that IMAGE_EXTENSIONS set has expected properties."""
# Assert - Should be a set
assert isinstance(IMAGE_EXTENSIONS, set)
"""Test that IMAGE_EXTENSIONS frozenset has expected properties."""
# Assert - Should be immutable
assert isinstance(IMAGE_EXTENSIONS, frozenset)
# Should not be empty
assert len(IMAGE_EXTENSIONS) > 0
# Should contain common image formats
@@ -1260,9 +1260,9 @@ class TestFileConstants:
assert ext in IMAGE_EXTENSIONS or ext.upper() in IMAGE_EXTENSIONS
def test_video_extensions_set_properties(self):
"""Test that VIDEO_EXTENSIONS set has expected properties."""
# Assert - Should be a set
assert isinstance(VIDEO_EXTENSIONS, set)
"""Test that VIDEO_EXTENSIONS frozenset has expected properties."""
# Assert - Should be immutable
assert isinstance(VIDEO_EXTENSIONS, frozenset)
# Should not be empty
assert len(VIDEO_EXTENSIONS) > 0
# Should contain common video formats
@@ -1271,9 +1271,9 @@ class TestFileConstants:
assert ext in VIDEO_EXTENSIONS or ext.upper() in VIDEO_EXTENSIONS
def test_audio_extensions_set_properties(self):
"""Test that AUDIO_EXTENSIONS set has expected properties."""
# Assert - Should be a set
assert isinstance(AUDIO_EXTENSIONS, set)
"""Test that AUDIO_EXTENSIONS frozenset has expected properties."""
# Assert - Should be immutable
assert isinstance(AUDIO_EXTENSIONS, frozenset)
# Should not be empty
assert len(AUDIO_EXTENSIONS) > 0
# Should contain common audio formats
@@ -1282,9 +1282,9 @@ class TestFileConstants:
assert ext in AUDIO_EXTENSIONS or ext.upper() in AUDIO_EXTENSIONS
def test_document_extensions_set_properties(self):
"""Test that DOCUMENT_EXTENSIONS set has expected properties."""
# Assert - Should be a set
assert isinstance(DOCUMENT_EXTENSIONS, set)
"""Test that DOCUMENT_EXTENSIONS frozenset has expected properties."""
# Assert - Should be immutable
assert isinstance(DOCUMENT_EXTENSIONS, frozenset)
# Should not be empty
assert len(DOCUMENT_EXTENSIONS) > 0
# Should contain common document formats

View File

@@ -2,5 +2,6 @@ from core.rag.datasource.keyword.jieba.stopwords import STOPWORDS
def test_stopwords_loaded():
assert isinstance(STOPWORDS, frozenset)
assert "during" in STOPWORDS
assert "the" in STOPWORDS

View File

@@ -1,4 +1,5 @@
import base64
import logging
import uuid
from collections.abc import Sequence
from unittest import mock
@@ -1261,6 +1262,10 @@ def test_llm_node_image_file_to_markdown(llm_node: LLMNode):
class TestSaveMultimodalOutputAndConvertResultToMarkdown:
class _UnknownItem:
def __str__(self) -> str:
return "<unknown-item>"
def test_str_content(self, llm_node_for_multimodal):
llm_node, mock_file_saver = llm_node_for_multimodal
gen = llm_node._save_multimodal_output_and_convert_result_to_markdown(
@@ -1330,18 +1335,23 @@ class TestSaveMultimodalOutputAndConvertResultToMarkdown:
def test_unknown_content_type(self, llm_node_for_multimodal):
llm_node, mock_file_saver = llm_node_for_multimodal
gen = llm_node._save_multimodal_output_and_convert_result_to_markdown(
contents=frozenset(["hello world"]), file_saver=mock_file_saver, file_outputs=[]
contents=frozenset(("hello world",)), file_saver=mock_file_saver, file_outputs=[]
)
assert list(gen) == ["hello world"]
mock_file_saver.save_binary_string.assert_not_called()
mock_file_saver.save_remote_url.assert_not_called()
def test_unknown_item_type(self, llm_node_for_multimodal):
def test_unknown_item_type(self, llm_node_for_multimodal, caplog):
llm_node, mock_file_saver = llm_node_for_multimodal
unknown_item = self._UnknownItem()
with caplog.at_level(logging.WARNING, logger="graphon.nodes.llm.node"):
gen = llm_node._save_multimodal_output_and_convert_result_to_markdown(
contents=[frozenset(["hello world"])], file_saver=mock_file_saver, file_outputs=[]
contents=[unknown_item], file_saver=mock_file_saver, file_outputs=[]
)
assert list(gen) == ["frozenset({'hello world'})"]
assert list(gen) == [str(unknown_item)]
assert "unknown item type encountered" in caplog.text
mock_file_saver.save_binary_string.assert_not_called()
mock_file_saver.save_remote_url.assert_not_called()

View File

@@ -837,7 +837,7 @@ class TestBuildSegmentValueErrors:
self.ValueErrorTestCase(
name="frozenset_type",
description="frozenset (unsupported type)",
test_value=frozenset([1, 2, 3]),
test_value=frozenset((1, 2, 3)),
),
self.ValueErrorTestCase(
name="memoryview_type",

View File

@@ -3,7 +3,8 @@ from pathlib import Path
import yaml # type: ignore
from dotenv import dotenv_values
BASE_API_AND_DOCKER_CONFIG_SET_DIFF = {
BASE_API_AND_DOCKER_CONFIG_SET_DIFF: frozenset[str] = frozenset(
(
"APP_MAX_EXECUTION_TIME",
"BATCH_UPLOAD_LIMIT",
"CELERY_BEAT_SCHEDULER_TIME",
@@ -38,9 +39,11 @@ BASE_API_AND_DOCKER_CONFIG_SET_DIFF = {
"UPSTASH_VECTOR_URL",
"USING_UGC_INDEX",
"WEAVIATE_BATCH_SIZE",
}
)
)
BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF = {
BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF: frozenset[str] = frozenset(
(
"BATCH_UPLOAD_LIMIT",
"CELERY_BEAT_SCHEDULER_TIME",
"HTTP_REQUEST_MAX_CONNECT_TIMEOUT",
@@ -85,7 +88,8 @@ BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF = {
"VIKINGDB_CONNECTION_TIMEOUT",
"VIKINGDB_SOCKET_TIMEOUT",
"WEAVIATE_BATCH_SIZE",
}
)
)
API_CONFIG_SET = set(dotenv_values(Path("api") / Path(".env.example")).keys())
DOCKER_CONFIG_SET = set(dotenv_values(Path("docker") / Path(".env.example")).keys())