refactor(api): tighten phase 1 shared type contracts (#33453)

This commit is contained in:
盐粒 Yanli
2026-03-17 17:50:51 +08:00
committed by GitHub
parent a592c53573
commit a717519822
14 changed files with 313 additions and 196 deletions

View File

@@ -32,6 +32,11 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
def _stream_with_request_context(response: object) -> Any:
"""Bridge Flask's loosely-typed streaming helper without leaking casts into callers."""
return cast(Any, stream_with_context)(response)
def escape_like_pattern(pattern: str) -> str:
"""
Escape special characters in a string for safe use in SQL LIKE patterns.
@@ -286,22 +291,32 @@ def generate_text_hash(text: str) -> str:
return sha256(hash_text.encode()).hexdigest()
def compact_generate_response(response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
if isinstance(response, dict):
def compact_generate_response(
response: Mapping[str, Any] | Generator[str, None, None] | RateLimitGenerator,
) -> Response:
if isinstance(response, Mapping):
return Response(
response=json.dumps(jsonable_encoder(response)),
status=200,
content_type="application/json; charset=utf-8",
)
else:
stream_response = response
def generate() -> Generator:
yield from response
def generate() -> Generator[str, None, None]:
yield from stream_response
return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
return Response(
_stream_with_request_context(generate()),
status=200,
mimetype="text/event-stream",
)
def length_prefixed_response(magic_number: int, response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
def length_prefixed_response(
magic_number: int,
response: Mapping[str, Any] | BaseModel | Generator[str | bytes, None, None] | RateLimitGenerator,
) -> Response:
"""
This function is used to return a response with a length prefix.
Magic number is a one byte number that indicates the type of the response.
@@ -332,7 +347,7 @@ def length_prefixed_response(magic_number: int, response: Union[Mapping, Generat
# | Magic Number 1byte | Reserved 1byte | Header Length 2bytes | Data Length 4bytes | Reserved 6bytes | Data
return struct.pack("<BBHI", magic_number, 0, header_length, data_length) + b"\x00" * 6 + response
if isinstance(response, dict):
if isinstance(response, Mapping):
return Response(
response=pack_response_with_length_prefix(json.dumps(jsonable_encoder(response)).encode("utf-8")),
status=200,
@@ -345,14 +360,20 @@ def length_prefixed_response(magic_number: int, response: Union[Mapping, Generat
mimetype="application/json",
)
def generate() -> Generator:
for chunk in response:
stream_response = response
def generate() -> Generator[bytes, None, None]:
for chunk in stream_response:
if isinstance(chunk, str):
yield pack_response_with_length_prefix(chunk.encode("utf-8"))
else:
yield pack_response_with_length_prefix(chunk)
return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
return Response(
_stream_with_request_context(generate()),
status=200,
mimetype="text/event-stream",
)
class TokenManager:

View File

@@ -77,12 +77,14 @@ def login_required(func: Callable[P, R]) -> Callable[P, R | ResponseReturnValue]
@wraps(func)
def decorated_view(*args: P.args, **kwargs: P.kwargs) -> R | ResponseReturnValue:
if request.method in EXEMPT_METHODS or dify_config.LOGIN_DISABLED:
pass
elif current_user is not None and not current_user.is_authenticated:
return current_app.ensure_sync(func)(*args, **kwargs)
user = _get_user()
if user is None or not user.is_authenticated:
return current_app.login_manager.unauthorized() # type: ignore
# we put csrf validation here for less conflicts
# TODO: maybe find a better place for it.
check_csrf_token(request, current_user.id)
check_csrf_token(request, user.id)
return current_app.ensure_sync(func)(*args, **kwargs)
return decorated_view

View File

@@ -7,9 +7,10 @@ https://github.com/django/django/blob/main/django/utils/module_loading.py
import sys
from importlib import import_module
from typing import Any
def cached_import(module_path: str, class_name: str):
def cached_import(module_path: str, class_name: str) -> Any:
"""
Import a module and return the named attribute/class from it, with caching.
@@ -20,16 +21,14 @@ def cached_import(module_path: str, class_name: str):
Returns:
The imported attribute/class
"""
if not (
(module := sys.modules.get(module_path))
and (spec := getattr(module, "__spec__", None))
and getattr(spec, "_initializing", False) is False
):
module = sys.modules.get(module_path)
spec = getattr(module, "__spec__", None) if module is not None else None
if module is None or getattr(spec, "_initializing", False):
module = import_module(module_path)
return getattr(module, class_name)
def import_string(dotted_path: str):
def import_string(dotted_path: str) -> Any:
"""
Import a dotted module path and return the attribute/class designated by
the last name in the path. Raise ImportError if the import failed.

View File

@@ -1,7 +1,48 @@
import sys
import urllib.parse
from dataclasses import dataclass
from typing import NotRequired
import httpx
from pydantic import TypeAdapter
if sys.version_info >= (3, 12):
from typing import TypedDict
else:
from typing_extensions import TypedDict
JsonObject = dict[str, object]
JsonObjectList = list[JsonObject]
JSON_OBJECT_ADAPTER = TypeAdapter(JsonObject)
JSON_OBJECT_LIST_ADAPTER = TypeAdapter(JsonObjectList)
class AccessTokenResponse(TypedDict, total=False):
access_token: str
class GitHubEmailRecord(TypedDict, total=False):
email: str
primary: bool
class GitHubRawUserInfo(TypedDict):
id: int | str
login: str
name: NotRequired[str]
email: NotRequired[str]
class GoogleRawUserInfo(TypedDict):
sub: str
email: str
ACCESS_TOKEN_RESPONSE_ADAPTER = TypeAdapter(AccessTokenResponse)
GITHUB_RAW_USER_INFO_ADAPTER = TypeAdapter(GitHubRawUserInfo)
GITHUB_EMAIL_RECORDS_ADAPTER = TypeAdapter(list[GitHubEmailRecord])
GOOGLE_RAW_USER_INFO_ADAPTER = TypeAdapter(GoogleRawUserInfo)
@dataclass
@@ -11,26 +52,38 @@ class OAuthUserInfo:
email: str
def _json_object(response: httpx.Response) -> JsonObject:
return JSON_OBJECT_ADAPTER.validate_python(response.json())
def _json_list(response: httpx.Response) -> JsonObjectList:
return JSON_OBJECT_LIST_ADAPTER.validate_python(response.json())
class OAuth:
client_id: str
client_secret: str
redirect_uri: str
def __init__(self, client_id: str, client_secret: str, redirect_uri: str):
self.client_id = client_id
self.client_secret = client_secret
self.redirect_uri = redirect_uri
def get_authorization_url(self):
def get_authorization_url(self, invite_token: str | None = None) -> str:
raise NotImplementedError()
def get_access_token(self, code: str):
def get_access_token(self, code: str) -> str:
raise NotImplementedError()
def get_raw_user_info(self, token: str):
def get_raw_user_info(self, token: str) -> JsonObject:
raise NotImplementedError()
def get_user_info(self, token: str) -> OAuthUserInfo:
raw_info = self.get_raw_user_info(token)
return self._transform_user_info(raw_info)
def _transform_user_info(self, raw_info: dict) -> OAuthUserInfo:
def _transform_user_info(self, raw_info: JsonObject) -> OAuthUserInfo:
raise NotImplementedError()
@@ -40,7 +93,7 @@ class GitHubOAuth(OAuth):
_USER_INFO_URL = "https://api.github.com/user"
_EMAIL_INFO_URL = "https://api.github.com/user/emails"
def get_authorization_url(self, invite_token: str | None = None):
def get_authorization_url(self, invite_token: str | None = None) -> str:
params = {
"client_id": self.client_id,
"redirect_uri": self.redirect_uri,
@@ -50,7 +103,7 @@ class GitHubOAuth(OAuth):
params["state"] = invite_token
return f"{self._AUTH_URL}?{urllib.parse.urlencode(params)}"
def get_access_token(self, code: str):
def get_access_token(self, code: str) -> str:
data = {
"client_id": self.client_id,
"client_secret": self.client_secret,
@@ -60,7 +113,7 @@ class GitHubOAuth(OAuth):
headers = {"Accept": "application/json"}
response = httpx.post(self._TOKEN_URL, data=data, headers=headers)
response_json = response.json()
response_json = ACCESS_TOKEN_RESPONSE_ADAPTER.validate_python(_json_object(response))
access_token = response_json.get("access_token")
if not access_token:
@@ -68,23 +121,24 @@ class GitHubOAuth(OAuth):
return access_token
def get_raw_user_info(self, token: str):
def get_raw_user_info(self, token: str) -> JsonObject:
headers = {"Authorization": f"token {token}"}
response = httpx.get(self._USER_INFO_URL, headers=headers)
response.raise_for_status()
user_info = response.json()
user_info = GITHUB_RAW_USER_INFO_ADAPTER.validate_python(_json_object(response))
email_response = httpx.get(self._EMAIL_INFO_URL, headers=headers)
email_info = email_response.json()
primary_email: dict = next((email for email in email_info if email["primary"] == True), {})
email_info = GITHUB_EMAIL_RECORDS_ADAPTER.validate_python(_json_list(email_response))
primary_email = next((email for email in email_info if email.get("primary") is True), None)
return {**user_info, "email": primary_email.get("email", "")}
return {**user_info, "email": primary_email.get("email", "") if primary_email else ""}
def _transform_user_info(self, raw_info: dict) -> OAuthUserInfo:
email = raw_info.get("email")
def _transform_user_info(self, raw_info: JsonObject) -> OAuthUserInfo:
payload = GITHUB_RAW_USER_INFO_ADAPTER.validate_python(raw_info)
email = payload.get("email")
if not email:
email = f"{raw_info['id']}+{raw_info['login']}@users.noreply.github.com"
return OAuthUserInfo(id=str(raw_info["id"]), name=raw_info["name"], email=email)
email = f"{payload['id']}+{payload['login']}@users.noreply.github.com"
return OAuthUserInfo(id=str(payload["id"]), name=str(payload.get("name", "")), email=email)
class GoogleOAuth(OAuth):
@@ -92,7 +146,7 @@ class GoogleOAuth(OAuth):
_TOKEN_URL = "https://oauth2.googleapis.com/token"
_USER_INFO_URL = "https://www.googleapis.com/oauth2/v3/userinfo"
def get_authorization_url(self, invite_token: str | None = None):
def get_authorization_url(self, invite_token: str | None = None) -> str:
params = {
"client_id": self.client_id,
"response_type": "code",
@@ -103,7 +157,7 @@ class GoogleOAuth(OAuth):
params["state"] = invite_token
return f"{self._AUTH_URL}?{urllib.parse.urlencode(params)}"
def get_access_token(self, code: str):
def get_access_token(self, code: str) -> str:
data = {
"client_id": self.client_id,
"client_secret": self.client_secret,
@@ -114,7 +168,7 @@ class GoogleOAuth(OAuth):
headers = {"Accept": "application/json"}
response = httpx.post(self._TOKEN_URL, data=data, headers=headers)
response_json = response.json()
response_json = ACCESS_TOKEN_RESPONSE_ADAPTER.validate_python(_json_object(response))
access_token = response_json.get("access_token")
if not access_token:
@@ -122,11 +176,12 @@ class GoogleOAuth(OAuth):
return access_token
def get_raw_user_info(self, token: str):
def get_raw_user_info(self, token: str) -> JsonObject:
headers = {"Authorization": f"Bearer {token}"}
response = httpx.get(self._USER_INFO_URL, headers=headers)
response.raise_for_status()
return response.json()
return _json_object(response)
def _transform_user_info(self, raw_info: dict) -> OAuthUserInfo:
return OAuthUserInfo(id=str(raw_info["sub"]), name="", email=raw_info["email"])
def _transform_user_info(self, raw_info: JsonObject) -> OAuthUserInfo:
payload = GOOGLE_RAW_USER_INFO_ADAPTER.validate_python(raw_info)
return OAuthUserInfo(id=str(payload["sub"]), name="", email=payload["email"])

View File

@@ -1,25 +1,57 @@
import sys
import urllib.parse
from typing import Any
from typing import Any, Literal
import httpx
from flask_login import current_user
from pydantic import TypeAdapter
from sqlalchemy import select
from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from models.source import DataSourceOauthBinding
if sys.version_info >= (3, 12):
from typing import TypedDict
else:
from typing_extensions import TypedDict
class NotionPageSummary(TypedDict):
page_id: str
page_name: str
page_icon: dict[str, str] | None
parent_id: str
type: Literal["page", "database"]
class NotionSourceInfo(TypedDict):
workspace_name: str | None
workspace_icon: str | None
workspace_id: str | None
pages: list[NotionPageSummary]
total: int
SOURCE_INFO_STORAGE_ADAPTER = TypeAdapter(dict[str, object])
NOTION_SOURCE_INFO_ADAPTER = TypeAdapter(NotionSourceInfo)
NOTION_PAGE_SUMMARY_ADAPTER = TypeAdapter(NotionPageSummary)
class OAuthDataSource:
client_id: str
client_secret: str
redirect_uri: str
def __init__(self, client_id: str, client_secret: str, redirect_uri: str):
self.client_id = client_id
self.client_secret = client_secret
self.redirect_uri = redirect_uri
def get_authorization_url(self):
def get_authorization_url(self) -> str:
raise NotImplementedError()
def get_access_token(self, code: str):
def get_access_token(self, code: str) -> None:
raise NotImplementedError()
@@ -30,7 +62,7 @@ class NotionOAuth(OAuthDataSource):
_NOTION_BLOCK_SEARCH = "https://api.notion.com/v1/blocks"
_NOTION_BOT_USER = "https://api.notion.com/v1/users/me"
def get_authorization_url(self):
def get_authorization_url(self) -> str:
params = {
"client_id": self.client_id,
"response_type": "code",
@@ -39,7 +71,7 @@ class NotionOAuth(OAuthDataSource):
}
return f"{self._AUTH_URL}?{urllib.parse.urlencode(params)}"
def get_access_token(self, code: str):
def get_access_token(self, code: str) -> None:
data = {"code": code, "grant_type": "authorization_code", "redirect_uri": self.redirect_uri}
headers = {"Accept": "application/json"}
auth = (self.client_id, self.client_secret)
@@ -54,13 +86,12 @@ class NotionOAuth(OAuthDataSource):
workspace_id = response_json.get("workspace_id")
# get all authorized pages
pages = self.get_authorized_pages(access_token)
source_info = {
"workspace_name": workspace_name,
"workspace_icon": workspace_icon,
"workspace_id": workspace_id,
"pages": pages,
"total": len(pages),
}
source_info = self._build_source_info(
workspace_name=workspace_name,
workspace_icon=workspace_icon,
workspace_id=workspace_id,
pages=pages,
)
# save data source binding
data_source_binding = db.session.scalar(
select(DataSourceOauthBinding).where(
@@ -70,7 +101,7 @@ class NotionOAuth(OAuthDataSource):
)
)
if data_source_binding:
data_source_binding.source_info = source_info
data_source_binding.source_info = SOURCE_INFO_STORAGE_ADAPTER.validate_python(source_info)
data_source_binding.disabled = False
data_source_binding.updated_at = naive_utc_now()
db.session.commit()
@@ -78,25 +109,24 @@ class NotionOAuth(OAuthDataSource):
new_data_source_binding = DataSourceOauthBinding(
tenant_id=current_user.current_tenant_id,
access_token=access_token,
source_info=source_info,
source_info=SOURCE_INFO_STORAGE_ADAPTER.validate_python(source_info),
provider="notion",
)
db.session.add(new_data_source_binding)
db.session.commit()
def save_internal_access_token(self, access_token: str):
def save_internal_access_token(self, access_token: str) -> None:
workspace_name = self.notion_workspace_name(access_token)
workspace_icon = None
workspace_id = current_user.current_tenant_id
# get all authorized pages
pages = self.get_authorized_pages(access_token)
source_info = {
"workspace_name": workspace_name,
"workspace_icon": workspace_icon,
"workspace_id": workspace_id,
"pages": pages,
"total": len(pages),
}
source_info = self._build_source_info(
workspace_name=workspace_name,
workspace_icon=workspace_icon,
workspace_id=workspace_id,
pages=pages,
)
# save data source binding
data_source_binding = db.session.scalar(
select(DataSourceOauthBinding).where(
@@ -106,7 +136,7 @@ class NotionOAuth(OAuthDataSource):
)
)
if data_source_binding:
data_source_binding.source_info = source_info
data_source_binding.source_info = SOURCE_INFO_STORAGE_ADAPTER.validate_python(source_info)
data_source_binding.disabled = False
data_source_binding.updated_at = naive_utc_now()
db.session.commit()
@@ -114,13 +144,13 @@ class NotionOAuth(OAuthDataSource):
new_data_source_binding = DataSourceOauthBinding(
tenant_id=current_user.current_tenant_id,
access_token=access_token,
source_info=source_info,
source_info=SOURCE_INFO_STORAGE_ADAPTER.validate_python(source_info),
provider="notion",
)
db.session.add(new_data_source_binding)
db.session.commit()
def sync_data_source(self, binding_id: str):
def sync_data_source(self, binding_id: str) -> None:
# save data source binding
data_source_binding = db.session.scalar(
select(DataSourceOauthBinding).where(
@@ -134,23 +164,22 @@ class NotionOAuth(OAuthDataSource):
if data_source_binding:
# get all authorized pages
pages = self.get_authorized_pages(data_source_binding.access_token)
source_info = data_source_binding.source_info
new_source_info = {
"workspace_name": source_info["workspace_name"],
"workspace_icon": source_info["workspace_icon"],
"workspace_id": source_info["workspace_id"],
"pages": pages,
"total": len(pages),
}
data_source_binding.source_info = new_source_info
source_info = NOTION_SOURCE_INFO_ADAPTER.validate_python(data_source_binding.source_info)
new_source_info = self._build_source_info(
workspace_name=source_info["workspace_name"],
workspace_icon=source_info["workspace_icon"],
workspace_id=source_info["workspace_id"],
pages=pages,
)
data_source_binding.source_info = SOURCE_INFO_STORAGE_ADAPTER.validate_python(new_source_info)
data_source_binding.disabled = False
data_source_binding.updated_at = naive_utc_now()
db.session.commit()
else:
raise ValueError("Data source binding not found")
def get_authorized_pages(self, access_token: str):
pages = []
def get_authorized_pages(self, access_token: str) -> list[NotionPageSummary]:
pages: list[NotionPageSummary] = []
page_results = self.notion_page_search(access_token)
database_results = self.notion_database_search(access_token)
# get page detail
@@ -187,7 +216,7 @@ class NotionOAuth(OAuthDataSource):
"parent_id": parent_id,
"type": "page",
}
pages.append(page)
pages.append(NOTION_PAGE_SUMMARY_ADAPTER.validate_python(page))
# get database detail
for database_result in database_results:
page_id = database_result["id"]
@@ -220,11 +249,11 @@ class NotionOAuth(OAuthDataSource):
"parent_id": parent_id,
"type": "database",
}
pages.append(page)
pages.append(NOTION_PAGE_SUMMARY_ADAPTER.validate_python(page))
return pages
def notion_page_search(self, access_token: str):
results = []
def notion_page_search(self, access_token: str) -> list[dict[str, Any]]:
results: list[dict[str, Any]] = []
next_cursor = None
has_more = True
@@ -249,7 +278,7 @@ class NotionOAuth(OAuthDataSource):
return results
def notion_block_parent_page_id(self, access_token: str, block_id: str):
def notion_block_parent_page_id(self, access_token: str, block_id: str) -> str:
headers = {
"Authorization": f"Bearer {access_token}",
"Notion-Version": "2022-06-28",
@@ -265,7 +294,7 @@ class NotionOAuth(OAuthDataSource):
return self.notion_block_parent_page_id(access_token, parent[parent_type])
return parent[parent_type]
def notion_workspace_name(self, access_token: str):
def notion_workspace_name(self, access_token: str) -> str:
headers = {
"Authorization": f"Bearer {access_token}",
"Notion-Version": "2022-06-28",
@@ -279,8 +308,8 @@ class NotionOAuth(OAuthDataSource):
return user_info["workspace_name"]
return "workspace"
def notion_database_search(self, access_token: str):
results = []
def notion_database_search(self, access_token: str) -> list[dict[str, Any]]:
results: list[dict[str, Any]] = []
next_cursor = None
has_more = True
@@ -303,3 +332,19 @@ class NotionOAuth(OAuthDataSource):
next_cursor = response_json.get("next_cursor", None)
return results
@staticmethod
def _build_source_info(
*,
workspace_name: str | None,
workspace_icon: str | None,
workspace_id: str | None,
pages: list[NotionPageSummary],
) -> NotionSourceInfo:
return {
"workspace_name": workspace_name,
"workspace_icon": workspace_icon,
"workspace_id": workspace_id,
"pages": pages,
"total": len(pages),
}