fix: handle null email in GitHub OAuth sign-in (#34043)

When a GitHub user's profile email is null (hidden/private), the OAuth callback fails with HTTP 400 because `GitHubRawUserInfo` validates `email` as a required non-null string. Even after the type was relaxed to `NotRequired[str | None]` in #33882, the flow still raises a `ValueError` when no email can be resolved, blocking sign-in entirely.

This PR improves the email resolution strategy so that users with private GitHub emails can still sign in.
This commit is contained in:
Krishna Chaitanya
2026-03-25 09:41:18 -07:00
committed by GitHub
parent a9336b74fd
commit 7c0d2e1d98
2 changed files with 78 additions and 21 deletions

View File

@@ -28,6 +28,7 @@ class AccessTokenResponse(TypedDict, total=False):
class GitHubEmailRecord(TypedDict, total=False):
email: str
primary: bool
verified: bool
class GitHubRawUserInfo(TypedDict):
@@ -130,25 +131,51 @@ class GitHubOAuth(OAuth):
response.raise_for_status()
user_info = GITHUB_RAW_USER_INFO_ADAPTER.validate_python(_json_object(response))
# Only call the /user/emails endpoint when the profile email is absent,
# i.e. the user has "Keep my email addresses private" enabled.
resolved_email = user_info.get("email") or ""
if not resolved_email:
resolved_email = self._get_email_from_emails_endpoint(headers)
return {**user_info, "email": resolved_email}
@staticmethod
def _get_email_from_emails_endpoint(headers: dict[str, str]) -> str:
"""Fetch the best available email from GitHub's /user/emails endpoint.
Prefers the primary email, then falls back to any verified email.
Returns an empty string when no usable email is found.
"""
try:
email_response = httpx.get(self._EMAIL_INFO_URL, headers=headers)
email_response = httpx.get(GitHubOAuth._EMAIL_INFO_URL, headers=headers)
email_response.raise_for_status()
email_info = GITHUB_EMAIL_RECORDS_ADAPTER.validate_python(_json_list(email_response))
primary_email = next((email for email in email_info if email.get("primary") is True), None)
email_records = GITHUB_EMAIL_RECORDS_ADAPTER.validate_python(_json_list(email_response))
except (httpx.HTTPStatusError, ValidationError):
logger.warning("Failed to retrieve email from GitHub /user/emails endpoint", exc_info=True)
primary_email = None
return ""
return {**user_info, "email": primary_email.get("email", "") if primary_email else ""}
primary = next((r for r in email_records if r.get("primary") is True), None)
if primary:
return primary.get("email", "")
# No primary email; try any verified email as a fallback.
verified = next((r for r in email_records if r.get("verified") is True), None)
if verified:
return verified.get("email", "")
return ""
def _transform_user_info(self, raw_info: JsonObject) -> OAuthUserInfo:
payload = GITHUB_RAW_USER_INFO_ADAPTER.validate_python(raw_info)
email = payload.get("email")
email = payload.get("email") or ""
if not email:
raise ValueError(
'Dify currently not supports the "Keep my email addresses private" feature,'
" please disable it and login again"
)
# When no email is available from the profile or /user/emails endpoint,
# fall back to GitHub's noreply address so sign-in can still proceed.
# Use only the numeric ID (not the login) so the address stays stable
# even if the user renames their GitHub account.
github_id = payload["id"]
email = f"{github_id}@users.noreply.github.com"
logger.info("GitHub user %s has no public email; using noreply address", payload["login"])
return OAuthUserInfo(id=str(payload["id"]), name=str(payload.get("name") or ""), email=email)

View File

@@ -86,7 +86,7 @@ class TestGitHubOAuth(BaseOAuthTest):
@pytest.mark.parametrize(
("user_data", "email_data", "expected_email"),
[
# User with primary email
# User with primary email from /user/emails (no email in profile)
(
{"id": 12345, "login": "testuser", "name": "Test User"},
[
@@ -101,6 +101,12 @@ class TestGitHubOAuth(BaseOAuthTest):
[{"email": "primary@example.com", "primary": True}],
"primary@example.com",
),
# User with only verified (non-primary) email
(
{"id": 12345, "login": "testuser", "name": "Test User"},
[{"email": "verified@example.com", "primary": False, "verified": True}],
"verified@example.com",
),
],
)
@patch("httpx.get", autospec=True)
@@ -118,18 +124,38 @@ class TestGitHubOAuth(BaseOAuthTest):
assert user_info.id == str(user_data["id"])
assert user_info.name == (user_data["name"] or "")
assert user_info.email == expected_email
# The profile email is absent/null, so /user/emails should be called
assert mock_get.call_count == 2
@patch("httpx.get", autospec=True)
def test_should_skip_email_endpoint_when_profile_email_present(self, mock_get, oauth):
"""When the /user profile already contains an email, do not call /user/emails."""
user_response = MagicMock()
user_response.json.return_value = {
"id": 12345,
"login": "testuser",
"name": "Test User",
"email": "profile@example.com",
}
mock_get.return_value = user_response
user_info = oauth.get_user_info("test_token")
assert user_info.email == "profile@example.com"
# Only /user should be called; /user/emails should be skipped
mock_get.assert_called_once()
@pytest.mark.parametrize(
("user_data", "email_data"),
[
# User with no emails
# User with no emails at all
({"id": 12345, "login": "testuser", "name": "Test User"}, []),
# User with only secondary email
# User with only unverified secondary email
(
{"id": 12345, "login": "testuser", "name": "Test User"},
[{"email": "secondary@example.com", "primary": False}],
[{"email": "secondary@example.com", "primary": False, "verified": False}],
),
# User with private email and no primary in emails endpoint
# User with private email and no entries in emails endpoint
(
{"id": 12345, "login": "testuser", "name": None, "email": None},
[],
@@ -137,7 +163,7 @@ class TestGitHubOAuth(BaseOAuthTest):
],
)
@patch("httpx.get", autospec=True)
def test_should_raise_error_when_no_primary_email(self, mock_get, oauth, user_data, email_data):
def test_should_use_noreply_email_when_no_usable_email(self, mock_get, oauth, user_data, email_data):
user_response = MagicMock()
user_response.json.return_value = user_data
@@ -146,11 +172,13 @@ class TestGitHubOAuth(BaseOAuthTest):
mock_get.side_effect = [user_response, email_response]
with pytest.raises(ValueError, match="Keep my email addresses private"):
oauth.get_user_info("test_token")
user_info = oauth.get_user_info("test_token")
assert user_info.id == str(user_data["id"])
assert user_info.email == "12345@users.noreply.github.com"
@patch("httpx.get", autospec=True)
def test_should_raise_error_when_email_endpoint_fails(self, mock_get, oauth):
def test_should_use_noreply_email_when_email_endpoint_fails(self, mock_get, oauth):
user_response = MagicMock()
user_response.json.return_value = {"id": 12345, "login": "testuser", "name": "Test User"}
@@ -161,8 +189,10 @@ class TestGitHubOAuth(BaseOAuthTest):
mock_get.side_effect = [user_response, email_response]
with pytest.raises(ValueError, match="Keep my email addresses private"):
oauth.get_user_info("test_token")
user_info = oauth.get_user_info("test_token")
assert user_info.id == "12345"
assert user_info.email == "12345@users.noreply.github.com"
@patch("httpx.get", autospec=True)
def test_should_handle_network_errors(self, mock_get, oauth):