diff --git a/api/libs/oauth.py b/api/libs/oauth.py index 1afb42304d9..76e741301cd 100644 --- a/api/libs/oauth.py +++ b/api/libs/oauth.py @@ -28,6 +28,7 @@ class AccessTokenResponse(TypedDict, total=False): class GitHubEmailRecord(TypedDict, total=False): email: str primary: bool + verified: bool class GitHubRawUserInfo(TypedDict): @@ -130,25 +131,51 @@ class GitHubOAuth(OAuth): response.raise_for_status() user_info = GITHUB_RAW_USER_INFO_ADAPTER.validate_python(_json_object(response)) + # Only call the /user/emails endpoint when the profile email is absent, + # i.e. the user has "Keep my email addresses private" enabled. + resolved_email = user_info.get("email") or "" + if not resolved_email: + resolved_email = self._get_email_from_emails_endpoint(headers) + + return {**user_info, "email": resolved_email} + + @staticmethod + def _get_email_from_emails_endpoint(headers: dict[str, str]) -> str: + """Fetch the best available email from GitHub's /user/emails endpoint. + + Prefers the primary email, then falls back to any verified email. + Returns an empty string when no usable email is found. + """ try: - email_response = httpx.get(self._EMAIL_INFO_URL, headers=headers) + email_response = httpx.get(GitHubOAuth._EMAIL_INFO_URL, headers=headers) email_response.raise_for_status() - email_info = GITHUB_EMAIL_RECORDS_ADAPTER.validate_python(_json_list(email_response)) - primary_email = next((email for email in email_info if email.get("primary") is True), None) + email_records = GITHUB_EMAIL_RECORDS_ADAPTER.validate_python(_json_list(email_response)) except (httpx.HTTPStatusError, ValidationError): logger.warning("Failed to retrieve email from GitHub /user/emails endpoint", exc_info=True) - primary_email = None + return "" - return {**user_info, "email": primary_email.get("email", "") if primary_email else ""} + primary = next((r for r in email_records if r.get("primary") is True), None) + if primary: + return primary.get("email", "") + + # No primary email; try any verified email as a fallback. + verified = next((r for r in email_records if r.get("verified") is True), None) + if verified: + return verified.get("email", "") + + return "" def _transform_user_info(self, raw_info: JsonObject) -> OAuthUserInfo: payload = GITHUB_RAW_USER_INFO_ADAPTER.validate_python(raw_info) - email = payload.get("email") + email = payload.get("email") or "" if not email: - raise ValueError( - 'Dify currently not supports the "Keep my email addresses private" feature,' - " please disable it and login again" - ) + # When no email is available from the profile or /user/emails endpoint, + # fall back to GitHub's noreply address so sign-in can still proceed. + # Use only the numeric ID (not the login) so the address stays stable + # even if the user renames their GitHub account. + github_id = payload["id"] + email = f"{github_id}@users.noreply.github.com" + logger.info("GitHub user %s has no public email; using noreply address", payload["login"]) return OAuthUserInfo(id=str(payload["id"]), name=str(payload.get("name") or ""), email=email) diff --git a/api/tests/unit_tests/libs/test_oauth_clients.py b/api/tests/unit_tests/libs/test_oauth_clients.py index 3918e8ee4b8..ab468c86874 100644 --- a/api/tests/unit_tests/libs/test_oauth_clients.py +++ b/api/tests/unit_tests/libs/test_oauth_clients.py @@ -86,7 +86,7 @@ class TestGitHubOAuth(BaseOAuthTest): @pytest.mark.parametrize( ("user_data", "email_data", "expected_email"), [ - # User with primary email + # User with primary email from /user/emails (no email in profile) ( {"id": 12345, "login": "testuser", "name": "Test User"}, [ @@ -101,6 +101,12 @@ class TestGitHubOAuth(BaseOAuthTest): [{"email": "primary@example.com", "primary": True}], "primary@example.com", ), + # User with only verified (non-primary) email + ( + {"id": 12345, "login": "testuser", "name": "Test User"}, + [{"email": "verified@example.com", "primary": False, "verified": True}], + "verified@example.com", + ), ], ) @patch("httpx.get", autospec=True) @@ -118,18 +124,38 @@ class TestGitHubOAuth(BaseOAuthTest): assert user_info.id == str(user_data["id"]) assert user_info.name == (user_data["name"] or "") assert user_info.email == expected_email + # The profile email is absent/null, so /user/emails should be called + assert mock_get.call_count == 2 + + @patch("httpx.get", autospec=True) + def test_should_skip_email_endpoint_when_profile_email_present(self, mock_get, oauth): + """When the /user profile already contains an email, do not call /user/emails.""" + user_response = MagicMock() + user_response.json.return_value = { + "id": 12345, + "login": "testuser", + "name": "Test User", + "email": "profile@example.com", + } + mock_get.return_value = user_response + + user_info = oauth.get_user_info("test_token") + + assert user_info.email == "profile@example.com" + # Only /user should be called; /user/emails should be skipped + mock_get.assert_called_once() @pytest.mark.parametrize( ("user_data", "email_data"), [ - # User with no emails + # User with no emails at all ({"id": 12345, "login": "testuser", "name": "Test User"}, []), - # User with only secondary email + # User with only unverified secondary email ( {"id": 12345, "login": "testuser", "name": "Test User"}, - [{"email": "secondary@example.com", "primary": False}], + [{"email": "secondary@example.com", "primary": False, "verified": False}], ), - # User with private email and no primary in emails endpoint + # User with private email and no entries in emails endpoint ( {"id": 12345, "login": "testuser", "name": None, "email": None}, [], @@ -137,7 +163,7 @@ class TestGitHubOAuth(BaseOAuthTest): ], ) @patch("httpx.get", autospec=True) - def test_should_raise_error_when_no_primary_email(self, mock_get, oauth, user_data, email_data): + def test_should_use_noreply_email_when_no_usable_email(self, mock_get, oauth, user_data, email_data): user_response = MagicMock() user_response.json.return_value = user_data @@ -146,11 +172,13 @@ class TestGitHubOAuth(BaseOAuthTest): mock_get.side_effect = [user_response, email_response] - with pytest.raises(ValueError, match="Keep my email addresses private"): - oauth.get_user_info("test_token") + user_info = oauth.get_user_info("test_token") + + assert user_info.id == str(user_data["id"]) + assert user_info.email == "12345@users.noreply.github.com" @patch("httpx.get", autospec=True) - def test_should_raise_error_when_email_endpoint_fails(self, mock_get, oauth): + def test_should_use_noreply_email_when_email_endpoint_fails(self, mock_get, oauth): user_response = MagicMock() user_response.json.return_value = {"id": 12345, "login": "testuser", "name": "Test User"} @@ -161,8 +189,10 @@ class TestGitHubOAuth(BaseOAuthTest): mock_get.side_effect = [user_response, email_response] - with pytest.raises(ValueError, match="Keep my email addresses private"): - oauth.get_user_info("test_token") + user_info = oauth.get_user_info("test_token") + + assert user_info.id == "12345" + assert user_info.email == "12345@users.noreply.github.com" @patch("httpx.get", autospec=True) def test_should_handle_network_errors(self, mock_get, oauth):