refactor: select in metadata_service (#34479)

This commit is contained in:
Renzo
2026-04-02 14:34:38 +02:00
committed by GitHub
parent 894826771a
commit d243de26ec
2 changed files with 65 additions and 90 deletions

View File

@@ -1,6 +1,8 @@
import copy
import logging
from sqlalchemy import delete, func, select
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
from extensions.ext_database import db
from extensions.ext_redis import redis_client
@@ -25,10 +27,14 @@ class MetadataService:
raise ValueError("Metadata name cannot exceed 255 characters.")
current_user, current_tenant_id = current_account_with_tenant()
# check if metadata name already exists
if (
db.session.query(DatasetMetadata)
.filter_by(tenant_id=current_tenant_id, dataset_id=dataset_id, name=metadata_args.name)
.first()
if db.session.scalar(
select(DatasetMetadata)
.where(
DatasetMetadata.tenant_id == current_tenant_id,
DatasetMetadata.dataset_id == dataset_id,
DatasetMetadata.name == metadata_args.name,
)
.limit(1)
):
raise ValueError("Metadata name already exists.")
for field in BuiltInField:
@@ -54,10 +60,14 @@ class MetadataService:
lock_key = f"dataset_metadata_lock_{dataset_id}"
# check if metadata name already exists
current_user, current_tenant_id = current_account_with_tenant()
if (
db.session.query(DatasetMetadata)
.filter_by(tenant_id=current_tenant_id, dataset_id=dataset_id, name=name)
.first()
if db.session.scalar(
select(DatasetMetadata)
.where(
DatasetMetadata.tenant_id == current_tenant_id,
DatasetMetadata.dataset_id == dataset_id,
DatasetMetadata.name == name,
)
.limit(1)
):
raise ValueError("Metadata name already exists.")
for field in BuiltInField:
@@ -65,7 +75,11 @@ class MetadataService:
raise ValueError("Metadata name already exists in Built-in fields.")
try:
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id, dataset_id=dataset_id).first()
metadata = db.session.scalar(
select(DatasetMetadata)
.where(DatasetMetadata.id == metadata_id, DatasetMetadata.dataset_id == dataset_id)
.limit(1)
)
if metadata is None:
raise ValueError("Metadata not found.")
old_name = metadata.name
@@ -74,9 +88,9 @@ class MetadataService:
metadata.updated_at = naive_utc_now()
# update related documents
dataset_metadata_bindings = (
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
)
dataset_metadata_bindings = db.session.scalars(
select(DatasetMetadataBinding).where(DatasetMetadataBinding.metadata_id == metadata_id)
).all()
if dataset_metadata_bindings:
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
documents = DocumentService.get_document_by_ids(document_ids)
@@ -101,15 +115,19 @@ class MetadataService:
lock_key = f"dataset_metadata_lock_{dataset_id}"
try:
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id, dataset_id=dataset_id).first()
metadata = db.session.scalar(
select(DatasetMetadata)
.where(DatasetMetadata.id == metadata_id, DatasetMetadata.dataset_id == dataset_id)
.limit(1)
)
if metadata is None:
raise ValueError("Metadata not found.")
db.session.delete(metadata)
# deal related documents
dataset_metadata_bindings = (
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
)
dataset_metadata_bindings = db.session.scalars(
select(DatasetMetadataBinding).where(DatasetMetadataBinding.metadata_id == metadata_id)
).all()
if dataset_metadata_bindings:
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
documents = DocumentService.get_document_by_ids(document_ids)
@@ -224,16 +242,23 @@ class MetadataService:
# deal metadata binding (in the same transaction as the doc_metadata update)
if not operation.partial_update:
db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete()
db.session.execute(
delete(DatasetMetadataBinding).where(
DatasetMetadataBinding.document_id == operation.document_id
)
)
current_user, current_tenant_id = current_account_with_tenant()
for metadata_value in operation.metadata_list:
# check if binding already exists
if operation.partial_update:
existing_binding = (
db.session.query(DatasetMetadataBinding)
.filter_by(document_id=operation.document_id, metadata_id=metadata_value.id)
.first()
existing_binding = db.session.scalar(
select(DatasetMetadataBinding)
.where(
DatasetMetadataBinding.document_id == operation.document_id,
DatasetMetadataBinding.metadata_id == metadata_value.id,
)
.limit(1)
)
if existing_binding:
continue
@@ -275,9 +300,13 @@ class MetadataService:
"id": item.get("id"),
"name": item.get("name"),
"type": item.get("type"),
"count": db.session.query(DatasetMetadataBinding)
.filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
.count(),
"count": db.session.scalar(
select(func.count(DatasetMetadataBinding.id)).where(
DatasetMetadataBinding.metadata_id == item.get("id"),
DatasetMetadataBinding.dataset_id == dataset.id,
)
)
or 0,
}
for item in dataset.doc_metadata or []
if item.get("id") != "built-in"

View File

@@ -401,10 +401,7 @@ class TestMetadataServiceCreateMetadata:
metadata_args = MetadataTestDataFactory.create_metadata_args_mock(name="category", metadata_type="string")
# Mock query to return None (no existing metadata with same name)
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.first.return_value = None
mock_db_session.query.return_value = mock_query
mock_db_session.scalar.return_value = None
# Mock BuiltInField enum iteration
with patch("services.metadata_service.BuiltInField") as mock_builtin:
@@ -417,10 +414,6 @@ class TestMetadataServiceCreateMetadata:
assert result is not None
assert isinstance(result, DatasetMetadata)
# Verify query was made to check for duplicates
mock_db_session.query.assert_called()
mock_query.filter_by.assert_called()
# Verify metadata was added and committed
mock_db_session.add.assert_called_once()
mock_db_session.commit.assert_called_once()
@@ -468,10 +461,7 @@ class TestMetadataServiceCreateMetadata:
# Mock existing metadata with same name
existing_metadata = MetadataTestDataFactory.create_metadata_mock(name="category")
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.first.return_value = existing_metadata
mock_db_session.query.return_value = mock_query
mock_db_session.scalar.return_value = existing_metadata
# Act & Assert
with pytest.raises(ValueError, match="Metadata name already exists"):
@@ -500,10 +490,7 @@ class TestMetadataServiceCreateMetadata:
)
# Mock query to return None (no duplicate in database)
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.first.return_value = None
mock_db_session.query.return_value = mock_query
mock_db_session.scalar.return_value = None
# Mock BuiltInField to include the conflicting name
with patch("services.metadata_service.BuiltInField") as mock_builtin:
@@ -597,27 +584,11 @@ class TestMetadataServiceUpdateMetadataName:
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
# Mock query for duplicate check (no duplicate)
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.first.return_value = None
mock_db_session.query.return_value = mock_query
# Mock metadata retrieval
def query_side_effect(model):
if model == DatasetMetadata:
mock_meta_query = Mock()
mock_meta_query.filter_by.return_value = mock_meta_query
mock_meta_query.first.return_value = existing_metadata
return mock_meta_query
return mock_query
mock_db_session.query.side_effect = query_side_effect
# Mock scalar calls: first for duplicate check (None), second for metadata retrieval
mock_db_session.scalar.side_effect = [None, existing_metadata]
# Mock no metadata bindings (no documents to update)
mock_binding_query = Mock()
mock_binding_query.filter_by.return_value = mock_binding_query
mock_binding_query.all.return_value = []
mock_db_session.scalars.return_value.all.return_value = []
# Mock BuiltInField enum
with patch("services.metadata_service.BuiltInField") as mock_builtin:
@@ -655,22 +626,8 @@ class TestMetadataServiceUpdateMetadataName:
metadata_id = "non-existent-metadata"
new_name = "updated_category"
# Mock query for duplicate check (no duplicate)
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.first.return_value = None
mock_db_session.query.return_value = mock_query
# Mock metadata retrieval to return None
def query_side_effect(model):
if model == DatasetMetadata:
mock_meta_query = Mock()
mock_meta_query.filter_by.return_value = mock_meta_query
mock_meta_query.first.return_value = None # Not found
return mock_meta_query
return mock_query
mock_db_session.query.side_effect = query_side_effect
# Mock scalar calls: first for duplicate check (None), second for metadata retrieval (None = not found)
mock_db_session.scalar.side_effect = [None, None]
# Mock BuiltInField enum
with patch("services.metadata_service.BuiltInField") as mock_builtin:
@@ -746,15 +703,10 @@ class TestMetadataServiceDeleteMetadata:
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
# Mock metadata retrieval
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.first.return_value = existing_metadata
mock_db_session.query.return_value = mock_query
mock_db_session.scalar.return_value = existing_metadata
# Mock no metadata bindings (no documents to update)
mock_binding_query = Mock()
mock_binding_query.filter_by.return_value = mock_binding_query
mock_binding_query.all.return_value = []
mock_db_session.scalars.return_value.all.return_value = []
# Act
result = MetadataService.delete_metadata(dataset_id, metadata_id)
@@ -788,10 +740,7 @@ class TestMetadataServiceDeleteMetadata:
metadata_id = "non-existent-metadata"
# Mock metadata retrieval to return None
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.first.return_value = None
mock_db_session.query.return_value = mock_query
mock_db_session.scalar.return_value = None
# Act & Assert
with pytest.raises(ValueError, match="Metadata not found"):
@@ -1013,10 +962,7 @@ class TestMetadataServiceGetDatasetMetadatas:
)
# Mock usage count queries
mock_query = Mock()
mock_query.filter_by.return_value = mock_query
mock_query.count.return_value = 5 # 5 documents use this metadata
mock_db_session.query.return_value = mock_query
mock_db_session.scalar.return_value = 5 # 5 documents use this metadata
# Act
result = MetadataService.get_dataset_metadatas(dataset)