mirror of
https://github.com/langgenius/dify.git
synced 2026-04-05 02:19:20 +08:00
refactor: select in metadata_service (#34479)
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import copy
|
||||
import logging
|
||||
|
||||
from sqlalchemy import delete, func, select
|
||||
|
||||
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
@@ -25,10 +27,14 @@ class MetadataService:
|
||||
raise ValueError("Metadata name cannot exceed 255 characters.")
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
# check if metadata name already exists
|
||||
if (
|
||||
db.session.query(DatasetMetadata)
|
||||
.filter_by(tenant_id=current_tenant_id, dataset_id=dataset_id, name=metadata_args.name)
|
||||
.first()
|
||||
if db.session.scalar(
|
||||
select(DatasetMetadata)
|
||||
.where(
|
||||
DatasetMetadata.tenant_id == current_tenant_id,
|
||||
DatasetMetadata.dataset_id == dataset_id,
|
||||
DatasetMetadata.name == metadata_args.name,
|
||||
)
|
||||
.limit(1)
|
||||
):
|
||||
raise ValueError("Metadata name already exists.")
|
||||
for field in BuiltInField:
|
||||
@@ -54,10 +60,14 @@ class MetadataService:
|
||||
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
||||
# check if metadata name already exists
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
if (
|
||||
db.session.query(DatasetMetadata)
|
||||
.filter_by(tenant_id=current_tenant_id, dataset_id=dataset_id, name=name)
|
||||
.first()
|
||||
if db.session.scalar(
|
||||
select(DatasetMetadata)
|
||||
.where(
|
||||
DatasetMetadata.tenant_id == current_tenant_id,
|
||||
DatasetMetadata.dataset_id == dataset_id,
|
||||
DatasetMetadata.name == name,
|
||||
)
|
||||
.limit(1)
|
||||
):
|
||||
raise ValueError("Metadata name already exists.")
|
||||
for field in BuiltInField:
|
||||
@@ -65,7 +75,11 @@ class MetadataService:
|
||||
raise ValueError("Metadata name already exists in Built-in fields.")
|
||||
try:
|
||||
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
||||
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id, dataset_id=dataset_id).first()
|
||||
metadata = db.session.scalar(
|
||||
select(DatasetMetadata)
|
||||
.where(DatasetMetadata.id == metadata_id, DatasetMetadata.dataset_id == dataset_id)
|
||||
.limit(1)
|
||||
)
|
||||
if metadata is None:
|
||||
raise ValueError("Metadata not found.")
|
||||
old_name = metadata.name
|
||||
@@ -74,9 +88,9 @@ class MetadataService:
|
||||
metadata.updated_at = naive_utc_now()
|
||||
|
||||
# update related documents
|
||||
dataset_metadata_bindings = (
|
||||
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
|
||||
)
|
||||
dataset_metadata_bindings = db.session.scalars(
|
||||
select(DatasetMetadataBinding).where(DatasetMetadataBinding.metadata_id == metadata_id)
|
||||
).all()
|
||||
if dataset_metadata_bindings:
|
||||
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
||||
documents = DocumentService.get_document_by_ids(document_ids)
|
||||
@@ -101,15 +115,19 @@ class MetadataService:
|
||||
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
||||
try:
|
||||
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
||||
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id, dataset_id=dataset_id).first()
|
||||
metadata = db.session.scalar(
|
||||
select(DatasetMetadata)
|
||||
.where(DatasetMetadata.id == metadata_id, DatasetMetadata.dataset_id == dataset_id)
|
||||
.limit(1)
|
||||
)
|
||||
if metadata is None:
|
||||
raise ValueError("Metadata not found.")
|
||||
db.session.delete(metadata)
|
||||
|
||||
# deal related documents
|
||||
dataset_metadata_bindings = (
|
||||
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
|
||||
)
|
||||
dataset_metadata_bindings = db.session.scalars(
|
||||
select(DatasetMetadataBinding).where(DatasetMetadataBinding.metadata_id == metadata_id)
|
||||
).all()
|
||||
if dataset_metadata_bindings:
|
||||
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
||||
documents = DocumentService.get_document_by_ids(document_ids)
|
||||
@@ -224,16 +242,23 @@ class MetadataService:
|
||||
|
||||
# deal metadata binding (in the same transaction as the doc_metadata update)
|
||||
if not operation.partial_update:
|
||||
db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete()
|
||||
db.session.execute(
|
||||
delete(DatasetMetadataBinding).where(
|
||||
DatasetMetadataBinding.document_id == operation.document_id
|
||||
)
|
||||
)
|
||||
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
for metadata_value in operation.metadata_list:
|
||||
# check if binding already exists
|
||||
if operation.partial_update:
|
||||
existing_binding = (
|
||||
db.session.query(DatasetMetadataBinding)
|
||||
.filter_by(document_id=operation.document_id, metadata_id=metadata_value.id)
|
||||
.first()
|
||||
existing_binding = db.session.scalar(
|
||||
select(DatasetMetadataBinding)
|
||||
.where(
|
||||
DatasetMetadataBinding.document_id == operation.document_id,
|
||||
DatasetMetadataBinding.metadata_id == metadata_value.id,
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
if existing_binding:
|
||||
continue
|
||||
@@ -275,9 +300,13 @@ class MetadataService:
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"type": item.get("type"),
|
||||
"count": db.session.query(DatasetMetadataBinding)
|
||||
.filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
|
||||
.count(),
|
||||
"count": db.session.scalar(
|
||||
select(func.count(DatasetMetadataBinding.id)).where(
|
||||
DatasetMetadataBinding.metadata_id == item.get("id"),
|
||||
DatasetMetadataBinding.dataset_id == dataset.id,
|
||||
)
|
||||
)
|
||||
or 0,
|
||||
}
|
||||
for item in dataset.doc_metadata or []
|
||||
if item.get("id") != "built-in"
|
||||
|
||||
@@ -401,10 +401,7 @@ class TestMetadataServiceCreateMetadata:
|
||||
metadata_args = MetadataTestDataFactory.create_metadata_args_mock(name="category", metadata_type="string")
|
||||
|
||||
# Mock query to return None (no existing metadata with same name)
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.first.return_value = None
|
||||
mock_db_session.query.return_value = mock_query
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
# Mock BuiltInField enum iteration
|
||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||
@@ -417,10 +414,6 @@ class TestMetadataServiceCreateMetadata:
|
||||
assert result is not None
|
||||
assert isinstance(result, DatasetMetadata)
|
||||
|
||||
# Verify query was made to check for duplicates
|
||||
mock_db_session.query.assert_called()
|
||||
mock_query.filter_by.assert_called()
|
||||
|
||||
# Verify metadata was added and committed
|
||||
mock_db_session.add.assert_called_once()
|
||||
mock_db_session.commit.assert_called_once()
|
||||
@@ -468,10 +461,7 @@ class TestMetadataServiceCreateMetadata:
|
||||
|
||||
# Mock existing metadata with same name
|
||||
existing_metadata = MetadataTestDataFactory.create_metadata_mock(name="category")
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.first.return_value = existing_metadata
|
||||
mock_db_session.query.return_value = mock_query
|
||||
mock_db_session.scalar.return_value = existing_metadata
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises(ValueError, match="Metadata name already exists"):
|
||||
@@ -500,10 +490,7 @@ class TestMetadataServiceCreateMetadata:
|
||||
)
|
||||
|
||||
# Mock query to return None (no duplicate in database)
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.first.return_value = None
|
||||
mock_db_session.query.return_value = mock_query
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
# Mock BuiltInField to include the conflicting name
|
||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||
@@ -597,27 +584,11 @@ class TestMetadataServiceUpdateMetadataName:
|
||||
|
||||
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
|
||||
|
||||
# Mock query for duplicate check (no duplicate)
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.first.return_value = None
|
||||
mock_db_session.query.return_value = mock_query
|
||||
|
||||
# Mock metadata retrieval
|
||||
def query_side_effect(model):
|
||||
if model == DatasetMetadata:
|
||||
mock_meta_query = Mock()
|
||||
mock_meta_query.filter_by.return_value = mock_meta_query
|
||||
mock_meta_query.first.return_value = existing_metadata
|
||||
return mock_meta_query
|
||||
return mock_query
|
||||
|
||||
mock_db_session.query.side_effect = query_side_effect
|
||||
# Mock scalar calls: first for duplicate check (None), second for metadata retrieval
|
||||
mock_db_session.scalar.side_effect = [None, existing_metadata]
|
||||
|
||||
# Mock no metadata bindings (no documents to update)
|
||||
mock_binding_query = Mock()
|
||||
mock_binding_query.filter_by.return_value = mock_binding_query
|
||||
mock_binding_query.all.return_value = []
|
||||
mock_db_session.scalars.return_value.all.return_value = []
|
||||
|
||||
# Mock BuiltInField enum
|
||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||
@@ -655,22 +626,8 @@ class TestMetadataServiceUpdateMetadataName:
|
||||
metadata_id = "non-existent-metadata"
|
||||
new_name = "updated_category"
|
||||
|
||||
# Mock query for duplicate check (no duplicate)
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.first.return_value = None
|
||||
mock_db_session.query.return_value = mock_query
|
||||
|
||||
# Mock metadata retrieval to return None
|
||||
def query_side_effect(model):
|
||||
if model == DatasetMetadata:
|
||||
mock_meta_query = Mock()
|
||||
mock_meta_query.filter_by.return_value = mock_meta_query
|
||||
mock_meta_query.first.return_value = None # Not found
|
||||
return mock_meta_query
|
||||
return mock_query
|
||||
|
||||
mock_db_session.query.side_effect = query_side_effect
|
||||
# Mock scalar calls: first for duplicate check (None), second for metadata retrieval (None = not found)
|
||||
mock_db_session.scalar.side_effect = [None, None]
|
||||
|
||||
# Mock BuiltInField enum
|
||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||
@@ -746,15 +703,10 @@ class TestMetadataServiceDeleteMetadata:
|
||||
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
|
||||
|
||||
# Mock metadata retrieval
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.first.return_value = existing_metadata
|
||||
mock_db_session.query.return_value = mock_query
|
||||
mock_db_session.scalar.return_value = existing_metadata
|
||||
|
||||
# Mock no metadata bindings (no documents to update)
|
||||
mock_binding_query = Mock()
|
||||
mock_binding_query.filter_by.return_value = mock_binding_query
|
||||
mock_binding_query.all.return_value = []
|
||||
mock_db_session.scalars.return_value.all.return_value = []
|
||||
|
||||
# Act
|
||||
result = MetadataService.delete_metadata(dataset_id, metadata_id)
|
||||
@@ -788,10 +740,7 @@ class TestMetadataServiceDeleteMetadata:
|
||||
metadata_id = "non-existent-metadata"
|
||||
|
||||
# Mock metadata retrieval to return None
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.first.return_value = None
|
||||
mock_db_session.query.return_value = mock_query
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises(ValueError, match="Metadata not found"):
|
||||
@@ -1013,10 +962,7 @@ class TestMetadataServiceGetDatasetMetadatas:
|
||||
)
|
||||
|
||||
# Mock usage count queries
|
||||
mock_query = Mock()
|
||||
mock_query.filter_by.return_value = mock_query
|
||||
mock_query.count.return_value = 5 # 5 documents use this metadata
|
||||
mock_db_session.query.return_value = mock_query
|
||||
mock_db_session.scalar.return_value = 5 # 5 documents use this metadata
|
||||
|
||||
# Act
|
||||
result = MetadataService.get_dataset_metadatas(dataset)
|
||||
|
||||
Reference in New Issue
Block a user