mirror of
https://github.com/langgenius/dify.git
synced 2026-04-05 09:19:22 +08:00
feat: server multi models support (#799)
This commit is contained in:
@@ -3,7 +3,7 @@ from typing import Any, Dict, Optional, Sequence
|
||||
from langchain.schema import Document
|
||||
from sqlalchemy import func
|
||||
|
||||
from core.llm.token_calculator import TokenCalculator
|
||||
from core.model_providers.model_factory import ModelFactory
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, DocumentSegment
|
||||
|
||||
@@ -13,12 +13,10 @@ class DatesetDocumentStore:
|
||||
self,
|
||||
dataset: Dataset,
|
||||
user_id: str,
|
||||
embedding_model_name: str,
|
||||
document_id: Optional[str] = None,
|
||||
):
|
||||
self._dataset = dataset
|
||||
self._user_id = user_id
|
||||
self._embedding_model_name = embedding_model_name
|
||||
self._document_id = document_id
|
||||
|
||||
@classmethod
|
||||
@@ -39,10 +37,6 @@ class DatesetDocumentStore:
|
||||
def user_id(self) -> Any:
|
||||
return self._user_id
|
||||
|
||||
@property
|
||||
def embedding_model_name(self) -> Any:
|
||||
return self._embedding_model_name
|
||||
|
||||
@property
|
||||
def docs(self) -> Dict[str, Document]:
|
||||
document_segments = db.session.query(DocumentSegment).filter(
|
||||
@@ -74,6 +68,10 @@ class DatesetDocumentStore:
|
||||
if max_position is None:
|
||||
max_position = 0
|
||||
|
||||
embedding_model = ModelFactory.get_embedding_model(
|
||||
tenant_id=self._dataset.tenant_id
|
||||
)
|
||||
|
||||
for doc in docs:
|
||||
if not isinstance(doc, Document):
|
||||
raise ValueError("doc must be a Document")
|
||||
@@ -88,7 +86,7 @@ class DatesetDocumentStore:
|
||||
)
|
||||
|
||||
# calc embedding use tokens
|
||||
tokens = TokenCalculator.get_num_tokens(self._embedding_model_name, doc.page_content)
|
||||
tokens = embedding_model.get_num_tokens(doc.page_content)
|
||||
|
||||
if not segment_document:
|
||||
max_position += 1
|
||||
|
||||
Reference in New Issue
Block a user