mirror of
https://github.com/langgenius/dify.git
synced 2026-04-05 09:19:22 +08:00
feat: universal chat in explore (#649)
Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
This commit is contained in:
@@ -1,32 +0,0 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
|
||||
from core.chain.sensitive_word_avoidance_chain import SensitiveWordAvoidanceChain
|
||||
from core.chain.tool_chain import ToolChain
|
||||
|
||||
|
||||
class ChainBuilder:
|
||||
@classmethod
|
||||
def to_tool_chain(cls, tool, **kwargs) -> ToolChain:
|
||||
return ToolChain(
|
||||
tool=tool,
|
||||
input_key=kwargs.get('input_key', 'input'),
|
||||
output_key=kwargs.get('output_key', 'tool_output'),
|
||||
callbacks=[DifyStdOutCallbackHandler()]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def to_sensitive_word_avoidance_chain(cls, tool_config: dict, **kwargs) -> Optional[
|
||||
SensitiveWordAvoidanceChain]:
|
||||
sensitive_words = tool_config.get("words", "")
|
||||
if tool_config.get("enabled", False) \
|
||||
and sensitive_words:
|
||||
return SensitiveWordAvoidanceChain(
|
||||
sensitive_words=sensitive_words.split(","),
|
||||
canned_response=tool_config.get("canned_response", ''),
|
||||
output_key="sensitive_word_avoidance_output",
|
||||
callbacks=[DifyStdOutCallbackHandler()],
|
||||
**kwargs
|
||||
)
|
||||
|
||||
return None
|
||||
@@ -1,111 +0,0 @@
|
||||
"""Base classes for LLM-powered router chains."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, Type, cast, NamedTuple
|
||||
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from pydantic import root_validator
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.prompts import BasePromptTemplate
|
||||
from langchain.schema import BaseOutputParser, OutputParserException
|
||||
|
||||
from libs.json_in_md_parser import parse_and_check_json_markdown
|
||||
|
||||
|
||||
class Route(NamedTuple):
|
||||
destination: Optional[str]
|
||||
next_inputs: Dict[str, Any]
|
||||
|
||||
|
||||
class LLMRouterChain(Chain):
|
||||
"""A router chain that uses an LLM chain to perform routing."""
|
||||
|
||||
llm_chain: LLMChain
|
||||
"""LLM chain used to perform routing"""
|
||||
|
||||
@root_validator()
|
||||
def validate_prompt(cls, values: dict) -> dict:
|
||||
prompt = values["llm_chain"].prompt
|
||||
if prompt.output_parser is None:
|
||||
raise ValueError(
|
||||
"LLMRouterChain requires base llm_chain prompt to have an output"
|
||||
" parser that converts LLM text output to a dictionary with keys"
|
||||
" 'destination' and 'next_inputs'. Received a prompt with no output"
|
||||
" parser."
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Will be whatever keys the LLM chain prompt expects.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return self.llm_chain.input_keys
|
||||
|
||||
def _validate_outputs(self, outputs: Dict[str, Any]) -> None:
|
||||
super()._validate_outputs(outputs)
|
||||
if not isinstance(outputs["next_inputs"], dict):
|
||||
raise ValueError
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
output = cast(
|
||||
Dict[str, Any],
|
||||
self.llm_chain.predict_and_parse(**inputs),
|
||||
)
|
||||
return output
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls, llm: BaseLanguageModel, prompt: BasePromptTemplate, **kwargs: Any
|
||||
) -> LLMRouterChain:
|
||||
"""Convenience constructor."""
|
||||
llm_chain = LLMChain(llm=llm, prompt=prompt)
|
||||
return cls(llm_chain=llm_chain, **kwargs)
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
return ["destination", "next_inputs"]
|
||||
|
||||
def route(self, inputs: Dict[str, Any]) -> Route:
|
||||
result = self(inputs)
|
||||
return Route(result["destination"], result["next_inputs"])
|
||||
|
||||
|
||||
class RouterOutputParser(BaseOutputParser[Dict[str, str]]):
|
||||
"""Parser for output of router chain int he multi-prompt chain."""
|
||||
|
||||
default_destination: str = "DEFAULT"
|
||||
next_inputs_type: Type = str
|
||||
next_inputs_inner_key: str = "input"
|
||||
|
||||
def parse(self, text: str) -> Dict[str, Any]:
|
||||
try:
|
||||
expected_keys = ["destination", "next_inputs"]
|
||||
parsed = parse_and_check_json_markdown(text, expected_keys)
|
||||
if not isinstance(parsed["destination"], str):
|
||||
raise ValueError("Expected 'destination' to be a string.")
|
||||
if not isinstance(parsed["next_inputs"], self.next_inputs_type):
|
||||
raise ValueError(
|
||||
f"Expected 'next_inputs' to be {self.next_inputs_type}."
|
||||
)
|
||||
parsed["next_inputs"] = {self.next_inputs_inner_key: parsed["next_inputs"]}
|
||||
if (
|
||||
parsed["destination"].strip().lower()
|
||||
== self.default_destination.lower()
|
||||
):
|
||||
parsed["destination"] = None
|
||||
else:
|
||||
parsed["destination"] = parsed["destination"].strip()
|
||||
return parsed
|
||||
except Exception as e:
|
||||
raise OutputParserException(
|
||||
f"Parsing text\n{text}\n of llm router raised following error:\n{e}"
|
||||
)
|
||||
@@ -1,110 +0,0 @@
|
||||
from typing import Optional, List, cast
|
||||
|
||||
from langchain.chains import SequentialChain
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.memory.chat_memory import BaseChatMemory
|
||||
|
||||
from core.callback_handler.main_chain_gather_callback_handler import MainChainGatherCallbackHandler
|
||||
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
|
||||
from core.chain.chain_builder import ChainBuilder
|
||||
from core.chain.multi_dataset_router_chain import MultiDatasetRouterChain
|
||||
from core.conversation_message_task import ConversationMessageTask
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset
|
||||
|
||||
|
||||
class MainChainBuilder:
|
||||
@classmethod
|
||||
def to_langchain_components(cls, tenant_id: str, agent_mode: dict, memory: Optional[BaseChatMemory],
|
||||
rest_tokens: int,
|
||||
conversation_message_task: ConversationMessageTask):
|
||||
first_input_key = "input"
|
||||
final_output_key = "output"
|
||||
|
||||
chains = []
|
||||
|
||||
chain_callback_handler = MainChainGatherCallbackHandler(conversation_message_task)
|
||||
|
||||
# agent mode
|
||||
tool_chains, chains_output_key = cls.get_agent_chains(
|
||||
tenant_id=tenant_id,
|
||||
agent_mode=agent_mode,
|
||||
rest_tokens=rest_tokens,
|
||||
memory=memory,
|
||||
conversation_message_task=conversation_message_task
|
||||
)
|
||||
chains += tool_chains
|
||||
|
||||
if chains_output_key:
|
||||
final_output_key = chains_output_key
|
||||
|
||||
if len(chains) == 0:
|
||||
return None
|
||||
|
||||
for chain in chains:
|
||||
chain = cast(Chain, chain)
|
||||
chain.callbacks.append(chain_callback_handler)
|
||||
|
||||
# build main chain
|
||||
overall_chain = SequentialChain(
|
||||
chains=chains,
|
||||
input_variables=[first_input_key],
|
||||
output_variables=[final_output_key],
|
||||
memory=memory, # only for use the memory prompt input key
|
||||
)
|
||||
|
||||
return overall_chain
|
||||
|
||||
@classmethod
|
||||
def get_agent_chains(cls, tenant_id: str, agent_mode: dict,
|
||||
rest_tokens: int,
|
||||
memory: Optional[BaseChatMemory],
|
||||
conversation_message_task: ConversationMessageTask):
|
||||
# agent mode
|
||||
chains = []
|
||||
if agent_mode and agent_mode.get('enabled'):
|
||||
tools = agent_mode.get('tools', [])
|
||||
|
||||
pre_fixed_chains = []
|
||||
# agent_tools = []
|
||||
datasets = []
|
||||
for tool in tools:
|
||||
tool_type = list(tool.keys())[0]
|
||||
tool_config = list(tool.values())[0]
|
||||
if tool_type == 'sensitive-word-avoidance':
|
||||
chain = ChainBuilder.to_sensitive_word_avoidance_chain(tool_config)
|
||||
if chain:
|
||||
pre_fixed_chains.append(chain)
|
||||
elif tool_type == "dataset":
|
||||
# get dataset from dataset id
|
||||
dataset = db.session.query(Dataset).filter(
|
||||
Dataset.tenant_id == tenant_id,
|
||||
Dataset.id == tool_config.get("id")
|
||||
).first()
|
||||
|
||||
if dataset:
|
||||
datasets.append(dataset)
|
||||
|
||||
# add pre-fixed chains
|
||||
chains += pre_fixed_chains
|
||||
|
||||
if len(datasets) > 0:
|
||||
# tool to chain
|
||||
multi_dataset_router_chain = MultiDatasetRouterChain.from_datasets(
|
||||
tenant_id=tenant_id,
|
||||
datasets=datasets,
|
||||
conversation_message_task=conversation_message_task,
|
||||
rest_tokens=rest_tokens,
|
||||
callbacks=[DifyStdOutCallbackHandler()]
|
||||
)
|
||||
chains.append(multi_dataset_router_chain)
|
||||
|
||||
final_output_key = cls.get_chains_output_key(chains)
|
||||
|
||||
return chains, final_output_key
|
||||
|
||||
@classmethod
|
||||
def get_chains_output_key(cls, chains: List[Chain]):
|
||||
if len(chains) > 0:
|
||||
return chains[-1].output_keys[0]
|
||||
return None
|
||||
@@ -1,198 +0,0 @@
|
||||
import math
|
||||
import re
|
||||
from typing import Mapping, List, Dict, Any, Optional
|
||||
|
||||
from langchain import PromptTemplate
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from pydantic import Extra
|
||||
|
||||
from core.callback_handler.dataset_tool_callback_handler import DatasetToolCallbackHandler
|
||||
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
|
||||
from core.chain.llm_router_chain import LLMRouterChain, RouterOutputParser
|
||||
from core.conversation_message_task import ConversationMessageTask
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
from core.tool.dataset_index_tool import DatasetTool
|
||||
from models.dataset import Dataset, DatasetProcessRule
|
||||
|
||||
DEFAULT_K = 2
|
||||
CONTEXT_TOKENS_PERCENT = 0.3
|
||||
MULTI_PROMPT_ROUTER_TEMPLATE = """
|
||||
Given a raw text input to a language model select the model prompt best suited for \
|
||||
the input. You will be given the names of the available prompts and a description of \
|
||||
what the prompt is best suited for. You may also revise the original input if you \
|
||||
think that revising it will ultimately lead to a better response from the language \
|
||||
model.
|
||||
|
||||
<< FORMATTING >>
|
||||
Return a markdown code snippet with a JSON object formatted to look like, \
|
||||
no any other string out of markdown code snippet:
|
||||
```json
|
||||
{{{{
|
||||
"destination": string \\ name of the prompt to use or "DEFAULT"
|
||||
"next_inputs": string \\ a potentially modified version of the original input
|
||||
}}}}
|
||||
```
|
||||
|
||||
REMEMBER: "destination" MUST be one of the candidate prompt names specified below OR \
|
||||
it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.
|
||||
REMEMBER: "next_inputs" can just be the original input if you don't think any \
|
||||
modifications are needed.
|
||||
|
||||
<< CANDIDATE PROMPTS >>
|
||||
{destinations}
|
||||
|
||||
<< INPUT >>
|
||||
{{input}}
|
||||
|
||||
<< OUTPUT >>
|
||||
"""
|
||||
|
||||
|
||||
class MultiDatasetRouterChain(Chain):
|
||||
"""Use a single chain to route an input to one of multiple candidate chains."""
|
||||
|
||||
router_chain: LLMRouterChain
|
||||
"""Chain for deciding a destination chain and the input to it."""
|
||||
dataset_tools: Mapping[str, DatasetTool]
|
||||
"""Map of name to candidate chains that inputs can be routed to."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Will be whatever keys the router chain prompt expects.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return self.router_chain.input_keys
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
return ["text"]
|
||||
|
||||
@classmethod
|
||||
def from_datasets(
|
||||
cls,
|
||||
tenant_id: str,
|
||||
datasets: List[Dataset],
|
||||
conversation_message_task: ConversationMessageTask,
|
||||
rest_tokens: int,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Convenience constructor for instantiating from destination prompts."""
|
||||
llm = LLMBuilder.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name='gpt-3.5-turbo',
|
||||
temperature=0,
|
||||
max_tokens=1024,
|
||||
callbacks=[DifyStdOutCallbackHandler()]
|
||||
)
|
||||
|
||||
destinations = ["[[{}]]: {}".format(d.id, d.description.replace('\n', ' ') if d.description
|
||||
else ('useful for when you want to answer queries about the ' + d.name))
|
||||
for d in datasets]
|
||||
destinations_str = "\n".join(destinations)
|
||||
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
|
||||
destinations=destinations_str
|
||||
)
|
||||
|
||||
router_prompt = PromptTemplate(
|
||||
template=router_template,
|
||||
input_variables=["input"],
|
||||
output_parser=RouterOutputParser(),
|
||||
)
|
||||
|
||||
router_chain = LLMRouterChain.from_llm(llm, router_prompt)
|
||||
dataset_tools = {}
|
||||
for dataset in datasets:
|
||||
# fulfill description when it is empty
|
||||
if dataset.available_document_count == 0 or dataset.available_document_count == 0:
|
||||
continue
|
||||
|
||||
description = dataset.description
|
||||
if not description:
|
||||
description = 'useful for when you want to answer queries about the ' + dataset.name
|
||||
|
||||
k = cls._dynamic_calc_retrieve_k(dataset, rest_tokens)
|
||||
if k == 0:
|
||||
continue
|
||||
|
||||
dataset_tool = DatasetTool(
|
||||
name=f"dataset-{dataset.id}",
|
||||
description=description,
|
||||
k=k,
|
||||
dataset=dataset,
|
||||
callbacks=[DatasetToolCallbackHandler(conversation_message_task), DifyStdOutCallbackHandler()]
|
||||
)
|
||||
|
||||
dataset_tools[str(dataset.id)] = dataset_tool
|
||||
|
||||
return cls(
|
||||
router_chain=router_chain,
|
||||
dataset_tools=dataset_tools,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _dynamic_calc_retrieve_k(cls, dataset: Dataset, rest_tokens: int) -> int:
|
||||
processing_rule = dataset.latest_process_rule
|
||||
if not processing_rule:
|
||||
return DEFAULT_K
|
||||
|
||||
if processing_rule.mode == "custom":
|
||||
rules = processing_rule.rules_dict
|
||||
if not rules:
|
||||
return DEFAULT_K
|
||||
|
||||
segmentation = rules["segmentation"]
|
||||
segment_max_tokens = segmentation["max_tokens"]
|
||||
else:
|
||||
segment_max_tokens = DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens']
|
||||
|
||||
# when rest_tokens is less than default context tokens
|
||||
if rest_tokens < segment_max_tokens * DEFAULT_K:
|
||||
return rest_tokens // segment_max_tokens
|
||||
|
||||
context_limit_tokens = math.floor(rest_tokens * CONTEXT_TOKENS_PERCENT)
|
||||
|
||||
# when context_limit_tokens is less than default context tokens, use default_k
|
||||
if context_limit_tokens <= segment_max_tokens * DEFAULT_K:
|
||||
return DEFAULT_K
|
||||
|
||||
# Expand the k value when there's still some room left in the 30% rest tokens space
|
||||
return context_limit_tokens // segment_max_tokens
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
if len(self.dataset_tools) == 0:
|
||||
return {"text": ''}
|
||||
elif len(self.dataset_tools) == 1:
|
||||
return {"text": next(iter(self.dataset_tools.values())).run(inputs['input'])}
|
||||
|
||||
route = self.router_chain.route(inputs)
|
||||
|
||||
destination = ''
|
||||
if route.destination:
|
||||
pattern = r'\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b'
|
||||
match = re.search(pattern, route.destination, re.IGNORECASE)
|
||||
if match:
|
||||
destination = match.group()
|
||||
|
||||
if not destination:
|
||||
return {"text": ''}
|
||||
elif destination in self.dataset_tools:
|
||||
return {"text": self.dataset_tools[destination].run(
|
||||
route.next_inputs['input']
|
||||
)}
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Received invalid destination chain name '{destination}'"
|
||||
)
|
||||
@@ -1,51 +0,0 @@
|
||||
from typing import List, Dict, Optional, Any
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun, AsyncCallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
|
||||
class ToolChain(Chain):
|
||||
input_key: str = "input" #: :meta private:
|
||||
output_key: str = "output" #: :meta private:
|
||||
|
||||
tool: BaseTool
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "tool_chain"
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Expect input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
input = inputs[self.input_key]
|
||||
output = self.tool.run(input, self.verbose)
|
||||
return {self.output_key: output}
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the logic of this chain and return the output."""
|
||||
input = inputs[self.input_key]
|
||||
output = await self.tool.arun(input, self.verbose)
|
||||
return {self.output_key: output}
|
||||
Reference in New Issue
Block a user