mirror of
https://github.com/langgenius/dify.git
synced 2026-04-05 10:01:46 +08:00
Initial commit
This commit is contained in:
2
api/.dockerignore
Normal file
2
api/.dockerignore
Normal file
@@ -0,0 +1,2 @@
|
||||
.env
|
||||
storage/privkeys/*
|
||||
85
api/.env.example
Normal file
85
api/.env.example
Normal file
@@ -0,0 +1,85 @@
|
||||
# Server Edition
|
||||
EDITION=SELF_HOSTED
|
||||
|
||||
# Your App secret key will be used for securely signing the session cookie
|
||||
# Make sure you are changing this key for your deployment with a strong key.
|
||||
# You can generate a strong key using `openssl rand -base64 42`.
|
||||
# Alternatively you can set it with `SECRET_KEY` environment variable.
|
||||
SECRET_KEY=
|
||||
|
||||
# Console API base URL
|
||||
CONSOLE_URL=http://127.0.0.1:5001
|
||||
|
||||
# Service API base URL
|
||||
API_URL=http://127.0.0.1:5001
|
||||
|
||||
# Web APP base URL
|
||||
APP_URL=http://127.0.0.1:5001
|
||||
|
||||
# celery configuration
|
||||
CELERY_BROKER_URL=redis://:difyai123456@localhost:6379/1
|
||||
|
||||
# redis configuration
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=difyai123456
|
||||
REDIS_DB=0
|
||||
|
||||
# PostgreSQL database configuration
|
||||
DB_USERNAME=postgres
|
||||
DB_PASSWORD=difyai123456
|
||||
DB_HOST=localhost
|
||||
DB_PORT=5432
|
||||
DB_DATABASE=dify
|
||||
|
||||
# Storage configuration
|
||||
# use for store upload files, private keys...
|
||||
# storage type: local, s3
|
||||
STORAGE_TYPE=local
|
||||
STORAGE_LOCAL_PATH=storage
|
||||
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
|
||||
S3_BUCKET_NAME=your-bucket-name
|
||||
S3_ACCESS_KEY=your-access-key
|
||||
S3_SECRET_KEY=your-secret-key
|
||||
S3_REGION=your-region
|
||||
|
||||
# CORS configuration
|
||||
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
|
||||
# Cookie configuration
|
||||
COOKIE_HTTPONLY=true
|
||||
COOKIE_SAMESITE=None
|
||||
COOKIE_SECURE=true
|
||||
|
||||
# Session configuration
|
||||
SESSION_PERMANENT=true
|
||||
SESSION_USE_SIGNER=true
|
||||
|
||||
## support redis, sqlalchemy
|
||||
SESSION_TYPE=redis
|
||||
|
||||
# session redis configuration
|
||||
SESSION_REDIS_HOST=localhost
|
||||
SESSION_REDIS_PORT=6379
|
||||
SESSION_REDIS_PASSWORD=difyai123456
|
||||
SESSION_REDIS_DB=2
|
||||
|
||||
# Vector database configuration, support: weaviate, qdrant
|
||||
VECTOR_STORE=weaviate
|
||||
|
||||
# Weaviate configuration
|
||||
WEAVIATE_ENDPOINT=http://localhost:8080
|
||||
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
||||
WEAVIATE_GRPC_ENABLED=false
|
||||
|
||||
# Qdrant configuration, use `path:` prefix for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode
|
||||
QDRANT_URL=path:storage/qdrant
|
||||
QDRANT_API_KEY=your-qdrant-api-key
|
||||
|
||||
# Sentry configuration
|
||||
SENTRY_DSN=
|
||||
|
||||
# DEBUG
|
||||
DEBUG=false
|
||||
SQLALCHEMY_ECHO=false
|
||||
28
api/Dockerfile
Normal file
28
api/Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
||||
FROM langgenius/base:1.0.0-bullseye-slim as langgenius-api
|
||||
|
||||
LABEL maintainer="takatost@gmail.com"
|
||||
|
||||
ENV FLASK_APP app.py
|
||||
ENV EDITION SELF_HOSTED
|
||||
ENV DEPLOY_ENV PRODUCTION
|
||||
ENV CONSOLE_URL http://127.0.0.1:5001
|
||||
ENV API_URL http://127.0.0.1:5001
|
||||
ENV APP_URL http://127.0.0.1:5001
|
||||
|
||||
EXPOSE 5001
|
||||
|
||||
WORKDIR /app/api
|
||||
|
||||
COPY requirements.txt /app/api/requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
COPY . /app/api/
|
||||
|
||||
COPY docker/entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
ARG COMMIT_SHA
|
||||
ENV COMMIT_SHA ${COMMIT_SHA}
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
35
api/README.md
Normal file
35
api/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Dify Backend API
|
||||
|
||||
## Usage
|
||||
|
||||
1. Start the docker-compose stack
|
||||
|
||||
The backend require some middleware, including PostgreSQL, Redis, and Weaviate, which can be started together using `docker-compose`.
|
||||
|
||||
```bash
|
||||
cd ../docker
|
||||
docker-compose -f docker-compose.middleware.yaml up -d
|
||||
cd ../api
|
||||
```
|
||||
2. Copy `.env.example` to `.env`
|
||||
3. Generate a `SECRET_KEY` in the `.env` file.
|
||||
|
||||
```bash
|
||||
openssl rand -base64 42
|
||||
```
|
||||
4. Install dependencies
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
5. Run migrate
|
||||
|
||||
Before the first launch, migrate the database to the latest version.
|
||||
|
||||
```bash
|
||||
flask db upgrade
|
||||
```
|
||||
6. Start backend:
|
||||
```bash
|
||||
flask run --host 0.0.0.0 --port=5001 --debug
|
||||
```
|
||||
7. Setup your application by visiting http://localhost:5001/console/api/setup or other apis...
|
||||
222
api/app.py
Normal file
222
api/app.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import os
|
||||
if not os.environ.get("DEBUG") or os.environ.get("DEBUG").lower() != 'true':
|
||||
from gevent import monkey
|
||||
monkey.patch_all()
|
||||
|
||||
import logging
|
||||
import json
|
||||
import threading
|
||||
|
||||
from flask import Flask, request, Response, session
|
||||
import flask_login
|
||||
from flask_cors import CORS
|
||||
|
||||
from extensions import ext_session, ext_celery, ext_sentry, ext_redis, ext_login, ext_vector_store, ext_migrate, \
|
||||
ext_database, ext_storage
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_login import login_manager
|
||||
|
||||
# DO NOT REMOVE BELOW
|
||||
from models import model, account, dataset, web, task
|
||||
from events import event_handlers
|
||||
# DO NOT REMOVE ABOVE
|
||||
|
||||
import core
|
||||
from config import Config, CloudEditionConfig
|
||||
from commands import register_commands
|
||||
from models.account import TenantAccountJoin
|
||||
from models.model import Account, EndUser, App
|
||||
|
||||
import warnings
|
||||
warnings.simplefilter("ignore", ResourceWarning)
|
||||
|
||||
|
||||
class DifyApp(Flask):
|
||||
pass
|
||||
|
||||
# -------------
|
||||
# Configuration
|
||||
# -------------
|
||||
|
||||
|
||||
config_type = os.getenv('EDITION', default='SELF_HOSTED') # ce edition first
|
||||
|
||||
# ----------------------------
|
||||
# Application Factory Function
|
||||
# ----------------------------
|
||||
|
||||
|
||||
def create_app(test_config=None) -> Flask:
|
||||
app = DifyApp(__name__)
|
||||
|
||||
if test_config:
|
||||
app.config.from_object(test_config)
|
||||
else:
|
||||
if config_type == "CLOUD":
|
||||
app.config.from_object(CloudEditionConfig())
|
||||
else:
|
||||
app.config.from_object(Config())
|
||||
|
||||
app.secret_key = app.config['SECRET_KEY']
|
||||
|
||||
logging.basicConfig(level=app.config.get('LOG_LEVEL', 'INFO'))
|
||||
|
||||
initialize_extensions(app)
|
||||
register_blueprints(app)
|
||||
register_commands(app)
|
||||
|
||||
core.init_app(app)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def initialize_extensions(app):
|
||||
# Since the application instance is now created, pass it to each Flask
|
||||
# extension instance to bind it to the Flask application instance (app)
|
||||
ext_database.init_app(app)
|
||||
ext_migrate.init(app, db)
|
||||
ext_redis.init_app(app)
|
||||
ext_vector_store.init_app(app)
|
||||
ext_storage.init_app(app)
|
||||
ext_celery.init_app(app)
|
||||
ext_session.init_app(app)
|
||||
ext_login.init_app(app)
|
||||
ext_sentry.init_app(app)
|
||||
|
||||
|
||||
# Flask-Login configuration
|
||||
@login_manager.user_loader
|
||||
def load_user(user_id):
|
||||
"""Load user based on the user_id."""
|
||||
if request.blueprint == 'console':
|
||||
# Check if the user_id contains a dot, indicating the old format
|
||||
if '.' in user_id:
|
||||
tenant_id, account_id = user_id.split('.')
|
||||
else:
|
||||
account_id = user_id
|
||||
|
||||
account = db.session.query(Account).filter(Account.id == account_id).first()
|
||||
|
||||
if account:
|
||||
workspace_id = session.get('workspace_id')
|
||||
if workspace_id:
|
||||
tenant_account_join = db.session.query(TenantAccountJoin).filter(
|
||||
TenantAccountJoin.account_id == account.id,
|
||||
TenantAccountJoin.tenant_id == workspace_id
|
||||
).first()
|
||||
|
||||
if not tenant_account_join:
|
||||
tenant_account_join = db.session.query(TenantAccountJoin).filter(
|
||||
TenantAccountJoin.account_id == account.id).first()
|
||||
|
||||
if tenant_account_join:
|
||||
account.current_tenant_id = tenant_account_join.tenant_id
|
||||
session['workspace_id'] = account.current_tenant_id
|
||||
else:
|
||||
account.current_tenant_id = workspace_id
|
||||
else:
|
||||
tenant_account_join = db.session.query(TenantAccountJoin).filter(
|
||||
TenantAccountJoin.account_id == account.id).first()
|
||||
if tenant_account_join:
|
||||
account.current_tenant_id = tenant_account_join.tenant_id
|
||||
session['workspace_id'] = account.current_tenant_id
|
||||
|
||||
# Log in the user with the updated user_id
|
||||
flask_login.login_user(account, remember=True)
|
||||
|
||||
return account
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@login_manager.unauthorized_handler
|
||||
def unauthorized_handler():
|
||||
"""Handle unauthorized requests."""
|
||||
return Response(json.dumps({
|
||||
'code': 'unauthorized',
|
||||
'message': "Unauthorized."
|
||||
}), status=401, content_type="application/json")
|
||||
|
||||
|
||||
# register blueprint routers
|
||||
def register_blueprints(app):
|
||||
from controllers.service_api import bp as service_api_bp
|
||||
from controllers.web import bp as web_bp
|
||||
from controllers.console import bp as console_app_bp
|
||||
|
||||
app.register_blueprint(service_api_bp)
|
||||
|
||||
CORS(web_bp,
|
||||
resources={
|
||||
r"/*": {"origins": app.config['WEB_API_CORS_ALLOW_ORIGINS']}},
|
||||
supports_credentials=True,
|
||||
allow_headers=['Content-Type', 'Authorization'],
|
||||
methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'],
|
||||
expose_headers=['X-Version', 'X-Env']
|
||||
)
|
||||
|
||||
app.register_blueprint(web_bp)
|
||||
|
||||
CORS(console_app_bp,
|
||||
resources={
|
||||
r"/*": {"origins": app.config['CONSOLE_CORS_ALLOW_ORIGINS']}},
|
||||
supports_credentials=True,
|
||||
allow_headers=['Content-Type', 'Authorization'],
|
||||
methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'],
|
||||
expose_headers=['X-Version', 'X-Env']
|
||||
)
|
||||
|
||||
app.register_blueprint(console_app_bp)
|
||||
|
||||
|
||||
# create app
|
||||
app = create_app()
|
||||
celery = app.extensions["celery"]
|
||||
|
||||
|
||||
if app.config['TESTING']:
|
||||
print("App is running in TESTING mode")
|
||||
|
||||
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
"""Add Version headers to the response."""
|
||||
response.headers.add('X-Version', app.config['CURRENT_VERSION'])
|
||||
response.headers.add('X-Env', app.config['DEPLOY_ENV'])
|
||||
return response
|
||||
|
||||
|
||||
@app.route('/health')
|
||||
def health():
|
||||
return Response(json.dumps({
|
||||
'status': 'ok',
|
||||
'version': app.config['CURRENT_VERSION']
|
||||
}), status=200, content_type="application/json")
|
||||
|
||||
|
||||
@app.route('/threads')
|
||||
def threads():
|
||||
num_threads = threading.active_count()
|
||||
threads = threading.enumerate()
|
||||
|
||||
thread_list = []
|
||||
for thread in threads:
|
||||
thread_name = thread.name
|
||||
thread_id = thread.ident
|
||||
is_alive = thread.is_alive()
|
||||
|
||||
thread_list.append({
|
||||
'name': thread_name,
|
||||
'id': thread_id,
|
||||
'is_alive': is_alive
|
||||
})
|
||||
|
||||
return {
|
||||
'thread_num': num_threads,
|
||||
'threads': thread_list
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5001)
|
||||
160
api/commands.py
Normal file
160
api/commands.py
Normal file
@@ -0,0 +1,160 @@
|
||||
import datetime
|
||||
import json
|
||||
import random
|
||||
import string
|
||||
|
||||
import click
|
||||
|
||||
from libs.password import password_pattern, valid_password, hash_password
|
||||
from libs.helper import email as email_validate
|
||||
from extensions.ext_database import db
|
||||
from models.account import InvitationCode
|
||||
from models.model import Account, AppModelConfig, ApiToken, Site, App, RecommendedApp
|
||||
import secrets
|
||||
import base64
|
||||
|
||||
|
||||
@click.command('reset-password', help='Reset the account password.')
|
||||
@click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
|
||||
@click.option('--new-password', prompt=True, help='the new password.')
|
||||
@click.option('--password-confirm', prompt=True, help='the new password confirm.')
|
||||
def reset_password(email, new_password, password_confirm):
|
||||
if str(new_password).strip() != str(password_confirm).strip():
|
||||
click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
|
||||
return
|
||||
account = db.session.query(Account). \
|
||||
filter(Account.email == email). \
|
||||
one_or_none()
|
||||
if not account:
|
||||
click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
|
||||
return
|
||||
try:
|
||||
valid_password(new_password)
|
||||
except:
|
||||
click.echo(
|
||||
click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red'))
|
||||
return
|
||||
|
||||
# generate password salt
|
||||
salt = secrets.token_bytes(16)
|
||||
base64_salt = base64.b64encode(salt).decode()
|
||||
|
||||
# encrypt password with salt
|
||||
password_hashed = hash_password(new_password, salt)
|
||||
base64_password_hashed = base64.b64encode(password_hashed).decode()
|
||||
account.password = base64_password_hashed
|
||||
account.password_salt = base64_salt
|
||||
db.session.commit()
|
||||
click.echo(click.style('Congratulations!, password has been reset.', fg='green'))
|
||||
|
||||
|
||||
@click.command('reset-email', help='Reset the account email.')
|
||||
@click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
|
||||
@click.option('--new-email', prompt=True, help='the new email.')
|
||||
@click.option('--email-confirm', prompt=True, help='the new email confirm.')
|
||||
def reset_email(email, new_email, email_confirm):
|
||||
if str(new_email).strip() != str(email_confirm).strip():
|
||||
click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
|
||||
return
|
||||
account = db.session.query(Account). \
|
||||
filter(Account.email == email). \
|
||||
one_or_none()
|
||||
if not account:
|
||||
click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
|
||||
return
|
||||
try:
|
||||
email_validate(new_email)
|
||||
except:
|
||||
click.echo(
|
||||
click.style('sorry. {} is not a valid email. '.format(email), fg='red'))
|
||||
return
|
||||
|
||||
account.email = new_email
|
||||
db.session.commit()
|
||||
click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
|
||||
|
||||
|
||||
@click.command('generate-invitation-codes', help='Generate invitation codes.')
|
||||
@click.option('--batch', help='The batch of invitation codes.')
|
||||
@click.option('--count', prompt=True, help='Invitation codes count.')
|
||||
def generate_invitation_codes(batch, count):
|
||||
if not batch:
|
||||
now = datetime.datetime.now()
|
||||
batch = now.strftime('%Y%m%d%H%M%S')
|
||||
|
||||
if not count or int(count) <= 0:
|
||||
click.echo(click.style('sorry. the count must be greater than 0.', fg='red'))
|
||||
return
|
||||
|
||||
count = int(count)
|
||||
|
||||
click.echo('Start generate {} invitation codes for batch {}.'.format(count, batch))
|
||||
|
||||
codes = ''
|
||||
for i in range(count):
|
||||
code = generate_invitation_code()
|
||||
invitation_code = InvitationCode(
|
||||
code=code,
|
||||
batch=batch
|
||||
)
|
||||
db.session.add(invitation_code)
|
||||
click.echo(code)
|
||||
|
||||
codes += code + "\n"
|
||||
db.session.commit()
|
||||
|
||||
filename = 'storage/invitation-codes-{}.txt'.format(batch)
|
||||
|
||||
with open(filename, 'w') as f:
|
||||
f.write(codes)
|
||||
|
||||
click.echo(click.style(
|
||||
'Congratulations! Generated {} invitation codes for batch {} and saved to the file \'{}\''.format(count, batch,
|
||||
filename),
|
||||
fg='green'))
|
||||
|
||||
|
||||
def generate_invitation_code():
|
||||
code = generate_upper_string()
|
||||
while db.session.query(InvitationCode).filter(InvitationCode.code == code).count() > 0:
|
||||
code = generate_upper_string()
|
||||
|
||||
return code
|
||||
|
||||
|
||||
def generate_upper_string():
|
||||
letters_digits = string.ascii_uppercase + string.digits
|
||||
result = ""
|
||||
for i in range(8):
|
||||
result += random.choice(letters_digits)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@click.command('gen-recommended-apps', help='Number of records to generate')
|
||||
def generate_recommended_apps():
|
||||
print('Generating recommended app data...')
|
||||
apps = App.query.all()
|
||||
for app in apps:
|
||||
recommended_app = RecommendedApp(
|
||||
app_id=app.id,
|
||||
description={
|
||||
'en': 'Description for ' + app.name,
|
||||
'zh': '描述 ' + app.name
|
||||
},
|
||||
copyright='Copyright ' + str(random.randint(1990, 2020)),
|
||||
privacy_policy='https://privacypolicy.example.com',
|
||||
category=random.choice(['Games', 'News', 'Music', 'Sports']),
|
||||
position=random.randint(1, 100),
|
||||
install_count=random.randint(100, 100000)
|
||||
)
|
||||
db.session.add(recommended_app)
|
||||
db.session.commit()
|
||||
print('Done!')
|
||||
|
||||
|
||||
def register_commands(app):
|
||||
app.cli.add_command(reset_password)
|
||||
app.cli.add_command(reset_email)
|
||||
app.cli.add_command(generate_invitation_codes)
|
||||
app.cli.add_command(generate_recommended_apps)
|
||||
200
api/config.py
Normal file
200
api/config.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import os
|
||||
from datetime import timedelta
|
||||
|
||||
import dotenv
|
||||
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
DEFAULTS = {
|
||||
'COOKIE_HTTPONLY': 'True',
|
||||
'COOKIE_SECURE': 'True',
|
||||
'COOKIE_SAMESITE': 'None',
|
||||
'DB_USERNAME': 'postgres',
|
||||
'DB_PASSWORD': '',
|
||||
'DB_HOST': 'localhost',
|
||||
'DB_PORT': '5432',
|
||||
'DB_DATABASE': 'dify',
|
||||
'REDIS_HOST': 'localhost',
|
||||
'REDIS_PORT': '6379',
|
||||
'REDIS_DB': '0',
|
||||
'SESSION_REDIS_HOST': 'localhost',
|
||||
'SESSION_REDIS_PORT': '6379',
|
||||
'SESSION_REDIS_DB': '2',
|
||||
'OAUTH_REDIRECT_PATH': '/console/api/oauth/authorize',
|
||||
'OAUTH_REDIRECT_INDEX_PATH': '/',
|
||||
'CONSOLE_URL': 'https://cloud.dify.ai',
|
||||
'API_URL': 'https://api.dify.ai',
|
||||
'APP_URL': 'https://udify.app',
|
||||
'STORAGE_TYPE': 'local',
|
||||
'STORAGE_LOCAL_PATH': 'storage',
|
||||
'CHECK_UPDATE_URL': 'https://updates.dify.ai',
|
||||
'SESSION_TYPE': 'sqlalchemy',
|
||||
'SESSION_PERMANENT': 'True',
|
||||
'SESSION_USE_SIGNER': 'True',
|
||||
'DEPLOY_ENV': 'PRODUCTION',
|
||||
'SQLALCHEMY_POOL_SIZE': 30,
|
||||
'SQLALCHEMY_ECHO': 'False',
|
||||
'SENTRY_TRACES_SAMPLE_RATE': 1.0,
|
||||
'SENTRY_PROFILES_SAMPLE_RATE': 1.0,
|
||||
'WEAVIATE_GRPC_ENABLED': 'True',
|
||||
'CELERY_BACKEND': 'database',
|
||||
'PDF_PREVIEW': 'True',
|
||||
'LOG_LEVEL': 'INFO',
|
||||
}
|
||||
|
||||
|
||||
def get_env(key):
|
||||
return os.environ.get(key, DEFAULTS.get(key))
|
||||
|
||||
|
||||
def get_bool_env(key):
|
||||
return get_env(key).lower() == 'true'
|
||||
|
||||
|
||||
def get_cors_allow_origins(env, default):
|
||||
cors_allow_origins = []
|
||||
if get_env(env):
|
||||
for origin in get_env(env).split(','):
|
||||
cors_allow_origins.append(origin)
|
||||
else:
|
||||
cors_allow_origins = [default]
|
||||
|
||||
return cors_allow_origins
|
||||
|
||||
|
||||
class Config:
|
||||
"""Application configuration class."""
|
||||
|
||||
def __init__(self):
|
||||
# app settings
|
||||
self.CONSOLE_URL = get_env('CONSOLE_URL')
|
||||
self.API_URL = get_env('API_URL')
|
||||
self.APP_URL = get_env('APP_URL')
|
||||
self.CURRENT_VERSION = "0.2.0"
|
||||
self.COMMIT_SHA = get_env('COMMIT_SHA')
|
||||
self.EDITION = "SELF_HOSTED"
|
||||
self.DEPLOY_ENV = get_env('DEPLOY_ENV')
|
||||
self.TESTING = False
|
||||
self.LOG_LEVEL = get_env('LOG_LEVEL')
|
||||
self.PDF_PREVIEW = get_bool_env('PDF_PREVIEW')
|
||||
|
||||
# Your App secret key will be used for securely signing the session cookie
|
||||
# Make sure you are changing this key for your deployment with a strong key.
|
||||
# You can generate a strong key using `openssl rand -base64 42`.
|
||||
# Alternatively you can set it with `SECRET_KEY` environment variable.
|
||||
self.SECRET_KEY = get_env('SECRET_KEY')
|
||||
|
||||
# cookie settings
|
||||
self.REMEMBER_COOKIE_HTTPONLY = get_bool_env('COOKIE_HTTPONLY')
|
||||
self.SESSION_COOKIE_HTTPONLY = get_bool_env('COOKIE_HTTPONLY')
|
||||
self.REMEMBER_COOKIE_SAMESITE = get_env('COOKIE_SAMESITE')
|
||||
self.SESSION_COOKIE_SAMESITE = get_env('COOKIE_SAMESITE')
|
||||
self.REMEMBER_COOKIE_SECURE = get_bool_env('COOKIE_SECURE')
|
||||
self.SESSION_COOKIE_SECURE = get_bool_env('COOKIE_SECURE')
|
||||
self.PERMANENT_SESSION_LIFETIME = timedelta(days=7)
|
||||
|
||||
# session settings, only support sqlalchemy, redis
|
||||
self.SESSION_TYPE = get_env('SESSION_TYPE')
|
||||
self.SESSION_PERMANENT = get_bool_env('SESSION_PERMANENT')
|
||||
self.SESSION_USE_SIGNER = get_bool_env('SESSION_USE_SIGNER')
|
||||
|
||||
# redis settings
|
||||
self.REDIS_HOST = get_env('REDIS_HOST')
|
||||
self.REDIS_PORT = get_env('REDIS_PORT')
|
||||
self.REDIS_PASSWORD = get_env('REDIS_PASSWORD')
|
||||
self.REDIS_DB = get_env('REDIS_DB')
|
||||
|
||||
# session redis settings
|
||||
self.SESSION_REDIS_HOST = get_env('SESSION_REDIS_HOST')
|
||||
self.SESSION_REDIS_PORT = get_env('SESSION_REDIS_PORT')
|
||||
self.SESSION_REDIS_PASSWORD = get_env('SESSION_REDIS_PASSWORD')
|
||||
self.SESSION_REDIS_DB = get_env('SESSION_REDIS_DB')
|
||||
|
||||
# storage settings
|
||||
self.STORAGE_TYPE = get_env('STORAGE_TYPE')
|
||||
self.STORAGE_LOCAL_PATH = get_env('STORAGE_LOCAL_PATH')
|
||||
self.S3_ENDPOINT = get_env('S3_ENDPOINT')
|
||||
self.S3_BUCKET_NAME = get_env('S3_BUCKET_NAME')
|
||||
self.S3_ACCESS_KEY = get_env('S3_ACCESS_KEY')
|
||||
self.S3_SECRET_KEY = get_env('S3_SECRET_KEY')
|
||||
self.S3_REGION = get_env('S3_REGION')
|
||||
|
||||
# vector store settings, only support weaviate, qdrant
|
||||
self.VECTOR_STORE = get_env('VECTOR_STORE')
|
||||
|
||||
# weaviate settings
|
||||
self.WEAVIATE_ENDPOINT = get_env('WEAVIATE_ENDPOINT')
|
||||
self.WEAVIATE_API_KEY = get_env('WEAVIATE_API_KEY')
|
||||
self.WEAVIATE_GRPC_ENABLED = get_bool_env('WEAVIATE_GRPC_ENABLED')
|
||||
|
||||
# qdrant settings
|
||||
self.QDRANT_URL = get_env('QDRANT_URL')
|
||||
self.QDRANT_API_KEY = get_env('QDRANT_API_KEY')
|
||||
|
||||
# cors settings
|
||||
self.CONSOLE_CORS_ALLOW_ORIGINS = get_cors_allow_origins(
|
||||
'CONSOLE_CORS_ALLOW_ORIGINS', self.CONSOLE_URL)
|
||||
self.WEB_API_CORS_ALLOW_ORIGINS = get_cors_allow_origins(
|
||||
'WEB_API_CORS_ALLOW_ORIGINS', '*')
|
||||
|
||||
# sentry settings
|
||||
self.SENTRY_DSN = get_env('SENTRY_DSN')
|
||||
self.SENTRY_TRACES_SAMPLE_RATE = float(get_env('SENTRY_TRACES_SAMPLE_RATE'))
|
||||
self.SENTRY_PROFILES_SAMPLE_RATE = float(get_env('SENTRY_PROFILES_SAMPLE_RATE'))
|
||||
|
||||
# check update url
|
||||
self.CHECK_UPDATE_URL = get_env('CHECK_UPDATE_URL')
|
||||
|
||||
# database settings
|
||||
db_credentials = {
|
||||
key: get_env(key) for key in
|
||||
['DB_USERNAME', 'DB_PASSWORD', 'DB_HOST', 'DB_PORT', 'DB_DATABASE']
|
||||
}
|
||||
|
||||
self.SQLALCHEMY_DATABASE_URI = f"postgresql://{db_credentials['DB_USERNAME']}:{db_credentials['DB_PASSWORD']}@{db_credentials['DB_HOST']}:{db_credentials['DB_PORT']}/{db_credentials['DB_DATABASE']}"
|
||||
self.SQLALCHEMY_ENGINE_OPTIONS = {'pool_size': int(get_env('SQLALCHEMY_POOL_SIZE'))}
|
||||
|
||||
self.SQLALCHEMY_ECHO = get_bool_env('SQLALCHEMY_ECHO')
|
||||
|
||||
# celery settings
|
||||
self.CELERY_BROKER_URL = get_env('CELERY_BROKER_URL')
|
||||
self.CELERY_BACKEND = get_env('CELERY_BACKEND')
|
||||
self.CELERY_RESULT_BACKEND = 'db+{}'.format(self.SQLALCHEMY_DATABASE_URI) \
|
||||
if self.CELERY_BACKEND == 'database' else self.CELERY_BROKER_URL
|
||||
|
||||
# hosted provider credentials
|
||||
self.OPENAI_API_KEY = get_env('OPENAI_API_KEY')
|
||||
|
||||
|
||||
class CloudEditionConfig(Config):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.EDITION = "CLOUD"
|
||||
|
||||
self.GITHUB_CLIENT_ID = get_env('GITHUB_CLIENT_ID')
|
||||
self.GITHUB_CLIENT_SECRET = get_env('GITHUB_CLIENT_SECRET')
|
||||
self.GOOGLE_CLIENT_ID = get_env('GOOGLE_CLIENT_ID')
|
||||
self.GOOGLE_CLIENT_SECRET = get_env('GOOGLE_CLIENT_SECRET')
|
||||
self.OAUTH_REDIRECT_PATH = get_env('OAUTH_REDIRECT_PATH')
|
||||
|
||||
|
||||
class TestConfig(Config):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.EDITION = "SELF_HOSTED"
|
||||
self.TESTING = True
|
||||
|
||||
db_credentials = {
|
||||
key: get_env(key) for key in ['DB_USERNAME', 'DB_PASSWORD', 'DB_HOST', 'DB_PORT']
|
||||
}
|
||||
|
||||
# use a different database for testing: dify_test
|
||||
self.SQLALCHEMY_DATABASE_URI = f"postgresql://{db_credentials['DB_USERNAME']}:{db_credentials['DB_PASSWORD']}@{db_credentials['DB_HOST']}:{db_credentials['DB_PORT']}/dify_test"
|
||||
0
api/constants/__init__.py
Normal file
0
api/constants/__init__.py
Normal file
322
api/constants/model_template.py
Normal file
322
api/constants/model_template.py
Normal file
@@ -0,0 +1,322 @@
|
||||
import json
|
||||
|
||||
from models.model import AppModelConfig, App
|
||||
|
||||
model_templates = {
|
||||
# completion default mode
|
||||
'completion_default': {
|
||||
'app': {
|
||||
'mode': 'completion',
|
||||
'enable_site': True,
|
||||
'enable_api': True,
|
||||
'is_demo': False,
|
||||
'api_rpm': 0,
|
||||
'api_rph': 0,
|
||||
'status': 'normal'
|
||||
},
|
||||
'model_config': {
|
||||
'provider': 'openai',
|
||||
'model_id': 'text-davinci-003',
|
||||
'configs': {
|
||||
'prompt_template': '',
|
||||
'prompt_variables': [],
|
||||
'completion_params': {
|
||||
'max_token': 512,
|
||||
'temperature': 1,
|
||||
'top_p': 1,
|
||||
'presence_penalty': 0,
|
||||
'frequency_penalty': 0,
|
||||
}
|
||||
},
|
||||
'model': json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "text-davinci-003",
|
||||
"completion_params": {
|
||||
"max_tokens": 512,
|
||||
"temperature": 1,
|
||||
"top_p": 1,
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0
|
||||
}
|
||||
})
|
||||
}
|
||||
},
|
||||
|
||||
# chat default mode
|
||||
'chat_default': {
|
||||
'app': {
|
||||
'mode': 'chat',
|
||||
'enable_site': True,
|
||||
'enable_api': True,
|
||||
'is_demo': False,
|
||||
'api_rpm': 0,
|
||||
'api_rph': 0,
|
||||
'status': 'normal'
|
||||
},
|
||||
'model_config': {
|
||||
'provider': 'openai',
|
||||
'model_id': 'gpt-3.5-turbo',
|
||||
'configs': {
|
||||
'prompt_template': '',
|
||||
'prompt_variables': [],
|
||||
'completion_params': {
|
||||
'max_token': 512,
|
||||
'temperature': 1,
|
||||
'top_p': 1,
|
||||
'presence_penalty': 0,
|
||||
'frequency_penalty': 0,
|
||||
}
|
||||
},
|
||||
'model': json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "gpt-3.5-turbo",
|
||||
"completion_params": {
|
||||
"max_tokens": 512,
|
||||
"temperature": 1,
|
||||
"top_p": 1,
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0
|
||||
}
|
||||
})
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
demo_model_templates = {
|
||||
'en-US': [
|
||||
{
|
||||
'name': 'Translation Assistant',
|
||||
'icon': '',
|
||||
'icon_background': '',
|
||||
'description': 'A multilingual translator that provides translation capabilities in multiple languages, translating user input into the language they need.',
|
||||
'mode': 'completion',
|
||||
'model_config': AppModelConfig(
|
||||
provider='openai',
|
||||
model_id='text-davinci-003',
|
||||
configs={
|
||||
'prompt_template': "Please translate the following text into {{target_language}}:\n",
|
||||
'prompt_variables': [
|
||||
{
|
||||
"key": "target_language",
|
||||
"name": "Target Language",
|
||||
"description": "The language you want to translate into.",
|
||||
"type": "select",
|
||||
"default": "Chinese",
|
||||
'options': [
|
||||
'Chinese',
|
||||
'English',
|
||||
'Japanese',
|
||||
'French',
|
||||
'Russian',
|
||||
'German',
|
||||
'Spanish',
|
||||
'Korean',
|
||||
'Italian',
|
||||
]
|
||||
}
|
||||
],
|
||||
'completion_params': {
|
||||
'max_token': 1000,
|
||||
'temperature': 0,
|
||||
'top_p': 0,
|
||||
'presence_penalty': 0.1,
|
||||
'frequency_penalty': 0.1,
|
||||
}
|
||||
},
|
||||
opening_statement='',
|
||||
suggested_questions=None,
|
||||
pre_prompt="Please translate the following text into {{target_language}}:\n",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "text-davinci-003",
|
||||
"completion_params": {
|
||||
"max_tokens": 1000,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1
|
||||
}
|
||||
}),
|
||||
user_input_form=json.dumps([
|
||||
{
|
||||
"select": {
|
||||
"label": "Target Language",
|
||||
"variable": "target_language",
|
||||
"description": "The language you want to translate into.",
|
||||
"default": "Chinese",
|
||||
"required": True,
|
||||
'options': [
|
||||
'Chinese',
|
||||
'English',
|
||||
'Japanese',
|
||||
'French',
|
||||
'Russian',
|
||||
'German',
|
||||
'Spanish',
|
||||
'Korean',
|
||||
'Italian',
|
||||
]
|
||||
}
|
||||
}
|
||||
])
|
||||
)
|
||||
},
|
||||
{
|
||||
'name': 'AI Front-end Interviewer',
|
||||
'icon': '',
|
||||
'icon_background': '',
|
||||
'description': 'A simulated front-end interviewer that tests the skill level of front-end development through questioning.',
|
||||
'mode': 'chat',
|
||||
'model_config': AppModelConfig(
|
||||
provider='openai',
|
||||
model_id='gpt-3.5-turbo',
|
||||
configs={
|
||||
'introduction': 'Hi, welcome to our interview. I am the interviewer for this technology company, and I will test your web front-end development skills. Next, I will ask you some technical questions. Please answer them as thoroughly as possible. ',
|
||||
'prompt_template': "You will play the role of an interviewer for a technology company, examining the user's web front-end development skills and posing 5-10 sharp technical questions.\n\nPlease note:\n- Only ask one question at a time.\n- After the user answers a question, ask the next question directly, without trying to correct any mistakes made by the candidate.\n- If you think the user has not answered correctly for several consecutive questions, ask fewer questions.\n- After asking the last question, you can ask this question: Why did you leave your last job? After the user answers this question, please express your understanding and support.\n",
|
||||
'prompt_variables': [],
|
||||
'completion_params': {
|
||||
'max_token': 300,
|
||||
'temperature': 0.8,
|
||||
'top_p': 0.9,
|
||||
'presence_penalty': 0.1,
|
||||
'frequency_penalty': 0.1,
|
||||
}
|
||||
},
|
||||
opening_statement='Hi, welcome to our interview. I am the interviewer for this technology company, and I will test your web front-end development skills. Next, I will ask you some technical questions. Please answer them as thoroughly as possible. ',
|
||||
suggested_questions=None,
|
||||
pre_prompt="You will play the role of an interviewer for a technology company, examining the user's web front-end development skills and posing 5-10 sharp technical questions.\n\nPlease note:\n- Only ask one question at a time.\n- After the user answers a question, ask the next question directly, without trying to correct any mistakes made by the candidate.\n- If you think the user has not answered correctly for several consecutive questions, ask fewer questions.\n- After asking the last question, you can ask this question: Why did you leave your last job? After the user answers this question, please express your understanding and support.\n",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "gpt-3.5-turbo",
|
||||
"completion_params": {
|
||||
"max_tokens": 300,
|
||||
"temperature": 0.8,
|
||||
"top_p": 0.9,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1
|
||||
}
|
||||
}),
|
||||
user_input_form=None
|
||||
)
|
||||
}
|
||||
],
|
||||
|
||||
'zh-Hans': [
|
||||
{
|
||||
'name': '翻译助手',
|
||||
'icon': '',
|
||||
'icon_background': '',
|
||||
'description': '一个多语言翻译器,提供多种语言翻译能力,将用户输入的文本翻译成他们需要的语言。',
|
||||
'mode': 'completion',
|
||||
'model_config': AppModelConfig(
|
||||
provider='openai',
|
||||
model_id='text-davinci-003',
|
||||
configs={
|
||||
'prompt_template': "请将以下文本翻译为{{target_language}}:\n",
|
||||
'prompt_variables': [
|
||||
{
|
||||
"key": "target_language",
|
||||
"name": "目标语言",
|
||||
"description": "翻译的目标语言",
|
||||
"type": "select",
|
||||
"default": "中文",
|
||||
"options": [
|
||||
"中文",
|
||||
"英文",
|
||||
"日语",
|
||||
"法语",
|
||||
"俄语",
|
||||
"德语",
|
||||
"西班牙语",
|
||||
"韩语",
|
||||
"意大利语",
|
||||
]
|
||||
}
|
||||
],
|
||||
'completion_params': {
|
||||
'max_token': 1000,
|
||||
'temperature': 0,
|
||||
'top_p': 0,
|
||||
'presence_penalty': 0.1,
|
||||
'frequency_penalty': 0.1,
|
||||
}
|
||||
},
|
||||
opening_statement='',
|
||||
suggested_questions=None,
|
||||
pre_prompt="请将以下文本翻译为{{target_language}}:\n",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "text-davinci-003",
|
||||
"completion_params": {
|
||||
"max_tokens": 1000,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1
|
||||
}
|
||||
}),
|
||||
user_input_form=json.dumps([
|
||||
{
|
||||
"select": {
|
||||
"label": "目标语言",
|
||||
"variable": "target_language",
|
||||
"description": "翻译的目标语言",
|
||||
"default": "中文",
|
||||
"required": True,
|
||||
'options': [
|
||||
"中文",
|
||||
"英文",
|
||||
"日语",
|
||||
"法语",
|
||||
"俄语",
|
||||
"德语",
|
||||
"西班牙语",
|
||||
"韩语",
|
||||
"意大利语",
|
||||
]
|
||||
}
|
||||
}
|
||||
])
|
||||
)
|
||||
},
|
||||
{
|
||||
'name': 'AI 前端面试官',
|
||||
'icon': '',
|
||||
'icon_background': '',
|
||||
'description': '一个模拟的前端面试官,通过提问的方式对前端开发的技能水平进行检验。',
|
||||
'mode': 'chat',
|
||||
'model_config': AppModelConfig(
|
||||
provider='openai',
|
||||
model_id='gpt-3.5-turbo',
|
||||
configs={
|
||||
'introduction': '你好,欢迎来参加我们的面试,我是这家科技公司的面试官,我将考察你的 Web 前端开发技能。接下来我会向您提出一些技术问题,请您尽可能详尽地回答。',
|
||||
'prompt_template': "你将扮演一个科技公司的面试官,考察用户作为候选人的 Web 前端开发水平,提出 5-10 个犀利的技术问题。\n\n请注意:\n- 每次只问一个问题\n- 用户回答问题后请直接问下一个问题,而不要试图纠正候选人的错误;\n- 如果你认为用户连续几次回答的都不对,就少问一点;\n- 问完最后一个问题后,你可以问这样一个问题:上一份工作为什么离职?用户回答该问题后,请表示理解与支持。\n",
|
||||
'prompt_variables': [],
|
||||
'completion_params': {
|
||||
'max_token': 300,
|
||||
'temperature': 0.8,
|
||||
'top_p': 0.9,
|
||||
'presence_penalty': 0.1,
|
||||
'frequency_penalty': 0.1,
|
||||
}
|
||||
},
|
||||
opening_statement='你好,欢迎来参加我们的面试,我是这家科技公司的面试官,我将考察你的 Web 前端开发技能。接下来我会向您提出一些技术问题,请您尽可能详尽地回答。',
|
||||
suggested_questions=None,
|
||||
pre_prompt="你将扮演一个科技公司的面试官,考察用户作为候选人的 Web 前端开发水平,提出 5-10 个犀利的技术问题。\n\n请注意:\n- 每次只问一个问题\n- 用户回答问题后请直接问下一个问题,而不要试图纠正候选人的错误;\n- 如果你认为用户连续几次回答的都不对,就少问一点;\n- 问完最后一个问题后,你可以问这样一个问题:上一份工作为什么离职?用户回答该问题后,请表示理解与支持。\n",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "gpt-3.5-turbo",
|
||||
"completion_params": {
|
||||
"max_tokens": 300,
|
||||
"temperature": 0.8,
|
||||
"top_p": 0.9,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1
|
||||
}
|
||||
}),
|
||||
user_input_form=None
|
||||
)
|
||||
}
|
||||
],
|
||||
}
|
||||
4
api/controllers/__init__.py
Normal file
4
api/controllers/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
|
||||
|
||||
20
api/controllers/console/__init__.py
Normal file
20
api/controllers/console/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from flask import Blueprint
|
||||
|
||||
from libs.external_api import ExternalApi
|
||||
|
||||
bp = Blueprint('console', __name__, url_prefix='/console/api')
|
||||
api = ExternalApi(bp)
|
||||
|
||||
# Import app controllers
|
||||
from .app import app, site, explore, completion, model_config, statistic, conversation, message
|
||||
|
||||
# Import auth controllers
|
||||
from .auth import login, oauth
|
||||
|
||||
# Import datasets controllers
|
||||
from .datasets import datasets, datasets_document, datasets_segments, file, hit_testing
|
||||
|
||||
# Import other controllers
|
||||
from . import setup, version, apikey
|
||||
|
||||
from .workspace import workspace, members, providers, account
|
||||
175
api/controllers/console/apikey.py
Normal file
175
api/controllers/console/apikey.py
Normal file
@@ -0,0 +1,175 @@
|
||||
from flask_login import login_required, current_user
|
||||
import flask_restful
|
||||
from flask_restful import Resource, fields, marshal_with
|
||||
from werkzeug.exceptions import Forbidden
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.model import App, ApiToken
|
||||
from models.dataset import Dataset
|
||||
|
||||
from . import api
|
||||
from .setup import setup_required
|
||||
from .wraps import account_initialization_required
|
||||
from libs.helper import TimestampField
|
||||
|
||||
api_key_fields = {
|
||||
'id': fields.String,
|
||||
'type': fields.String,
|
||||
'token': fields.String,
|
||||
'last_used_at': TimestampField,
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
api_key_list = {
|
||||
'data': fields.List(fields.Nested(api_key_fields), attribute="items")
|
||||
}
|
||||
|
||||
|
||||
def _get_resource(resource_id, tenant_id, resource_model):
|
||||
resource = resource_model.query.filter_by(
|
||||
id=resource_id, tenant_id=tenant_id
|
||||
).first()
|
||||
|
||||
if resource is None:
|
||||
flask_restful.abort(
|
||||
404, message=f"{resource_model.__name__} not found.")
|
||||
|
||||
return resource
|
||||
|
||||
|
||||
class BaseApiKeyListResource(Resource):
|
||||
method_decorators = [account_initialization_required, login_required, setup_required]
|
||||
|
||||
resource_type = None
|
||||
resource_model = None
|
||||
resource_id_field = None
|
||||
token_prefix = None
|
||||
max_keys = 10
|
||||
|
||||
@marshal_with(api_key_list)
|
||||
def get(self, resource_id):
|
||||
resource_id = str(resource_id)
|
||||
_get_resource(resource_id, current_user.current_tenant_id,
|
||||
self.resource_model)
|
||||
keys = db.session.query(ApiToken). \
|
||||
filter(ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id). \
|
||||
all()
|
||||
return {"items": keys}
|
||||
|
||||
@marshal_with(api_key_fields)
|
||||
def post(self, resource_id):
|
||||
resource_id = str(resource_id)
|
||||
_get_resource(resource_id, current_user.current_tenant_id,
|
||||
self.resource_model)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
current_key_count = db.session.query(ApiToken). \
|
||||
filter(ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id). \
|
||||
count()
|
||||
|
||||
if current_key_count >= self.max_keys:
|
||||
flask_restful.abort(
|
||||
400,
|
||||
message=f"Cannot create more than {self.max_keys} API keys for this resource type.",
|
||||
code='max_keys_exceeded'
|
||||
)
|
||||
|
||||
key = ApiToken.generate_api_key(self.token_prefix, 24)
|
||||
api_token = ApiToken()
|
||||
setattr(api_token, self.resource_id_field, resource_id)
|
||||
api_token.token = key
|
||||
api_token.type = self.resource_type
|
||||
db.session.add(api_token)
|
||||
db.session.commit()
|
||||
return api_token, 201
|
||||
|
||||
|
||||
class BaseApiKeyResource(Resource):
|
||||
method_decorators = [account_initialization_required, login_required, setup_required]
|
||||
|
||||
resource_type = None
|
||||
resource_model = None
|
||||
resource_id_field = None
|
||||
|
||||
def delete(self, resource_id, api_key_id):
|
||||
resource_id = str(resource_id)
|
||||
api_key_id = str(api_key_id)
|
||||
_get_resource(resource_id, current_user.current_tenant_id,
|
||||
self.resource_model)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
key = db.session.query(ApiToken). \
|
||||
filter(getattr(ApiToken, self.resource_id_field) == resource_id, ApiToken.type == self.resource_type, ApiToken.id == api_key_id). \
|
||||
first()
|
||||
|
||||
if key is None:
|
||||
flask_restful.abort(404, message='API key not found')
|
||||
|
||||
db.session.query(ApiToken).filter(ApiToken.id == api_key_id).delete()
|
||||
db.session.commit()
|
||||
|
||||
return {'result': 'success'}, 204
|
||||
|
||||
|
||||
class AppApiKeyListResource(BaseApiKeyListResource):
|
||||
|
||||
def after_request(self, resp):
|
||||
resp.headers['Access-Control-Allow-Origin'] = '*'
|
||||
resp.headers['Access-Control-Allow-Credentials'] = 'true'
|
||||
return resp
|
||||
|
||||
resource_type = 'app'
|
||||
resource_model = App
|
||||
resource_id_field = 'app_id'
|
||||
token_prefix = 'app-'
|
||||
|
||||
|
||||
class AppApiKeyResource(BaseApiKeyResource):
|
||||
|
||||
def after_request(self, resp):
|
||||
resp.headers['Access-Control-Allow-Origin'] = '*'
|
||||
resp.headers['Access-Control-Allow-Credentials'] = 'true'
|
||||
return resp
|
||||
|
||||
resource_type = 'app'
|
||||
resource_model = App
|
||||
resource_id_field = 'app_id'
|
||||
|
||||
|
||||
class DatasetApiKeyListResource(BaseApiKeyListResource):
|
||||
|
||||
def after_request(self, resp):
|
||||
resp.headers['Access-Control-Allow-Origin'] = '*'
|
||||
resp.headers['Access-Control-Allow-Credentials'] = 'true'
|
||||
return resp
|
||||
|
||||
resource_type = 'dataset'
|
||||
resource_model = Dataset
|
||||
resource_id_field = 'dataset_id'
|
||||
token_prefix = 'ds-'
|
||||
|
||||
|
||||
class DatasetApiKeyResource(BaseApiKeyResource):
|
||||
|
||||
def after_request(self, resp):
|
||||
resp.headers['Access-Control-Allow-Origin'] = '*'
|
||||
resp.headers['Access-Control-Allow-Credentials'] = 'true'
|
||||
return resp
|
||||
resource_type = 'dataset'
|
||||
resource_model = Dataset
|
||||
resource_id_field = 'dataset_id'
|
||||
|
||||
|
||||
api.add_resource(AppApiKeyListResource, '/apps/<uuid:resource_id>/api-keys')
|
||||
api.add_resource(AppApiKeyResource,
|
||||
'/apps/<uuid:resource_id>/api-keys/<uuid:api_key_id>')
|
||||
api.add_resource(DatasetApiKeyListResource,
|
||||
'/datasets/<uuid:resource_id>/api-keys')
|
||||
api.add_resource(DatasetApiKeyResource,
|
||||
'/datasets/<uuid:resource_id>/api-keys/<uuid:api_key_id>')
|
||||
22
api/controllers/console/app/__init__.py
Normal file
22
api/controllers/console/app/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from flask_login import current_user
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.console.app.error import AppUnavailableError
|
||||
from extensions.ext_database import db
|
||||
from models.model import App
|
||||
|
||||
|
||||
def _get_app(app_id, mode=None):
|
||||
app = db.session.query(App).filter(
|
||||
App.id == app_id,
|
||||
App.tenant_id == current_user.current_tenant_id,
|
||||
App.status == 'normal'
|
||||
).first()
|
||||
|
||||
if not app:
|
||||
raise NotFound("App not found")
|
||||
|
||||
if mode and app.mode != mode:
|
||||
raise AppUnavailableError()
|
||||
|
||||
return app
|
||||
518
api/controllers/console/app/app.py
Normal file
518
api/controllers/console/app/app.py
Normal file
@@ -0,0 +1,518 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
import flask
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, fields, marshal_with, abort, inputs
|
||||
from werkzeug.exceptions import Unauthorized, Forbidden
|
||||
|
||||
from constants.model_template import model_templates, demo_model_templates
|
||||
from controllers.console import api
|
||||
from controllers.console.app.error import AppNotFoundError, ProviderNotInitializeError, ProviderQuotaExceededError, \
|
||||
CompletionRequestError, ProviderModelCurrentlyNotSupportError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from core.generator.llm_generator import LLMGenerator
|
||||
from core.llm.error import ProviderTokenNotInitError, QuotaExceededError, LLMBadRequestError, LLMAPIConnectionError, \
|
||||
LLMAPIUnavailableError, LLMRateLimitError, LLMAuthorizationError, ModelCurrentlyNotSupportError
|
||||
from events.app_event import app_was_created, app_was_deleted
|
||||
from libs.helper import TimestampField
|
||||
from extensions.ext_database import db
|
||||
from models.model import App, AppModelConfig, Site, InstalledApp
|
||||
from services.account_service import TenantService
|
||||
from services.app_model_config_service import AppModelConfigService
|
||||
|
||||
model_config_fields = {
|
||||
'opening_statement': fields.String,
|
||||
'suggested_questions': fields.Raw(attribute='suggested_questions_list'),
|
||||
'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'),
|
||||
'more_like_this': fields.Raw(attribute='more_like_this_dict'),
|
||||
'model': fields.Raw(attribute='model_dict'),
|
||||
'user_input_form': fields.Raw(attribute='user_input_form_list'),
|
||||
'pre_prompt': fields.String,
|
||||
'agent_mode': fields.Raw(attribute='agent_mode_dict'),
|
||||
}
|
||||
|
||||
app_detail_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'mode': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
'enable_site': fields.Boolean,
|
||||
'enable_api': fields.Boolean,
|
||||
'api_rpm': fields.Integer,
|
||||
'api_rph': fields.Integer,
|
||||
'is_demo': fields.Boolean,
|
||||
'model_config': fields.Nested(model_config_fields, attribute='app_model_config'),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
|
||||
def _get_app(app_id, tenant_id):
|
||||
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id).first()
|
||||
if not app:
|
||||
raise AppNotFoundError
|
||||
return app
|
||||
|
||||
|
||||
class AppListApi(Resource):
|
||||
prompt_config_fields = {
|
||||
'prompt_template': fields.String,
|
||||
}
|
||||
|
||||
model_config_partial_fields = {
|
||||
'model': fields.Raw(attribute='model_dict'),
|
||||
'pre_prompt': fields.String,
|
||||
}
|
||||
|
||||
app_partial_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'mode': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
'enable_site': fields.Boolean,
|
||||
'enable_api': fields.Boolean,
|
||||
'is_demo': fields.Boolean,
|
||||
'model_config': fields.Nested(model_config_partial_fields, attribute='app_model_config'),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
app_pagination_fields = {
|
||||
'page': fields.Integer,
|
||||
'limit': fields.Integer(attribute='per_page'),
|
||||
'total': fields.Integer,
|
||||
'has_more': fields.Boolean(attribute='has_next'),
|
||||
'data': fields.List(fields.Nested(app_partial_fields), attribute='items')
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_pagination_fields)
|
||||
def get(self):
|
||||
"""Get app list"""
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('page', type=inputs.int_range(1, 99999), required=False, default=1, location='args')
|
||||
parser.add_argument('limit', type=inputs.int_range(1, 100), required=False, default=20, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
app_models = db.paginate(
|
||||
db.select(App).where(App.tenant_id == current_user.current_tenant_id).order_by(App.created_at.desc()),
|
||||
page=args['page'],
|
||||
per_page=args['limit'],
|
||||
error_out=False)
|
||||
|
||||
return app_models
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields)
|
||||
def post(self):
|
||||
"""Create app"""
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', type=str, required=True, location='json')
|
||||
parser.add_argument('mode', type=str, choices=['completion', 'chat'], location='json')
|
||||
parser.add_argument('icon', type=str, location='json')
|
||||
parser.add_argument('icon_background', type=str, location='json')
|
||||
parser.add_argument('model_config', type=dict, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
if args['model_config'] is not None:
|
||||
# validate config
|
||||
model_configuration = AppModelConfigService.validate_configuration(
|
||||
account=current_user,
|
||||
config=args['model_config'],
|
||||
mode=args['mode']
|
||||
)
|
||||
|
||||
app = App(
|
||||
enable_site=True,
|
||||
enable_api=True,
|
||||
is_demo=False,
|
||||
api_rpm=0,
|
||||
api_rph=0,
|
||||
status='normal'
|
||||
)
|
||||
|
||||
app_model_config = AppModelConfig(
|
||||
provider="",
|
||||
model_id="",
|
||||
configs={},
|
||||
opening_statement=model_configuration['opening_statement'],
|
||||
suggested_questions=json.dumps(model_configuration['suggested_questions']),
|
||||
suggested_questions_after_answer=json.dumps(model_configuration['suggested_questions_after_answer']),
|
||||
more_like_this=json.dumps(model_configuration['more_like_this']),
|
||||
model=json.dumps(model_configuration['model']),
|
||||
user_input_form=json.dumps(model_configuration['user_input_form']),
|
||||
pre_prompt=model_configuration['pre_prompt'],
|
||||
agent_mode=json.dumps(model_configuration['agent_mode']),
|
||||
)
|
||||
else:
|
||||
if 'mode' not in args or args['mode'] is None:
|
||||
abort(400, message="mode is required")
|
||||
|
||||
model_config_template = model_templates[args['mode'] + '_default']
|
||||
|
||||
app = App(**model_config_template['app'])
|
||||
app_model_config = AppModelConfig(**model_config_template['model_config'])
|
||||
|
||||
app.name = args['name']
|
||||
app.mode = args['mode']
|
||||
app.icon = args['icon']
|
||||
app.icon_background = args['icon_background']
|
||||
app.tenant_id = current_user.current_tenant_id
|
||||
|
||||
db.session.add(app)
|
||||
db.session.flush()
|
||||
|
||||
app_model_config.app_id = app.id
|
||||
db.session.add(app_model_config)
|
||||
db.session.flush()
|
||||
|
||||
app.app_model_config_id = app_model_config.id
|
||||
|
||||
account = current_user
|
||||
|
||||
site = Site(
|
||||
app_id=app.id,
|
||||
title=app.name,
|
||||
default_language=account.interface_language,
|
||||
customize_token_strategy='not_allow',
|
||||
code=Site.generate_code(16)
|
||||
)
|
||||
|
||||
db.session.add(site)
|
||||
db.session.commit()
|
||||
|
||||
app_was_created.send(app)
|
||||
|
||||
return app, 201
|
||||
|
||||
|
||||
class AppTemplateApi(Resource):
|
||||
template_fields = {
|
||||
'name': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
'description': fields.String,
|
||||
'mode': fields.String,
|
||||
'model_config': fields.Nested(model_config_fields),
|
||||
}
|
||||
|
||||
template_list_fields = {
|
||||
'data': fields.List(fields.Nested(template_fields)),
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(template_list_fields)
|
||||
def get(self):
|
||||
"""Get app demo templates"""
|
||||
account = current_user
|
||||
interface_language = account.interface_language
|
||||
|
||||
return {'data': demo_model_templates.get(interface_language)}
|
||||
|
||||
|
||||
class AppApi(Resource):
|
||||
site_fields = {
|
||||
'access_token': fields.String(attribute='code'),
|
||||
'code': fields.String,
|
||||
'title': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
'description': fields.String,
|
||||
'default_language': fields.String,
|
||||
'customize_domain': fields.String,
|
||||
'copyright': fields.String,
|
||||
'privacy_policy': fields.String,
|
||||
'customize_token_strategy': fields.String,
|
||||
'prompt_public': fields.Boolean,
|
||||
'app_base_url': fields.String,
|
||||
}
|
||||
|
||||
app_detail_fields_with_site = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'mode': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
'enable_site': fields.Boolean,
|
||||
'enable_api': fields.Boolean,
|
||||
'api_rpm': fields.Integer,
|
||||
'api_rph': fields.Integer,
|
||||
'is_demo': fields.Boolean,
|
||||
'model_config': fields.Nested(model_config_fields, attribute='app_model_config'),
|
||||
'site': fields.Nested(site_fields),
|
||||
'api_base_url': fields.String,
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields_with_site)
|
||||
def get(self, app_id):
|
||||
"""Get app detail"""
|
||||
app_id = str(app_id)
|
||||
app = _get_app(app_id, current_user.current_tenant_id)
|
||||
|
||||
return app
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, app_id):
|
||||
"""Delete app"""
|
||||
app_id = str(app_id)
|
||||
app = _get_app(app_id, current_user.current_tenant_id)
|
||||
|
||||
db.session.delete(app)
|
||||
db.session.commit()
|
||||
|
||||
# todo delete related data??
|
||||
# model_config, site, api_token, conversation, message, message_feedback, message_annotation
|
||||
|
||||
app_was_deleted.send(app)
|
||||
|
||||
return {'result': 'success'}, 204
|
||||
|
||||
|
||||
class AppNameApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields)
|
||||
def post(self, app_id):
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', type=str, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
app = db.get_or_404(App, str(app_id))
|
||||
if app.tenant_id != flask.session.get('tenant_id'):
|
||||
raise Unauthorized()
|
||||
|
||||
app.name = args.get('name')
|
||||
app.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
return app
|
||||
|
||||
|
||||
class AppIconApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields)
|
||||
def post(self, app_id):
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('icon', type=str, location='json')
|
||||
parser.add_argument('icon_background', type=str, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
app = db.get_or_404(App, str(app_id))
|
||||
if app.tenant_id != flask.session.get('tenant_id'):
|
||||
raise Unauthorized()
|
||||
|
||||
app.icon = args.get('icon')
|
||||
app.icon_background = args.get('icon_background')
|
||||
app.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
return app
|
||||
|
||||
|
||||
class AppSiteStatus(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields)
|
||||
def post(self, app_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('enable_site', type=bool, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
app_id = str(app_id)
|
||||
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == current_user.current_tenant_id).first()
|
||||
if not app:
|
||||
raise AppNotFoundError
|
||||
|
||||
if args.get('enable_site') == app.enable_site:
|
||||
return app
|
||||
|
||||
app.enable_site = args.get('enable_site')
|
||||
app.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
return app
|
||||
|
||||
|
||||
class AppApiStatus(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields)
|
||||
def post(self, app_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('enable_api', type=bool, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
app_id = str(app_id)
|
||||
app = _get_app(app_id, current_user.current_tenant_id)
|
||||
|
||||
if args.get('enable_api') == app.enable_api:
|
||||
return app
|
||||
|
||||
app.enable_api = args.get('enable_api')
|
||||
app.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
return app
|
||||
|
||||
|
||||
class AppRateLimit(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields)
|
||||
def post(self, app_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('api_rpm', type=inputs.natural, required=False, location='json')
|
||||
parser.add_argument('api_rph', type=inputs.natural, required=False, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
app_id = str(app_id)
|
||||
app = _get_app(app_id, current_user.current_tenant_id)
|
||||
|
||||
if args.get('api_rpm'):
|
||||
app.api_rpm = args.get('api_rpm')
|
||||
if args.get('api_rph'):
|
||||
app.api_rph = args.get('api_rph')
|
||||
app.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
return app
|
||||
|
||||
|
||||
class AppCopy(Resource):
|
||||
@staticmethod
|
||||
def create_app_copy(app):
|
||||
copy_app = App(
|
||||
name=app.name + ' copy',
|
||||
icon=app.icon,
|
||||
icon_background=app.icon_background,
|
||||
tenant_id=app.tenant_id,
|
||||
mode=app.mode,
|
||||
app_model_config_id=app.app_model_config_id,
|
||||
enable_site=app.enable_site,
|
||||
enable_api=app.enable_api,
|
||||
api_rpm=app.api_rpm,
|
||||
api_rph=app.api_rph
|
||||
)
|
||||
return copy_app
|
||||
|
||||
@staticmethod
|
||||
def create_app_model_config_copy(app_config, copy_app_id):
|
||||
copy_app_model_config = AppModelConfig(
|
||||
app_id=copy_app_id,
|
||||
provider=app_config.provider,
|
||||
model_id=app_config.model_id,
|
||||
configs=app_config.configs,
|
||||
opening_statement=app_config.opening_statement,
|
||||
suggested_questions=app_config.suggested_questions,
|
||||
suggested_questions_after_answer=app_config.suggested_questions_after_answer,
|
||||
more_like_this=app_config.more_like_this,
|
||||
model=app_config.model,
|
||||
user_input_form=app_config.user_input_form,
|
||||
pre_prompt=app_config.pre_prompt,
|
||||
agent_mode=app_config.agent_mode
|
||||
)
|
||||
return copy_app_model_config
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_detail_fields)
|
||||
def post(self, app_id):
|
||||
app_id = str(app_id)
|
||||
app = _get_app(app_id, current_user.current_tenant_id)
|
||||
|
||||
copy_app = self.create_app_copy(app)
|
||||
db.session.add(copy_app)
|
||||
|
||||
app_config = db.session.query(AppModelConfig). \
|
||||
filter(AppModelConfig.app_id == app_id). \
|
||||
one_or_none()
|
||||
|
||||
if app_config:
|
||||
copy_app_model_config = self.create_app_model_config_copy(app_config, copy_app.id)
|
||||
db.session.add(copy_app_model_config)
|
||||
db.session.commit()
|
||||
copy_app.app_model_config_id = copy_app_model_config.id
|
||||
db.session.commit()
|
||||
|
||||
return copy_app, 201
|
||||
|
||||
|
||||
class AppExport(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id):
|
||||
# todo
|
||||
pass
|
||||
|
||||
|
||||
class IntroductionGenerateApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('prompt_template', type=str, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
account = current_user
|
||||
|
||||
try:
|
||||
answer = LLMGenerator.generate_introduction(
|
||||
account.current_tenant_id,
|
||||
args['prompt_template']
|
||||
)
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
|
||||
return {'introduction': answer}
|
||||
|
||||
|
||||
api.add_resource(AppListApi, '/apps')
|
||||
api.add_resource(AppTemplateApi, '/app-templates')
|
||||
api.add_resource(AppApi, '/apps/<uuid:app_id>')
|
||||
api.add_resource(AppCopy, '/apps/<uuid:app_id>/copy')
|
||||
api.add_resource(AppNameApi, '/apps/<uuid:app_id>/name')
|
||||
api.add_resource(AppSiteStatus, '/apps/<uuid:app_id>/site-enable')
|
||||
api.add_resource(AppApiStatus, '/apps/<uuid:app_id>/api-enable')
|
||||
api.add_resource(AppRateLimit, '/apps/<uuid:app_id>/rate-limit')
|
||||
api.add_resource(IntroductionGenerateApi, '/introduction-generate')
|
||||
206
api/controllers/console/app/completion.py
Normal file
206
api/controllers/console/app/completion.py
Normal file
@@ -0,0 +1,206 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import json
|
||||
import logging
|
||||
from typing import Generator, Union
|
||||
|
||||
import flask_login
|
||||
from flask import Response, stream_with_context
|
||||
from flask_login import login_required
|
||||
from werkzeug.exceptions import InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.app import _get_app
|
||||
from controllers.console.app.error import ConversationCompletedError, AppUnavailableError, \
|
||||
ProviderNotInitializeError, CompletionRequestError, ProviderQuotaExceededError, \
|
||||
ProviderModelCurrentlyNotSupportError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from core.conversation_message_task import PubHandler
|
||||
from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthorizationError, LLMAPIConnectionError, \
|
||||
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
|
||||
from libs.helper import uuid_value
|
||||
from flask_restful import Resource, reqparse
|
||||
|
||||
from services.completion_service import CompletionService
|
||||
|
||||
|
||||
# define completion message api for user
|
||||
class CompletionMessageApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
app_model = _get_app(app_id, 'completion')
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('inputs', type=dict, required=True, location='json')
|
||||
parser.add_argument('query', type=str, location='json')
|
||||
parser.add_argument('model_config', type=dict, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
account = flask_login.current_user
|
||||
|
||||
try:
|
||||
response = CompletionService.completion(
|
||||
app_model=app_model,
|
||||
user=account,
|
||||
args=args,
|
||||
from_source='console',
|
||||
streaming=True,
|
||||
is_model_config_override=True
|
||||
)
|
||||
|
||||
return compact_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class CompletionMessageStopApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id, task_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
_get_app(app_id, 'completion')
|
||||
|
||||
account = flask_login.current_user
|
||||
|
||||
PubHandler.stop(account, task_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
class ChatMessageApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
app_model = _get_app(app_id, 'chat')
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('inputs', type=dict, required=True, location='json')
|
||||
parser.add_argument('query', type=str, required=True, location='json')
|
||||
parser.add_argument('model_config', type=dict, required=True, location='json')
|
||||
parser.add_argument('conversation_id', type=uuid_value, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
account = flask_login.current_user
|
||||
|
||||
try:
|
||||
response = CompletionService.completion(
|
||||
app_model=app_model,
|
||||
user=account,
|
||||
args=args,
|
||||
from_source='console',
|
||||
streaming=True,
|
||||
is_model_config_override=True
|
||||
)
|
||||
|
||||
return compact_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
def compact_response(response: Union[dict | Generator]) -> Response:
|
||||
if isinstance(response, dict):
|
||||
return Response(response=json.dumps(response), status=200, mimetype='application/json')
|
||||
else:
|
||||
def generate() -> Generator:
|
||||
try:
|
||||
for chunk in response:
|
||||
yield chunk
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
yield "data: " + json.dumps(api.handle_error(NotFound("Conversation Not Exists.")).get_json()) + "\n\n"
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
yield "data: " + json.dumps(api.handle_error(ConversationCompletedError()).get_json()) + "\n\n"
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
yield "data: " + json.dumps(api.handle_error(AppUnavailableError()).get_json()) + "\n\n"
|
||||
except ProviderTokenNotInitError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
|
||||
except QuotaExceededError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
|
||||
|
||||
return Response(stream_with_context(generate()), status=200,
|
||||
mimetype='text/event-stream')
|
||||
|
||||
|
||||
class ChatMessageStopApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id, task_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
_get_app(app_id, 'chat')
|
||||
|
||||
account = flask_login.current_user
|
||||
|
||||
PubHandler.stop(account, task_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
api.add_resource(CompletionMessageApi, '/apps/<uuid:app_id>/completion-messages')
|
||||
api.add_resource(CompletionMessageStopApi, '/apps/<uuid:app_id>/completion-messages/<string:task_id>/stop')
|
||||
api.add_resource(ChatMessageApi, '/apps/<uuid:app_id>/chat-messages')
|
||||
api.add_resource(ChatMessageStopApi, '/apps/<uuid:app_id>/chat-messages/<string:task_id>/stop')
|
||||
384
api/controllers/console/app/conversation.py
Normal file
384
api/controllers/console/app/conversation.py
Normal file
@@ -0,0 +1,384 @@
|
||||
from datetime import datetime
|
||||
|
||||
import pytz
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, fields, marshal_with
|
||||
from flask_restful.inputs import int_range
|
||||
from sqlalchemy import or_, func
|
||||
from sqlalchemy.orm import joinedload
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.app import _get_app
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from libs.helper import TimestampField, datetime_string, uuid_value
|
||||
from extensions.ext_database import db
|
||||
from models.model import Message, MessageAnnotation, Conversation
|
||||
|
||||
account_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'email': fields.String
|
||||
}
|
||||
|
||||
feedback_fields = {
|
||||
'rating': fields.String,
|
||||
'content': fields.String,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account': fields.Nested(account_fields, allow_null=True),
|
||||
}
|
||||
|
||||
annotation_fields = {
|
||||
'content': fields.String,
|
||||
'account': fields.Nested(account_fields, allow_null=True),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
message_detail_fields = {
|
||||
'id': fields.String,
|
||||
'conversation_id': fields.String,
|
||||
'inputs': fields.Raw,
|
||||
'query': fields.String,
|
||||
'message': fields.Raw,
|
||||
'message_tokens': fields.Integer,
|
||||
'answer': fields.String,
|
||||
'answer_tokens': fields.Integer,
|
||||
'provider_response_latency': fields.Integer,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account_id': fields.String,
|
||||
'feedbacks': fields.List(fields.Nested(feedback_fields)),
|
||||
'annotation': fields.Nested(annotation_fields, allow_null=True),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
feedback_stat_fields = {
|
||||
'like': fields.Integer,
|
||||
'dislike': fields.Integer
|
||||
}
|
||||
|
||||
model_config_fields = {
|
||||
'opening_statement': fields.String,
|
||||
'suggested_questions': fields.Raw,
|
||||
'model': fields.Raw,
|
||||
'user_input_form': fields.Raw,
|
||||
'pre_prompt': fields.String,
|
||||
'agent_mode': fields.Raw,
|
||||
}
|
||||
|
||||
|
||||
class CompletionConversationApi(Resource):
|
||||
class MessageTextField(fields.Raw):
|
||||
def format(self, value):
|
||||
return value[0]['text'] if value else ''
|
||||
|
||||
simple_configs_fields = {
|
||||
'prompt_template': fields.String,
|
||||
}
|
||||
|
||||
simple_model_config_fields = {
|
||||
'model': fields.Raw(attribute='model_dict'),
|
||||
'pre_prompt': fields.String,
|
||||
}
|
||||
|
||||
simple_message_detail_fields = {
|
||||
'inputs': fields.Raw,
|
||||
'query': fields.String,
|
||||
'message': MessageTextField,
|
||||
'answer': fields.String,
|
||||
}
|
||||
|
||||
conversation_fields = {
|
||||
'id': fields.String,
|
||||
'status': fields.String,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account_id': fields.String,
|
||||
'read_at': TimestampField,
|
||||
'created_at': TimestampField,
|
||||
'annotation': fields.Nested(annotation_fields, allow_null=True),
|
||||
'model_config': fields.Nested(simple_model_config_fields),
|
||||
'user_feedback_stats': fields.Nested(feedback_stat_fields),
|
||||
'admin_feedback_stats': fields.Nested(feedback_stat_fields),
|
||||
'message': fields.Nested(simple_message_detail_fields, attribute='first_message')
|
||||
}
|
||||
|
||||
conversation_pagination_fields = {
|
||||
'page': fields.Integer,
|
||||
'limit': fields.Integer(attribute='per_page'),
|
||||
'total': fields.Integer,
|
||||
'has_more': fields.Boolean(attribute='has_next'),
|
||||
'data': fields.List(fields.Nested(conversation_fields), attribute='items')
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(conversation_pagination_fields)
|
||||
def get(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('keyword', type=str, location='args')
|
||||
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
parser.add_argument('annotation_status', type=str,
|
||||
choices=['annotated', 'not_annotated', 'all'], default='all', location='args')
|
||||
parser.add_argument('page', type=int_range(1, 99999), default=1, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), default=20, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
# get app info
|
||||
app = _get_app(app_id, 'completion')
|
||||
|
||||
query = db.select(Conversation).where(Conversation.app_id == app.id, Conversation.mode == 'completion')
|
||||
|
||||
if args['keyword']:
|
||||
query = query.join(
|
||||
Message, Message.conversation_id == Conversation.id
|
||||
).filter(
|
||||
or_(
|
||||
Message.query.ilike('%{}%'.format(args['keyword'])),
|
||||
Message.answer.ilike('%{}%'.format(args['keyword']))
|
||||
)
|
||||
)
|
||||
|
||||
account = current_user
|
||||
timezone = pytz.timezone(account.timezone)
|
||||
utc_timezone = pytz.utc
|
||||
|
||||
if args['start']:
|
||||
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
|
||||
start_datetime = start_datetime.replace(second=0)
|
||||
|
||||
start_datetime_timezone = timezone.localize(start_datetime)
|
||||
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
query = query.where(Conversation.created_at >= start_datetime_utc)
|
||||
|
||||
if args['end']:
|
||||
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
|
||||
end_datetime = end_datetime.replace(second=0)
|
||||
|
||||
end_datetime_timezone = timezone.localize(end_datetime)
|
||||
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
query = query.where(Conversation.created_at < end_datetime_utc)
|
||||
|
||||
if args['annotation_status'] == "annotated":
|
||||
query = query.options(joinedload(Conversation.message_annotations)).join(
|
||||
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
|
||||
)
|
||||
elif args['annotation_status'] == "not_annotated":
|
||||
query = query.outerjoin(
|
||||
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
|
||||
).group_by(Conversation.id).having(func.count(MessageAnnotation.id) == 0)
|
||||
|
||||
query = query.order_by(Conversation.created_at.desc())
|
||||
|
||||
conversations = db.paginate(
|
||||
query,
|
||||
page=args['page'],
|
||||
per_page=args['limit'],
|
||||
error_out=False
|
||||
)
|
||||
|
||||
return conversations
|
||||
|
||||
|
||||
class CompletionConversationDetailApi(Resource):
|
||||
conversation_detail_fields = {
|
||||
'id': fields.String,
|
||||
'status': fields.String,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account_id': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'model_config': fields.Nested(model_config_fields),
|
||||
'message': fields.Nested(message_detail_fields, attribute='first_message'),
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(conversation_detail_fields)
|
||||
def get(self, app_id, conversation_id):
|
||||
app_id = str(app_id)
|
||||
conversation_id = str(conversation_id)
|
||||
|
||||
return _get_conversation(app_id, conversation_id, 'completion')
|
||||
|
||||
|
||||
class ChatConversationApi(Resource):
|
||||
simple_configs_fields = {
|
||||
'prompt_template': fields.String,
|
||||
}
|
||||
|
||||
simple_model_config_fields = {
|
||||
'model': fields.Raw(attribute='model_dict'),
|
||||
'pre_prompt': fields.String,
|
||||
}
|
||||
|
||||
conversation_fields = {
|
||||
'id': fields.String,
|
||||
'status': fields.String,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account_id': fields.String,
|
||||
'summary': fields.String(attribute='summary_or_query'),
|
||||
'read_at': TimestampField,
|
||||
'created_at': TimestampField,
|
||||
'annotated': fields.Boolean,
|
||||
'model_config': fields.Nested(simple_model_config_fields),
|
||||
'message_count': fields.Integer,
|
||||
'user_feedback_stats': fields.Nested(feedback_stat_fields),
|
||||
'admin_feedback_stats': fields.Nested(feedback_stat_fields)
|
||||
}
|
||||
|
||||
conversation_pagination_fields = {
|
||||
'page': fields.Integer,
|
||||
'limit': fields.Integer(attribute='per_page'),
|
||||
'total': fields.Integer,
|
||||
'has_more': fields.Boolean(attribute='has_next'),
|
||||
'data': fields.List(fields.Nested(conversation_fields), attribute='items')
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(conversation_pagination_fields)
|
||||
def get(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('keyword', type=str, location='args')
|
||||
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
parser.add_argument('annotation_status', type=str,
|
||||
choices=['annotated', 'not_annotated', 'all'], default='all', location='args')
|
||||
parser.add_argument('message_count_gte', type=int_range(1, 99999), required=False, location='args')
|
||||
parser.add_argument('page', type=int_range(1, 99999), required=False, default=1, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
# get app info
|
||||
app = _get_app(app_id, 'chat')
|
||||
|
||||
query = db.select(Conversation).where(Conversation.app_id == app.id, Conversation.mode == 'chat')
|
||||
|
||||
if args['keyword']:
|
||||
query = query.join(
|
||||
Message, Message.conversation_id == Conversation.id
|
||||
).filter(
|
||||
or_(
|
||||
Message.query.ilike('%{}%'.format(args['keyword'])),
|
||||
Message.answer.ilike('%{}%'.format(args['keyword'])),
|
||||
Conversation.name.ilike('%{}%'.format(args['keyword'])),
|
||||
Conversation.introduction.ilike('%{}%'.format(args['keyword'])),
|
||||
),
|
||||
|
||||
)
|
||||
|
||||
account = current_user
|
||||
timezone = pytz.timezone(account.timezone)
|
||||
utc_timezone = pytz.utc
|
||||
|
||||
if args['start']:
|
||||
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
|
||||
start_datetime = start_datetime.replace(second=0)
|
||||
|
||||
start_datetime_timezone = timezone.localize(start_datetime)
|
||||
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
query = query.where(Conversation.created_at >= start_datetime_utc)
|
||||
|
||||
if args['end']:
|
||||
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
|
||||
end_datetime = end_datetime.replace(second=0)
|
||||
|
||||
end_datetime_timezone = timezone.localize(end_datetime)
|
||||
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
query = query.where(Conversation.created_at < end_datetime_utc)
|
||||
|
||||
if args['annotation_status'] == "annotated":
|
||||
query = query.options(joinedload(Conversation.message_annotations)).join(
|
||||
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
|
||||
)
|
||||
elif args['annotation_status'] == "not_annotated":
|
||||
query = query.outerjoin(
|
||||
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
|
||||
).group_by(Conversation.id).having(func.count(MessageAnnotation.id) == 0)
|
||||
|
||||
if args['message_count_gte'] and args['message_count_gte'] >= 1:
|
||||
query = (
|
||||
query.options(joinedload(Conversation.messages))
|
||||
.join(Message, Message.conversation_id == Conversation.id)
|
||||
.group_by(Conversation.id)
|
||||
.having(func.count(Message.id) >= args['message_count_gte'])
|
||||
)
|
||||
|
||||
query = query.order_by(Conversation.created_at.desc())
|
||||
|
||||
conversations = db.paginate(
|
||||
query,
|
||||
page=args['page'],
|
||||
per_page=args['limit'],
|
||||
error_out=False
|
||||
)
|
||||
|
||||
return conversations
|
||||
|
||||
|
||||
class ChatConversationDetailApi(Resource):
|
||||
conversation_detail_fields = {
|
||||
'id': fields.String,
|
||||
'status': fields.String,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account_id': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'annotated': fields.Boolean,
|
||||
'model_config': fields.Nested(model_config_fields),
|
||||
'message_count': fields.Integer,
|
||||
'user_feedback_stats': fields.Nested(feedback_stat_fields),
|
||||
'admin_feedback_stats': fields.Nested(feedback_stat_fields)
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(conversation_detail_fields)
|
||||
def get(self, app_id, conversation_id):
|
||||
app_id = str(app_id)
|
||||
conversation_id = str(conversation_id)
|
||||
|
||||
return _get_conversation(app_id, conversation_id, 'chat')
|
||||
|
||||
|
||||
|
||||
|
||||
api.add_resource(CompletionConversationApi, '/apps/<uuid:app_id>/completion-conversations')
|
||||
api.add_resource(CompletionConversationDetailApi, '/apps/<uuid:app_id>/completion-conversations/<uuid:conversation_id>')
|
||||
api.add_resource(ChatConversationApi, '/apps/<uuid:app_id>/chat-conversations')
|
||||
api.add_resource(ChatConversationDetailApi, '/apps/<uuid:app_id>/chat-conversations/<uuid:conversation_id>')
|
||||
|
||||
|
||||
def _get_conversation(app_id, conversation_id, mode):
|
||||
# get app info
|
||||
app = _get_app(app_id, mode)
|
||||
|
||||
conversation = db.session.query(Conversation) \
|
||||
.filter(Conversation.id == conversation_id, Conversation.app_id == app.id).first()
|
||||
|
||||
if not conversation:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
|
||||
if not conversation.read_at:
|
||||
conversation.read_at = datetime.utcnow()
|
||||
conversation.read_account_id = current_user.id
|
||||
db.session.commit()
|
||||
|
||||
return conversation
|
||||
49
api/controllers/console/app/error.py
Normal file
49
api/controllers/console/app/error.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class AppNotFoundError(BaseHTTPException):
|
||||
error_code = 'app_not_found'
|
||||
description = "App not found."
|
||||
code = 404
|
||||
|
||||
|
||||
class ProviderNotInitializeError(BaseHTTPException):
|
||||
error_code = 'provider_not_initialize'
|
||||
description = "Provider Token not initialize."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderQuotaExceededError(BaseHTTPException):
|
||||
error_code = 'provider_quota_exceeded'
|
||||
description = "Provider quota exceeded."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderModelCurrentlyNotSupportError(BaseHTTPException):
|
||||
error_code = 'model_currently_not_support'
|
||||
description = "GPT-4 currently not support."
|
||||
code = 400
|
||||
|
||||
|
||||
class ConversationCompletedError(BaseHTTPException):
|
||||
error_code = 'conversation_completed'
|
||||
description = "Conversation was completed."
|
||||
code = 400
|
||||
|
||||
|
||||
class AppUnavailableError(BaseHTTPException):
|
||||
error_code = 'app_unavailable'
|
||||
description = "App unavailable."
|
||||
code = 400
|
||||
|
||||
|
||||
class CompletionRequestError(BaseHTTPException):
|
||||
error_code = 'completion_request_error'
|
||||
description = "Completion request failed."
|
||||
code = 400
|
||||
|
||||
|
||||
class AppMoreLikeThisDisabledError(BaseHTTPException):
|
||||
error_code = 'app_more_like_this_disabled'
|
||||
description = "More like this disabled."
|
||||
code = 403
|
||||
209
api/controllers/console/app/explore.py
Normal file
209
api/controllers/console/app/explore.py
Normal file
@@ -0,0 +1,209 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from datetime import datetime
|
||||
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, fields, marshal_with, abort, inputs
|
||||
from sqlalchemy import and_
|
||||
|
||||
from controllers.console import api
|
||||
from extensions.ext_database import db
|
||||
from models.model import Tenant, App, InstalledApp, RecommendedApp
|
||||
from services.account_service import TenantService
|
||||
|
||||
app_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'mode': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String
|
||||
}
|
||||
|
||||
installed_app_fields = {
|
||||
'id': fields.String,
|
||||
'app': fields.Nested(app_fields, attribute='app'),
|
||||
'app_owner_tenant_id': fields.String,
|
||||
'is_pinned': fields.Boolean,
|
||||
'last_used_at': fields.DateTime,
|
||||
'editable': fields.Boolean
|
||||
}
|
||||
|
||||
installed_app_list_fields = {
|
||||
'installed_apps': fields.List(fields.Nested(installed_app_fields))
|
||||
}
|
||||
|
||||
recommended_app_fields = {
|
||||
'app': fields.Nested(app_fields, attribute='app'),
|
||||
'app_id': fields.String,
|
||||
'description': fields.String(attribute='description'),
|
||||
'copyright': fields.String,
|
||||
'privacy_policy': fields.String,
|
||||
'category': fields.String,
|
||||
'position': fields.Integer,
|
||||
'is_listed': fields.Boolean,
|
||||
'install_count': fields.Integer,
|
||||
'installed': fields.Boolean,
|
||||
'editable': fields.Boolean
|
||||
}
|
||||
|
||||
recommended_app_list_fields = {
|
||||
'recommended_apps': fields.List(fields.Nested(recommended_app_fields)),
|
||||
'categories': fields.List(fields.String)
|
||||
}
|
||||
|
||||
|
||||
class InstalledAppsListResource(Resource):
|
||||
@login_required
|
||||
@marshal_with(installed_app_list_fields)
|
||||
def get(self):
|
||||
current_tenant_id = Tenant.query.first().id
|
||||
installed_apps = db.session.query(InstalledApp).filter(
|
||||
InstalledApp.tenant_id == current_tenant_id
|
||||
).all()
|
||||
|
||||
current_user.role = TenantService.get_user_role(current_user, current_user.current_tenant)
|
||||
installed_apps = [
|
||||
{
|
||||
**installed_app,
|
||||
"editable": current_user.role in ["owner", "admin"],
|
||||
}
|
||||
for installed_app in installed_apps
|
||||
]
|
||||
installed_apps.sort(key=lambda app: (-app.is_pinned, app.last_used_at))
|
||||
|
||||
return {'installed_apps': installed_apps}
|
||||
|
||||
@login_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('app_id', type=str, required=True, help='Invalid app_id')
|
||||
args = parser.parse_args()
|
||||
|
||||
current_tenant_id = Tenant.query.first().id
|
||||
app = App.query.get(args['app_id'])
|
||||
if app is None:
|
||||
abort(404, message='App not found')
|
||||
recommended_app = RecommendedApp.query.filter(RecommendedApp.app_id == args['app_id']).first()
|
||||
if recommended_app is None:
|
||||
abort(404, message='App not found')
|
||||
if not app.is_public:
|
||||
abort(403, message="You can't install a non-public app")
|
||||
|
||||
installed_app = InstalledApp.query.filter(and_(
|
||||
InstalledApp.app_id == args['app_id'],
|
||||
InstalledApp.tenant_id == current_tenant_id
|
||||
)).first()
|
||||
|
||||
if installed_app is None:
|
||||
# todo: position
|
||||
recommended_app.install_count += 1
|
||||
|
||||
new_installed_app = InstalledApp(
|
||||
app_id=args['app_id'],
|
||||
tenant_id=current_tenant_id,
|
||||
is_pinned=False,
|
||||
last_used_at=datetime.utcnow()
|
||||
)
|
||||
db.session.add(new_installed_app)
|
||||
db.session.commit()
|
||||
|
||||
return {'message': 'App installed successfully'}
|
||||
|
||||
|
||||
class InstalledAppResource(Resource):
|
||||
|
||||
@login_required
|
||||
def delete(self, installed_app_id):
|
||||
|
||||
installed_app = InstalledApp.query.filter(and_(
|
||||
InstalledApp.id == str(installed_app_id),
|
||||
InstalledApp.tenant_id == current_user.current_tenant_id
|
||||
)).first()
|
||||
|
||||
if installed_app is None:
|
||||
abort(404, message='App not found')
|
||||
|
||||
if installed_app.app_owner_tenant_id == current_user.current_tenant_id:
|
||||
abort(400, message="You can't uninstall an app owned by the current tenant")
|
||||
|
||||
db.session.delete(installed_app)
|
||||
db.session.commit()
|
||||
|
||||
return {'result': 'success', 'message': 'App uninstalled successfully'}
|
||||
|
||||
@login_required
|
||||
def patch(self, installed_app_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('is_pinned', type=inputs.boolean)
|
||||
args = parser.parse_args()
|
||||
|
||||
current_tenant_id = Tenant.query.first().id
|
||||
installed_app = InstalledApp.query.filter(and_(
|
||||
InstalledApp.id == str(installed_app_id),
|
||||
InstalledApp.tenant_id == current_tenant_id
|
||||
)).first()
|
||||
|
||||
if installed_app is None:
|
||||
abort(404, message='Installed app not found')
|
||||
|
||||
commit_args = False
|
||||
if 'is_pinned' in args:
|
||||
installed_app.is_pinned = args['is_pinned']
|
||||
commit_args = True
|
||||
|
||||
if commit_args:
|
||||
db.session.commit()
|
||||
|
||||
return {'result': 'success', 'message': 'App info updated successfully'}
|
||||
|
||||
|
||||
class RecommendedAppsResource(Resource):
|
||||
@login_required
|
||||
@marshal_with(recommended_app_list_fields)
|
||||
def get(self):
|
||||
recommended_apps = db.session.query(RecommendedApp).filter(
|
||||
RecommendedApp.is_listed == True
|
||||
).all()
|
||||
|
||||
categories = set()
|
||||
current_user.role = TenantService.get_user_role(current_user, current_user.current_tenant)
|
||||
recommended_apps_result = []
|
||||
for recommended_app in recommended_apps:
|
||||
installed = db.session.query(InstalledApp).filter(
|
||||
and_(
|
||||
InstalledApp.app_id == recommended_app.app_id,
|
||||
InstalledApp.tenant_id == current_user.current_tenant_id
|
||||
)
|
||||
).first() is not None
|
||||
|
||||
language_prefix = current_user.interface_language.split('-')[0]
|
||||
desc = None
|
||||
if recommended_app.description:
|
||||
if language_prefix in recommended_app.description:
|
||||
desc = recommended_app.description[language_prefix]
|
||||
elif 'en' in recommended_app.description:
|
||||
desc = recommended_app.description['en']
|
||||
|
||||
recommended_app_result = {
|
||||
'id': recommended_app.id,
|
||||
'app': recommended_app.app,
|
||||
'app_id': recommended_app.app_id,
|
||||
'description': desc,
|
||||
'copyright': recommended_app.copyright,
|
||||
'privacy_policy': recommended_app.privacy_policy,
|
||||
'category': recommended_app.category,
|
||||
'position': recommended_app.position,
|
||||
'is_listed': recommended_app.is_listed,
|
||||
'install_count': recommended_app.install_count,
|
||||
'installed': installed,
|
||||
'editable': current_user.role in ['owner', 'admin'],
|
||||
}
|
||||
recommended_apps_result.append(recommended_app_result)
|
||||
|
||||
categories.add(recommended_app.category) # add category to categories
|
||||
|
||||
return {'recommended_apps': recommended_apps_result, 'categories': list(categories)}
|
||||
|
||||
|
||||
api.add_resource(InstalledAppsListResource, '/installed-apps')
|
||||
api.add_resource(InstalledAppResource, '/installed-apps/<uuid:installed_app_id>')
|
||||
api.add_resource(RecommendedAppsResource, '/explore/apps')
|
||||
361
api/controllers/console/app/message.py
Normal file
361
api/controllers/console/app/message.py
Normal file
@@ -0,0 +1,361 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import Union, Generator
|
||||
|
||||
from flask import Response, stream_with_context
|
||||
from flask_login import current_user, login_required
|
||||
from flask_restful import Resource, reqparse, marshal_with, fields
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import InternalServerError, NotFound
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.app import _get_app
|
||||
from controllers.console.app.error import CompletionRequestError, ProviderNotInitializeError, \
|
||||
AppMoreLikeThisDisabledError, ProviderQuotaExceededError, ProviderModelCurrentlyNotSupportError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from core.llm.error import LLMRateLimitError, LLMBadRequestError, LLMAuthorizationError, LLMAPIConnectionError, \
|
||||
ProviderTokenNotInitError, LLMAPIUnavailableError, QuotaExceededError, ModelCurrentlyNotSupportError
|
||||
from libs.helper import uuid_value, TimestampField
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from extensions.ext_database import db
|
||||
from models.model import MessageAnnotation, Conversation, Message, MessageFeedback
|
||||
from services.completion_service import CompletionService
|
||||
from services.errors.app import MoreLikeThisDisabledError
|
||||
from services.errors.conversation import ConversationNotExistsError
|
||||
from services.errors.message import MessageNotExistsError
|
||||
from services.message_service import MessageService
|
||||
|
||||
|
||||
class ChatMessageApi(Resource):
|
||||
account_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'email': fields.String
|
||||
}
|
||||
|
||||
feedback_fields = {
|
||||
'rating': fields.String,
|
||||
'content': fields.String,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account': fields.Nested(account_fields, allow_null=True),
|
||||
}
|
||||
|
||||
annotation_fields = {
|
||||
'content': fields.String,
|
||||
'account': fields.Nested(account_fields, allow_null=True),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
message_detail_fields = {
|
||||
'id': fields.String,
|
||||
'conversation_id': fields.String,
|
||||
'inputs': fields.Raw,
|
||||
'query': fields.String,
|
||||
'message': fields.Raw,
|
||||
'message_tokens': fields.Integer,
|
||||
'answer': fields.String,
|
||||
'answer_tokens': fields.Integer,
|
||||
'provider_response_latency': fields.Integer,
|
||||
'from_source': fields.String,
|
||||
'from_end_user_id': fields.String,
|
||||
'from_account_id': fields.String,
|
||||
'feedbacks': fields.List(fields.Nested(feedback_fields)),
|
||||
'annotation': fields.Nested(annotation_fields, allow_null=True),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
message_infinite_scroll_pagination_fields = {
|
||||
'limit': fields.Integer,
|
||||
'has_more': fields.Boolean,
|
||||
'data': fields.List(fields.Nested(message_detail_fields))
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(message_infinite_scroll_pagination_fields)
|
||||
def get(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
app = _get_app(app_id, 'chat')
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('conversation_id', required=True, type=uuid_value, location='args')
|
||||
parser.add_argument('first_id', type=uuid_value, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
conversation = db.session.query(Conversation).filter(
|
||||
Conversation.id == args['conversation_id'],
|
||||
Conversation.app_id == app.id
|
||||
).first()
|
||||
|
||||
if not conversation:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
|
||||
if args['first_id']:
|
||||
first_message = db.session.query(Message) \
|
||||
.filter(Message.conversation_id == conversation.id, Message.id == args['first_id']).first()
|
||||
|
||||
if not first_message:
|
||||
raise NotFound("First message not found")
|
||||
|
||||
history_messages = db.session.query(Message).filter(
|
||||
Message.conversation_id == conversation.id,
|
||||
Message.created_at < first_message.created_at,
|
||||
Message.id != first_message.id
|
||||
) \
|
||||
.order_by(Message.created_at.desc()).limit(args['limit']).all()
|
||||
else:
|
||||
history_messages = db.session.query(Message).filter(Message.conversation_id == conversation.id) \
|
||||
.order_by(Message.created_at.desc()).limit(args['limit']).all()
|
||||
|
||||
has_more = False
|
||||
if len(history_messages) == args['limit']:
|
||||
current_page_first_message = history_messages[-1]
|
||||
rest_count = db.session.query(Message).filter(
|
||||
Message.conversation_id == conversation.id,
|
||||
Message.created_at < current_page_first_message.created_at,
|
||||
Message.id != current_page_first_message.id
|
||||
).count()
|
||||
|
||||
if rest_count > 0:
|
||||
has_more = True
|
||||
|
||||
history_messages = list(reversed(history_messages))
|
||||
|
||||
return InfiniteScrollPagination(
|
||||
data=history_messages,
|
||||
limit=args['limit'],
|
||||
has_more=has_more
|
||||
)
|
||||
|
||||
|
||||
class MessageFeedbackApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
app = _get_app(app_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('message_id', required=True, type=uuid_value, location='json')
|
||||
parser.add_argument('rating', type=str, choices=['like', 'dislike', None], location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
message_id = str(args['message_id'])
|
||||
|
||||
message = db.session.query(Message).filter(
|
||||
Message.id == message_id,
|
||||
Message.app_id == app.id
|
||||
).first()
|
||||
|
||||
if not message:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
feedback = message.admin_feedback
|
||||
|
||||
if not args['rating'] and feedback:
|
||||
db.session.delete(feedback)
|
||||
elif args['rating'] and feedback:
|
||||
feedback.rating = args['rating']
|
||||
elif not args['rating'] and not feedback:
|
||||
raise ValueError('rating cannot be None when feedback not exists')
|
||||
else:
|
||||
feedback = MessageFeedback(
|
||||
app_id=app.id,
|
||||
conversation_id=message.conversation_id,
|
||||
message_id=message.id,
|
||||
rating=args['rating'],
|
||||
from_source='admin',
|
||||
from_account_id=current_user.id
|
||||
)
|
||||
db.session.add(feedback)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
class MessageAnnotationApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
app = _get_app(app_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('message_id', required=True, type=uuid_value, location='json')
|
||||
parser.add_argument('content', type=str, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
message_id = str(args['message_id'])
|
||||
|
||||
message = db.session.query(Message).filter(
|
||||
Message.id == message_id,
|
||||
Message.app_id == app.id
|
||||
).first()
|
||||
|
||||
if not message:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
annotation = message.annotation
|
||||
|
||||
if annotation:
|
||||
annotation.content = args['content']
|
||||
else:
|
||||
annotation = MessageAnnotation(
|
||||
app_id=app.id,
|
||||
conversation_id=message.conversation_id,
|
||||
message_id=message.id,
|
||||
content=args['content'],
|
||||
account_id=current_user.id
|
||||
)
|
||||
db.session.add(annotation)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
class MessageAnnotationCountApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id):
|
||||
app_id = str(app_id)
|
||||
|
||||
# get app info
|
||||
app = _get_app(app_id)
|
||||
|
||||
count = db.session.query(MessageAnnotation).filter(
|
||||
MessageAnnotation.app_id == app.id
|
||||
).count()
|
||||
|
||||
return {'count': count}
|
||||
|
||||
|
||||
class MessageMoreLikeThisApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id, message_id):
|
||||
app_id = str(app_id)
|
||||
message_id = str(message_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('response_mode', type=str, required=True, choices=['blocking', 'streaming'], location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
streaming = args['response_mode'] == 'streaming'
|
||||
|
||||
# get app info
|
||||
app_model = _get_app(app_id, 'completion')
|
||||
|
||||
try:
|
||||
response = CompletionService.generate_more_like_this(app_model, current_user, message_id, streaming)
|
||||
return compact_response(response)
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
except MoreLikeThisDisabledError:
|
||||
raise AppMoreLikeThisDisabledError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
def compact_response(response: Union[dict | Generator]) -> Response:
|
||||
if isinstance(response, dict):
|
||||
return Response(response=json.dumps(response), status=200, mimetype='application/json')
|
||||
else:
|
||||
def generate() -> Generator:
|
||||
try:
|
||||
for chunk in response:
|
||||
yield chunk
|
||||
except MessageNotExistsError:
|
||||
yield "data: " + json.dumps(api.handle_error(NotFound("Message Not Exists.")).get_json()) + "\n\n"
|
||||
except MoreLikeThisDisabledError:
|
||||
yield "data: " + json.dumps(api.handle_error(AppMoreLikeThisDisabledError()).get_json()) + "\n\n"
|
||||
except ProviderTokenNotInitError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
|
||||
except QuotaExceededError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
|
||||
|
||||
return Response(stream_with_context(generate()), status=200,
|
||||
mimetype='text/event-stream')
|
||||
|
||||
|
||||
class MessageSuggestedQuestionApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id, message_id):
|
||||
app_id = str(app_id)
|
||||
message_id = str(message_id)
|
||||
|
||||
# get app info
|
||||
app_model = _get_app(app_id, 'chat')
|
||||
|
||||
try:
|
||||
questions = MessageService.get_suggested_questions_after_answer(
|
||||
app_model=app_model,
|
||||
user=current_user,
|
||||
message_id=message_id,
|
||||
check_enabled=False
|
||||
)
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message not found")
|
||||
except ConversationNotExistsError:
|
||||
raise NotFound("Conversation not found")
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
return {'data': questions}
|
||||
|
||||
|
||||
api.add_resource(MessageMoreLikeThisApi, '/apps/<uuid:app_id>/completion-messages/<uuid:message_id>/more-like-this')
|
||||
api.add_resource(MessageSuggestedQuestionApi, '/apps/<uuid:app_id>/chat-messages/<uuid:message_id>/suggested-questions')
|
||||
api.add_resource(ChatMessageApi, '/apps/<uuid:app_id>/chat-messages', endpoint='chat_messages')
|
||||
api.add_resource(MessageFeedbackApi, '/apps/<uuid:app_id>/feedbacks')
|
||||
api.add_resource(MessageAnnotationApi, '/apps/<uuid:app_id>/annotations')
|
||||
api.add_resource(MessageAnnotationCountApi, '/apps/<uuid:app_id>/annotations/count')
|
||||
65
api/controllers/console/app/model_config.py
Normal file
65
api/controllers/console/app/model_config.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import json
|
||||
|
||||
from flask import request
|
||||
from flask_restful import Resource
|
||||
from flask_login import login_required, current_user
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.app import _get_app
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from events.app_event import app_model_config_was_updated
|
||||
from extensions.ext_database import db
|
||||
from models.model import AppModelConfig
|
||||
from services.app_model_config_service import AppModelConfigService
|
||||
|
||||
|
||||
class ModelConfigResource(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id):
|
||||
"""Modify app model config"""
|
||||
app_id = str(app_id)
|
||||
|
||||
app_model = _get_app(app_id)
|
||||
|
||||
# validate config
|
||||
model_configuration = AppModelConfigService.validate_configuration(
|
||||
account=current_user,
|
||||
config=request.json,
|
||||
mode=app_model.mode
|
||||
)
|
||||
|
||||
new_app_model_config = AppModelConfig(
|
||||
app_id=app_model.id,
|
||||
provider="",
|
||||
model_id="",
|
||||
configs={},
|
||||
opening_statement=model_configuration['opening_statement'],
|
||||
suggested_questions=json.dumps(model_configuration['suggested_questions']),
|
||||
suggested_questions_after_answer=json.dumps(model_configuration['suggested_questions_after_answer']),
|
||||
more_like_this=json.dumps(model_configuration['more_like_this']),
|
||||
model=json.dumps(model_configuration['model']),
|
||||
user_input_form=json.dumps(model_configuration['user_input_form']),
|
||||
pre_prompt=model_configuration['pre_prompt'],
|
||||
agent_mode=json.dumps(model_configuration['agent_mode']),
|
||||
)
|
||||
|
||||
db.session.add(new_app_model_config)
|
||||
db.session.flush()
|
||||
|
||||
app_model.app_model_config_id = new_app_model_config.id
|
||||
db.session.commit()
|
||||
|
||||
app_model_config_was_updated.send(
|
||||
app_model,
|
||||
app_model_config=new_app_model_config
|
||||
)
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
api.add_resource(ModelConfigResource, '/apps/<uuid:app_id>/model-config')
|
||||
114
api/controllers/console/app/site.py
Normal file
114
api/controllers/console/app/site.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, fields, marshal_with
|
||||
from werkzeug.exceptions import NotFound, Forbidden
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.app import _get_app
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from libs.helper import supported_language
|
||||
from extensions.ext_database import db
|
||||
from models.model import Site
|
||||
|
||||
app_site_fields = {
|
||||
'app_id': fields.String,
|
||||
'access_token': fields.String(attribute='code'),
|
||||
'code': fields.String,
|
||||
'title': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
'description': fields.String,
|
||||
'default_language': fields.String,
|
||||
'customize_domain': fields.String,
|
||||
'copyright': fields.String,
|
||||
'privacy_policy': fields.String,
|
||||
'customize_token_strategy': fields.String,
|
||||
'prompt_public': fields.Boolean
|
||||
}
|
||||
|
||||
|
||||
def parse_app_site_args():
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('title', type=str, required=False, location='json')
|
||||
parser.add_argument('icon', type=str, required=False, location='json')
|
||||
parser.add_argument('icon_background', type=str, required=False, location='json')
|
||||
parser.add_argument('description', type=str, required=False, location='json')
|
||||
parser.add_argument('default_language', type=supported_language, required=False, location='json')
|
||||
parser.add_argument('customize_domain', type=str, required=False, location='json')
|
||||
parser.add_argument('copyright', type=str, required=False, location='json')
|
||||
parser.add_argument('privacy_policy', type=str, required=False, location='json')
|
||||
parser.add_argument('customize_token_strategy', type=str, choices=['must', 'allow', 'not_allow'],
|
||||
required=False,
|
||||
location='json')
|
||||
parser.add_argument('prompt_public', type=bool, required=False, location='json')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class AppSite(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_site_fields)
|
||||
def post(self, app_id):
|
||||
args = parse_app_site_args()
|
||||
|
||||
app_id = str(app_id)
|
||||
app_model = _get_app(app_id)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
site = db.session.query(Site). \
|
||||
filter(Site.app_id == app_model.id). \
|
||||
one_or_404()
|
||||
|
||||
for attr_name in [
|
||||
'title',
|
||||
'icon',
|
||||
'icon_background',
|
||||
'description',
|
||||
'default_language',
|
||||
'customize_domain',
|
||||
'copyright',
|
||||
'privacy_policy',
|
||||
'customize_token_strategy',
|
||||
'prompt_public'
|
||||
]:
|
||||
value = args.get(attr_name)
|
||||
if value is not None:
|
||||
setattr(site, attr_name, value)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return site
|
||||
|
||||
|
||||
class AppSiteAccessTokenReset(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(app_site_fields)
|
||||
def post(self, app_id):
|
||||
app_id = str(app_id)
|
||||
app_model = _get_app(app_id)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
site = db.session.query(Site).filter(Site.app_id == app_model.id).first()
|
||||
|
||||
if not site:
|
||||
raise NotFound
|
||||
|
||||
site.code = Site.generate_code(16)
|
||||
db.session.commit()
|
||||
|
||||
return site
|
||||
|
||||
|
||||
api.add_resource(AppSite, '/apps/<uuid:app_id>/site')
|
||||
api.add_resource(AppSiteAccessTokenReset, '/apps/<uuid:app_id>/site/access-token-reset')
|
||||
202
api/controllers/console/app/statistic.py
Normal file
202
api/controllers/console/app/statistic.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from datetime import datetime
|
||||
|
||||
import pytz
|
||||
from flask import jsonify
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.app import _get_app
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from libs.helper import datetime_string
|
||||
from extensions.ext_database import db
|
||||
|
||||
|
||||
class DailyConversationStatistic(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id):
|
||||
account = current_user
|
||||
app_id = str(app_id)
|
||||
app_model = _get_app(app_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
sql_query = '''
|
||||
SELECT date(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, count(distinct messages.conversation_id) AS conversation_count
|
||||
FROM messages where app_id = :app_id
|
||||
'''
|
||||
arg_dict = {'tz': account.timezone, 'app_id': app_model.id}
|
||||
|
||||
timezone = pytz.timezone(account.timezone)
|
||||
utc_timezone = pytz.utc
|
||||
|
||||
if args['start']:
|
||||
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
|
||||
start_datetime = start_datetime.replace(second=0)
|
||||
|
||||
start_datetime_timezone = timezone.localize(start_datetime)
|
||||
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
sql_query += ' and created_at >= :start'
|
||||
arg_dict['start'] = start_datetime_utc
|
||||
|
||||
if args['end']:
|
||||
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
|
||||
end_datetime = end_datetime.replace(second=0)
|
||||
|
||||
end_datetime_timezone = timezone.localize(end_datetime)
|
||||
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
sql_query += ' and created_at < :end'
|
||||
arg_dict['end'] = end_datetime_utc
|
||||
|
||||
sql_query += ' GROUP BY date order by date'
|
||||
rs = db.session.execute(sql_query, arg_dict)
|
||||
|
||||
response_date = []
|
||||
|
||||
for i in rs:
|
||||
response_date.append({
|
||||
'date': str(i.date),
|
||||
'conversation_count': i.conversation_count
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
'data': response_date
|
||||
})
|
||||
|
||||
|
||||
class DailyTerminalsStatistic(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id):
|
||||
account = current_user
|
||||
app_id = str(app_id)
|
||||
app_model = _get_app(app_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
sql_query = '''
|
||||
SELECT date(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, count(distinct messages.from_end_user_id) AS terminal_count
|
||||
FROM messages where app_id = :app_id
|
||||
'''
|
||||
arg_dict = {'tz': account.timezone, 'app_id': app_model.id}
|
||||
|
||||
timezone = pytz.timezone(account.timezone)
|
||||
utc_timezone = pytz.utc
|
||||
|
||||
if args['start']:
|
||||
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
|
||||
start_datetime = start_datetime.replace(second=0)
|
||||
|
||||
start_datetime_timezone = timezone.localize(start_datetime)
|
||||
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
sql_query += ' and created_at >= :start'
|
||||
arg_dict['start'] = start_datetime_utc
|
||||
|
||||
if args['end']:
|
||||
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
|
||||
end_datetime = end_datetime.replace(second=0)
|
||||
|
||||
end_datetime_timezone = timezone.localize(end_datetime)
|
||||
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
sql_query += ' and created_at < :end'
|
||||
arg_dict['end'] = end_datetime_utc
|
||||
|
||||
sql_query += ' GROUP BY date order by date'
|
||||
rs = db.session.execute(sql_query, arg_dict)
|
||||
|
||||
response_date = []
|
||||
|
||||
for i in rs:
|
||||
response_date.append({
|
||||
'date': str(i.date),
|
||||
'terminal_count': i.terminal_count
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
'data': response_date
|
||||
})
|
||||
|
||||
|
||||
class DailyTokenCostStatistic(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id):
|
||||
account = current_user
|
||||
app_id = str(app_id)
|
||||
app_model = _get_app(app_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
sql_query = '''
|
||||
SELECT date(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
|
||||
(sum(messages.message_tokens) + sum(messages.answer_tokens)) as token_count,
|
||||
sum(total_price) as total_price
|
||||
FROM messages where app_id = :app_id
|
||||
'''
|
||||
arg_dict = {'tz': account.timezone, 'app_id': app_model.id}
|
||||
|
||||
timezone = pytz.timezone(account.timezone)
|
||||
utc_timezone = pytz.utc
|
||||
|
||||
if args['start']:
|
||||
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
|
||||
start_datetime = start_datetime.replace(second=0)
|
||||
|
||||
start_datetime_timezone = timezone.localize(start_datetime)
|
||||
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
sql_query += ' and created_at >= :start'
|
||||
arg_dict['start'] = start_datetime_utc
|
||||
|
||||
if args['end']:
|
||||
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
|
||||
end_datetime = end_datetime.replace(second=0)
|
||||
|
||||
end_datetime_timezone = timezone.localize(end_datetime)
|
||||
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
|
||||
|
||||
sql_query += ' and created_at < :end'
|
||||
arg_dict['end'] = end_datetime_utc
|
||||
|
||||
sql_query += ' GROUP BY date order by date'
|
||||
rs = db.session.execute(sql_query, arg_dict)
|
||||
|
||||
response_date = []
|
||||
|
||||
for i in rs:
|
||||
response_date.append({
|
||||
'date': str(i.date),
|
||||
'token_count': i.token_count,
|
||||
'total_price': i.total_price,
|
||||
'currency': 'USD'
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
'data': response_date
|
||||
})
|
||||
|
||||
|
||||
api.add_resource(DailyConversationStatistic, '/apps/<uuid:app_id>/statistics/daily-conversations')
|
||||
api.add_resource(DailyTerminalsStatistic, '/apps/<uuid:app_id>/statistics/daily-end-users')
|
||||
api.add_resource(DailyTokenCostStatistic, '/apps/<uuid:app_id>/statistics/token-costs')
|
||||
109
api/controllers/console/auth/login.py
Normal file
109
api/controllers/console/auth/login.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import flask
|
||||
import flask_login
|
||||
from flask import request, current_app
|
||||
from flask_restful import Resource, reqparse
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.error import AccountNotLinkTenantError
|
||||
from controllers.console.setup import setup_required
|
||||
from libs.helper import email
|
||||
from libs.password import valid_password
|
||||
from services.account_service import AccountService, TenantService
|
||||
|
||||
|
||||
class LoginApi(Resource):
|
||||
"""Resource for user login."""
|
||||
|
||||
@setup_required
|
||||
def post(self):
|
||||
"""Authenticate user and login."""
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('email', type=email, required=True, location='json')
|
||||
parser.add_argument('password', type=valid_password, required=True, location='json')
|
||||
parser.add_argument('remember_me', type=bool, required=False, default=False, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# todo: Verify the recaptcha
|
||||
|
||||
try:
|
||||
account = AccountService.authenticate(args['email'], args['password'])
|
||||
except services.errors.account.AccountLoginError:
|
||||
return {'code': 'unauthorized', 'message': 'Invalid email or password'}, 401
|
||||
|
||||
try:
|
||||
TenantService.switch_tenant(account)
|
||||
except Exception:
|
||||
raise AccountNotLinkTenantError("Account not link tenant")
|
||||
|
||||
flask_login.login_user(account, remember=args['remember_me'])
|
||||
AccountService.update_last_login(account, request)
|
||||
|
||||
# todo: return the user info
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
class LogoutApi(Resource):
|
||||
|
||||
@setup_required
|
||||
def get(self):
|
||||
flask.session.pop('workspace_id', None)
|
||||
flask_login.logout_user()
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
class ResetPasswordApi(Resource):
|
||||
@setup_required
|
||||
def get(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('email', type=email, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# import mailchimp_transactional as MailchimpTransactional
|
||||
# from mailchimp_transactional.api_client import ApiClientError
|
||||
|
||||
account = {'email': args['email']}
|
||||
# account = AccountService.get_by_email(args['email'])
|
||||
# if account is None:
|
||||
# raise ValueError('Email not found')
|
||||
# new_password = AccountService.generate_password()
|
||||
# AccountService.update_password(account, new_password)
|
||||
|
||||
# todo: Send email
|
||||
MAILCHIMP_API_KEY = current_app.config['MAILCHIMP_TRANSACTIONAL_API_KEY']
|
||||
# mailchimp = MailchimpTransactional(MAILCHIMP_API_KEY)
|
||||
|
||||
message = {
|
||||
'from_email': 'noreply@example.com',
|
||||
'to': [{'email': account.email}],
|
||||
'subject': 'Reset your Dify password',
|
||||
'html': """
|
||||
<p>Dear User,</p>
|
||||
<p>The Dify team has generated a new password for you, details as follows:</p>
|
||||
<p><strong>{new_password}</strong></p>
|
||||
<p>Please change your password to log in as soon as possible.</p>
|
||||
<p>Regards,</p>
|
||||
<p>The Dify Team</p>
|
||||
"""
|
||||
}
|
||||
|
||||
# response = mailchimp.messages.send({
|
||||
# 'message': message,
|
||||
# # required for transactional email
|
||||
# ' settings': {
|
||||
# 'sandbox_mode': current_app.config['MAILCHIMP_SANDBOX_MODE'],
|
||||
# },
|
||||
# })
|
||||
|
||||
# Check if MSG was sent
|
||||
# if response.status_code != 200:
|
||||
# # handle error
|
||||
# pass
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
api.add_resource(LoginApi, '/login')
|
||||
api.add_resource(LogoutApi, '/logout')
|
||||
126
api/controllers/console/auth/oauth.py
Normal file
126
api/controllers/console/auth/oauth.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
import flask_login
|
||||
import requests
|
||||
from flask import request, redirect, current_app, session
|
||||
from flask_restful import Resource
|
||||
|
||||
from libs.oauth import OAuthUserInfo, GitHubOAuth, GoogleOAuth
|
||||
from extensions.ext_database import db
|
||||
from models.account import Account, AccountStatus
|
||||
from services.account_service import AccountService, RegisterService
|
||||
from .. import api
|
||||
|
||||
|
||||
def get_oauth_providers():
|
||||
with current_app.app_context():
|
||||
github_oauth = GitHubOAuth(client_id=current_app.config.get('GITHUB_CLIENT_ID'),
|
||||
client_secret=current_app.config.get(
|
||||
'GITHUB_CLIENT_SECRET'),
|
||||
redirect_uri=current_app.config.get(
|
||||
'CONSOLE_URL') + '/console/api/oauth/authorize/github')
|
||||
|
||||
google_oauth = GoogleOAuth(client_id=current_app.config.get('GOOGLE_CLIENT_ID'),
|
||||
client_secret=current_app.config.get(
|
||||
'GOOGLE_CLIENT_SECRET'),
|
||||
redirect_uri=current_app.config.get(
|
||||
'CONSOLE_URL') + '/console/api/oauth/authorize/google')
|
||||
|
||||
OAUTH_PROVIDERS = {
|
||||
'github': github_oauth,
|
||||
'google': google_oauth
|
||||
}
|
||||
return OAUTH_PROVIDERS
|
||||
|
||||
|
||||
class OAuthLogin(Resource):
|
||||
def get(self, provider: str):
|
||||
OAUTH_PROVIDERS = get_oauth_providers()
|
||||
with current_app.app_context():
|
||||
oauth_provider = OAUTH_PROVIDERS.get(provider)
|
||||
print(vars(oauth_provider))
|
||||
if not oauth_provider:
|
||||
return {'error': 'Invalid provider'}, 400
|
||||
|
||||
auth_url = oauth_provider.get_authorization_url()
|
||||
return redirect(auth_url)
|
||||
|
||||
|
||||
class OAuthCallback(Resource):
|
||||
def get(self, provider: str):
|
||||
OAUTH_PROVIDERS = get_oauth_providers()
|
||||
with current_app.app_context():
|
||||
oauth_provider = OAUTH_PROVIDERS.get(provider)
|
||||
if not oauth_provider:
|
||||
return {'error': 'Invalid provider'}, 400
|
||||
|
||||
code = request.args.get('code')
|
||||
try:
|
||||
token = oauth_provider.get_access_token(code)
|
||||
user_info = oauth_provider.get_user_info(token)
|
||||
except requests.exceptions.HTTPError as e:
|
||||
logging.exception(
|
||||
f"An error occurred during the OAuth process with {provider}: {e.response.text}")
|
||||
return {'error': 'OAuth process failed'}, 400
|
||||
|
||||
account = _generate_account(provider, user_info)
|
||||
# Check account status
|
||||
if account.status == AccountStatus.BANNED.value or account.status == AccountStatus.CLOSED.value:
|
||||
return {'error': 'Account is banned or closed.'}, 403
|
||||
|
||||
if account.status == AccountStatus.PENDING.value:
|
||||
account.status = AccountStatus.ACTIVE.value
|
||||
account.initialized_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
# login user
|
||||
session.clear()
|
||||
flask_login.login_user(account, remember=True)
|
||||
AccountService.update_last_login(account, request)
|
||||
|
||||
return redirect(f'{current_app.config.get("CONSOLE_URL")}?oauth_login=success')
|
||||
|
||||
|
||||
def _get_account_by_openid_or_email(provider: str, user_info: OAuthUserInfo) -> Optional[Account]:
|
||||
account = Account.get_by_openid(provider, user_info.id)
|
||||
|
||||
if not account:
|
||||
account = Account.query.filter_by(email=user_info.email).first()
|
||||
|
||||
return account
|
||||
|
||||
|
||||
def _generate_account(provider: str, user_info: OAuthUserInfo):
|
||||
# Get account by openid or email.
|
||||
account = _get_account_by_openid_or_email(provider, user_info)
|
||||
|
||||
if not account:
|
||||
# Create account
|
||||
account_name = user_info.name if user_info.name else 'Dify'
|
||||
account = RegisterService.register(
|
||||
email=user_info.email,
|
||||
name=account_name,
|
||||
password=None,
|
||||
open_id=user_info.id,
|
||||
provider=provider
|
||||
)
|
||||
|
||||
# Set interface language
|
||||
preferred_lang = request.accept_languages.best_match(['zh', 'en'])
|
||||
if preferred_lang == 'zh':
|
||||
interface_language = 'zh-Hans'
|
||||
else:
|
||||
interface_language = 'en-US'
|
||||
account.interface_language = interface_language
|
||||
db.session.commit()
|
||||
|
||||
# Link account
|
||||
AccountService.link_account_integrate(provider, user_info.id, account)
|
||||
|
||||
return account
|
||||
|
||||
|
||||
api.add_resource(OAuthLogin, '/oauth/login/<provider>')
|
||||
api.add_resource(OAuthCallback, '/oauth/authorize/<provider>')
|
||||
281
api/controllers/console/datasets/datasets.py
Normal file
281
api/controllers/console/datasets/datasets.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, fields, marshal, marshal_with
|
||||
from werkzeug.exceptions import NotFound, Forbidden
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.datasets.error import DatasetNameDuplicateError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from core.indexing_runner import IndexingRunner
|
||||
from libs.helper import TimestampField
|
||||
from extensions.ext_database import db
|
||||
from models.model import UploadFile
|
||||
from services.dataset_service import DatasetService
|
||||
|
||||
dataset_detail_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'description': fields.String,
|
||||
'provider': fields.String,
|
||||
'permission': fields.String,
|
||||
'data_source_type': fields.String,
|
||||
'indexing_technique': fields.String,
|
||||
'app_count': fields.Integer,
|
||||
'document_count': fields.Integer,
|
||||
'word_count': fields.Integer,
|
||||
'created_by': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'updated_by': fields.String,
|
||||
'updated_at': TimestampField,
|
||||
}
|
||||
|
||||
dataset_query_detail_fields = {
|
||||
"id": fields.String,
|
||||
"content": fields.String,
|
||||
"source": fields.String,
|
||||
"source_app_id": fields.String,
|
||||
"created_by_role": fields.String,
|
||||
"created_by": fields.String,
|
||||
"created_at": TimestampField
|
||||
}
|
||||
|
||||
|
||||
def _validate_name(name):
|
||||
if not name or len(name) < 1 or len(name) > 40:
|
||||
raise ValueError('Name must be between 1 to 40 characters.')
|
||||
return name
|
||||
|
||||
|
||||
def _validate_description_length(description):
|
||||
if len(description) > 200:
|
||||
raise ValueError('Description cannot exceed 200 characters.')
|
||||
return description
|
||||
|
||||
|
||||
class DatasetListApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self):
|
||||
page = request.args.get('page', default=1, type=int)
|
||||
limit = request.args.get('limit', default=20, type=int)
|
||||
ids = request.args.getlist('ids')
|
||||
provider = request.args.get('provider', default="vendor")
|
||||
if ids:
|
||||
datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
|
||||
else:
|
||||
datasets, total = DatasetService.get_datasets(page, limit, provider,
|
||||
current_user.current_tenant_id, current_user)
|
||||
|
||||
response = {
|
||||
'data': marshal(datasets, dataset_detail_fields),
|
||||
'has_more': len(datasets) == limit,
|
||||
'limit': limit,
|
||||
'total': total,
|
||||
'page': page
|
||||
}
|
||||
return response, 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', nullable=False, required=True,
|
||||
help='type is required. Name must be between 1 to 40 characters.',
|
||||
type=_validate_name)
|
||||
parser.add_argument('indexing_technique', type=str, location='json',
|
||||
choices=('high_quality', 'economy'),
|
||||
help='Invalid indexing technique.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
try:
|
||||
dataset = DatasetService.create_empty_dataset(
|
||||
tenant_id=current_user.current_tenant_id,
|
||||
name=args['name'],
|
||||
indexing_technique=args['indexing_technique'],
|
||||
account=current_user
|
||||
)
|
||||
except services.errors.dataset.DatasetNameDuplicateError:
|
||||
raise DatasetNameDuplicateError()
|
||||
|
||||
return marshal(dataset, dataset_detail_fields), 201
|
||||
|
||||
|
||||
class DatasetApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(
|
||||
dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
return marshal(dataset, dataset_detail_fields), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def patch(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', nullable=False,
|
||||
help='type is required. Name must be between 1 to 40 characters.',
|
||||
type=_validate_name)
|
||||
parser.add_argument('description',
|
||||
location='json', store_missing=False,
|
||||
type=_validate_description_length)
|
||||
parser.add_argument('indexing_technique', type=str, location='json',
|
||||
choices=('high_quality', 'economy'),
|
||||
help='Invalid indexing technique.')
|
||||
parser.add_argument('permission', type=str, location='json', choices=(
|
||||
'only_me', 'all_team_members'), help='Invalid permission.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
dataset = DatasetService.update_dataset(
|
||||
dataset_id_str, args, current_user)
|
||||
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
return marshal(dataset, dataset_detail_fields), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
if DatasetService.delete_dataset(dataset_id_str, current_user):
|
||||
return {'result': 'success'}, 204
|
||||
else:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
|
||||
class DatasetQueryApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
page = request.args.get('page', default=1, type=int)
|
||||
limit = request.args.get('limit', default=20, type=int)
|
||||
|
||||
dataset_queries, total = DatasetService.get_dataset_queries(
|
||||
dataset_id=dataset.id,
|
||||
page=page,
|
||||
per_page=limit
|
||||
)
|
||||
|
||||
response = {
|
||||
'data': marshal(dataset_queries, dataset_query_detail_fields),
|
||||
'has_more': len(dataset_queries) == limit,
|
||||
'limit': limit,
|
||||
'total': total,
|
||||
'page': page
|
||||
}
|
||||
return response, 200
|
||||
|
||||
|
||||
class DatasetIndexingEstimateApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
segment_rule = request.get_json()
|
||||
file_detail = db.session.query(UploadFile).filter(
|
||||
UploadFile.tenant_id == current_user.current_tenant_id,
|
||||
UploadFile.id == segment_rule["file_id"]
|
||||
).first()
|
||||
|
||||
if file_detail is None:
|
||||
raise NotFound("File not found.")
|
||||
|
||||
indexing_runner = IndexingRunner()
|
||||
response = indexing_runner.indexing_estimate(file_detail, segment_rule['process_rule'])
|
||||
return response, 200
|
||||
|
||||
|
||||
class DatasetRelatedAppListApi(Resource):
|
||||
app_detail_kernel_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'mode': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
}
|
||||
|
||||
related_app_list = {
|
||||
'data': fields.List(fields.Nested(app_detail_kernel_fields)),
|
||||
'total': fields.Integer,
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(related_app_list)
|
||||
def get(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
app_dataset_joins = DatasetService.get_related_apps(dataset.id)
|
||||
|
||||
related_apps = []
|
||||
for app_dataset_join in app_dataset_joins:
|
||||
app_model = app_dataset_join.app
|
||||
if app_model:
|
||||
related_apps.append(app_model)
|
||||
|
||||
return {
|
||||
'data': related_apps,
|
||||
'total': len(related_apps)
|
||||
}, 200
|
||||
|
||||
|
||||
api.add_resource(DatasetListApi, '/datasets')
|
||||
api.add_resource(DatasetApi, '/datasets/<uuid:dataset_id>')
|
||||
api.add_resource(DatasetQueryApi, '/datasets/<uuid:dataset_id>/queries')
|
||||
api.add_resource(DatasetIndexingEstimateApi, '/datasets/file-indexing-estimate')
|
||||
api.add_resource(DatasetRelatedAppListApi, '/datasets/<uuid:dataset_id>/related-apps')
|
||||
682
api/controllers/console/datasets/datasets_document.py
Normal file
682
api/controllers/console/datasets/datasets_document.py
Normal file
@@ -0,0 +1,682 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import random
|
||||
from datetime import datetime
|
||||
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, fields, marshal, marshal_with, reqparse
|
||||
from sqlalchemy import desc, asc
|
||||
from werkzeug.exceptions import NotFound, Forbidden
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.app.error import ProviderNotInitializeError
|
||||
from controllers.console.datasets.error import DocumentAlreadyFinishedError, InvalidActionError, DocumentIndexingError, \
|
||||
InvalidMetadataError, ArchivedDocumentImmutableError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from core.indexing_runner import IndexingRunner
|
||||
from core.llm.error import ProviderTokenNotInitError
|
||||
from extensions.ext_redis import redis_client
|
||||
from libs.helper import TimestampField
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import DatasetProcessRule, Dataset
|
||||
from models.dataset import Document, DocumentSegment
|
||||
from models.model import UploadFile
|
||||
from services.dataset_service import DocumentService, DatasetService
|
||||
from tasks.add_document_to_index_task import add_document_to_index_task
|
||||
from tasks.remove_document_from_index_task import remove_document_from_index_task
|
||||
|
||||
dataset_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'description': fields.String,
|
||||
'permission': fields.String,
|
||||
'data_source_type': fields.String,
|
||||
'indexing_technique': fields.String,
|
||||
'created_by': fields.String,
|
||||
'created_at': TimestampField,
|
||||
}
|
||||
|
||||
document_fields = {
|
||||
'id': fields.String,
|
||||
'position': fields.Integer,
|
||||
'data_source_type': fields.String,
|
||||
'data_source_info': fields.Raw(attribute='data_source_info_dict'),
|
||||
'dataset_process_rule_id': fields.String,
|
||||
'name': fields.String,
|
||||
'created_from': fields.String,
|
||||
'created_by': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'tokens': fields.Integer,
|
||||
'indexing_status': fields.String,
|
||||
'error': fields.String,
|
||||
'enabled': fields.Boolean,
|
||||
'disabled_at': TimestampField,
|
||||
'disabled_by': fields.String,
|
||||
'archived': fields.Boolean,
|
||||
'display_status': fields.String,
|
||||
'word_count': fields.Integer,
|
||||
'hit_count': fields.Integer,
|
||||
}
|
||||
|
||||
|
||||
class DocumentResource(Resource):
|
||||
def get_document(self, dataset_id: str, document_id: str) -> Document:
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
document = DocumentService.get_document(dataset_id, document_id)
|
||||
|
||||
if not document:
|
||||
raise NotFound('Document not found.')
|
||||
|
||||
if document.tenant_id != current_user.current_tenant_id:
|
||||
raise Forbidden('No permission.')
|
||||
|
||||
return document
|
||||
|
||||
|
||||
class GetProcessRuleApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self):
|
||||
req_data = request.args
|
||||
|
||||
document_id = req_data.get('document_id')
|
||||
if document_id:
|
||||
# get the latest process rule
|
||||
document = Document.query.get_or_404(document_id)
|
||||
|
||||
dataset = DatasetService.get_dataset(document.dataset_id)
|
||||
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
# get the latest process rule
|
||||
dataset_process_rule = db.session.query(DatasetProcessRule). \
|
||||
filter(DatasetProcessRule.dataset_id == document.dataset_id). \
|
||||
order_by(DatasetProcessRule.created_at.desc()). \
|
||||
limit(1). \
|
||||
one_or_none()
|
||||
mode = dataset_process_rule.mode
|
||||
rules = dataset_process_rule.rules_dict
|
||||
else:
|
||||
mode = DocumentService.DEFAULT_RULES['mode']
|
||||
rules = DocumentService.DEFAULT_RULES['rules']
|
||||
|
||||
return {
|
||||
'mode': mode,
|
||||
'rules': rules
|
||||
}
|
||||
|
||||
|
||||
class DatasetDocumentListApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id):
|
||||
dataset_id = str(dataset_id)
|
||||
page = request.args.get('page', default=1, type=int)
|
||||
limit = request.args.get('limit', default=20, type=int)
|
||||
search = request.args.get('search', default=None, type=str)
|
||||
sort = request.args.get('sort', default='-created_at', type=str)
|
||||
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
query = Document.query.filter_by(
|
||||
dataset_id=str(dataset_id), tenant_id=current_user.current_tenant_id)
|
||||
|
||||
if search:
|
||||
search = f'%{search}%'
|
||||
query = query.filter(Document.name.like(search))
|
||||
|
||||
if sort.startswith('-'):
|
||||
sort_logic = desc
|
||||
sort = sort[1:]
|
||||
else:
|
||||
sort_logic = asc
|
||||
|
||||
if sort == 'hit_count':
|
||||
sub_query = db.select(DocumentSegment.document_id,
|
||||
db.func.sum(DocumentSegment.hit_count).label("total_hit_count")) \
|
||||
.group_by(DocumentSegment.document_id) \
|
||||
.subquery()
|
||||
|
||||
query = query.outerjoin(sub_query, sub_query.c.document_id == Document.id) \
|
||||
.order_by(sort_logic(db.func.coalesce(sub_query.c.total_hit_count, 0)))
|
||||
elif sort == 'created_at':
|
||||
query = query.order_by(sort_logic(Document.created_at))
|
||||
else:
|
||||
query = query.order_by(desc(Document.created_at))
|
||||
|
||||
paginated_documents = query.paginate(
|
||||
page=page, per_page=limit, max_per_page=100, error_out=False)
|
||||
documents = paginated_documents.items
|
||||
|
||||
response = {
|
||||
'data': marshal(documents, document_fields),
|
||||
'has_more': len(documents) == limit,
|
||||
'limit': limit,
|
||||
'total': paginated_documents.total,
|
||||
'page': page
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(document_fields)
|
||||
def post(self, dataset_id):
|
||||
dataset_id = str(dataset_id)
|
||||
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False,
|
||||
location='json')
|
||||
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
|
||||
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
|
||||
parser.add_argument('duplicate', type=bool, nullable=False, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not dataset.indexing_technique and not args['indexing_technique']:
|
||||
raise ValueError('indexing_technique is required.')
|
||||
|
||||
# validate args
|
||||
DocumentService.document_create_args_validate(args)
|
||||
|
||||
try:
|
||||
document = DocumentService.save_document_with_dataset_id(dataset, args, current_user)
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
|
||||
return document
|
||||
|
||||
|
||||
class DatasetInitApi(Resource):
|
||||
dataset_and_document_fields = {
|
||||
'dataset': fields.Nested(dataset_fields),
|
||||
'document': fields.Nested(document_fields)
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(dataset_and_document_fields)
|
||||
def post(self):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, required=True,
|
||||
nullable=False, location='json')
|
||||
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
|
||||
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# validate args
|
||||
DocumentService.document_create_args_validate(args)
|
||||
|
||||
try:
|
||||
dataset, document = DocumentService.save_document_without_dataset_id(
|
||||
tenant_id=current_user.current_tenant_id,
|
||||
document_data=args,
|
||||
account=current_user
|
||||
)
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
|
||||
response = {
|
||||
'dataset': dataset,
|
||||
'document': document
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class DocumentIndexingEstimateApi(DocumentResource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
if document.indexing_status in ['completed', 'error']:
|
||||
raise DocumentAlreadyFinishedError()
|
||||
|
||||
data_process_rule = document.dataset_process_rule
|
||||
data_process_rule_dict = data_process_rule.to_dict()
|
||||
|
||||
response = {
|
||||
"tokens": 0,
|
||||
"total_price": 0,
|
||||
"currency": "USD",
|
||||
"total_segments": 0,
|
||||
"preview": []
|
||||
}
|
||||
|
||||
if document.data_source_type == 'upload_file':
|
||||
data_source_info = document.data_source_info_dict
|
||||
if data_source_info and 'upload_file_id' in data_source_info:
|
||||
file_id = data_source_info['upload_file_id']
|
||||
|
||||
file = db.session.query(UploadFile).filter(
|
||||
UploadFile.tenant_id == document.tenant_id,
|
||||
UploadFile.id == file_id
|
||||
).first()
|
||||
|
||||
# raise error if file not found
|
||||
if not file:
|
||||
raise NotFound('File not found.')
|
||||
|
||||
indexing_runner = IndexingRunner()
|
||||
response = indexing_runner.indexing_estimate(file, data_process_rule_dict)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class DocumentIndexingStatusApi(DocumentResource):
|
||||
document_status_fields = {
|
||||
'id': fields.String,
|
||||
'indexing_status': fields.String,
|
||||
'processing_started_at': TimestampField,
|
||||
'parsing_completed_at': TimestampField,
|
||||
'cleaning_completed_at': TimestampField,
|
||||
'splitting_completed_at': TimestampField,
|
||||
'completed_at': TimestampField,
|
||||
'paused_at': TimestampField,
|
||||
'error': fields.String,
|
||||
'stopped_at': TimestampField,
|
||||
'completed_segments': fields.Integer,
|
||||
'total_segments': fields.Integer,
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
completed_segments = DocumentSegment.query \
|
||||
.filter(DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.document_id == str(document_id)) \
|
||||
.count()
|
||||
total_segments = DocumentSegment.query \
|
||||
.filter_by(document_id=str(document_id)) \
|
||||
.count()
|
||||
|
||||
document.completed_segments = completed_segments
|
||||
document.total_segments = total_segments
|
||||
|
||||
return marshal(document, self.document_status_fields)
|
||||
|
||||
|
||||
class DocumentDetailApi(DocumentResource):
|
||||
METADATA_CHOICES = {'all', 'only', 'without'}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
metadata = request.args.get('metadata', 'all')
|
||||
if metadata not in self.METADATA_CHOICES:
|
||||
raise InvalidMetadataError(f'Invalid metadata value: {metadata}')
|
||||
|
||||
if metadata == 'only':
|
||||
response = {
|
||||
'id': document.id,
|
||||
'doc_type': document.doc_type,
|
||||
'doc_metadata': document.doc_metadata
|
||||
}
|
||||
elif metadata == 'without':
|
||||
process_rules = DatasetService.get_process_rules(dataset_id)
|
||||
data_source_info = document.data_source_detail_dict
|
||||
response = {
|
||||
'id': document.id,
|
||||
'position': document.position,
|
||||
'data_source_type': document.data_source_type,
|
||||
'data_source_info': data_source_info,
|
||||
'dataset_process_rule_id': document.dataset_process_rule_id,
|
||||
'dataset_process_rule': process_rules,
|
||||
'name': document.name,
|
||||
'created_from': document.created_from,
|
||||
'created_by': document.created_by,
|
||||
'created_at': document.created_at.timestamp(),
|
||||
'tokens': document.tokens,
|
||||
'indexing_status': document.indexing_status,
|
||||
'completed_at': int(document.completed_at.timestamp()) if document.completed_at else None,
|
||||
'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None,
|
||||
'indexing_latency': document.indexing_latency,
|
||||
'error': document.error,
|
||||
'enabled': document.enabled,
|
||||
'disabled_at': int(document.disabled_at.timestamp()) if document.disabled_at else None,
|
||||
'disabled_by': document.disabled_by,
|
||||
'archived': document.archived,
|
||||
'segment_count': document.segment_count,
|
||||
'average_segment_length': document.average_segment_length,
|
||||
'hit_count': document.hit_count,
|
||||
'display_status': document.display_status
|
||||
}
|
||||
else:
|
||||
process_rules = DatasetService.get_process_rules(dataset_id)
|
||||
data_source_info = document.data_source_detail_dict_()
|
||||
response = {
|
||||
'id': document.id,
|
||||
'position': document.position,
|
||||
'data_source_type': document.data_source_type,
|
||||
'data_source_info': data_source_info,
|
||||
'dataset_process_rule_id': document.dataset_process_rule_id,
|
||||
'dataset_process_rule': process_rules,
|
||||
'name': document.name,
|
||||
'created_from': document.created_from,
|
||||
'created_by': document.created_by,
|
||||
'created_at': document.created_at.timestamp(),
|
||||
'tokens': document.tokens,
|
||||
'indexing_status': document.indexing_status,
|
||||
'completed_at': int(document.completed_at.timestamp())if document.completed_at else None,
|
||||
'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None,
|
||||
'indexing_latency': document.indexing_latency,
|
||||
'error': document.error,
|
||||
'enabled': document.enabled,
|
||||
'disabled_at': int(document.disabled_at.timestamp()) if document.disabled_at else None,
|
||||
'disabled_by': document.disabled_by,
|
||||
'archived': document.archived,
|
||||
'doc_type': document.doc_type,
|
||||
'doc_metadata': document.doc_metadata,
|
||||
'segment_count': document.segment_count,
|
||||
'average_segment_length': document.average_segment_length,
|
||||
'hit_count': document.hit_count,
|
||||
'display_status': document.display_status
|
||||
}
|
||||
|
||||
return response, 200
|
||||
|
||||
|
||||
class DocumentProcessingApi(DocumentResource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def patch(self, dataset_id, document_id, action):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
if action == "pause":
|
||||
if document.indexing_status != "indexing":
|
||||
raise InvalidActionError('Document not in indexing state.')
|
||||
|
||||
document.paused_by = current_user.id
|
||||
document.paused_at = datetime.utcnow()
|
||||
document.is_paused = True
|
||||
db.session.commit()
|
||||
|
||||
elif action == "resume":
|
||||
if document.indexing_status not in ["paused", "error"]:
|
||||
raise InvalidActionError('Document not in paused or error state.')
|
||||
|
||||
document.paused_by = None
|
||||
document.paused_at = None
|
||||
document.is_paused = False
|
||||
db.session.commit()
|
||||
else:
|
||||
raise InvalidActionError()
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
class DocumentDeleteApi(DocumentResource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
try:
|
||||
DocumentService.delete_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError('Cannot delete document during indexing.')
|
||||
|
||||
return {'result': 'success'}, 204
|
||||
|
||||
|
||||
class DocumentMetadataApi(DocumentResource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def put(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
req_data = request.get_json()
|
||||
|
||||
doc_type = req_data.get('doc_type')
|
||||
doc_metadata = req_data.get('doc_metadata')
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
if doc_type is None or doc_metadata is None:
|
||||
raise ValueError('Both doc_type and doc_metadata must be provided.')
|
||||
|
||||
if doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:
|
||||
raise ValueError('Invalid doc_type.')
|
||||
|
||||
if not isinstance(doc_metadata, dict):
|
||||
raise ValueError('doc_metadata must be a dictionary.')
|
||||
|
||||
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]
|
||||
|
||||
document.doc_metadata = {}
|
||||
|
||||
for key, value_type in metadata_schema.items():
|
||||
value = doc_metadata.get(key)
|
||||
if value is not None and isinstance(value, value_type):
|
||||
document.doc_metadata[key] = value
|
||||
|
||||
document.doc_type = doc_type
|
||||
document.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
return {'result': 'success', 'message': 'Document metadata updated.'}, 200
|
||||
|
||||
|
||||
class DocumentStatusApi(DocumentResource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def patch(self, dataset_id, document_id, action):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
indexing_cache_key = 'document_{}_indexing'.format(document.id)
|
||||
cache_result = redis_client.get(indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise InvalidActionError("Document is being indexed, please try again later")
|
||||
|
||||
if action == "enable":
|
||||
if document.enabled:
|
||||
raise InvalidActionError('Document already enabled.')
|
||||
|
||||
document.enabled = True
|
||||
document.disabled_at = None
|
||||
document.disabled_by = None
|
||||
document.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
add_document_to_index_task.delay(document_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
elif action == "disable":
|
||||
if not document.enabled:
|
||||
raise InvalidActionError('Document already disabled.')
|
||||
|
||||
document.enabled = False
|
||||
document.disabled_at = datetime.utcnow()
|
||||
document.disabled_by = current_user.id
|
||||
document.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
elif action == "archive":
|
||||
if document.archived:
|
||||
raise InvalidActionError('Document already archived.')
|
||||
|
||||
document.archived = True
|
||||
document.archived_at = datetime.utcnow()
|
||||
document.archived_by = current_user.id
|
||||
document.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
if document.enabled:
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
else:
|
||||
raise InvalidActionError()
|
||||
|
||||
|
||||
class DocumentPauseApi(DocumentResource):
|
||||
def patch(self, dataset_id, document_id):
|
||||
"""pause document."""
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
# 404 if document not found
|
||||
if document is None:
|
||||
raise NotFound("Document Not Exists.")
|
||||
|
||||
# 403 if document is archived
|
||||
if DocumentService.check_archived(document):
|
||||
raise ArchivedDocumentImmutableError()
|
||||
|
||||
try:
|
||||
# pause document
|
||||
DocumentService.pause_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError('Cannot pause completed document.')
|
||||
|
||||
return {'result': 'success'}, 204
|
||||
|
||||
|
||||
class DocumentRecoverApi(DocumentResource):
|
||||
def patch(self, dataset_id, document_id):
|
||||
"""recover document."""
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
# 404 if document not found
|
||||
if document is None:
|
||||
raise NotFound("Document Not Exists.")
|
||||
|
||||
# 403 if document is archived
|
||||
if DocumentService.check_archived(document):
|
||||
raise ArchivedDocumentImmutableError()
|
||||
try:
|
||||
# pause document
|
||||
DocumentService.recover_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError('Document is not in paused status.')
|
||||
|
||||
return {'result': 'success'}, 204
|
||||
|
||||
|
||||
api.add_resource(GetProcessRuleApi, '/datasets/process-rule')
|
||||
api.add_resource(DatasetDocumentListApi,
|
||||
'/datasets/<uuid:dataset_id>/documents')
|
||||
api.add_resource(DatasetInitApi,
|
||||
'/datasets/init')
|
||||
api.add_resource(DocumentIndexingEstimateApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-estimate')
|
||||
api.add_resource(DocumentIndexingStatusApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status')
|
||||
api.add_resource(DocumentDetailApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>')
|
||||
api.add_resource(DocumentProcessingApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/<string:action>')
|
||||
api.add_resource(DocumentDeleteApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>')
|
||||
api.add_resource(DocumentMetadataApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/metadata')
|
||||
api.add_resource(DocumentStatusApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/status/<string:action>')
|
||||
api.add_resource(DocumentPauseApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause')
|
||||
api.add_resource(DocumentRecoverApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/resume')
|
||||
203
api/controllers/console/datasets/datasets_segments.py
Normal file
203
api/controllers/console/datasets/datasets_segments.py
Normal file
@@ -0,0 +1,203 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from datetime import datetime
|
||||
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, fields, marshal
|
||||
from werkzeug.exceptions import NotFound, Forbidden
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.datasets.error import InvalidActionError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.dataset import DocumentSegment
|
||||
|
||||
from libs.helper import TimestampField
|
||||
from services.dataset_service import DatasetService, DocumentService
|
||||
from tasks.add_segment_to_index_task import add_segment_to_index_task
|
||||
from tasks.remove_segment_from_index_task import remove_segment_from_index_task
|
||||
|
||||
segment_fields = {
|
||||
'id': fields.String,
|
||||
'position': fields.Integer,
|
||||
'document_id': fields.String,
|
||||
'content': fields.String,
|
||||
'word_count': fields.Integer,
|
||||
'tokens': fields.Integer,
|
||||
'keywords': fields.List(fields.String),
|
||||
'index_node_id': fields.String,
|
||||
'index_node_hash': fields.String,
|
||||
'hit_count': fields.Integer,
|
||||
'enabled': fields.Boolean,
|
||||
'disabled_at': TimestampField,
|
||||
'disabled_by': fields.String,
|
||||
'status': fields.String,
|
||||
'created_by': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'indexing_at': TimestampField,
|
||||
'completed_at': TimestampField,
|
||||
'error': fields.String,
|
||||
'stopped_at': TimestampField
|
||||
}
|
||||
|
||||
segment_list_response = {
|
||||
'data': fields.List(fields.Nested(segment_fields)),
|
||||
'has_more': fields.Boolean,
|
||||
'limit': fields.Integer
|
||||
}
|
||||
|
||||
|
||||
class DatasetDocumentSegmentListApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
document = DocumentService.get_document(dataset_id, document_id)
|
||||
|
||||
if not document:
|
||||
raise NotFound('Document not found.')
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('last_id', type=str, default=None, location='args')
|
||||
parser.add_argument('limit', type=int, default=20, location='args')
|
||||
parser.add_argument('status', type=str,
|
||||
action='append', default=[], location='args')
|
||||
parser.add_argument('hit_count_gte', type=int,
|
||||
default=None, location='args')
|
||||
parser.add_argument('enabled', type=str, default='all', location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
last_id = args['last_id']
|
||||
limit = min(args['limit'], 100)
|
||||
status_list = args['status']
|
||||
hit_count_gte = args['hit_count_gte']
|
||||
|
||||
query = DocumentSegment.query.filter(
|
||||
DocumentSegment.document_id == str(document_id),
|
||||
DocumentSegment.tenant_id == current_user.current_tenant_id
|
||||
)
|
||||
|
||||
if last_id is not None:
|
||||
last_segment = DocumentSegment.query.get(str(last_id))
|
||||
if last_segment:
|
||||
query = query.filter(
|
||||
DocumentSegment.position > last_segment.position)
|
||||
else:
|
||||
return {'data': [], 'has_more': False, 'limit': limit}, 200
|
||||
|
||||
if status_list:
|
||||
query = query.filter(DocumentSegment.status.in_(status_list))
|
||||
|
||||
if hit_count_gte is not None:
|
||||
query = query.filter(DocumentSegment.hit_count >= hit_count_gte)
|
||||
|
||||
if args['enabled'].lower() != 'all':
|
||||
if args['enabled'].lower() == 'true':
|
||||
query = query.filter(DocumentSegment.enabled == True)
|
||||
elif args['enabled'].lower() == 'false':
|
||||
query = query.filter(DocumentSegment.enabled == False)
|
||||
|
||||
total = query.count()
|
||||
segments = query.order_by(DocumentSegment.position).limit(limit + 1).all()
|
||||
|
||||
has_more = False
|
||||
if len(segments) > limit:
|
||||
has_more = True
|
||||
segments = segments[:-1]
|
||||
|
||||
return {
|
||||
'data': marshal(segments, segment_fields),
|
||||
'has_more': has_more,
|
||||
'limit': limit,
|
||||
'total': total
|
||||
}, 200
|
||||
|
||||
|
||||
class DatasetDocumentSegmentApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def patch(self, dataset_id, segment_id, action):
|
||||
dataset_id = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound('Dataset not found.')
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
segment = DocumentSegment.query.filter(
|
||||
DocumentSegment.id == str(segment_id),
|
||||
DocumentSegment.tenant_id == current_user.current_tenant_id
|
||||
).first()
|
||||
|
||||
if not segment:
|
||||
raise NotFound('Segment not found.')
|
||||
|
||||
document_indexing_cache_key = 'document_{}_indexing'.format(segment.document_id)
|
||||
cache_result = redis_client.get(document_indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise InvalidActionError("Document is being indexed, please try again later")
|
||||
|
||||
indexing_cache_key = 'segment_{}_indexing'.format(segment.id)
|
||||
cache_result = redis_client.get(indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise InvalidActionError("Segment is being indexed, please try again later")
|
||||
|
||||
if action == "enable":
|
||||
if segment.enabled:
|
||||
raise InvalidActionError("Segment is already enabled.")
|
||||
|
||||
segment.enabled = True
|
||||
segment.disabled_at = None
|
||||
segment.disabled_by = None
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same segment multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
add_segment_to_index_task.delay(segment.id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
elif action == "disable":
|
||||
if not segment.enabled:
|
||||
raise InvalidActionError("Segment is already disabled.")
|
||||
|
||||
segment.enabled = False
|
||||
segment.disabled_at = datetime.utcnow()
|
||||
segment.disabled_by = current_user.id
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same segment multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_segment_from_index_task.delay(segment.id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
else:
|
||||
raise InvalidActionError()
|
||||
|
||||
|
||||
api.add_resource(DatasetDocumentSegmentListApi,
|
||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments')
|
||||
api.add_resource(DatasetDocumentSegmentApi,
|
||||
'/datasets/<uuid:dataset_id>/segments/<uuid:segment_id>/<string:action>')
|
||||
73
api/controllers/console/datasets/error.py
Normal file
73
api/controllers/console/datasets/error.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class NoFileUploadedError(BaseHTTPException):
|
||||
error_code = 'no_file_uploaded'
|
||||
description = "No file uploaded."
|
||||
code = 400
|
||||
|
||||
|
||||
class TooManyFilesError(BaseHTTPException):
|
||||
error_code = 'too_many_files'
|
||||
description = "Only one file is allowed."
|
||||
code = 400
|
||||
|
||||
|
||||
class FileTooLargeError(BaseHTTPException):
|
||||
error_code = 'file_too_large'
|
||||
description = "File size exceeded. {message}"
|
||||
code = 413
|
||||
|
||||
|
||||
class UnsupportedFileTypeError(BaseHTTPException):
|
||||
error_code = 'unsupported_file_type'
|
||||
description = "File type not allowed."
|
||||
code = 415
|
||||
|
||||
|
||||
class HighQualityDatasetOnlyError(BaseHTTPException):
|
||||
error_code = 'high_quality_dataset_only'
|
||||
description = "High quality dataset only."
|
||||
code = 400
|
||||
|
||||
|
||||
class DatasetNotInitializedError(BaseHTTPException):
|
||||
error_code = 'dataset_not_initialized'
|
||||
description = "Dataset not initialized."
|
||||
code = 400
|
||||
|
||||
|
||||
class ArchivedDocumentImmutableError(BaseHTTPException):
|
||||
error_code = 'archived_document_immutable'
|
||||
description = "Cannot process an archived document."
|
||||
code = 403
|
||||
|
||||
|
||||
class DatasetNameDuplicateError(BaseHTTPException):
|
||||
error_code = 'dataset_name_duplicate'
|
||||
description = "Dataset name already exists."
|
||||
code = 409
|
||||
|
||||
|
||||
class InvalidActionError(BaseHTTPException):
|
||||
error_code = 'invalid_action'
|
||||
description = "Invalid action."
|
||||
code = 400
|
||||
|
||||
|
||||
class DocumentAlreadyFinishedError(BaseHTTPException):
|
||||
error_code = 'document_already_finished'
|
||||
description = "Document already finished."
|
||||
code = 400
|
||||
|
||||
|
||||
class DocumentIndexingError(BaseHTTPException):
|
||||
error_code = 'document_indexing'
|
||||
description = "Document indexing."
|
||||
code = 400
|
||||
|
||||
|
||||
class InvalidMetadataError(BaseHTTPException):
|
||||
error_code = 'invalid_metadata'
|
||||
description = "Invalid metadata."
|
||||
code = 400
|
||||
147
api/controllers/console/datasets/file.py
Normal file
147
api/controllers/console/datasets/file.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from cachetools import TTLCache
|
||||
from flask import request, current_app
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, marshal_with, fields
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.datasets.error import NoFileUploadedError, TooManyFilesError, FileTooLargeError, \
|
||||
UnsupportedFileTypeError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from core.index.readers.html_parser import HTMLParser
|
||||
from core.index.readers.pdf_parser import PDFParser
|
||||
from extensions.ext_storage import storage
|
||||
from libs.helper import TimestampField
|
||||
from extensions.ext_database import db
|
||||
from models.model import UploadFile
|
||||
|
||||
cache = TTLCache(maxsize=None, ttl=30)
|
||||
|
||||
FILE_SIZE_LIMIT = 15 * 1024 * 1024 # 15MB
|
||||
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm']
|
||||
PREVIEW_WORDS_LIMIT = 3000
|
||||
|
||||
|
||||
class FileApi(Resource):
|
||||
file_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'size': fields.Integer,
|
||||
'extension': fields.String,
|
||||
'mime_type': fields.String,
|
||||
'created_by': fields.String,
|
||||
'created_at': TimestampField,
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(file_fields)
|
||||
def post(self):
|
||||
|
||||
# get file from request
|
||||
file = request.files['file']
|
||||
|
||||
# check file
|
||||
if 'file' not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
file_content = file.read()
|
||||
file_size = len(file_content)
|
||||
|
||||
if file_size > FILE_SIZE_LIMIT:
|
||||
message = "({file_size} > {FILE_SIZE_LIMIT})"
|
||||
raise FileTooLargeError(message)
|
||||
|
||||
extension = file.filename.split('.')[-1]
|
||||
if extension not in ALLOWED_EXTENSIONS:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
# user uuid as file name
|
||||
file_uuid = str(uuid.uuid4())
|
||||
file_key = 'upload_files/' + current_user.current_tenant_id + '/' + file_uuid + '.' + extension
|
||||
|
||||
# save file to storage
|
||||
storage.save(file_key, file_content)
|
||||
|
||||
# save file to db
|
||||
config = current_app.config
|
||||
upload_file = UploadFile(
|
||||
tenant_id=current_user.current_tenant_id,
|
||||
storage_type=config['STORAGE_TYPE'],
|
||||
key=file_key,
|
||||
name=file.filename,
|
||||
size=file_size,
|
||||
extension=extension,
|
||||
mime_type=file.mimetype,
|
||||
created_by=current_user.id,
|
||||
created_at=datetime.datetime.utcnow(),
|
||||
used=False,
|
||||
hash=hashlib.sha3_256(file_content).hexdigest()
|
||||
)
|
||||
|
||||
db.session.add(upload_file)
|
||||
db.session.commit()
|
||||
|
||||
return upload_file, 201
|
||||
|
||||
|
||||
class FilePreviewApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, file_id):
|
||||
file_id = str(file_id)
|
||||
|
||||
key = file_id + request.path
|
||||
cached_response = cache.get(key)
|
||||
if cached_response and time.time() - cached_response['timestamp'] < cache.ttl:
|
||||
return cached_response['response']
|
||||
|
||||
upload_file = db.session.query(UploadFile) \
|
||||
.filter(UploadFile.id == file_id) \
|
||||
.first()
|
||||
|
||||
if not upload_file:
|
||||
raise NotFound("File not found")
|
||||
|
||||
# extract text from file
|
||||
extension = upload_file.extension
|
||||
if extension not in ALLOWED_EXTENSIONS:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
suffix = Path(upload_file.key).suffix
|
||||
filepath = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"
|
||||
storage.download(upload_file.key, filepath)
|
||||
|
||||
if extension == 'pdf':
|
||||
parser = PDFParser({'upload_file': upload_file})
|
||||
text = parser.parse_file(Path(filepath))
|
||||
elif extension in ['html', 'htm']:
|
||||
# Use BeautifulSoup to extract text
|
||||
parser = HTMLParser()
|
||||
text = parser.parse_file(Path(filepath))
|
||||
else:
|
||||
# ['txt', 'markdown', 'md']
|
||||
with open(filepath, "rb") as fp:
|
||||
data = fp.read()
|
||||
text = data.decode(encoding='utf-8').strip() if data else ''
|
||||
|
||||
text = text[0:PREVIEW_WORDS_LIMIT] if text else ''
|
||||
return {'content': text}
|
||||
|
||||
|
||||
api.add_resource(FileApi, '/files/upload')
|
||||
api.add_resource(FilePreviewApi, '/files/<uuid:file_id>/preview')
|
||||
100
api/controllers/console/datasets/hit_testing.py
Normal file
100
api/controllers/console/datasets/hit_testing.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import logging
|
||||
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, marshal, fields
|
||||
from werkzeug.exceptions import InternalServerError, NotFound, Forbidden
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.datasets.error import HighQualityDatasetOnlyError, DatasetNotInitializedError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from libs.helper import TimestampField
|
||||
from services.dataset_service import DatasetService
|
||||
from services.hit_testing_service import HitTestingService
|
||||
|
||||
document_fields = {
|
||||
'id': fields.String,
|
||||
'data_source_type': fields.String,
|
||||
'name': fields.String,
|
||||
'doc_type': fields.String,
|
||||
}
|
||||
|
||||
segment_fields = {
|
||||
'id': fields.String,
|
||||
'position': fields.Integer,
|
||||
'document_id': fields.String,
|
||||
'content': fields.String,
|
||||
'word_count': fields.Integer,
|
||||
'tokens': fields.Integer,
|
||||
'keywords': fields.List(fields.String),
|
||||
'index_node_id': fields.String,
|
||||
'index_node_hash': fields.String,
|
||||
'hit_count': fields.Integer,
|
||||
'enabled': fields.Boolean,
|
||||
'disabled_at': TimestampField,
|
||||
'disabled_by': fields.String,
|
||||
'status': fields.String,
|
||||
'created_by': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'indexing_at': TimestampField,
|
||||
'completed_at': TimestampField,
|
||||
'error': fields.String,
|
||||
'stopped_at': TimestampField,
|
||||
'document': fields.Nested(document_fields),
|
||||
}
|
||||
|
||||
hit_testing_record_fields = {
|
||||
'segment': fields.Nested(segment_fields),
|
||||
'score': fields.Float,
|
||||
'tsne_position': fields.Raw
|
||||
}
|
||||
|
||||
|
||||
class HitTestingApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
# only high quality dataset can be used for hit testing
|
||||
if dataset.indexing_technique != 'high_quality':
|
||||
raise HighQualityDatasetOnlyError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('query', type=str, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
query = args['query']
|
||||
|
||||
if not query or len(query) > 250:
|
||||
raise ValueError('Query is required and cannot exceed 250 characters')
|
||||
|
||||
try:
|
||||
response = HitTestingService.retrieve(
|
||||
dataset=dataset,
|
||||
query=query,
|
||||
account=current_user,
|
||||
limit=10,
|
||||
)
|
||||
|
||||
return {"query": response['query'], 'records': marshal(response['records'], hit_testing_record_fields)}
|
||||
except services.errors.index.IndexNotInitializedError:
|
||||
raise DatasetNotInitializedError()
|
||||
except Exception as e:
|
||||
logging.exception("Hit testing failed.")
|
||||
raise InternalServerError(str(e))
|
||||
|
||||
|
||||
api.add_resource(HitTestingApi, '/datasets/<uuid:dataset_id>/hit-testing')
|
||||
19
api/controllers/console/error.py
Normal file
19
api/controllers/console/error.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class AlreadySetupError(BaseHTTPException):
|
||||
error_code = 'already_setup'
|
||||
description = "Application already setup."
|
||||
code = 403
|
||||
|
||||
|
||||
class NotSetupError(BaseHTTPException):
|
||||
error_code = 'not_setup'
|
||||
description = "Application not setup."
|
||||
code = 401
|
||||
|
||||
|
||||
class AccountNotLinkTenantError(BaseHTTPException):
|
||||
error_code = 'account_not_link_tenant'
|
||||
description = "Account not link tenant."
|
||||
code = 403
|
||||
93
api/controllers/console/setup.py
Normal file
93
api/controllers/console/setup.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from functools import wraps
|
||||
|
||||
import flask_login
|
||||
from flask import request, current_app
|
||||
from flask_restful import Resource, reqparse
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.model import DifySetup
|
||||
from services.account_service import AccountService, TenantService, RegisterService
|
||||
|
||||
from libs.helper import email, str_len
|
||||
from libs.password import valid_password
|
||||
|
||||
from . import api
|
||||
from .error import AlreadySetupError, NotSetupError
|
||||
from .wraps import only_edition_self_hosted
|
||||
|
||||
|
||||
class SetupApi(Resource):
|
||||
|
||||
@only_edition_self_hosted
|
||||
def get(self):
|
||||
setup_status = get_setup_status()
|
||||
if setup_status:
|
||||
return {
|
||||
'step': 'finished',
|
||||
'setup_at': setup_status.setup_at.isoformat()
|
||||
}
|
||||
return {'step': 'not_start'}
|
||||
|
||||
@only_edition_self_hosted
|
||||
def post(self):
|
||||
# is set up
|
||||
if get_setup_status():
|
||||
raise AlreadySetupError()
|
||||
|
||||
# is tenant created
|
||||
tenant_count = TenantService.get_tenant_count()
|
||||
if tenant_count > 0:
|
||||
raise AlreadySetupError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('email', type=email,
|
||||
required=True, location='json')
|
||||
parser.add_argument('name', type=str_len(
|
||||
30), required=True, location='json')
|
||||
parser.add_argument('password', type=valid_password,
|
||||
required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Register
|
||||
account = RegisterService.register(
|
||||
email=args['email'],
|
||||
name=args['name'],
|
||||
password=args['password']
|
||||
)
|
||||
|
||||
setup()
|
||||
|
||||
# Login
|
||||
flask_login.login_user(account)
|
||||
AccountService.update_last_login(account, request)
|
||||
|
||||
return {'result': 'success'}, 201
|
||||
|
||||
|
||||
def setup():
|
||||
dify_setup = DifySetup(
|
||||
version=current_app.config['CURRENT_VERSION']
|
||||
)
|
||||
db.session.add(dify_setup)
|
||||
|
||||
|
||||
def setup_required(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
# check setup
|
||||
if not get_setup_status():
|
||||
raise NotSetupError()
|
||||
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def get_setup_status():
|
||||
if current_app.config['EDITION'] == 'SELF_HOSTED':
|
||||
return DifySetup.query.first()
|
||||
else:
|
||||
return True
|
||||
|
||||
api.add_resource(SetupApi, '/setup')
|
||||
39
api/controllers/console/version.py
Normal file
39
api/controllers/console/version.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from flask import current_app
|
||||
from flask_restful import reqparse, Resource
|
||||
from werkzeug.exceptions import InternalServerError
|
||||
|
||||
from . import api
|
||||
|
||||
|
||||
class VersionApi(Resource):
|
||||
|
||||
def get(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('current_version', type=str, required=True, location='args')
|
||||
args = parser.parse_args()
|
||||
check_update_url = current_app.config['CHECK_UPDATE_URL']
|
||||
|
||||
try:
|
||||
response = requests.get(check_update_url, {
|
||||
'current_version': args.get('current_version')
|
||||
})
|
||||
except Exception as error:
|
||||
logging.exception("Check update error.")
|
||||
raise InternalServerError()
|
||||
|
||||
content = json.loads(response.content)
|
||||
return {
|
||||
'version': content['version'],
|
||||
'release_date': content['releaseDate'],
|
||||
'release_notes': content['releaseNotes'],
|
||||
'can_auto_update': content['canAutoUpdate']
|
||||
}
|
||||
|
||||
|
||||
api.add_resource(VersionApi, '/version')
|
||||
0
api/controllers/console/workspace/__init__.py
Normal file
0
api/controllers/console/workspace/__init__.py
Normal file
263
api/controllers/console/workspace/account.py
Normal file
263
api/controllers/console/workspace/account.py
Normal file
@@ -0,0 +1,263 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from datetime import datetime
|
||||
|
||||
import pytz
|
||||
from flask import current_app, request
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, fields, marshal_with
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.workspace.error import AccountAlreadyInitedError, InvalidInvitationCodeError, \
|
||||
RepeatPasswordNotMatchError
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from libs.helper import TimestampField, supported_language, timezone
|
||||
from extensions.ext_database import db
|
||||
from models.account import InvitationCode, AccountIntegrate
|
||||
from services.account_service import AccountService
|
||||
|
||||
|
||||
account_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'avatar': fields.String,
|
||||
'email': fields.String,
|
||||
'interface_language': fields.String,
|
||||
'interface_theme': fields.String,
|
||||
'timezone': fields.String,
|
||||
'last_login_at': TimestampField,
|
||||
'last_login_ip': fields.String,
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
|
||||
class AccountInitApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
def post(self):
|
||||
account = current_user
|
||||
|
||||
if account.status == 'active':
|
||||
raise AccountAlreadyInitedError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
|
||||
if current_app.config['EDITION'] == 'CLOUD':
|
||||
parser.add_argument('invitation_code', type=str, location='json')
|
||||
|
||||
parser.add_argument(
|
||||
'interface_language', type=supported_language, required=True, location='json')
|
||||
parser.add_argument('timezone', type=timezone,
|
||||
required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
if current_app.config['EDITION'] == 'CLOUD':
|
||||
if not args['invitation_code']:
|
||||
raise ValueError('invitation_code is required')
|
||||
|
||||
# check invitation code
|
||||
invitation_code = db.session.query(InvitationCode).filter(
|
||||
InvitationCode.code == args['invitation_code'],
|
||||
InvitationCode.status == 'unused',
|
||||
).first()
|
||||
|
||||
if not invitation_code:
|
||||
raise InvalidInvitationCodeError()
|
||||
|
||||
invitation_code.status = 'used'
|
||||
invitation_code.used_at = datetime.utcnow()
|
||||
invitation_code.used_by_tenant_id = account.current_tenant_id
|
||||
invitation_code.used_by_account_id = account.id
|
||||
|
||||
account.interface_language = args['interface_language']
|
||||
account.timezone = args['timezone']
|
||||
account.interface_theme = 'light'
|
||||
account.status = 'active'
|
||||
account.initialized_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
class AccountProfileApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
def get(self):
|
||||
return current_user
|
||||
|
||||
|
||||
class AccountNameApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', type=str, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate account name length
|
||||
if len(args['name']) < 3 or len(args['name']) > 30:
|
||||
raise ValueError(
|
||||
"Account name must be between 3 and 30 characters.")
|
||||
|
||||
updated_account = AccountService.update_account(current_user, name=args['name'])
|
||||
|
||||
return updated_account
|
||||
|
||||
|
||||
class AccountAvatarApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('avatar', type=str, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
updated_account = AccountService.update_account(current_user, avatar=args['avatar'])
|
||||
|
||||
return updated_account
|
||||
|
||||
|
||||
class AccountInterfaceLanguageApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument(
|
||||
'interface_language', type=supported_language, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
updated_account = AccountService.update_account(current_user, interface_language=args['interface_language'])
|
||||
|
||||
return updated_account
|
||||
|
||||
|
||||
class AccountInterfaceThemeApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('interface_theme', type=str, choices=[
|
||||
'light', 'dark'], required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
updated_account = AccountService.update_account(current_user, interface_theme=args['interface_theme'])
|
||||
|
||||
return updated_account
|
||||
|
||||
|
||||
class AccountTimezoneApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('timezone', type=str,
|
||||
required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate timezone string, e.g. America/New_York, Asia/Shanghai
|
||||
if args['timezone'] not in pytz.all_timezones:
|
||||
raise ValueError("Invalid timezone string.")
|
||||
|
||||
updated_account = AccountService.update_account(current_user, timezone=args['timezone'])
|
||||
|
||||
return updated_account
|
||||
|
||||
|
||||
class AccountPasswordApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('password', type=str,
|
||||
required=False, location='json')
|
||||
parser.add_argument('new_password', type=str,
|
||||
required=True, location='json')
|
||||
parser.add_argument('repeat_new_password', type=str,
|
||||
required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args['new_password'] != args['repeat_new_password']:
|
||||
raise RepeatPasswordNotMatchError()
|
||||
|
||||
AccountService.update_account_password(
|
||||
current_user, args['password'], args['new_password'])
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
|
||||
class AccountIntegrateApi(Resource):
|
||||
integrate_fields = {
|
||||
'provider': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'is_bound': fields.Boolean,
|
||||
'link': fields.String
|
||||
}
|
||||
|
||||
integrate_list_fields = {
|
||||
'data': fields.List(fields.Nested(integrate_fields)),
|
||||
}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(integrate_list_fields)
|
||||
def get(self):
|
||||
account = current_user
|
||||
|
||||
account_integrates = db.session.query(AccountIntegrate).filter(
|
||||
AccountIntegrate.account_id == account.id).all()
|
||||
|
||||
base_url = request.url_root.rstrip('/')
|
||||
oauth_base_path = "/console/api/oauth/login"
|
||||
providers = ["github", "google"]
|
||||
|
||||
integrate_data = []
|
||||
for provider in providers:
|
||||
existing_integrate = next((ai for ai in account_integrates if ai.provider == provider), None)
|
||||
if existing_integrate:
|
||||
integrate_data.append({
|
||||
'id': existing_integrate.id,
|
||||
'provider': provider,
|
||||
'created_at': existing_integrate.created_at,
|
||||
'is_bound': True,
|
||||
'link': None
|
||||
})
|
||||
else:
|
||||
integrate_data.append({
|
||||
'id': None,
|
||||
'provider': provider,
|
||||
'created_at': None,
|
||||
'is_bound': False,
|
||||
'link': f'{base_url}{oauth_base_path}/{provider}'
|
||||
})
|
||||
|
||||
return {'data': integrate_data}
|
||||
|
||||
|
||||
# Register API resources
|
||||
api.add_resource(AccountInitApi, '/account/init')
|
||||
api.add_resource(AccountProfileApi, '/account/profile')
|
||||
api.add_resource(AccountNameApi, '/account/name')
|
||||
api.add_resource(AccountAvatarApi, '/account/avatar')
|
||||
api.add_resource(AccountInterfaceLanguageApi, '/account/interface-language')
|
||||
api.add_resource(AccountInterfaceThemeApi, '/account/interface-theme')
|
||||
api.add_resource(AccountTimezoneApi, '/account/timezone')
|
||||
api.add_resource(AccountPasswordApi, '/account/password')
|
||||
api.add_resource(AccountIntegrateApi, '/account/integrates')
|
||||
# api.add_resource(AccountEmailApi, '/account/email')
|
||||
# api.add_resource(AccountEmailVerifyApi, '/account/email-verify')
|
||||
31
api/controllers/console/workspace/error.py
Normal file
31
api/controllers/console/workspace/error.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class RepeatPasswordNotMatchError(BaseHTTPException):
|
||||
error_code = 'repeat_password_not_match'
|
||||
description = "New password and repeat password does not match."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderRequestFailedError(BaseHTTPException):
|
||||
error_code = 'provider_request_failed'
|
||||
description = None
|
||||
code = 400
|
||||
|
||||
|
||||
class InvalidInvitationCodeError(BaseHTTPException):
|
||||
error_code = 'invalid_invitation_code'
|
||||
description = "Invalid invitation code."
|
||||
code = 400
|
||||
|
||||
|
||||
class AccountAlreadyInitedError(BaseHTTPException):
|
||||
error_code = 'account_already_inited'
|
||||
description = "Account already inited."
|
||||
code = 400
|
||||
|
||||
|
||||
class AccountNotInitializedError(BaseHTTPException):
|
||||
error_code = 'account_not_initialized'
|
||||
description = "Account not initialized."
|
||||
code = 400
|
||||
141
api/controllers/console/workspace/members.py
Normal file
141
api/controllers/console/workspace/members.py
Normal file
@@ -0,0 +1,141 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, marshal_with, abort, fields, marshal
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from libs.helper import TimestampField
|
||||
from extensions.ext_database import db
|
||||
from models.account import Account, TenantAccountJoin
|
||||
from services.account_service import TenantService, RegisterService
|
||||
|
||||
account_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'avatar': fields.String,
|
||||
'email': fields.String,
|
||||
'last_login_at': TimestampField,
|
||||
'created_at': TimestampField,
|
||||
'role': fields.String,
|
||||
'status': fields.String,
|
||||
}
|
||||
|
||||
account_list_fields = {
|
||||
'accounts': fields.List(fields.Nested(account_fields))
|
||||
}
|
||||
|
||||
|
||||
class MemberListApi(Resource):
|
||||
"""List all members of current tenant."""
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_list_fields)
|
||||
def get(self):
|
||||
members = TenantService.get_tenant_members(current_user.current_tenant)
|
||||
return {'result': 'success', 'accounts': members}, 200
|
||||
|
||||
|
||||
class MemberInviteEmailApi(Resource):
|
||||
"""Invite a new member by email."""
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('email', type=str, required=True, location='json')
|
||||
parser.add_argument('role', type=str, required=True, default='admin', location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
invitee_email = args['email']
|
||||
invitee_role = args['role']
|
||||
if invitee_role not in ['admin', 'normal']:
|
||||
return {'code': 'invalid-role', 'message': 'Invalid role'}, 400
|
||||
|
||||
inviter = current_user
|
||||
|
||||
try:
|
||||
RegisterService.invite_new_member(inviter.current_tenant, invitee_email, role=invitee_role, inviter=inviter)
|
||||
account = db.session.query(Account, TenantAccountJoin.role).join(
|
||||
TenantAccountJoin, Account.id == TenantAccountJoin.account_id
|
||||
).filter(Account.email == args['email']).first()
|
||||
account, role = account
|
||||
account = marshal(account, account_fields)
|
||||
account['role'] = role
|
||||
except services.errors.account.CannotOperateSelfError as e:
|
||||
return {'code': 'cannot-operate-self', 'message': str(e)}, 400
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
return {'code': 'forbidden', 'message': str(e)}, 403
|
||||
except services.errors.account.AccountAlreadyInTenantError as e:
|
||||
return {'code': 'email-taken', 'message': str(e)}, 409
|
||||
except Exception as e:
|
||||
return {'code': 'unexpected-error', 'message': str(e)}, 500
|
||||
|
||||
# todo:413
|
||||
|
||||
return {'result': 'success', 'account': account}, 201
|
||||
|
||||
|
||||
class MemberCancelInviteApi(Resource):
|
||||
"""Cancel an invitation by member id."""
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, member_id):
|
||||
member = Account.query.get(str(member_id))
|
||||
if not member:
|
||||
abort(404)
|
||||
|
||||
try:
|
||||
TenantService.remove_member_from_tenant(current_user.current_tenant, member, current_user)
|
||||
except services.errors.account.CannotOperateSelfError as e:
|
||||
return {'code': 'cannot-operate-self', 'message': str(e)}, 400
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
return {'code': 'forbidden', 'message': str(e)}, 403
|
||||
except services.errors.account.MemberNotInTenantError as e:
|
||||
return {'code': 'member-not-found', 'message': str(e)}, 404
|
||||
except Exception as e:
|
||||
raise ValueError(str(e))
|
||||
|
||||
return {'result': 'success'}, 204
|
||||
|
||||
|
||||
class MemberUpdateRoleApi(Resource):
|
||||
"""Update member role."""
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def put(self, member_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('role', type=str, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
new_role = args['role']
|
||||
|
||||
if new_role not in ['admin', 'normal', 'owner']:
|
||||
return {'code': 'invalid-role', 'message': 'Invalid role'}, 400
|
||||
|
||||
member = Account.query.get(str(member_id))
|
||||
if not member:
|
||||
abort(404)
|
||||
|
||||
try:
|
||||
TenantService.update_member_role(current_user.current_tenant, member, new_role, current_user)
|
||||
except Exception as e:
|
||||
raise ValueError(str(e))
|
||||
|
||||
# todo: 403
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
api.add_resource(MemberListApi, '/workspaces/current/members')
|
||||
api.add_resource(MemberInviteEmailApi, '/workspaces/current/members/invite-email')
|
||||
api.add_resource(MemberCancelInviteApi, '/workspaces/current/members/<uuid:member_id>')
|
||||
api.add_resource(MemberUpdateRoleApi, '/workspaces/current/members/<uuid:member_id>/update-role')
|
||||
246
api/controllers/console/workspace/providers.py
Normal file
246
api/controllers/console/workspace/providers.py
Normal file
@@ -0,0 +1,246 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, reqparse, abort
|
||||
from werkzeug.exceptions import Forbidden
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from core.llm.provider.errors import ValidateFailedError
|
||||
from extensions.ext_database import db
|
||||
from libs import rsa
|
||||
from models.provider import Provider, ProviderType, ProviderName
|
||||
from services.provider_service import ProviderService
|
||||
|
||||
|
||||
class ProviderListApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self):
|
||||
tenant_id = current_user.current_tenant_id
|
||||
|
||||
"""
|
||||
If the type is AZURE_OPENAI, decode and return the four fields of azure_api_type, azure_api_version:,
|
||||
azure_api_base, azure_api_key as an object, where azure_api_key displays the first 6 bits in plaintext, and the
|
||||
rest is replaced by * and the last two bits are displayed in plaintext
|
||||
|
||||
If the type is other, decode and return the Token field directly, the field displays the first 6 bits in
|
||||
plaintext, the rest is replaced by * and the last two bits are displayed in plaintext
|
||||
"""
|
||||
|
||||
ProviderService.init_supported_provider(current_user.current_tenant, "cloud")
|
||||
providers = Provider.query.filter_by(tenant_id=tenant_id).all()
|
||||
|
||||
provider_list = [
|
||||
{
|
||||
'provider_name': p.provider_name,
|
||||
'provider_type': p.provider_type,
|
||||
'is_valid': p.is_valid,
|
||||
'last_used': p.last_used,
|
||||
'is_enabled': p.is_enabled,
|
||||
**({
|
||||
'quota_type': p.quota_type,
|
||||
'quota_limit': p.quota_limit,
|
||||
'quota_used': p.quota_used
|
||||
} if p.provider_type == ProviderType.SYSTEM.value else {}),
|
||||
'token': ProviderService.get_obfuscated_api_key(current_user.current_tenant,
|
||||
ProviderName(p.provider_name))
|
||||
}
|
||||
for p in providers
|
||||
]
|
||||
|
||||
return provider_list
|
||||
|
||||
|
||||
class ProviderTokenApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, provider):
|
||||
if provider not in [p.value for p in ProviderName]:
|
||||
abort(404)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
logging.log(logging.ERROR,
|
||||
f'User {current_user.id} is not authorized to update provider token, current_role is {current_user.current_tenant.current_role}')
|
||||
raise Forbidden()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
|
||||
parser.add_argument('token', type=ProviderService.get_token_type(
|
||||
tenant=current_user.current_tenant,
|
||||
provider_name=ProviderName(provider)
|
||||
), required=True, nullable=False, location='json')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args['token']:
|
||||
raise ValueError('Token is empty')
|
||||
|
||||
try:
|
||||
ProviderService.validate_provider_configs(
|
||||
tenant=current_user.current_tenant,
|
||||
provider_name=ProviderName(provider),
|
||||
configs=args['token']
|
||||
)
|
||||
token_is_valid = True
|
||||
except ValidateFailedError:
|
||||
token_is_valid = False
|
||||
|
||||
tenant = current_user.current_tenant
|
||||
|
||||
base64_encrypted_token = ProviderService.get_encrypted_token(
|
||||
tenant=current_user.current_tenant,
|
||||
provider_name=ProviderName(provider),
|
||||
configs=args['token']
|
||||
)
|
||||
|
||||
provider_model = Provider.query.filter_by(tenant_id=tenant.id, provider_name=provider,
|
||||
provider_type=ProviderType.CUSTOM.value).first()
|
||||
|
||||
# Only allow updating token for CUSTOM provider type
|
||||
if provider_model:
|
||||
provider_model.encrypted_config = base64_encrypted_token
|
||||
provider_model.is_valid = token_is_valid
|
||||
else:
|
||||
provider_model = Provider(tenant_id=tenant.id, provider_name=provider,
|
||||
provider_type=ProviderType.CUSTOM.value,
|
||||
encrypted_config=base64_encrypted_token,
|
||||
is_valid=token_is_valid)
|
||||
db.session.add(provider_model)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
if provider in [ProviderName.ANTHROPIC.value, ProviderName.AZURE_OPENAI.value, ProviderName.COHERE.value,
|
||||
ProviderName.HUGGINGFACEHUB.value]:
|
||||
return {'result': 'success', 'warning': 'MOCK: This provider is not supported yet.'}, 201
|
||||
|
||||
return {'result': 'success'}, 201
|
||||
|
||||
|
||||
class ProviderTokenValidateApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, provider):
|
||||
if provider not in [p.value for p in ProviderName]:
|
||||
abort(404)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('token', type=ProviderService.get_token_type(
|
||||
tenant=current_user.current_tenant,
|
||||
provider_name=ProviderName(provider)
|
||||
), required=True, nullable=False, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# todo: remove this when the provider is supported
|
||||
if provider in [ProviderName.ANTHROPIC.value, ProviderName.AZURE_OPENAI.value, ProviderName.COHERE.value,
|
||||
ProviderName.HUGGINGFACEHUB.value]:
|
||||
return {'result': 'success', 'warning': 'MOCK: This provider is not supported yet.'}
|
||||
|
||||
result = True
|
||||
error = None
|
||||
|
||||
try:
|
||||
ProviderService.validate_provider_configs(
|
||||
tenant=current_user.current_tenant,
|
||||
provider_name=ProviderName(provider),
|
||||
configs=args['token']
|
||||
)
|
||||
except ValidateFailedError as e:
|
||||
result = False
|
||||
error = str(e)
|
||||
|
||||
response = {'result': 'success' if result else 'error'}
|
||||
|
||||
if not result:
|
||||
response['error'] = error
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class ProviderSystemApi(Resource):
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def put(self, provider):
|
||||
if provider not in [p.value for p in ProviderName]:
|
||||
abort(404)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('is_enabled', type=bool, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
tenant = current_user.current_tenant_id
|
||||
|
||||
provider_model = Provider.query.filter_by(tenant_id=tenant.id, provider_name=provider).first()
|
||||
|
||||
if provider_model and provider_model.provider_type == ProviderType.SYSTEM.value:
|
||||
provider_model.is_valid = args['is_enabled']
|
||||
db.session.commit()
|
||||
elif not provider_model:
|
||||
ProviderService.create_system_provider(tenant, provider, args['is_enabled'])
|
||||
else:
|
||||
abort(403)
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, provider):
|
||||
if provider not in [p.value for p in ProviderName]:
|
||||
abort(404)
|
||||
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||
raise Forbidden()
|
||||
|
||||
provider_model = db.session.query(Provider).filter(Provider.tenant_id == current_user.current_tenant_id,
|
||||
Provider.provider_name == provider,
|
||||
Provider.provider_type == ProviderType.SYSTEM.value).first()
|
||||
|
||||
system_model = None
|
||||
if provider_model:
|
||||
system_model = {
|
||||
'result': 'success',
|
||||
'provider': {
|
||||
'provider_name': provider_model.provider_name,
|
||||
'provider_type': provider_model.provider_type,
|
||||
'is_valid': provider_model.is_valid,
|
||||
'last_used': provider_model.last_used,
|
||||
'is_enabled': provider_model.is_enabled,
|
||||
'quota_type': provider_model.quota_type,
|
||||
'quota_limit': provider_model.quota_limit,
|
||||
'quota_used': provider_model.quota_used
|
||||
}
|
||||
}
|
||||
else:
|
||||
abort(404)
|
||||
|
||||
return system_model
|
||||
|
||||
|
||||
api.add_resource(ProviderTokenApi, '/providers/<provider>/token',
|
||||
endpoint='current_providers_token') # Deprecated
|
||||
api.add_resource(ProviderTokenValidateApi, '/providers/<provider>/token-validate',
|
||||
endpoint='current_providers_token_validate') # Deprecated
|
||||
|
||||
api.add_resource(ProviderTokenApi, '/workspaces/current/providers/<provider>/token',
|
||||
endpoint='workspaces_current_providers_token') # PUT for updating provider token
|
||||
api.add_resource(ProviderTokenValidateApi, '/workspaces/current/providers/<provider>/token-validate',
|
||||
endpoint='workspaces_current_providers_token_validate') # POST for validating provider token
|
||||
|
||||
api.add_resource(ProviderListApi, '/workspaces/current/providers') # GET for getting providers list
|
||||
api.add_resource(ProviderSystemApi, '/workspaces/current/providers/<provider>/system',
|
||||
endpoint='workspaces_current_providers_system') # GET for getting provider quota, PUT for updating provider status
|
||||
97
api/controllers/console/workspace/workspace.py
Normal file
97
api/controllers/console/workspace/workspace.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import logging
|
||||
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
from flask_restful import Resource, fields, marshal_with, reqparse, marshal
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.error import AccountNotLinkTenantError
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from libs.helper import TimestampField
|
||||
from extensions.ext_database import db
|
||||
from models.account import Tenant
|
||||
from services.account_service import TenantService
|
||||
from services.workspace_service import WorkspaceService
|
||||
|
||||
provider_fields = {
|
||||
'provider_name': fields.String,
|
||||
'provider_type': fields.String,
|
||||
'is_valid': fields.Boolean,
|
||||
'token_is_set': fields.Boolean,
|
||||
}
|
||||
|
||||
tenant_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'plan': fields.String,
|
||||
'status': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'role': fields.String,
|
||||
'providers': fields.List(fields.Nested(provider_fields)),
|
||||
'in_trail': fields.Boolean,
|
||||
'trial_end_reason': fields.String,
|
||||
}
|
||||
|
||||
tenants_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'plan': fields.String,
|
||||
'status': fields.String,
|
||||
'created_at': TimestampField,
|
||||
'current': fields.Boolean
|
||||
}
|
||||
|
||||
|
||||
class TenantListApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self):
|
||||
tenants = TenantService.get_join_tenants(current_user)
|
||||
|
||||
for tenant in tenants:
|
||||
if tenant.id == current_user.current_tenant_id:
|
||||
tenant.current = True # Set current=True for current tenant
|
||||
return {'workspaces': marshal(tenants, tenants_fields)}, 200
|
||||
|
||||
|
||||
class TenantApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(tenant_fields)
|
||||
def get(self):
|
||||
if request.path == '/info':
|
||||
logging.warning('Deprecated URL /info was used.')
|
||||
|
||||
tenant = current_user.current_tenant
|
||||
|
||||
return WorkspaceService.get_tenant_info(tenant), 200
|
||||
|
||||
|
||||
class SwitchWorkspaceApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('tenant_id', type=str, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
# check if tenant_id is valid, 403 if not
|
||||
try:
|
||||
TenantService.switch_tenant(current_user, args['tenant_id'])
|
||||
except Exception:
|
||||
raise AccountNotLinkTenantError("Account not link tenant")
|
||||
|
||||
new_tenant = db.session.query(Tenant).get(args['tenant_id']) # Get new tenant
|
||||
|
||||
return {'result': 'success', 'new_tenant': marshal(WorkspaceService.get_tenant_info(new_tenant), tenant_fields)}
|
||||
|
||||
|
||||
api.add_resource(TenantListApi, '/workspaces') # GET for getting all tenants
|
||||
api.add_resource(TenantApi, '/workspaces/current', endpoint='workspaces_current') # GET for getting current tenant info
|
||||
api.add_resource(TenantApi, '/info', endpoint='info') # Deprecated
|
||||
api.add_resource(SwitchWorkspaceApi, '/workspaces/switch') # POST for switching tenant
|
||||
43
api/controllers/console/wraps.py
Normal file
43
api/controllers/console/wraps.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from functools import wraps
|
||||
|
||||
from flask import current_app, abort
|
||||
from flask_login import current_user
|
||||
|
||||
from controllers.console.workspace.error import AccountNotInitializedError
|
||||
|
||||
|
||||
def account_initialization_required(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
# check account initialization
|
||||
account = current_user
|
||||
|
||||
if account.status == 'uninitialized':
|
||||
raise AccountNotInitializedError()
|
||||
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def only_edition_cloud(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
if current_app.config['EDITION'] != 'CLOUD':
|
||||
abort(404)
|
||||
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def only_edition_self_hosted(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
if current_app.config['EDITION'] != 'SELF_HOSTED':
|
||||
abort(404)
|
||||
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
12
api/controllers/service_api/__init__.py
Normal file
12
api/controllers/service_api/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask import Blueprint
|
||||
|
||||
from libs.external_api import ExternalApi
|
||||
|
||||
bp = Blueprint('service_api', __name__, url_prefix='/v1')
|
||||
api = ExternalApi(bp)
|
||||
|
||||
|
||||
from .app import completion, app, conversation, message
|
||||
|
||||
from .dataset import document
|
||||
27
api/controllers/service_api/app/__init__.py
Normal file
27
api/controllers/service_api/app/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from extensions.ext_database import db
|
||||
from models.model import EndUser
|
||||
|
||||
|
||||
def create_or_update_end_user_for_user_id(app_model, user_id):
|
||||
"""
|
||||
Create or update session terminal based on user ID.
|
||||
"""
|
||||
end_user = db.session.query(EndUser) \
|
||||
.filter(
|
||||
EndUser.tenant_id == app_model.tenant_id,
|
||||
EndUser.session_id == user_id,
|
||||
EndUser.type == 'service_api'
|
||||
).first()
|
||||
|
||||
if end_user is None:
|
||||
end_user = EndUser(
|
||||
tenant_id=app_model.tenant_id,
|
||||
app_id=app_model.id,
|
||||
type='service_api',
|
||||
is_anonymous=True,
|
||||
session_id=user_id
|
||||
)
|
||||
db.session.add(end_user)
|
||||
db.session.commit()
|
||||
|
||||
return end_user
|
||||
43
api/controllers/service_api/app/app.py
Normal file
43
api/controllers/service_api/app/app.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask_restful import fields, marshal_with
|
||||
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.wraps import AppApiResource
|
||||
|
||||
|
||||
class AppParameterApi(AppApiResource):
|
||||
"""Resource for app variables."""
|
||||
|
||||
variable_fields = {
|
||||
'key': fields.String,
|
||||
'name': fields.String,
|
||||
'description': fields.String,
|
||||
'type': fields.String,
|
||||
'default': fields.String,
|
||||
'max_length': fields.Integer,
|
||||
'options': fields.List(fields.String)
|
||||
}
|
||||
|
||||
parameters_fields = {
|
||||
'opening_statement': fields.String,
|
||||
'suggested_questions': fields.Raw,
|
||||
'suggested_questions_after_answer': fields.Raw,
|
||||
'more_like_this': fields.Raw,
|
||||
'user_input_form': fields.Raw,
|
||||
}
|
||||
|
||||
@marshal_with(parameters_fields)
|
||||
def get(self, app_model, end_user):
|
||||
"""Retrieve app parameters."""
|
||||
app_model_config = app_model.app_model_config
|
||||
|
||||
return {
|
||||
'opening_statement': app_model_config.opening_statement,
|
||||
'suggested_questions': app_model_config.suggested_questions_list,
|
||||
'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
|
||||
'more_like_this': app_model_config.more_like_this_dict,
|
||||
'user_input_form': app_model_config.user_input_form_list
|
||||
}
|
||||
|
||||
|
||||
api.add_resource(AppParameterApi, '/parameters')
|
||||
182
api/controllers/service_api/app/completion.py
Normal file
182
api/controllers/service_api/app/completion.py
Normal file
@@ -0,0 +1,182 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import Union, Generator
|
||||
|
||||
from flask import stream_with_context, Response
|
||||
from flask_restful import reqparse
|
||||
from werkzeug.exceptions import NotFound, InternalServerError
|
||||
|
||||
import services
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.app import create_or_update_end_user_for_user_id
|
||||
from controllers.service_api.app.error import AppUnavailableError, ProviderNotInitializeError, NotChatAppError, \
|
||||
ConversationCompletedError, CompletionRequestError, ProviderQuotaExceededError, \
|
||||
ProviderModelCurrentlyNotSupportError
|
||||
from controllers.service_api.wraps import AppApiResource
|
||||
from core.conversation_message_task import PubHandler
|
||||
from core.llm.error import LLMBadRequestError, LLMAuthorizationError, LLMAPIUnavailableError, LLMAPIConnectionError, \
|
||||
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
|
||||
from libs.helper import uuid_value
|
||||
from services.completion_service import CompletionService
|
||||
|
||||
|
||||
class CompletionApi(AppApiResource):
|
||||
def post(self, app_model, end_user):
|
||||
if app_model.mode != 'completion':
|
||||
raise AppUnavailableError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('inputs', type=dict, required=True, location='json')
|
||||
parser.add_argument('query', type=str, location='json')
|
||||
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
|
||||
parser.add_argument('user', type=str, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
streaming = args['response_mode'] == 'streaming'
|
||||
|
||||
if end_user is None and args['user'] is not None:
|
||||
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
|
||||
|
||||
try:
|
||||
response = CompletionService.completion(
|
||||
app_model=app_model,
|
||||
user=end_user,
|
||||
args=args,
|
||||
from_source='api',
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
return compact_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class CompletionStopApi(AppApiResource):
|
||||
def post(self, app_model, end_user, task_id):
|
||||
if app_model.mode != 'completion':
|
||||
raise AppUnavailableError()
|
||||
|
||||
PubHandler.stop(end_user, task_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
class ChatApi(AppApiResource):
|
||||
def post(self, app_model, end_user):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('inputs', type=dict, required=True, location='json')
|
||||
parser.add_argument('query', type=str, required=True, location='json')
|
||||
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
|
||||
parser.add_argument('conversation_id', type=uuid_value, location='json')
|
||||
parser.add_argument('user', type=str, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
streaming = args['response_mode'] == 'streaming'
|
||||
|
||||
if end_user is None and args['user'] is not None:
|
||||
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
|
||||
|
||||
try:
|
||||
response = CompletionService.completion(
|
||||
app_model=app_model,
|
||||
user=end_user,
|
||||
args=args,
|
||||
from_source='api',
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
return compact_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class ChatStopApi(AppApiResource):
|
||||
def post(self, app_model, end_user, task_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
PubHandler.stop(end_user, task_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
def compact_response(response: Union[dict | Generator]) -> Response:
|
||||
if isinstance(response, dict):
|
||||
return Response(response=json.dumps(response), status=200, mimetype='application/json')
|
||||
else:
|
||||
def generate() -> Generator:
|
||||
try:
|
||||
for chunk in response:
|
||||
yield chunk
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
yield "data: " + json.dumps(api.handle_error(NotFound("Conversation Not Exists.")).get_json()) + "\n\n"
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
yield "data: " + json.dumps(api.handle_error(ConversationCompletedError()).get_json()) + "\n\n"
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
yield "data: " + json.dumps(api.handle_error(AppUnavailableError()).get_json()) + "\n\n"
|
||||
except ProviderTokenNotInitError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
|
||||
except QuotaExceededError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
|
||||
|
||||
return Response(stream_with_context(generate()), status=200,
|
||||
mimetype='text/event-stream')
|
||||
|
||||
|
||||
api.add_resource(CompletionApi, '/completion-messages')
|
||||
api.add_resource(CompletionStopApi, '/completion-messages/<string:task_id>/stop')
|
||||
api.add_resource(ChatApi, '/chat-messages')
|
||||
api.add_resource(ChatStopApi, '/chat-messages/<string:task_id>/stop')
|
||||
|
||||
76
api/controllers/service_api/app/conversation.py
Normal file
76
api/controllers/service_api/app/conversation.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask_restful import fields, marshal_with, reqparse
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.app import create_or_update_end_user_for_user_id
|
||||
from controllers.service_api.app.error import NotChatAppError
|
||||
from controllers.service_api.wraps import AppApiResource
|
||||
from libs.helper import TimestampField, uuid_value
|
||||
import services
|
||||
from services.conversation_service import ConversationService
|
||||
|
||||
conversation_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'inputs': fields.Raw,
|
||||
'status': fields.String,
|
||||
'introduction': fields.String,
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
conversation_infinite_scroll_pagination_fields = {
|
||||
'limit': fields.Integer,
|
||||
'has_more': fields.Boolean,
|
||||
'data': fields.List(fields.Nested(conversation_fields))
|
||||
}
|
||||
|
||||
|
||||
class ConversationApi(AppApiResource):
|
||||
|
||||
@marshal_with(conversation_infinite_scroll_pagination_fields)
|
||||
def get(self, app_model, end_user):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('last_id', type=uuid_value, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
|
||||
parser.add_argument('user', type=str, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
if end_user is None and args['user'] is not None:
|
||||
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
|
||||
|
||||
try:
|
||||
return ConversationService.pagination_by_last_id(app_model, end_user, args['last_id'], args['limit'])
|
||||
except services.errors.conversation.LastConversationNotExistsError:
|
||||
raise NotFound("Last Conversation Not Exists.")
|
||||
|
||||
|
||||
class ConversationRenameApi(AppApiResource):
|
||||
|
||||
@marshal_with(conversation_fields)
|
||||
def post(self, app_model, end_user, c_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
conversation_id = str(c_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', type=str, required=True, location='json')
|
||||
parser.add_argument('user', type=str, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
if end_user is None and args['user'] is not None:
|
||||
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
|
||||
|
||||
try:
|
||||
return ConversationService.rename(app_model, conversation_id, end_user, args['name'])
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
|
||||
|
||||
api.add_resource(ConversationRenameApi, '/conversations/<uuid:c_id>/name', endpoint='conversation_name')
|
||||
api.add_resource(ConversationApi, '/conversations')
|
||||
51
api/controllers/service_api/app/error.py
Normal file
51
api/controllers/service_api/app/error.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class AppUnavailableError(BaseHTTPException):
|
||||
error_code = 'app_unavailable'
|
||||
description = "App unavailable."
|
||||
code = 400
|
||||
|
||||
|
||||
class NotCompletionAppError(BaseHTTPException):
|
||||
error_code = 'not_completion_app'
|
||||
description = "Not Completion App"
|
||||
code = 400
|
||||
|
||||
|
||||
class NotChatAppError(BaseHTTPException):
|
||||
error_code = 'not_chat_app'
|
||||
description = "Not Chat App"
|
||||
code = 400
|
||||
|
||||
|
||||
class ConversationCompletedError(BaseHTTPException):
|
||||
error_code = 'conversation_completed'
|
||||
description = "Conversation Completed."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderNotInitializeError(BaseHTTPException):
|
||||
error_code = 'provider_not_initialize'
|
||||
description = "Provider Token not initialize."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderQuotaExceededError(BaseHTTPException):
|
||||
error_code = 'provider_quota_exceeded'
|
||||
description = "Provider quota exceeded."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderModelCurrentlyNotSupportError(BaseHTTPException):
|
||||
error_code = 'model_currently_not_support'
|
||||
description = "GPT-4 currently not support."
|
||||
code = 400
|
||||
|
||||
|
||||
class CompletionRequestError(BaseHTTPException):
|
||||
error_code = 'completion_request_error'
|
||||
description = "Completion request failed."
|
||||
code = 400
|
||||
|
||||
81
api/controllers/service_api/app/message.py
Normal file
81
api/controllers/service_api/app/message.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask_restful import fields, marshal_with, reqparse
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
import services
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.app import create_or_update_end_user_for_user_id
|
||||
from controllers.service_api.app.error import NotChatAppError
|
||||
from controllers.service_api.wraps import AppApiResource
|
||||
from libs.helper import TimestampField, uuid_value
|
||||
from services.message_service import MessageService
|
||||
|
||||
|
||||
class MessageListApi(AppApiResource):
|
||||
feedback_fields = {
|
||||
'rating': fields.String
|
||||
}
|
||||
|
||||
message_fields = {
|
||||
'id': fields.String,
|
||||
'conversation_id': fields.String,
|
||||
'inputs': fields.Raw,
|
||||
'query': fields.String,
|
||||
'answer': fields.String,
|
||||
'feedback': fields.Nested(feedback_fields, attribute='user_feedback', allow_null=True),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
message_infinite_scroll_pagination_fields = {
|
||||
'limit': fields.Integer,
|
||||
'has_more': fields.Boolean,
|
||||
'data': fields.List(fields.Nested(message_fields))
|
||||
}
|
||||
|
||||
@marshal_with(message_infinite_scroll_pagination_fields)
|
||||
def get(self, app_model, end_user):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('conversation_id', required=True, type=uuid_value, location='args')
|
||||
parser.add_argument('first_id', type=uuid_value, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
|
||||
parser.add_argument('user', type=str, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
if end_user is None and args['user'] is not None:
|
||||
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
|
||||
|
||||
try:
|
||||
return MessageService.pagination_by_first_id(app_model, end_user,
|
||||
args['conversation_id'], args['first_id'], args['limit'])
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.message.FirstMessageNotExistsError:
|
||||
raise NotFound("First Message Not Exists.")
|
||||
|
||||
|
||||
class MessageFeedbackApi(AppApiResource):
|
||||
def post(self, app_model, end_user, message_id):
|
||||
message_id = str(message_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('rating', type=str, choices=['like', 'dislike', None], location='json')
|
||||
parser.add_argument('user', type=str, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
if end_user is None and args['user'] is not None:
|
||||
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
|
||||
|
||||
try:
|
||||
MessageService.create_feedback(app_model, message_id, end_user, args['rating'])
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
api.add_resource(MessageListApi, '/messages')
|
||||
api.add_resource(MessageFeedbackApi, '/messages/<uuid:message_id>/feedbacks')
|
||||
0
api/controllers/service_api/dataset/__init__.py
Normal file
0
api/controllers/service_api/dataset/__init__.py
Normal file
129
api/controllers/service_api/dataset/document.py
Normal file
129
api/controllers/service_api/dataset/document.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import datetime
|
||||
import uuid
|
||||
|
||||
from flask import current_app
|
||||
from flask_restful import reqparse
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
import services.dataset_service
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.app.error import ProviderNotInitializeError
|
||||
from controllers.service_api.dataset.error import ArchivedDocumentImmutableError, DocumentIndexingError, \
|
||||
DatasetNotInitedError
|
||||
from controllers.service_api.wraps import DatasetApiResource
|
||||
from core.llm.error import ProviderTokenNotInitError
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from models.model import UploadFile
|
||||
from services.dataset_service import DocumentService
|
||||
|
||||
|
||||
class DocumentListApi(DatasetApiResource):
|
||||
"""Resource for documents."""
|
||||
|
||||
def post(self, dataset):
|
||||
"""Create document."""
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', type=str, required=True, nullable=False, location='json')
|
||||
parser.add_argument('text', type=str, required=True, nullable=False, location='json')
|
||||
parser.add_argument('doc_type', type=str, location='json')
|
||||
parser.add_argument('doc_metadata', type=dict, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not dataset.indexing_technique:
|
||||
raise DatasetNotInitedError("Dataset indexing technique must be set.")
|
||||
|
||||
doc_type = args.get('doc_type')
|
||||
doc_metadata = args.get('doc_metadata')
|
||||
|
||||
if doc_type and doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:
|
||||
raise ValueError('Invalid doc_type.')
|
||||
|
||||
# user uuid as file name
|
||||
file_uuid = str(uuid.uuid4())
|
||||
file_key = 'upload_files/' + dataset.tenant_id + '/' + file_uuid + '.txt'
|
||||
|
||||
# save file to storage
|
||||
storage.save(file_key, args.get('text'))
|
||||
|
||||
# save file to db
|
||||
config = current_app.config
|
||||
upload_file = UploadFile(
|
||||
tenant_id=dataset.tenant_id,
|
||||
storage_type=config['STORAGE_TYPE'],
|
||||
key=file_key,
|
||||
name=args.get('name') + '.txt',
|
||||
size=len(args.get('text')),
|
||||
extension='txt',
|
||||
mime_type='text/plain',
|
||||
created_by=dataset.created_by,
|
||||
created_at=datetime.datetime.utcnow(),
|
||||
used=True,
|
||||
used_by=dataset.created_by,
|
||||
used_at=datetime.datetime.utcnow()
|
||||
)
|
||||
|
||||
db.session.add(upload_file)
|
||||
db.session.commit()
|
||||
|
||||
document_data = {
|
||||
'data_source': {
|
||||
'type': 'upload_file',
|
||||
'info': upload_file.id
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
document = DocumentService.save_document_with_dataset_id(
|
||||
dataset=dataset,
|
||||
document_data=document_data,
|
||||
account=dataset.created_by_account,
|
||||
dataset_process_rule=dataset.latest_process_rule,
|
||||
created_from='api'
|
||||
)
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
|
||||
if doc_type and doc_metadata:
|
||||
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]
|
||||
|
||||
document.doc_metadata = {}
|
||||
|
||||
for key, value_type in metadata_schema.items():
|
||||
value = doc_metadata.get(key)
|
||||
if value is not None and isinstance(value, value_type):
|
||||
document.doc_metadata[key] = value
|
||||
|
||||
document.doc_type = doc_type
|
||||
document.updated_at = datetime.datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
return {'id': document.id}
|
||||
|
||||
|
||||
class DocumentApi(DatasetApiResource):
|
||||
def delete(self, dataset, document_id):
|
||||
"""Delete document."""
|
||||
document_id = str(document_id)
|
||||
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
# 404 if document not found
|
||||
if document is None:
|
||||
raise NotFound("Document Not Exists.")
|
||||
|
||||
# 403 if document is archived
|
||||
if DocumentService.check_archived(document):
|
||||
raise ArchivedDocumentImmutableError()
|
||||
|
||||
try:
|
||||
# delete document
|
||||
DocumentService.delete_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError('Cannot delete document during indexing.')
|
||||
|
||||
return {'result': 'success'}, 204
|
||||
|
||||
|
||||
api.add_resource(DocumentListApi, '/documents')
|
||||
api.add_resource(DocumentApi, '/documents/<uuid:document_id>')
|
||||
20
api/controllers/service_api/dataset/error.py
Normal file
20
api/controllers/service_api/dataset/error.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class ArchivedDocumentImmutableError(BaseHTTPException):
|
||||
error_code = 'archived_document_immutable'
|
||||
description = "Cannot operate when document was archived."
|
||||
code = 403
|
||||
|
||||
|
||||
class DocumentIndexingError(BaseHTTPException):
|
||||
error_code = 'document_indexing'
|
||||
description = "Cannot operate document during indexing."
|
||||
code = 403
|
||||
|
||||
|
||||
class DatasetNotInitedError(BaseHTTPException):
|
||||
error_code = 'dataset_not_inited'
|
||||
description = "Dataset not inited."
|
||||
code = 403
|
||||
95
api/controllers/service_api/wraps.py
Normal file
95
api/controllers/service_api/wraps.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
|
||||
from flask import request
|
||||
from flask_restful import Resource
|
||||
from werkzeug.exceptions import NotFound, Unauthorized
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset
|
||||
from models.model import ApiToken, App
|
||||
|
||||
|
||||
def validate_app_token(view=None):
|
||||
def decorator(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
api_token = validate_and_get_api_token('app')
|
||||
|
||||
app_model = db.session.query(App).get(api_token.app_id)
|
||||
if not app_model:
|
||||
raise NotFound()
|
||||
|
||||
if app_model.status != 'normal':
|
||||
raise NotFound()
|
||||
|
||||
if not app_model.enable_api:
|
||||
raise NotFound()
|
||||
|
||||
return view(app_model, None, *args, **kwargs)
|
||||
return decorated
|
||||
|
||||
if view:
|
||||
return decorator(view)
|
||||
|
||||
# if view is None, it means that the decorator is used without parentheses
|
||||
# use the decorator as a function for method_decorators
|
||||
return decorator
|
||||
|
||||
|
||||
def validate_dataset_token(view=None):
|
||||
def decorator(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
api_token = validate_and_get_api_token('dataset')
|
||||
|
||||
dataset = db.session.query(Dataset).get(api_token.dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound()
|
||||
|
||||
return view(dataset, *args, **kwargs)
|
||||
return decorated
|
||||
|
||||
if view:
|
||||
return decorator(view)
|
||||
|
||||
# if view is None, it means that the decorator is used without parentheses
|
||||
# use the decorator as a function for method_decorators
|
||||
return decorator
|
||||
|
||||
|
||||
def validate_and_get_api_token(scope=None):
|
||||
"""
|
||||
Validate and get API token.
|
||||
"""
|
||||
auth_header = request.headers.get('Authorization')
|
||||
if auth_header is None:
|
||||
raise Unauthorized()
|
||||
|
||||
auth_scheme, auth_token = auth_header.split(None, 1)
|
||||
auth_scheme = auth_scheme.lower()
|
||||
|
||||
if auth_scheme != 'bearer':
|
||||
raise Unauthorized()
|
||||
|
||||
api_token = db.session.query(ApiToken).filter(
|
||||
ApiToken.token == auth_token,
|
||||
ApiToken.type == scope,
|
||||
).first()
|
||||
|
||||
if not api_token:
|
||||
raise Unauthorized()
|
||||
|
||||
api_token.last_used_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
return api_token
|
||||
|
||||
|
||||
class AppApiResource(Resource):
|
||||
method_decorators = [validate_app_token]
|
||||
|
||||
|
||||
class DatasetApiResource(Resource):
|
||||
method_decorators = [validate_dataset_token]
|
||||
10
api/controllers/web/__init__.py
Normal file
10
api/controllers/web/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask import Blueprint
|
||||
|
||||
from libs.external_api import ExternalApi
|
||||
|
||||
bp = Blueprint('web', __name__, url_prefix='/api')
|
||||
api = ExternalApi(bp)
|
||||
|
||||
|
||||
from . import completion, app, conversation, message, site, saved_message
|
||||
42
api/controllers/web/app.py
Normal file
42
api/controllers/web/app.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask_restful import marshal_with, fields
|
||||
|
||||
from controllers.web import api
|
||||
from controllers.web.wraps import WebApiResource
|
||||
|
||||
|
||||
class AppParameterApi(WebApiResource):
|
||||
"""Resource for app variables."""
|
||||
variable_fields = {
|
||||
'key': fields.String,
|
||||
'name': fields.String,
|
||||
'description': fields.String,
|
||||
'type': fields.String,
|
||||
'default': fields.String,
|
||||
'max_length': fields.Integer,
|
||||
'options': fields.List(fields.String)
|
||||
}
|
||||
|
||||
parameters_fields = {
|
||||
'opening_statement': fields.String,
|
||||
'suggested_questions': fields.Raw,
|
||||
'suggested_questions_after_answer': fields.Raw,
|
||||
'more_like_this': fields.Raw,
|
||||
'user_input_form': fields.Raw,
|
||||
}
|
||||
|
||||
@marshal_with(parameters_fields)
|
||||
def get(self, app_model, end_user):
|
||||
"""Retrieve app parameters."""
|
||||
app_model_config = app_model.app_model_config
|
||||
|
||||
return {
|
||||
'opening_statement': app_model_config.opening_statement,
|
||||
'suggested_questions': app_model_config.suggested_questions_list,
|
||||
'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
|
||||
'more_like_this': app_model_config.more_like_this_dict,
|
||||
'user_input_form': app_model_config.user_input_form_list
|
||||
}
|
||||
|
||||
|
||||
api.add_resource(AppParameterApi, '/parameters')
|
||||
175
api/controllers/web/completion.py
Normal file
175
api/controllers/web/completion.py
Normal file
@@ -0,0 +1,175 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import json
|
||||
import logging
|
||||
from typing import Generator, Union
|
||||
|
||||
from flask import Response, stream_with_context
|
||||
from flask_restful import reqparse
|
||||
from werkzeug.exceptions import InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.web import api
|
||||
from controllers.web.error import AppUnavailableError, ConversationCompletedError, \
|
||||
ProviderNotInitializeError, NotChatAppError, NotCompletionAppError, CompletionRequestError, \
|
||||
ProviderQuotaExceededError, ProviderModelCurrentlyNotSupportError
|
||||
from controllers.web.wraps import WebApiResource
|
||||
from core.conversation_message_task import PubHandler
|
||||
from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthorizationError, LLMAPIConnectionError, \
|
||||
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
|
||||
from libs.helper import uuid_value
|
||||
from services.completion_service import CompletionService
|
||||
|
||||
|
||||
# define completion api for user
|
||||
class CompletionApi(WebApiResource):
|
||||
|
||||
def post(self, app_model, end_user):
|
||||
if app_model.mode != 'completion':
|
||||
raise NotCompletionAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('inputs', type=dict, required=True, location='json')
|
||||
parser.add_argument('query', type=str, location='json')
|
||||
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
streaming = args['response_mode'] == 'streaming'
|
||||
|
||||
try:
|
||||
response = CompletionService.completion(
|
||||
app_model=app_model,
|
||||
user=end_user,
|
||||
args=args,
|
||||
from_source='api',
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
return compact_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class CompletionStopApi(WebApiResource):
|
||||
def post(self, app_model, end_user, task_id):
|
||||
if app_model.mode != 'completion':
|
||||
raise NotCompletionAppError()
|
||||
|
||||
PubHandler.stop(end_user, task_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
class ChatApi(WebApiResource):
|
||||
def post(self, app_model, end_user):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('inputs', type=dict, required=True, location='json')
|
||||
parser.add_argument('query', type=str, required=True, location='json')
|
||||
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
|
||||
parser.add_argument('conversation_id', type=uuid_value, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
streaming = args['response_mode'] == 'streaming'
|
||||
|
||||
try:
|
||||
response = CompletionService.completion(
|
||||
app_model=app_model,
|
||||
user=end_user,
|
||||
args=args,
|
||||
from_source='api',
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
return compact_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class ChatStopApi(WebApiResource):
|
||||
def post(self, app_model, end_user, task_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
PubHandler.stop(end_user, task_id)
|
||||
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
def compact_response(response: Union[dict | Generator]) -> Response:
|
||||
if isinstance(response, dict):
|
||||
return Response(response=json.dumps(response), status=200, mimetype='application/json')
|
||||
else:
|
||||
def generate() -> Generator:
|
||||
try:
|
||||
for chunk in response:
|
||||
yield chunk
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
yield "data: " + json.dumps(api.handle_error(NotFound("Conversation Not Exists.")).get_json()) + "\n\n"
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
yield "data: " + json.dumps(api.handle_error(ConversationCompletedError()).get_json()) + "\n\n"
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logging.exception("App model config broken.")
|
||||
yield "data: " + json.dumps(api.handle_error(AppUnavailableError()).get_json()) + "\n\n"
|
||||
except ProviderTokenNotInitError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
|
||||
except QuotaExceededError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
|
||||
|
||||
return Response(stream_with_context(generate()), status=200,
|
||||
mimetype='text/event-stream')
|
||||
|
||||
|
||||
api.add_resource(CompletionApi, '/completion-messages')
|
||||
api.add_resource(CompletionStopApi, '/completion-messages/<string:task_id>/stop')
|
||||
api.add_resource(ChatApi, '/chat-messages')
|
||||
api.add_resource(ChatStopApi, '/chat-messages/<string:task_id>/stop')
|
||||
121
api/controllers/web/conversation.py
Normal file
121
api/controllers/web/conversation.py
Normal file
@@ -0,0 +1,121 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask_restful import fields, reqparse, marshal_with
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.web import api
|
||||
from controllers.web.error import NotChatAppError
|
||||
from controllers.web.wraps import WebApiResource
|
||||
from libs.helper import TimestampField, uuid_value
|
||||
from services.conversation_service import ConversationService
|
||||
from services.errors.conversation import LastConversationNotExistsError, ConversationNotExistsError
|
||||
from services.web_conversation_service import WebConversationService
|
||||
|
||||
conversation_fields = {
|
||||
'id': fields.String,
|
||||
'name': fields.String,
|
||||
'inputs': fields.Raw,
|
||||
'status': fields.String,
|
||||
'introduction': fields.String,
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
conversation_infinite_scroll_pagination_fields = {
|
||||
'limit': fields.Integer,
|
||||
'has_more': fields.Boolean,
|
||||
'data': fields.List(fields.Nested(conversation_fields))
|
||||
}
|
||||
|
||||
|
||||
class ConversationListApi(WebApiResource):
|
||||
|
||||
@marshal_with(conversation_infinite_scroll_pagination_fields)
|
||||
def get(self, app_model, end_user):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('last_id', type=uuid_value, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
|
||||
parser.add_argument('pinned', type=str, choices=['true', 'false', None], location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
pinned = None
|
||||
if 'pinned' in args and args['pinned'] is not None:
|
||||
pinned = True if args['pinned'] == 'true' else False
|
||||
|
||||
try:
|
||||
return WebConversationService.pagination_by_last_id(
|
||||
app_model=app_model,
|
||||
end_user=end_user,
|
||||
last_id=args['last_id'],
|
||||
limit=args['limit'],
|
||||
pinned=pinned
|
||||
)
|
||||
except LastConversationNotExistsError:
|
||||
raise NotFound("Last Conversation Not Exists.")
|
||||
|
||||
|
||||
class ConversationApi(WebApiResource):
|
||||
def delete(self, app_model, end_user, c_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
conversation_id = str(c_id)
|
||||
ConversationService.delete(app_model, conversation_id, end_user)
|
||||
WebConversationService.unpin(app_model, conversation_id, end_user)
|
||||
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
class ConversationRenameApi(WebApiResource):
|
||||
|
||||
@marshal_with(conversation_fields)
|
||||
def post(self, app_model, end_user, c_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
conversation_id = str(c_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', type=str, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
return ConversationService.rename(app_model, conversation_id, end_user, args['name'])
|
||||
except ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
|
||||
|
||||
class ConversationPinApi(WebApiResource):
|
||||
|
||||
def patch(self, app_model, end_user, c_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
conversation_id = str(c_id)
|
||||
|
||||
try:
|
||||
WebConversationService.pin(app_model, conversation_id, end_user)
|
||||
except ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
|
||||
class ConversationUnPinApi(WebApiResource):
|
||||
def patch(self, app_model, end_user, c_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
conversation_id = str(c_id)
|
||||
WebConversationService.unpin(app_model, conversation_id, end_user)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
|
||||
api.add_resource(ConversationRenameApi, '/conversations/<uuid:c_id>/name', endpoint='web_conversation_name')
|
||||
api.add_resource(ConversationListApi, '/conversations')
|
||||
api.add_resource(ConversationApi, '/conversations/<uuid:c_id>')
|
||||
api.add_resource(ConversationPinApi, '/conversations/<uuid:c_id>/pin')
|
||||
api.add_resource(ConversationUnPinApi, '/conversations/<uuid:c_id>/unpin')
|
||||
62
api/controllers/web/error.py
Normal file
62
api/controllers/web/error.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class AppUnavailableError(BaseHTTPException):
|
||||
error_code = 'app_unavailable'
|
||||
description = "App unavailable."
|
||||
code = 400
|
||||
|
||||
|
||||
class NotCompletionAppError(BaseHTTPException):
|
||||
error_code = 'not_completion_app'
|
||||
description = "Not Completion App"
|
||||
code = 400
|
||||
|
||||
|
||||
class NotChatAppError(BaseHTTPException):
|
||||
error_code = 'not_chat_app'
|
||||
description = "Not Chat App"
|
||||
code = 400
|
||||
|
||||
|
||||
class ConversationCompletedError(BaseHTTPException):
|
||||
error_code = 'conversation_completed'
|
||||
description = "Conversation Completed."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderNotInitializeError(BaseHTTPException):
|
||||
error_code = 'provider_not_initialize'
|
||||
description = "Provider Token not initialize."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderQuotaExceededError(BaseHTTPException):
|
||||
error_code = 'provider_quota_exceeded'
|
||||
description = "Provider quota exceeded."
|
||||
code = 400
|
||||
|
||||
|
||||
class ProviderModelCurrentlyNotSupportError(BaseHTTPException):
|
||||
error_code = 'model_currently_not_support'
|
||||
description = "GPT-4 currently not support."
|
||||
code = 400
|
||||
|
||||
|
||||
class CompletionRequestError(BaseHTTPException):
|
||||
error_code = 'completion_request_error'
|
||||
description = "Completion request failed."
|
||||
code = 400
|
||||
|
||||
|
||||
class AppMoreLikeThisDisabledError(BaseHTTPException):
|
||||
error_code = 'app_more_like_this_disabled'
|
||||
description = "More like this disabled."
|
||||
code = 403
|
||||
|
||||
|
||||
class AppSuggestedQuestionsAfterAnswerDisabledError(BaseHTTPException):
|
||||
error_code = 'app_suggested_questions_after_answer_disabled'
|
||||
description = "Function Suggested questions after answer disabled."
|
||||
code = 403
|
||||
189
api/controllers/web/message.py
Normal file
189
api/controllers/web/message.py
Normal file
@@ -0,0 +1,189 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import json
|
||||
import logging
|
||||
from typing import Generator, Union
|
||||
|
||||
from flask import stream_with_context, Response
|
||||
from flask_restful import reqparse, fields, marshal_with
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import NotFound, InternalServerError
|
||||
|
||||
import services
|
||||
from controllers.web import api
|
||||
from controllers.web.error import NotChatAppError, CompletionRequestError, ProviderNotInitializeError, \
|
||||
AppMoreLikeThisDisabledError, NotCompletionAppError, AppSuggestedQuestionsAfterAnswerDisabledError, \
|
||||
ProviderQuotaExceededError, ProviderModelCurrentlyNotSupportError
|
||||
from controllers.web.wraps import WebApiResource
|
||||
from core.llm.error import LLMRateLimitError, LLMBadRequestError, LLMAuthorizationError, LLMAPIConnectionError, \
|
||||
ProviderTokenNotInitError, LLMAPIUnavailableError, QuotaExceededError, ModelCurrentlyNotSupportError
|
||||
from libs.helper import uuid_value, TimestampField
|
||||
from services.completion_service import CompletionService
|
||||
from services.errors.app import MoreLikeThisDisabledError
|
||||
from services.errors.conversation import ConversationNotExistsError
|
||||
from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
|
||||
from services.message_service import MessageService
|
||||
|
||||
|
||||
class MessageListApi(WebApiResource):
|
||||
feedback_fields = {
|
||||
'rating': fields.String
|
||||
}
|
||||
|
||||
message_fields = {
|
||||
'id': fields.String,
|
||||
'conversation_id': fields.String,
|
||||
'inputs': fields.Raw,
|
||||
'query': fields.String,
|
||||
'answer': fields.String,
|
||||
'feedback': fields.Nested(feedback_fields, attribute='user_feedback', allow_null=True),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
message_infinite_scroll_pagination_fields = {
|
||||
'limit': fields.Integer,
|
||||
'has_more': fields.Boolean,
|
||||
'data': fields.List(fields.Nested(message_fields))
|
||||
}
|
||||
|
||||
@marshal_with(message_infinite_scroll_pagination_fields)
|
||||
def get(self, app_model, end_user):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotChatAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('conversation_id', required=True, type=uuid_value, location='args')
|
||||
parser.add_argument('first_id', type=uuid_value, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
return MessageService.pagination_by_first_id(app_model, end_user,
|
||||
args['conversation_id'], args['first_id'], args['limit'])
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.message.FirstMessageNotExistsError:
|
||||
raise NotFound("First Message Not Exists.")
|
||||
|
||||
|
||||
class MessageFeedbackApi(WebApiResource):
|
||||
def post(self, app_model, end_user, message_id):
|
||||
message_id = str(message_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('rating', type=str, choices=['like', 'dislike', None], location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
MessageService.create_feedback(app_model, message_id, end_user, args['rating'])
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
class MessageMoreLikeThisApi(WebApiResource):
|
||||
def get(self, app_model, end_user, message_id):
|
||||
if app_model.mode != 'completion':
|
||||
raise NotCompletionAppError()
|
||||
|
||||
message_id = str(message_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('response_mode', type=str, required=True, choices=['blocking', 'streaming'], location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
streaming = args['response_mode'] == 'streaming'
|
||||
|
||||
try:
|
||||
response = CompletionService.generate_more_like_this(app_model, end_user, message_id, streaming)
|
||||
return compact_response(response)
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
except MoreLikeThisDisabledError:
|
||||
raise AppMoreLikeThisDisabledError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
def compact_response(response: Union[dict | Generator]) -> Response:
|
||||
if isinstance(response, dict):
|
||||
return Response(response=json.dumps(response), status=200, mimetype='application/json')
|
||||
else:
|
||||
def generate() -> Generator:
|
||||
try:
|
||||
for chunk in response:
|
||||
yield chunk
|
||||
except MessageNotExistsError:
|
||||
yield "data: " + json.dumps(api.handle_error(NotFound("Message Not Exists.")).get_json()) + "\n\n"
|
||||
except MoreLikeThisDisabledError:
|
||||
yield "data: " + json.dumps(api.handle_error(AppMoreLikeThisDisabledError()).get_json()) + "\n\n"
|
||||
except ProviderTokenNotInitError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
|
||||
except QuotaExceededError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
|
||||
|
||||
return Response(stream_with_context(generate()), status=200,
|
||||
mimetype='text/event-stream')
|
||||
|
||||
|
||||
class MessageSuggestedQuestionApi(WebApiResource):
|
||||
def get(self, app_model, end_user, message_id):
|
||||
if app_model.mode != 'chat':
|
||||
raise NotCompletionAppError()
|
||||
|
||||
message_id = str(message_id)
|
||||
|
||||
try:
|
||||
questions = MessageService.get_suggested_questions_after_answer(
|
||||
app_model=app_model,
|
||||
user=end_user,
|
||||
message_id=message_id
|
||||
)
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message not found")
|
||||
except ConversationNotExistsError:
|
||||
raise NotFound("Conversation not found")
|
||||
except SuggestedQuestionsAfterAnswerDisabledError:
|
||||
raise AppSuggestedQuestionsAfterAnswerDisabledError()
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError()
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
|
||||
LLMRateLimitError, LLMAuthorizationError) as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
return {'data': questions}
|
||||
|
||||
|
||||
api.add_resource(MessageListApi, '/messages')
|
||||
api.add_resource(MessageFeedbackApi, '/messages/<uuid:message_id>/feedbacks')
|
||||
api.add_resource(MessageMoreLikeThisApi, '/messages/<uuid:message_id>/more-like-this')
|
||||
api.add_resource(MessageSuggestedQuestionApi, '/messages/<uuid:message_id>/suggested-questions')
|
||||
74
api/controllers/web/saved_message.py
Normal file
74
api/controllers/web/saved_message.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from flask_restful import reqparse, marshal_with, fields
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.web import api
|
||||
from controllers.web.error import NotCompletionAppError
|
||||
from controllers.web.wraps import WebApiResource
|
||||
from libs.helper import uuid_value, TimestampField
|
||||
from services.errors.message import MessageNotExistsError
|
||||
from services.saved_message_service import SavedMessageService
|
||||
|
||||
feedback_fields = {
|
||||
'rating': fields.String
|
||||
}
|
||||
|
||||
message_fields = {
|
||||
'id': fields.String,
|
||||
'inputs': fields.Raw,
|
||||
'query': fields.String,
|
||||
'answer': fields.String,
|
||||
'feedback': fields.Nested(feedback_fields, attribute='user_feedback', allow_null=True),
|
||||
'created_at': TimestampField
|
||||
}
|
||||
|
||||
|
||||
class SavedMessageListApi(WebApiResource):
|
||||
saved_message_infinite_scroll_pagination_fields = {
|
||||
'limit': fields.Integer,
|
||||
'has_more': fields.Boolean,
|
||||
'data': fields.List(fields.Nested(message_fields))
|
||||
}
|
||||
|
||||
@marshal_with(saved_message_infinite_scroll_pagination_fields)
|
||||
def get(self, app_model, end_user):
|
||||
if app_model.mode != 'completion':
|
||||
raise NotCompletionAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('last_id', type=uuid_value, location='args')
|
||||
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
return SavedMessageService.pagination_by_last_id(app_model, end_user, args['last_id'], args['limit'])
|
||||
|
||||
def post(self, app_model, end_user):
|
||||
if app_model.mode != 'completion':
|
||||
raise NotCompletionAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('message_id', type=uuid_value, required=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
SavedMessageService.save(app_model, end_user, args['message_id'])
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
class SavedMessageApi(WebApiResource):
|
||||
def delete(self, app_model, end_user, message_id):
|
||||
message_id = str(message_id)
|
||||
|
||||
if app_model.mode != 'completion':
|
||||
raise NotCompletionAppError()
|
||||
|
||||
SavedMessageService.delete(app_model, end_user, message_id)
|
||||
|
||||
return {'result': 'success'}
|
||||
|
||||
|
||||
api.add_resource(SavedMessageListApi, '/saved-messages')
|
||||
api.add_resource(SavedMessageApi, '/saved-messages/<uuid:message_id>')
|
||||
73
api/controllers/web/site.py
Normal file
73
api/controllers/web/site.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from flask_restful import fields, marshal_with
|
||||
from werkzeug.exceptions import Forbidden
|
||||
|
||||
from controllers.web import api
|
||||
from controllers.web.wraps import WebApiResource
|
||||
from extensions.ext_database import db
|
||||
from models.model import Site
|
||||
|
||||
|
||||
class AppSiteApi(WebApiResource):
|
||||
"""Resource for app sites."""
|
||||
|
||||
model_config_fields = {
|
||||
'opening_statement': fields.String,
|
||||
'suggested_questions': fields.Raw(attribute='suggested_questions_list'),
|
||||
'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'),
|
||||
'more_like_this': fields.Raw(attribute='more_like_this_dict'),
|
||||
'model': fields.Raw(attribute='model_dict'),
|
||||
'user_input_form': fields.Raw(attribute='user_input_form_list'),
|
||||
'pre_prompt': fields.String,
|
||||
}
|
||||
|
||||
site_fields = {
|
||||
'title': fields.String,
|
||||
'icon': fields.String,
|
||||
'icon_background': fields.String,
|
||||
'description': fields.String,
|
||||
'copyright': fields.String,
|
||||
'privacy_policy': fields.String,
|
||||
'default_language': fields.String,
|
||||
'prompt_public': fields.Boolean
|
||||
}
|
||||
|
||||
app_fields = {
|
||||
'app_id': fields.String,
|
||||
'end_user_id': fields.String,
|
||||
'enable_site': fields.Boolean,
|
||||
'site': fields.Nested(site_fields),
|
||||
'model_config': fields.Nested(model_config_fields, allow_null=True),
|
||||
'plan': fields.String,
|
||||
}
|
||||
|
||||
@marshal_with(app_fields)
|
||||
def get(self, app_model, end_user):
|
||||
"""Retrieve app site info."""
|
||||
# get site
|
||||
site = db.session.query(Site).filter(Site.app_id == app_model.id).first()
|
||||
|
||||
if not site:
|
||||
raise Forbidden()
|
||||
|
||||
return AppSiteInfo(app_model.tenant, app_model, site, end_user.id)
|
||||
|
||||
|
||||
api.add_resource(AppSiteApi, '/site')
|
||||
|
||||
|
||||
class AppSiteInfo:
|
||||
"""Class to store site information."""
|
||||
|
||||
def __init__(self, tenant, app, site, end_user):
|
||||
"""Initialize AppSiteInfo instance."""
|
||||
self.app_id = app.id
|
||||
self.end_user_id = end_user
|
||||
self.enable_site = app.enable_site
|
||||
self.site = site
|
||||
self.model_config = None
|
||||
self.plan = tenant.plan
|
||||
|
||||
if app.enable_site and site.prompt_public:
|
||||
app_model_config = app.app_model_config
|
||||
self.model_config = app_model_config
|
||||
107
api/controllers/web/wraps.py
Normal file
107
api/controllers/web/wraps.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import uuid
|
||||
from functools import wraps
|
||||
|
||||
from flask import request, session
|
||||
from flask_restful import Resource
|
||||
from werkzeug.exceptions import NotFound, Unauthorized
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.model import App, Site, EndUser
|
||||
|
||||
|
||||
def validate_token(view=None):
|
||||
def decorator(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
site = validate_and_get_site()
|
||||
|
||||
app_model = db.session.query(App).get(site.app_id)
|
||||
if not app_model:
|
||||
raise NotFound()
|
||||
|
||||
if app_model.status != 'normal':
|
||||
raise NotFound()
|
||||
|
||||
if not app_model.enable_site:
|
||||
raise NotFound()
|
||||
|
||||
end_user = create_or_update_end_user_for_session(app_model)
|
||||
|
||||
return view(app_model, end_user, *args, **kwargs)
|
||||
return decorated
|
||||
|
||||
if view:
|
||||
return decorator(view)
|
||||
return decorator
|
||||
|
||||
|
||||
def validate_and_get_site():
|
||||
"""
|
||||
Validate and get API token.
|
||||
"""
|
||||
auth_header = request.headers.get('Authorization')
|
||||
if auth_header is None:
|
||||
raise Unauthorized()
|
||||
|
||||
auth_scheme, auth_token = auth_header.split(None, 1)
|
||||
auth_scheme = auth_scheme.lower()
|
||||
|
||||
if auth_scheme != 'bearer':
|
||||
raise Unauthorized()
|
||||
|
||||
site = db.session.query(Site).filter(
|
||||
Site.code == auth_token,
|
||||
Site.status == 'normal'
|
||||
).first()
|
||||
|
||||
if not site:
|
||||
raise NotFound()
|
||||
|
||||
return site
|
||||
|
||||
|
||||
def create_or_update_end_user_for_session(app_model):
|
||||
"""
|
||||
Create or update session terminal based on session ID.
|
||||
"""
|
||||
if 'session_id' not in session:
|
||||
session['session_id'] = generate_session_id()
|
||||
|
||||
session_id = session.get('session_id')
|
||||
end_user = db.session.query(EndUser) \
|
||||
.filter(
|
||||
EndUser.session_id == session_id,
|
||||
EndUser.type == 'browser'
|
||||
).first()
|
||||
|
||||
if end_user is None:
|
||||
end_user = EndUser(
|
||||
tenant_id=app_model.tenant_id,
|
||||
app_id=app_model.id,
|
||||
type='browser',
|
||||
is_anonymous=True,
|
||||
session_id=session_id
|
||||
)
|
||||
db.session.add(end_user)
|
||||
db.session.commit()
|
||||
|
||||
return end_user
|
||||
|
||||
|
||||
def generate_session_id():
|
||||
"""
|
||||
Generate a unique session ID.
|
||||
"""
|
||||
count = 1
|
||||
session_id = ''
|
||||
while count != 0:
|
||||
session_id = str(uuid.uuid4())
|
||||
count = db.session.query(EndUser) \
|
||||
.filter(EndUser.session_id == session_id).count()
|
||||
|
||||
return session_id
|
||||
|
||||
|
||||
class WebApiResource(Resource):
|
||||
method_decorators = [validate_token]
|
||||
52
api/core/__init__.py
Normal file
52
api/core/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import langchain
|
||||
from flask import Flask
|
||||
from jieba.analyse import default_tfidf
|
||||
from langchain import set_handler
|
||||
from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING
|
||||
from llama_index import IndexStructType, QueryMode
|
||||
from llama_index.indices.registry import INDEX_STRUT_TYPE_TO_QUERY_MAP
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
|
||||
from core.index.keyword_table.jieba_keyword_table import GPTJIEBAKeywordTableIndex
|
||||
from core.index.keyword_table.stopwords import STOPWORDS
|
||||
from core.prompt.prompt_template import OneLineFormatter
|
||||
from core.vector_store.vector_store import VectorStore
|
||||
from core.vector_store.vector_store_index_query import EnhanceGPTVectorStoreIndexQuery
|
||||
|
||||
|
||||
class HostedOpenAICredential(BaseModel):
|
||||
api_key: str
|
||||
|
||||
|
||||
class HostedLLMCredentials(BaseModel):
|
||||
openai: Optional[HostedOpenAICredential] = None
|
||||
|
||||
|
||||
hosted_llm_credentials = HostedLLMCredentials()
|
||||
|
||||
|
||||
def init_app(app: Flask):
|
||||
formatter = OneLineFormatter()
|
||||
DEFAULT_FORMATTER_MAPPING['f-string'] = formatter.format
|
||||
INDEX_STRUT_TYPE_TO_QUERY_MAP[IndexStructType.KEYWORD_TABLE] = GPTJIEBAKeywordTableIndex.get_query_map()
|
||||
INDEX_STRUT_TYPE_TO_QUERY_MAP[IndexStructType.WEAVIATE] = {
|
||||
QueryMode.DEFAULT: EnhanceGPTVectorStoreIndexQuery,
|
||||
QueryMode.EMBEDDING: EnhanceGPTVectorStoreIndexQuery,
|
||||
}
|
||||
INDEX_STRUT_TYPE_TO_QUERY_MAP[IndexStructType.QDRANT] = {
|
||||
QueryMode.DEFAULT: EnhanceGPTVectorStoreIndexQuery,
|
||||
QueryMode.EMBEDDING: EnhanceGPTVectorStoreIndexQuery,
|
||||
}
|
||||
|
||||
default_tfidf.stop_words = STOPWORDS
|
||||
|
||||
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
|
||||
langchain.verbose = True
|
||||
set_handler(DifyStdOutCallbackHandler())
|
||||
|
||||
if app.config.get("OPENAI_API_KEY"):
|
||||
hosted_llm_credentials.openai = HostedOpenAICredential(api_key=app.config.get("OPENAI_API_KEY"))
|
||||
89
api/core/agent/agent_builder.py
Normal file
89
api/core/agent/agent_builder.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from typing import Optional
|
||||
|
||||
from langchain import LLMChain
|
||||
from langchain.agents import ZeroShotAgent, AgentExecutor, ConversationalAgent
|
||||
from langchain.callbacks import CallbackManager
|
||||
from langchain.memory.chat_memory import BaseChatMemory
|
||||
|
||||
from core.callback_handler.agent_loop_gather_callback_handler import AgentLoopGatherCallbackHandler
|
||||
from core.callback_handler.dataset_tool_callback_handler import DatasetToolCallbackHandler
|
||||
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
|
||||
|
||||
class AgentBuilder:
|
||||
@classmethod
|
||||
def to_agent_chain(cls, tenant_id: str, tools, memory: Optional[BaseChatMemory],
|
||||
dataset_tool_callback_handler: DatasetToolCallbackHandler,
|
||||
agent_loop_gather_callback_handler: AgentLoopGatherCallbackHandler):
|
||||
llm_callback_manager = CallbackManager([agent_loop_gather_callback_handler, DifyStdOutCallbackHandler()])
|
||||
llm = LLMBuilder.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name=agent_loop_gather_callback_handler.model_name,
|
||||
temperature=0,
|
||||
max_tokens=1024,
|
||||
callback_manager=llm_callback_manager
|
||||
)
|
||||
|
||||
tool_callback_manager = CallbackManager([
|
||||
agent_loop_gather_callback_handler,
|
||||
dataset_tool_callback_handler,
|
||||
DifyStdOutCallbackHandler()
|
||||
])
|
||||
|
||||
for tool in tools:
|
||||
tool.callback_manager = tool_callback_manager
|
||||
|
||||
prompt = cls.build_agent_prompt_template(
|
||||
tools=tools,
|
||||
memory=memory,
|
||||
)
|
||||
|
||||
agent_llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
agent = cls.build_agent(agent_llm_chain=agent_llm_chain, memory=memory)
|
||||
|
||||
agent_callback_manager = CallbackManager(
|
||||
[agent_loop_gather_callback_handler, DifyStdOutCallbackHandler()]
|
||||
)
|
||||
|
||||
agent_chain = AgentExecutor.from_agent_and_tools(
|
||||
tools=tools,
|
||||
agent=agent,
|
||||
memory=memory,
|
||||
callback_manager=agent_callback_manager,
|
||||
max_iterations=6,
|
||||
early_stopping_method="generate",
|
||||
# `generate` will continue to complete the last inference after reaching the iteration limit or request time limit
|
||||
)
|
||||
|
||||
return agent_chain
|
||||
|
||||
@classmethod
|
||||
def build_agent_prompt_template(cls, tools, memory: Optional[BaseChatMemory]):
|
||||
if memory:
|
||||
prompt = ConversationalAgent.create_prompt(
|
||||
tools=tools,
|
||||
)
|
||||
else:
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
return prompt
|
||||
|
||||
@classmethod
|
||||
def build_agent(cls, agent_llm_chain: LLMChain, memory: Optional[BaseChatMemory]):
|
||||
if memory:
|
||||
agent = ConversationalAgent(
|
||||
llm_chain=agent_llm_chain
|
||||
)
|
||||
else:
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=agent_llm_chain
|
||||
)
|
||||
|
||||
return agent
|
||||
178
api/core/callback_handler/agent_loop_gather_callback_handler.py
Normal file
178
api/core/callback_handler/agent_loop_gather_callback_handler.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from typing import Any, Dict, List, Union, Optional
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||
|
||||
from core.callback_handler.entity.agent_loop import AgentLoop
|
||||
from core.conversation_message_task import ConversationMessageTask
|
||||
|
||||
|
||||
class AgentLoopGatherCallbackHandler(BaseCallbackHandler):
|
||||
"""Callback Handler that prints to std out."""
|
||||
|
||||
def __init__(self, model_name, conversation_message_task: ConversationMessageTask) -> None:
|
||||
"""Initialize callback handler."""
|
||||
self.model_name = model_name
|
||||
self.conversation_message_task = conversation_message_task
|
||||
self._agent_loops = []
|
||||
self._current_loop = None
|
||||
self.current_chain = None
|
||||
|
||||
@property
|
||||
def agent_loops(self) -> List[AgentLoop]:
|
||||
return self._agent_loops
|
||||
|
||||
def clear_agent_loops(self) -> None:
|
||||
self._agent_loops = []
|
||||
self._current_loop = None
|
||||
|
||||
@property
|
||||
def always_verbose(self) -> bool:
|
||||
"""Whether to call verbose callbacks even if verbose is False."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def ignore_chain(self) -> bool:
|
||||
"""Whether to ignore chain callbacks."""
|
||||
return True
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Print out the prompts."""
|
||||
# serialized={'name': 'OpenAI'}
|
||||
# prompts=['Answer the following questions...\nThought:']
|
||||
# kwargs={}
|
||||
if not self._current_loop:
|
||||
# Agent start with a LLM query
|
||||
self._current_loop = AgentLoop(
|
||||
position=len(self._agent_loops) + 1,
|
||||
prompt=prompts[0],
|
||||
status='llm_started',
|
||||
started_at=time.perf_counter()
|
||||
)
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
# kwargs={}
|
||||
if self._current_loop and self._current_loop.status == 'llm_started':
|
||||
self._current_loop.status = 'llm_end'
|
||||
self._current_loop.prompt_tokens = response.llm_output['token_usage']['prompt_tokens']
|
||||
self._current_loop.completion = response.generations[0][0].text
|
||||
self._current_loop.completion_tokens = response.llm_output['token_usage']['completion_tokens']
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
logging.error(error)
|
||||
self._agent_loops = []
|
||||
self._current_loop = None
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Print out that we are entering a chain."""
|
||||
pass
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Print out that we finished a chain."""
|
||||
pass
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
logging.error(error)
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
# kwargs={'color': 'green', 'llm_prefix': 'Thought:', 'observation_prefix': 'Observation: '}
|
||||
# input_str='action-input'
|
||||
# serialized={'description': 'A search engine. Useful for when you need to answer questions about current events. Input should be a search query.', 'name': 'Search'}
|
||||
pass
|
||||
|
||||
def on_agent_action(
|
||||
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
|
||||
) -> Any:
|
||||
"""Run on agent action."""
|
||||
tool = action.tool
|
||||
tool_input = action.tool_input
|
||||
action_name_position = action.log.index("\nAction:") + 1 if action.log else -1
|
||||
thought = action.log[:action_name_position].strip() if action.log else ''
|
||||
|
||||
if self._current_loop and self._current_loop.status == 'llm_end':
|
||||
self._current_loop.status = 'agent_action'
|
||||
self._current_loop.thought = thought
|
||||
self._current_loop.tool_name = tool
|
||||
self._current_loop.tool_input = tool_input
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
color: Optional[str] = None,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""If not the final action, print out observation."""
|
||||
# kwargs={'name': 'Search'}
|
||||
# llm_prefix='Thought:'
|
||||
# observation_prefix='Observation: '
|
||||
# output='53 years'
|
||||
|
||||
if self._current_loop and self._current_loop.status == 'agent_action' and output and output != 'None':
|
||||
self._current_loop.status = 'tool_end'
|
||||
self._current_loop.tool_output = output
|
||||
self._current_loop.completed = True
|
||||
self._current_loop.completed_at = time.perf_counter()
|
||||
self._current_loop.latency = self._current_loop.completed_at - self._current_loop.started_at
|
||||
|
||||
self.conversation_message_task.on_agent_end(self.current_chain, self.model_name, self._current_loop)
|
||||
|
||||
self._agent_loops.append(self._current_loop)
|
||||
self._current_loop = None
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
logging.error(error)
|
||||
self._agent_loops = []
|
||||
self._current_loop = None
|
||||
|
||||
def on_text(
|
||||
self,
|
||||
text: str,
|
||||
color: Optional[str] = None,
|
||||
end: str = "",
|
||||
**kwargs: Optional[str],
|
||||
) -> None:
|
||||
"""Run on additional input from chains and agents."""
|
||||
pass
|
||||
|
||||
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
|
||||
"""Run on agent end."""
|
||||
# Final Answer
|
||||
if self._current_loop and (self._current_loop.status == 'llm_end' or self._current_loop.status == 'agent_action'):
|
||||
self._current_loop.status = 'agent_finish'
|
||||
self._current_loop.completed = True
|
||||
self._current_loop.completed_at = time.perf_counter()
|
||||
self._current_loop.latency = self._current_loop.completed_at - self._current_loop.started_at
|
||||
|
||||
self.conversation_message_task.on_agent_end(self.current_chain, self.model_name, self._current_loop)
|
||||
|
||||
self._agent_loops.append(self._current_loop)
|
||||
self._current_loop = None
|
||||
elif not self._current_loop and self._agent_loops:
|
||||
self._agent_loops[-1].status = 'agent_finish'
|
||||
117
api/core/callback_handler/dataset_tool_callback_handler.py
Normal file
117
api/core/callback_handler/dataset_tool_callback_handler.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import logging
|
||||
|
||||
from typing import Any, Dict, List, Union, Optional
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||
|
||||
from core.callback_handler.entity.dataset_query import DatasetQueryObj
|
||||
from core.conversation_message_task import ConversationMessageTask
|
||||
|
||||
|
||||
class DatasetToolCallbackHandler(BaseCallbackHandler):
|
||||
"""Callback Handler that prints to std out."""
|
||||
|
||||
def __init__(self, conversation_message_task: ConversationMessageTask) -> None:
|
||||
"""Initialize callback handler."""
|
||||
self.queries = []
|
||||
self.conversation_message_task = conversation_message_task
|
||||
|
||||
@property
|
||||
def always_verbose(self) -> bool:
|
||||
"""Whether to call verbose callbacks even if verbose is False."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def ignore_llm(self) -> bool:
|
||||
"""Whether to ignore LLM callbacks."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def ignore_chain(self) -> bool:
|
||||
"""Whether to ignore chain callbacks."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def ignore_agent(self) -> bool:
|
||||
"""Whether to ignore agent callbacks."""
|
||||
return False
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
tool_name = serialized.get('name')
|
||||
dataset_id = tool_name[len("dataset-"):]
|
||||
self.conversation_message_task.on_dataset_query_end(DatasetQueryObj(dataset_id=dataset_id, query=input_str))
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
color: Optional[str] = None,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# kwargs={'name': 'Search'}
|
||||
# llm_prefix='Thought:'
|
||||
# observation_prefix='Observation: '
|
||||
# output='53 years'
|
||||
pass
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
logging.error(error)
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
logging.error(error)
|
||||
|
||||
def on_agent_action(
|
||||
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
|
||||
) -> Any:
|
||||
pass
|
||||
|
||||
def on_text(
|
||||
self,
|
||||
text: str,
|
||||
color: Optional[str] = None,
|
||||
end: str = "",
|
||||
**kwargs: Optional[str],
|
||||
) -> None:
|
||||
"""Run on additional input from chains and agents."""
|
||||
pass
|
||||
|
||||
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
|
||||
"""Run on agent end."""
|
||||
pass
|
||||
23
api/core/callback_handler/entity/agent_loop.py
Normal file
23
api/core/callback_handler/entity/agent_loop.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AgentLoop(BaseModel):
|
||||
position: int = 1
|
||||
|
||||
thought: str = None
|
||||
tool_name: str = None
|
||||
tool_input: str = None
|
||||
tool_output: str = None
|
||||
|
||||
prompt: str = None
|
||||
prompt_tokens: int = None
|
||||
completion: str = None
|
||||
completion_tokens: int = None
|
||||
|
||||
latency: float = None
|
||||
|
||||
status: str = 'llm_started'
|
||||
completed: bool = False
|
||||
|
||||
started_at: float = None
|
||||
completed_at: float = None
|
||||
16
api/core/callback_handler/entity/chain_result.py
Normal file
16
api/core/callback_handler/entity/chain_result.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ChainResult(BaseModel):
|
||||
type: str = None
|
||||
prompt: dict = None
|
||||
completion: dict = None
|
||||
|
||||
status: str = 'chain_started'
|
||||
completed: bool = False
|
||||
|
||||
started_at: float = None
|
||||
completed_at: float = None
|
||||
|
||||
agent_result: dict = None
|
||||
"""only when type is 'AgentExecutor'"""
|
||||
6
api/core/callback_handler/entity/dataset_query.py
Normal file
6
api/core/callback_handler/entity/dataset_query.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class DatasetQueryObj(BaseModel):
|
||||
dataset_id: str = None
|
||||
query: str = None
|
||||
9
api/core/callback_handler/entity/llm_message.py
Normal file
9
api/core/callback_handler/entity/llm_message.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class LLMMessage(BaseModel):
|
||||
prompt: str = ''
|
||||
prompt_tokens: int = 0
|
||||
completion: str = ''
|
||||
completion_tokens: int = 0
|
||||
latency: float = 0.0
|
||||
38
api/core/callback_handler/index_tool_callback_handler.py
Normal file
38
api/core/callback_handler/index_tool_callback_handler.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from llama_index import Response
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import DocumentSegment
|
||||
|
||||
|
||||
class IndexToolCallbackHandler:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._response = None
|
||||
|
||||
@property
|
||||
def response(self) -> Response:
|
||||
return self._response
|
||||
|
||||
def on_tool_end(self, response: Response) -> None:
|
||||
"""Handle tool end."""
|
||||
self._response = response
|
||||
|
||||
|
||||
class DatasetIndexToolCallbackHandler(IndexToolCallbackHandler):
|
||||
"""Callback handler for dataset tool."""
|
||||
|
||||
def __init__(self, dataset_id: str) -> None:
|
||||
super().__init__()
|
||||
self.dataset_id = dataset_id
|
||||
|
||||
def on_tool_end(self, response: Response) -> None:
|
||||
"""Handle tool end."""
|
||||
for node in response.source_nodes:
|
||||
index_node_id = node.node.doc_id
|
||||
|
||||
# add hit count to document segment
|
||||
db.session.query(DocumentSegment).filter(
|
||||
DocumentSegment.dataset_id == self.dataset_id,
|
||||
DocumentSegment.index_node_id == index_node_id
|
||||
).update({DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False)
|
||||
|
||||
147
api/core/callback_handler/llm_callback_handler.py
Normal file
147
api/core/callback_handler/llm_callback_handler.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Union, Optional
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult, HumanMessage, AIMessage, SystemMessage
|
||||
|
||||
from core.callback_handler.entity.llm_message import LLMMessage
|
||||
from core.conversation_message_task import ConversationMessageTask, ConversationTaskStoppedException
|
||||
from core.llm.streamable_chat_open_ai import StreamableChatOpenAI
|
||||
from core.llm.streamable_open_ai import StreamableOpenAI
|
||||
|
||||
|
||||
class LLMCallbackHandler(BaseCallbackHandler):
|
||||
|
||||
def __init__(self, llm: Union[StreamableOpenAI, StreamableChatOpenAI],
|
||||
conversation_message_task: ConversationMessageTask):
|
||||
self.llm = llm
|
||||
self.llm_message = LLMMessage()
|
||||
self.start_at = None
|
||||
self.conversation_message_task = conversation_message_task
|
||||
|
||||
@property
|
||||
def always_verbose(self) -> bool:
|
||||
"""Whether to call verbose callbacks even if verbose is False."""
|
||||
return True
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
self.start_at = time.perf_counter()
|
||||
|
||||
if 'Chat' in serialized['name']:
|
||||
real_prompts = []
|
||||
messages = []
|
||||
for prompt in prompts:
|
||||
role, content = prompt.split(': ', maxsplit=1)
|
||||
if role == 'human':
|
||||
role = 'user'
|
||||
message = HumanMessage(content=content)
|
||||
elif role == 'ai':
|
||||
role = 'assistant'
|
||||
message = AIMessage(content=content)
|
||||
else:
|
||||
message = SystemMessage(content=content)
|
||||
|
||||
real_prompt = {
|
||||
"role": role,
|
||||
"text": content
|
||||
}
|
||||
real_prompts.append(real_prompt)
|
||||
messages.append(message)
|
||||
|
||||
self.llm_message.prompt = real_prompts
|
||||
self.llm_message.prompt_tokens = self.llm.get_messages_tokens(messages)
|
||||
else:
|
||||
self.llm_message.prompt = [{
|
||||
"role": 'user',
|
||||
"text": prompts[0]
|
||||
}]
|
||||
|
||||
self.llm_message.prompt_tokens = self.llm.get_num_tokens(prompts[0])
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
end_at = time.perf_counter()
|
||||
self.llm_message.latency = end_at - self.start_at
|
||||
|
||||
if not self.conversation_message_task.streaming:
|
||||
self.conversation_message_task.append_message_text(response.generations[0][0].text)
|
||||
self.llm_message.completion = response.generations[0][0].text
|
||||
self.llm_message.completion_tokens = response.llm_output['token_usage']['completion_tokens']
|
||||
else:
|
||||
self.llm_message.completion_tokens = self.llm.get_num_tokens(self.llm_message.completion)
|
||||
|
||||
self.conversation_message_task.save_message(self.llm_message)
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
self.conversation_message_task.append_message_text(token)
|
||||
self.llm_message.completion += token
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
if isinstance(error, ConversationTaskStoppedException):
|
||||
if self.conversation_message_task.streaming:
|
||||
end_at = time.perf_counter()
|
||||
self.llm_message.latency = end_at - self.start_at
|
||||
self.llm_message.completion_tokens = self.llm.get_num_tokens(self.llm_message.completion)
|
||||
self.conversation_message_task.save_message(llm_message=self.llm_message, by_stopped=True)
|
||||
else:
|
||||
logging.error(error)
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_agent_action(
|
||||
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
|
||||
) -> Any:
|
||||
pass
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
color: Optional[str] = None,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_text(
|
||||
self,
|
||||
text: str,
|
||||
color: Optional[str] = None,
|
||||
end: str = "",
|
||||
**kwargs: Optional[str],
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_agent_finish(
|
||||
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
137
api/core/callback_handler/main_chain_gather_callback_handler.py
Normal file
137
api/core/callback_handler/main_chain_gather_callback_handler.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from typing import Any, Dict, List, Union, Optional
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||
|
||||
from core.callback_handler.agent_loop_gather_callback_handler import AgentLoopGatherCallbackHandler
|
||||
from core.callback_handler.entity.chain_result import ChainResult
|
||||
from core.constant import llm_constant
|
||||
from core.conversation_message_task import ConversationMessageTask
|
||||
|
||||
|
||||
class MainChainGatherCallbackHandler(BaseCallbackHandler):
|
||||
"""Callback Handler that prints to std out."""
|
||||
|
||||
def __init__(self, conversation_message_task: ConversationMessageTask) -> None:
|
||||
"""Initialize callback handler."""
|
||||
self._current_chain_result = None
|
||||
self._current_chain_message = None
|
||||
self.conversation_message_task = conversation_message_task
|
||||
self.agent_loop_gather_callback_handler = AgentLoopGatherCallbackHandler(
|
||||
llm_constant.agent_model_name,
|
||||
conversation_message_task
|
||||
)
|
||||
|
||||
def clear_chain_results(self) -> None:
|
||||
self._current_chain_result = None
|
||||
self._current_chain_message = None
|
||||
self.agent_loop_gather_callback_handler.current_chain = None
|
||||
|
||||
@property
|
||||
def always_verbose(self) -> bool:
|
||||
"""Whether to call verbose callbacks even if verbose is False."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def ignore_llm(self) -> bool:
|
||||
"""Whether to ignore LLM callbacks."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def ignore_agent(self) -> bool:
|
||||
"""Whether to ignore agent callbacks."""
|
||||
return True
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Print out that we are entering a chain."""
|
||||
if not self._current_chain_result:
|
||||
self._current_chain_result = ChainResult(
|
||||
type=serialized['name'],
|
||||
prompt=inputs,
|
||||
started_at=time.perf_counter()
|
||||
)
|
||||
self._current_chain_message = self.conversation_message_task.init_chain(self._current_chain_result)
|
||||
self.agent_loop_gather_callback_handler.current_chain = self._current_chain_message
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Print out that we finished a chain."""
|
||||
if self._current_chain_result and self._current_chain_result.status == 'chain_started':
|
||||
self._current_chain_result.status = 'chain_ended'
|
||||
self._current_chain_result.completion = outputs
|
||||
self._current_chain_result.completed = True
|
||||
self._current_chain_result.completed_at = time.perf_counter()
|
||||
|
||||
self.conversation_message_task.on_chain_end(self._current_chain_message, self._current_chain_result)
|
||||
|
||||
self.clear_chain_results()
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
logging.error(error)
|
||||
self.clear_chain_results()
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
logging.error(error)
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_agent_action(
|
||||
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
|
||||
) -> Any:
|
||||
pass
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
color: Optional[str] = None,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
logging.error(error)
|
||||
|
||||
def on_text(
|
||||
self,
|
||||
text: str,
|
||||
color: Optional[str] = None,
|
||||
end: str = "",
|
||||
**kwargs: Optional[str],
|
||||
) -> None:
|
||||
"""Run on additional input from chains and agents."""
|
||||
pass
|
||||
|
||||
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
|
||||
"""Run on agent end."""
|
||||
pass
|
||||
127
api/core/callback_handler/std_out_callback_handler.py
Normal file
127
api/core/callback_handler/std_out_callback_handler.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.input import print_text
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||
|
||||
|
||||
class DifyStdOutCallbackHandler(BaseCallbackHandler):
|
||||
"""Callback Handler that prints to std out."""
|
||||
|
||||
def __init__(self, color: Optional[str] = None) -> None:
|
||||
"""Initialize callback handler."""
|
||||
self.color = color
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Print out the prompts."""
|
||||
print_text("\n[on_llm_start]\n", color='blue')
|
||||
|
||||
if 'Chat' in serialized['name']:
|
||||
for prompt in prompts:
|
||||
print_text(prompt + "\n", color='blue')
|
||||
else:
|
||||
print_text(prompts[0] + "\n", color='blue')
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
print_text("\n[on_llm_end]\nOutput: " + str(response.generations[0][0].text) + "\nllm_output: " + str(
|
||||
response.llm_output) + "\n", color='blue')
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
print_text("\n[on_llm_error]\nError: " + str(error) + "\n", color='blue')
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Print out that we are entering a chain."""
|
||||
class_name = serialized["name"]
|
||||
print_text("\n[on_chain_start]\nChain: " + class_name + "\nInputs: " + str(inputs) + "\n", color='pink')
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Print out that we finished a chain."""
|
||||
print_text("\n[on_chain_end]\nOutputs: " + str(outputs) + "\n", color='pink')
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
print_text("\n[on_chain_error]\nError: " + str(error) + "\n", color='pink')
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
print_text("\n[on_tool_start] " + str(serialized), color='yellow')
|
||||
|
||||
def on_agent_action(
|
||||
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
|
||||
) -> Any:
|
||||
"""Run on agent action."""
|
||||
tool = action.tool
|
||||
tool_input = action.tool_input
|
||||
action_name_position = action.log.index("\nAction:") + 1 if action.log else -1
|
||||
thought = action.log[:action_name_position].strip() if action.log else ''
|
||||
|
||||
log = f"Thought: {thought}\nTool: {tool}\nTool Input: {tool_input}"
|
||||
print_text("\n[on_agent_action]\n" + log + "\n", color='green')
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
color: Optional[str] = None,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""If not the final action, print out observation."""
|
||||
print_text("\n[on_tool_end]\n", color='yellow')
|
||||
if observation_prefix:
|
||||
print_text(f"\n{observation_prefix}")
|
||||
print_text(output, color='yellow')
|
||||
if llm_prefix:
|
||||
print_text(f"\n{llm_prefix}")
|
||||
print_text("\n")
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
print_text("\n[on_tool_error] Error: " + str(error) + "\n", color='yellow')
|
||||
|
||||
def on_text(
|
||||
self,
|
||||
text: str,
|
||||
color: Optional[str] = None,
|
||||
end: str = "",
|
||||
**kwargs: Optional[str],
|
||||
) -> None:
|
||||
"""Run when agent ends."""
|
||||
print_text("\n[on_text] " + text + "\n", color=color if color else self.color, end=end)
|
||||
|
||||
def on_agent_finish(
|
||||
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
|
||||
) -> None:
|
||||
"""Run on agent end."""
|
||||
print_text("[on_agent_finish] " + finish.return_values['output'] + "\n", color='green', end="\n")
|
||||
|
||||
|
||||
class DifyStreamingStdOutCallbackHandler(DifyStdOutCallbackHandler):
|
||||
"""Callback handler for streaming. Only works with LLMs that support streaming."""
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Run on new LLM token. Only available when streaming is enabled."""
|
||||
sys.stdout.write(token)
|
||||
sys.stdout.flush()
|
||||
34
api/core/chain/chain_builder.py
Normal file
34
api/core/chain/chain_builder.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from typing import Optional
|
||||
|
||||
from langchain.callbacks import CallbackManager
|
||||
|
||||
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
|
||||
from core.chain.sensitive_word_avoidance_chain import SensitiveWordAvoidanceChain
|
||||
from core.chain.tool_chain import ToolChain
|
||||
|
||||
|
||||
class ChainBuilder:
|
||||
@classmethod
|
||||
def to_tool_chain(cls, tool, **kwargs) -> ToolChain:
|
||||
return ToolChain(
|
||||
tool=tool,
|
||||
input_key=kwargs.get('input_key', 'input'),
|
||||
output_key=kwargs.get('output_key', 'tool_output'),
|
||||
callback_manager=CallbackManager([DifyStdOutCallbackHandler()])
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def to_sensitive_word_avoidance_chain(cls, tool_config: dict, **kwargs) -> Optional[
|
||||
SensitiveWordAvoidanceChain]:
|
||||
sensitive_words = tool_config.get("words", "")
|
||||
if tool_config.get("enabled", False) \
|
||||
and sensitive_words:
|
||||
return SensitiveWordAvoidanceChain(
|
||||
sensitive_words=sensitive_words.split(","),
|
||||
canned_response=tool_config.get("canned_response", ''),
|
||||
output_key="sensitive_word_avoidance_output",
|
||||
callback_manager=CallbackManager([DifyStdOutCallbackHandler()]),
|
||||
**kwargs
|
||||
)
|
||||
|
||||
return None
|
||||
116
api/core/chain/main_chain_builder.py
Normal file
116
api/core/chain/main_chain_builder.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from typing import Optional, List
|
||||
|
||||
from langchain.callbacks import SharedCallbackManager
|
||||
from langchain.chains import SequentialChain
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.memory.chat_memory import BaseChatMemory
|
||||
|
||||
from core.agent.agent_builder import AgentBuilder
|
||||
from core.callback_handler.agent_loop_gather_callback_handler import AgentLoopGatherCallbackHandler
|
||||
from core.callback_handler.dataset_tool_callback_handler import DatasetToolCallbackHandler
|
||||
from core.callback_handler.main_chain_gather_callback_handler import MainChainGatherCallbackHandler
|
||||
from core.chain.chain_builder import ChainBuilder
|
||||
from core.constant import llm_constant
|
||||
from core.conversation_message_task import ConversationMessageTask
|
||||
from core.tool.dataset_tool_builder import DatasetToolBuilder
|
||||
|
||||
|
||||
class MainChainBuilder:
|
||||
@classmethod
|
||||
def to_langchain_components(cls, tenant_id: str, agent_mode: dict, memory: Optional[BaseChatMemory],
|
||||
conversation_message_task: ConversationMessageTask):
|
||||
first_input_key = "input"
|
||||
final_output_key = "output"
|
||||
|
||||
chains = []
|
||||
|
||||
chain_callback_handler = MainChainGatherCallbackHandler(conversation_message_task)
|
||||
|
||||
# agent mode
|
||||
tool_chains, chains_output_key = cls.get_agent_chains(
|
||||
tenant_id=tenant_id,
|
||||
agent_mode=agent_mode,
|
||||
memory=memory,
|
||||
dataset_tool_callback_handler=DatasetToolCallbackHandler(conversation_message_task),
|
||||
agent_loop_gather_callback_handler=chain_callback_handler.agent_loop_gather_callback_handler
|
||||
)
|
||||
chains += tool_chains
|
||||
|
||||
if chains_output_key:
|
||||
final_output_key = chains_output_key
|
||||
|
||||
if len(chains) == 0:
|
||||
return None
|
||||
|
||||
for chain in chains:
|
||||
# do not add handler into singleton callback manager
|
||||
if not isinstance(chain.callback_manager, SharedCallbackManager):
|
||||
chain.callback_manager.add_handler(chain_callback_handler)
|
||||
|
||||
# build main chain
|
||||
overall_chain = SequentialChain(
|
||||
chains=chains,
|
||||
input_variables=[first_input_key],
|
||||
output_variables=[final_output_key],
|
||||
memory=memory, # only for use the memory prompt input key
|
||||
)
|
||||
|
||||
return overall_chain
|
||||
|
||||
@classmethod
|
||||
def get_agent_chains(cls, tenant_id: str, agent_mode: dict, memory: Optional[BaseChatMemory],
|
||||
dataset_tool_callback_handler: DatasetToolCallbackHandler,
|
||||
agent_loop_gather_callback_handler: AgentLoopGatherCallbackHandler):
|
||||
# agent mode
|
||||
chains = []
|
||||
if agent_mode and agent_mode.get('enabled'):
|
||||
tools = agent_mode.get('tools', [])
|
||||
|
||||
pre_fixed_chains = []
|
||||
agent_tools = []
|
||||
for tool in tools:
|
||||
tool_type = list(tool.keys())[0]
|
||||
tool_config = list(tool.values())[0]
|
||||
if tool_type == 'sensitive-word-avoidance':
|
||||
chain = ChainBuilder.to_sensitive_word_avoidance_chain(tool_config)
|
||||
if chain:
|
||||
pre_fixed_chains.append(chain)
|
||||
elif tool_type == "dataset":
|
||||
dataset_tool = DatasetToolBuilder.build_dataset_tool(
|
||||
tenant_id=tenant_id,
|
||||
dataset_id=tool_config.get("id"),
|
||||
response_mode='no_synthesizer', # "compact"
|
||||
callback_handler=dataset_tool_callback_handler
|
||||
)
|
||||
|
||||
if dataset_tool:
|
||||
agent_tools.append(dataset_tool)
|
||||
|
||||
# add pre-fixed chains
|
||||
chains += pre_fixed_chains
|
||||
|
||||
if len(agent_tools) == 1:
|
||||
# tool to chain
|
||||
tool_chain = ChainBuilder.to_tool_chain(tool=agent_tools[0], output_key='tool_output')
|
||||
chains.append(tool_chain)
|
||||
elif len(agent_tools) > 1:
|
||||
# build agent config
|
||||
agent_chain = AgentBuilder.to_agent_chain(
|
||||
tenant_id=tenant_id,
|
||||
tools=agent_tools,
|
||||
memory=memory,
|
||||
dataset_tool_callback_handler=dataset_tool_callback_handler,
|
||||
agent_loop_gather_callback_handler=agent_loop_gather_callback_handler
|
||||
)
|
||||
|
||||
chains.append(agent_chain)
|
||||
|
||||
final_output_key = cls.get_chains_output_key(chains)
|
||||
|
||||
return chains, final_output_key
|
||||
|
||||
@classmethod
|
||||
def get_chains_output_key(cls, chains: List[Chain]):
|
||||
if len(chains) > 0:
|
||||
return chains[-1].output_keys[0]
|
||||
return None
|
||||
42
api/core/chain/sensitive_word_avoidance_chain.py
Normal file
42
api/core/chain/sensitive_word_avoidance_chain.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from typing import List, Dict
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
|
||||
|
||||
class SensitiveWordAvoidanceChain(Chain):
|
||||
input_key: str = "input" #: :meta private:
|
||||
output_key: str = "output" #: :meta private:
|
||||
|
||||
sensitive_words: List[str] = []
|
||||
canned_response: str = None
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "sensitive_word_avoidance_chain"
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Expect input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
def _check_sensitive_word(self, text: str) -> str:
|
||||
for word in self.sensitive_words:
|
||||
if word in text:
|
||||
return self.canned_response
|
||||
return text
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
text = inputs[self.input_key]
|
||||
output = self._check_sensitive_word(text)
|
||||
return {self.output_key: output}
|
||||
42
api/core/chain/tool_chain.py
Normal file
42
api/core/chain/tool_chain.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from typing import List, Dict
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
|
||||
class ToolChain(Chain):
|
||||
input_key: str = "input" #: :meta private:
|
||||
output_key: str = "output" #: :meta private:
|
||||
|
||||
tool: BaseTool
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "tool_chain"
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Expect input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
input = inputs[self.input_key]
|
||||
output = self.tool.run(input, self.verbose)
|
||||
return {self.output_key: output}
|
||||
|
||||
async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
"""Run the logic of this chain and return the output."""
|
||||
input = inputs[self.input_key]
|
||||
output = await self.tool.arun(input, self.verbose)
|
||||
return {self.output_key: output}
|
||||
326
api/core/completion.py
Normal file
326
api/core/completion.py
Normal file
@@ -0,0 +1,326 @@
|
||||
from typing import Optional, List, Union
|
||||
|
||||
from langchain.callbacks import CallbackManager
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.llms import BaseLLM
|
||||
from langchain.schema import BaseMessage, BaseLanguageModel, HumanMessage
|
||||
from core.constant import llm_constant
|
||||
from core.callback_handler.llm_callback_handler import LLMCallbackHandler
|
||||
from core.callback_handler.std_out_callback_handler import DifyStreamingStdOutCallbackHandler, \
|
||||
DifyStdOutCallbackHandler
|
||||
from core.conversation_message_task import ConversationMessageTask, ConversationTaskStoppedException
|
||||
from core.llm.error import LLMBadRequestError
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
from core.chain.main_chain_builder import MainChainBuilder
|
||||
from core.llm.streamable_chat_open_ai import StreamableChatOpenAI
|
||||
from core.llm.streamable_open_ai import StreamableOpenAI
|
||||
from core.memory.read_only_conversation_token_db_buffer_shared_memory import \
|
||||
ReadOnlyConversationTokenDBBufferSharedMemory
|
||||
from core.memory.read_only_conversation_token_db_string_buffer_shared_memory import \
|
||||
ReadOnlyConversationTokenDBStringBufferSharedMemory
|
||||
from core.prompt.prompt_builder import PromptBuilder
|
||||
from core.prompt.prompt_template import OutLinePromptTemplate
|
||||
from core.prompt.prompts import MORE_LIKE_THIS_GENERATE_PROMPT
|
||||
from models.model import App, AppModelConfig, Account, Conversation, Message
|
||||
|
||||
|
||||
class Completion:
|
||||
@classmethod
|
||||
def generate(cls, task_id: str, app: App, app_model_config: AppModelConfig, query: str, inputs: dict,
|
||||
user: Account, conversation: Optional[Conversation], streaming: bool, is_override: bool = False):
|
||||
"""
|
||||
errors: ProviderTokenNotInitError
|
||||
"""
|
||||
cls.validate_query_tokens(app.tenant_id, app_model_config, query)
|
||||
|
||||
memory = None
|
||||
if conversation:
|
||||
# get memory of conversation (read-only)
|
||||
memory = cls.get_memory_from_conversation(
|
||||
tenant_id=app.tenant_id,
|
||||
app_model_config=app_model_config,
|
||||
conversation=conversation
|
||||
)
|
||||
|
||||
inputs = conversation.inputs
|
||||
|
||||
conversation_message_task = ConversationMessageTask(
|
||||
task_id=task_id,
|
||||
app=app,
|
||||
app_model_config=app_model_config,
|
||||
user=user,
|
||||
conversation=conversation,
|
||||
is_override=is_override,
|
||||
inputs=inputs,
|
||||
query=query,
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
# build main chain include agent
|
||||
main_chain = MainChainBuilder.to_langchain_components(
|
||||
tenant_id=app.tenant_id,
|
||||
agent_mode=app_model_config.agent_mode_dict,
|
||||
memory=ReadOnlyConversationTokenDBStringBufferSharedMemory(memory=memory) if memory else None,
|
||||
conversation_message_task=conversation_message_task
|
||||
)
|
||||
|
||||
chain_output = ''
|
||||
if main_chain:
|
||||
chain_output = main_chain.run(query)
|
||||
|
||||
# run the final llm
|
||||
try:
|
||||
cls.run_final_llm(
|
||||
tenant_id=app.tenant_id,
|
||||
mode=app.mode,
|
||||
app_model_config=app_model_config,
|
||||
query=query,
|
||||
inputs=inputs,
|
||||
chain_output=chain_output,
|
||||
conversation_message_task=conversation_message_task,
|
||||
memory=memory,
|
||||
streaming=streaming
|
||||
)
|
||||
except ConversationTaskStoppedException:
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def run_final_llm(cls, tenant_id: str, mode: str, app_model_config: AppModelConfig, query: str, inputs: dict,
|
||||
chain_output: str,
|
||||
conversation_message_task: ConversationMessageTask,
|
||||
memory: Optional[ReadOnlyConversationTokenDBBufferSharedMemory], streaming: bool):
|
||||
final_llm = LLMBuilder.to_llm_from_model(
|
||||
tenant_id=tenant_id,
|
||||
model=app_model_config.model_dict,
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
# get llm prompt
|
||||
prompt = cls.get_main_llm_prompt(
|
||||
mode=mode,
|
||||
llm=final_llm,
|
||||
pre_prompt=app_model_config.pre_prompt,
|
||||
query=query,
|
||||
inputs=inputs,
|
||||
chain_output=chain_output,
|
||||
memory=memory
|
||||
)
|
||||
|
||||
final_llm.callback_manager = cls.get_llm_callback_manager(final_llm, streaming, conversation_message_task)
|
||||
|
||||
cls.recale_llm_max_tokens(
|
||||
final_llm=final_llm,
|
||||
prompt=prompt,
|
||||
mode=mode
|
||||
)
|
||||
|
||||
response = final_llm.generate([prompt])
|
||||
|
||||
return response
|
||||
|
||||
@classmethod
|
||||
def get_main_llm_prompt(cls, mode: str, llm: BaseLanguageModel, pre_prompt: str, query: str, inputs: dict, chain_output: Optional[str],
|
||||
memory: Optional[ReadOnlyConversationTokenDBBufferSharedMemory]) -> \
|
||||
Union[str | List[BaseMessage]]:
|
||||
pre_prompt = PromptBuilder.process_template(pre_prompt) if pre_prompt else pre_prompt
|
||||
if mode == 'completion':
|
||||
prompt_template = OutLinePromptTemplate.from_template(
|
||||
template=("Use the following pieces of [CONTEXT] to answer the question at the end. "
|
||||
"If you don't know the answer, "
|
||||
"just say that you don't know, don't try to make up an answer. \n"
|
||||
"```\n"
|
||||
"[CONTEXT]\n"
|
||||
"{context}\n"
|
||||
"```\n" if chain_output else "")
|
||||
+ (pre_prompt + "\n" if pre_prompt else "")
|
||||
+ "{query}\n"
|
||||
)
|
||||
|
||||
if chain_output:
|
||||
inputs['context'] = chain_output
|
||||
|
||||
prompt_inputs = {k: inputs[k] for k in prompt_template.input_variables if k in inputs}
|
||||
prompt_content = prompt_template.format(
|
||||
query=query,
|
||||
**prompt_inputs
|
||||
)
|
||||
|
||||
if isinstance(llm, BaseChatModel):
|
||||
# use chat llm as completion model
|
||||
return [HumanMessage(content=prompt_content)]
|
||||
else:
|
||||
return prompt_content
|
||||
else:
|
||||
messages: List[BaseMessage] = []
|
||||
|
||||
system_message = None
|
||||
if pre_prompt:
|
||||
# append pre prompt as system message
|
||||
system_message = PromptBuilder.to_system_message(pre_prompt, inputs)
|
||||
|
||||
if chain_output:
|
||||
# append context as system message, currently only use simple stuff prompt
|
||||
context_message = PromptBuilder.to_system_message(
|
||||
"""Use the following pieces of [CONTEXT] to answer the users question.
|
||||
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||
```
|
||||
[CONTEXT]
|
||||
{context}
|
||||
```""",
|
||||
{'context': chain_output}
|
||||
)
|
||||
|
||||
if not system_message:
|
||||
system_message = context_message
|
||||
else:
|
||||
system_message.content = context_message.content + "\n\n" + system_message.content
|
||||
|
||||
if system_message:
|
||||
messages.append(system_message)
|
||||
|
||||
human_inputs = {
|
||||
"query": query
|
||||
}
|
||||
|
||||
# construct main prompt
|
||||
human_message = PromptBuilder.to_human_message(
|
||||
prompt_content="{query}",
|
||||
inputs=human_inputs
|
||||
)
|
||||
|
||||
if memory:
|
||||
# append chat histories
|
||||
tmp_messages = messages.copy() + [human_message]
|
||||
curr_message_tokens = memory.llm.get_messages_tokens(tmp_messages)
|
||||
rest_tokens = llm_constant.max_context_token_length[
|
||||
memory.llm.model_name] - memory.llm.max_tokens - curr_message_tokens
|
||||
rest_tokens = max(rest_tokens, 0)
|
||||
history_messages = cls.get_history_messages_from_memory(memory, rest_tokens)
|
||||
messages += history_messages
|
||||
|
||||
messages.append(human_message)
|
||||
|
||||
return messages
|
||||
|
||||
@classmethod
|
||||
def get_llm_callback_manager(cls, llm: Union[StreamableOpenAI, StreamableChatOpenAI],
|
||||
streaming: bool, conversation_message_task: ConversationMessageTask) -> CallbackManager:
|
||||
llm_callback_handler = LLMCallbackHandler(llm, conversation_message_task)
|
||||
if streaming:
|
||||
callback_handlers = [llm_callback_handler, DifyStreamingStdOutCallbackHandler()]
|
||||
else:
|
||||
callback_handlers = [llm_callback_handler, DifyStdOutCallbackHandler()]
|
||||
|
||||
return CallbackManager(callback_handlers)
|
||||
|
||||
@classmethod
|
||||
def get_history_messages_from_memory(cls, memory: ReadOnlyConversationTokenDBBufferSharedMemory,
|
||||
max_token_limit: int) -> \
|
||||
List[BaseMessage]:
|
||||
"""Get memory messages."""
|
||||
memory.max_token_limit = max_token_limit
|
||||
memory_key = memory.memory_variables[0]
|
||||
external_context = memory.load_memory_variables({})
|
||||
return external_context[memory_key]
|
||||
|
||||
@classmethod
|
||||
def get_memory_from_conversation(cls, tenant_id: str, app_model_config: AppModelConfig,
|
||||
conversation: Conversation,
|
||||
**kwargs) -> ReadOnlyConversationTokenDBBufferSharedMemory:
|
||||
# only for calc token in memory
|
||||
memory_llm = LLMBuilder.to_llm_from_model(
|
||||
tenant_id=tenant_id,
|
||||
model=app_model_config.model_dict
|
||||
)
|
||||
|
||||
# use llm config from conversation
|
||||
memory = ReadOnlyConversationTokenDBBufferSharedMemory(
|
||||
conversation=conversation,
|
||||
llm=memory_llm,
|
||||
max_token_limit=kwargs.get("max_token_limit", 2048),
|
||||
memory_key=kwargs.get("memory_key", "chat_history"),
|
||||
return_messages=kwargs.get("return_messages", True),
|
||||
input_key=kwargs.get("input_key", "input"),
|
||||
output_key=kwargs.get("output_key", "output"),
|
||||
message_limit=kwargs.get("message_limit", 10),
|
||||
)
|
||||
|
||||
return memory
|
||||
|
||||
@classmethod
|
||||
def validate_query_tokens(cls, tenant_id: str, app_model_config: AppModelConfig, query: str):
|
||||
llm = LLMBuilder.to_llm_from_model(
|
||||
tenant_id=tenant_id,
|
||||
model=app_model_config.model_dict
|
||||
)
|
||||
|
||||
model_limited_tokens = llm_constant.max_context_token_length[llm.model_name]
|
||||
max_tokens = llm.max_tokens
|
||||
|
||||
if model_limited_tokens - max_tokens - llm.get_num_tokens(query) < 0:
|
||||
raise LLMBadRequestError("Query is too long")
|
||||
|
||||
@classmethod
|
||||
def recale_llm_max_tokens(cls, final_llm: Union[StreamableOpenAI, StreamableChatOpenAI],
|
||||
prompt: Union[str, List[BaseMessage]], mode: str):
|
||||
# recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
|
||||
model_limited_tokens = llm_constant.max_context_token_length[final_llm.model_name]
|
||||
max_tokens = final_llm.max_tokens
|
||||
|
||||
if mode == 'completion' and isinstance(final_llm, BaseLLM):
|
||||
prompt_tokens = final_llm.get_num_tokens(prompt)
|
||||
else:
|
||||
prompt_tokens = final_llm.get_messages_tokens(prompt)
|
||||
|
||||
if prompt_tokens + max_tokens > model_limited_tokens:
|
||||
max_tokens = max(model_limited_tokens - prompt_tokens, 16)
|
||||
final_llm.max_tokens = max_tokens
|
||||
|
||||
@classmethod
|
||||
def generate_more_like_this(cls, task_id: str, app: App, message: Message, pre_prompt: str,
|
||||
app_model_config: AppModelConfig, user: Account, streaming: bool):
|
||||
llm: StreamableOpenAI = LLMBuilder.to_llm(
|
||||
tenant_id=app.tenant_id,
|
||||
model_name='gpt-3.5-turbo',
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
# get llm prompt
|
||||
original_prompt = cls.get_main_llm_prompt(
|
||||
mode="completion",
|
||||
llm=llm,
|
||||
pre_prompt=pre_prompt,
|
||||
query=message.query,
|
||||
inputs=message.inputs,
|
||||
chain_output=None,
|
||||
memory=None
|
||||
)
|
||||
|
||||
original_completion = message.answer.strip()
|
||||
|
||||
prompt = MORE_LIKE_THIS_GENERATE_PROMPT
|
||||
prompt = prompt.format(prompt=original_prompt, original_completion=original_completion)
|
||||
|
||||
if isinstance(llm, BaseChatModel):
|
||||
prompt = [HumanMessage(content=prompt)]
|
||||
|
||||
conversation_message_task = ConversationMessageTask(
|
||||
task_id=task_id,
|
||||
app=app,
|
||||
app_model_config=app_model_config,
|
||||
user=user,
|
||||
inputs=message.inputs,
|
||||
query=message.query,
|
||||
is_override=True if message.override_model_configs else False,
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
llm.callback_manager = cls.get_llm_callback_manager(llm, streaming, conversation_message_task)
|
||||
|
||||
cls.recale_llm_max_tokens(
|
||||
final_llm=llm,
|
||||
prompt=prompt,
|
||||
mode='completion'
|
||||
)
|
||||
|
||||
llm.generate([prompt])
|
||||
84
api/core/constant/llm_constant.py
Normal file
84
api/core/constant/llm_constant.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from _decimal import Decimal
|
||||
|
||||
models = {
|
||||
'gpt-4': 'openai', # 8,192 tokens
|
||||
'gpt-4-32k': 'openai', # 32,768 tokens
|
||||
'gpt-3.5-turbo': 'openai', # 4,096 tokens
|
||||
'text-davinci-003': 'openai', # 4,097 tokens
|
||||
'text-davinci-002': 'openai', # 4,097 tokens
|
||||
'text-curie-001': 'openai', # 2,049 tokens
|
||||
'text-babbage-001': 'openai', # 2,049 tokens
|
||||
'text-ada-001': 'openai', # 2,049 tokens
|
||||
'text-embedding-ada-002': 'openai' # 8191 tokens, 1536 dimensions
|
||||
}
|
||||
|
||||
max_context_token_length = {
|
||||
'gpt-4': 8192,
|
||||
'gpt-4-32k': 32768,
|
||||
'gpt-3.5-turbo': 4096,
|
||||
'text-davinci-003': 4097,
|
||||
'text-davinci-002': 4097,
|
||||
'text-curie-001': 2049,
|
||||
'text-babbage-001': 2049,
|
||||
'text-ada-001': 2049,
|
||||
'text-embedding-ada-002': 8191
|
||||
}
|
||||
|
||||
models_by_mode = {
|
||||
'chat': [
|
||||
'gpt-4', # 8,192 tokens
|
||||
'gpt-4-32k', # 32,768 tokens
|
||||
'gpt-3.5-turbo', # 4,096 tokens
|
||||
],
|
||||
'completion': [
|
||||
'gpt-4', # 8,192 tokens
|
||||
'gpt-4-32k', # 32,768 tokens
|
||||
'gpt-3.5-turbo', # 4,096 tokens
|
||||
'text-davinci-003', # 4,097 tokens
|
||||
'text-davinci-002' # 4,097 tokens
|
||||
'text-curie-001', # 2,049 tokens
|
||||
'text-babbage-001', # 2,049 tokens
|
||||
'text-ada-001' # 2,049 tokens
|
||||
],
|
||||
'embedding': [
|
||||
'text-embedding-ada-002' # 8191 tokens, 1536 dimensions
|
||||
]
|
||||
}
|
||||
|
||||
model_currency = 'USD'
|
||||
|
||||
model_prices = {
|
||||
'gpt-4': {
|
||||
'prompt': Decimal('0.03'),
|
||||
'completion': Decimal('0.06'),
|
||||
},
|
||||
'gpt-4-32k': {
|
||||
'prompt': Decimal('0.06'),
|
||||
'completion': Decimal('0.12')
|
||||
},
|
||||
'gpt-3.5-turbo': {
|
||||
'prompt': Decimal('0.002'),
|
||||
'completion': Decimal('0.002')
|
||||
},
|
||||
'text-davinci-003': {
|
||||
'prompt': Decimal('0.02'),
|
||||
'completion': Decimal('0.02')
|
||||
},
|
||||
'text-curie-001': {
|
||||
'prompt': Decimal('0.002'),
|
||||
'completion': Decimal('0.002')
|
||||
},
|
||||
'text-babbage-001': {
|
||||
'prompt': Decimal('0.0005'),
|
||||
'completion': Decimal('0.0005')
|
||||
},
|
||||
'text-ada-001': {
|
||||
'prompt': Decimal('0.0004'),
|
||||
'completion': Decimal('0.0004')
|
||||
},
|
||||
'text-embedding-ada-002': {
|
||||
'usage': Decimal('0.0004'),
|
||||
}
|
||||
}
|
||||
|
||||
agent_model_name = 'text-davinci-003'
|
||||
388
api/core/conversation_message_task.py
Normal file
388
api/core/conversation_message_task.py
Normal file
@@ -0,0 +1,388 @@
|
||||
import decimal
|
||||
import json
|
||||
from typing import Optional, Union
|
||||
|
||||
from gunicorn.config import User
|
||||
|
||||
from core.callback_handler.entity.agent_loop import AgentLoop
|
||||
from core.callback_handler.entity.dataset_query import DatasetQueryObj
|
||||
from core.callback_handler.entity.llm_message import LLMMessage
|
||||
from core.callback_handler.entity.chain_result import ChainResult
|
||||
from core.constant import llm_constant
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
from core.llm.provider.llm_provider_service import LLMProviderService
|
||||
from core.prompt.prompt_builder import PromptBuilder
|
||||
from core.prompt.prompt_template import OutLinePromptTemplate
|
||||
from events.message_event import message_was_created
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.dataset import DatasetQuery
|
||||
from models.model import AppModelConfig, Conversation, Account, Message, EndUser, App, MessageAgentThought, MessageChain
|
||||
from models.provider import ProviderType, Provider
|
||||
|
||||
|
||||
class ConversationMessageTask:
|
||||
def __init__(self, task_id: str, app: App, app_model_config: AppModelConfig, user: Account,
|
||||
inputs: dict, query: str, streaming: bool,
|
||||
conversation: Optional[Conversation] = None, is_override: bool = False):
|
||||
self.task_id = task_id
|
||||
|
||||
self.app = app
|
||||
self.tenant_id = app.tenant_id
|
||||
self.app_model_config = app_model_config
|
||||
self.is_override = is_override
|
||||
|
||||
self.user = user
|
||||
self.inputs = inputs
|
||||
self.query = query
|
||||
self.streaming = streaming
|
||||
|
||||
self.conversation = conversation
|
||||
self.is_new_conversation = False
|
||||
|
||||
self.message = None
|
||||
|
||||
self.model_dict = self.app_model_config.model_dict
|
||||
self.model_name = self.model_dict.get('name')
|
||||
self.mode = app.mode
|
||||
|
||||
self.init()
|
||||
|
||||
self._pub_handler = PubHandler(
|
||||
user=self.user,
|
||||
task_id=self.task_id,
|
||||
message=self.message,
|
||||
conversation=self.conversation,
|
||||
chain_pub=False, # disabled currently
|
||||
agent_thought_pub=False # disabled currently
|
||||
)
|
||||
|
||||
def init(self):
|
||||
override_model_configs = None
|
||||
if self.is_override:
|
||||
override_model_configs = {
|
||||
"model": self.app_model_config.model_dict,
|
||||
"pre_prompt": self.app_model_config.pre_prompt,
|
||||
"agent_mode": self.app_model_config.agent_mode_dict,
|
||||
"opening_statement": self.app_model_config.opening_statement,
|
||||
"suggested_questions": self.app_model_config.suggested_questions_list,
|
||||
"suggested_questions_after_answer": self.app_model_config.suggested_questions_after_answer_dict,
|
||||
"more_like_this": self.app_model_config.more_like_this_dict,
|
||||
"user_input_form": self.app_model_config.user_input_form_list,
|
||||
}
|
||||
|
||||
introduction = ''
|
||||
system_instruction = ''
|
||||
system_instruction_tokens = 0
|
||||
if self.mode == 'chat':
|
||||
introduction = self.app_model_config.opening_statement
|
||||
if introduction:
|
||||
prompt_template = OutLinePromptTemplate.from_template(template=PromptBuilder.process_template(introduction))
|
||||
prompt_inputs = {k: self.inputs[k] for k in prompt_template.input_variables if k in self.inputs}
|
||||
introduction = prompt_template.format(**prompt_inputs)
|
||||
|
||||
if self.app_model_config.pre_prompt:
|
||||
pre_prompt = PromptBuilder.process_template(self.app_model_config.pre_prompt)
|
||||
system_message = PromptBuilder.to_system_message(pre_prompt, self.inputs)
|
||||
system_instruction = system_message.content
|
||||
llm = LLMBuilder.to_llm(self.tenant_id, self.model_name)
|
||||
system_instruction_tokens = llm.get_messages_tokens([system_message])
|
||||
|
||||
if not self.conversation:
|
||||
self.is_new_conversation = True
|
||||
self.conversation = Conversation(
|
||||
app_id=self.app_model_config.app_id,
|
||||
app_model_config_id=self.app_model_config.id,
|
||||
model_provider=self.model_dict.get('provider'),
|
||||
model_id=self.model_name,
|
||||
override_model_configs=json.dumps(override_model_configs) if override_model_configs else None,
|
||||
mode=self.mode,
|
||||
name='',
|
||||
inputs=self.inputs,
|
||||
introduction=introduction,
|
||||
system_instruction=system_instruction,
|
||||
system_instruction_tokens=system_instruction_tokens,
|
||||
status='normal',
|
||||
from_source=('console' if isinstance(self.user, Account) else 'api'),
|
||||
from_end_user_id=(self.user.id if isinstance(self.user, EndUser) else None),
|
||||
from_account_id=(self.user.id if isinstance(self.user, Account) else None),
|
||||
)
|
||||
|
||||
db.session.add(self.conversation)
|
||||
db.session.flush()
|
||||
|
||||
self.message = Message(
|
||||
app_id=self.app_model_config.app_id,
|
||||
model_provider=self.model_dict.get('provider'),
|
||||
model_id=self.model_name,
|
||||
override_model_configs=json.dumps(override_model_configs) if override_model_configs else None,
|
||||
conversation_id=self.conversation.id,
|
||||
inputs=self.inputs,
|
||||
query=self.query,
|
||||
message="",
|
||||
message_tokens=0,
|
||||
message_unit_price=0,
|
||||
answer="",
|
||||
answer_tokens=0,
|
||||
answer_unit_price=0,
|
||||
provider_response_latency=0,
|
||||
total_price=0,
|
||||
currency=llm_constant.model_currency,
|
||||
from_source=('console' if isinstance(self.user, Account) else 'api'),
|
||||
from_end_user_id=(self.user.id if isinstance(self.user, EndUser) else None),
|
||||
from_account_id=(self.user.id if isinstance(self.user, Account) else None),
|
||||
agent_based=self.app_model_config.agent_mode_dict.get('enabled'),
|
||||
)
|
||||
|
||||
db.session.add(self.message)
|
||||
db.session.flush()
|
||||
|
||||
def append_message_text(self, text: str):
|
||||
self._pub_handler.pub_text(text)
|
||||
|
||||
def save_message(self, llm_message: LLMMessage, by_stopped: bool = False):
|
||||
model_name = self.app_model_config.model_dict.get('name')
|
||||
|
||||
message_tokens = llm_message.prompt_tokens
|
||||
answer_tokens = llm_message.completion_tokens
|
||||
message_unit_price = llm_constant.model_prices[model_name]['prompt']
|
||||
answer_unit_price = llm_constant.model_prices[model_name]['completion']
|
||||
|
||||
total_price = self.calc_total_price(message_tokens, message_unit_price, answer_tokens, answer_unit_price)
|
||||
|
||||
self.message.message = llm_message.prompt
|
||||
self.message.message_tokens = message_tokens
|
||||
self.message.message_unit_price = message_unit_price
|
||||
self.message.answer = llm_message.completion.strip() if llm_message.completion else ''
|
||||
self.message.answer_tokens = answer_tokens
|
||||
self.message.answer_unit_price = answer_unit_price
|
||||
self.message.provider_response_latency = llm_message.latency
|
||||
self.message.total_price = total_price
|
||||
|
||||
self.update_provider_quota()
|
||||
|
||||
db.session.commit()
|
||||
|
||||
message_was_created.send(
|
||||
self.message,
|
||||
conversation=self.conversation,
|
||||
is_first_message=self.is_new_conversation
|
||||
)
|
||||
|
||||
if not by_stopped:
|
||||
self._pub_handler.pub_end()
|
||||
|
||||
def update_provider_quota(self):
|
||||
llm_provider_service = LLMProviderService(
|
||||
tenant_id=self.app.tenant_id,
|
||||
provider_name=self.message.model_provider,
|
||||
)
|
||||
|
||||
provider = llm_provider_service.get_provider_db_record()
|
||||
if provider and provider.provider_type == ProviderType.SYSTEM.value:
|
||||
db.session.query(Provider).filter(
|
||||
Provider.tenant_id == self.app.tenant_id,
|
||||
Provider.quota_limit > Provider.quota_used
|
||||
).update({'quota_used': Provider.quota_used + 1})
|
||||
|
||||
def init_chain(self, chain_result: ChainResult):
|
||||
message_chain = MessageChain(
|
||||
message_id=self.message.id,
|
||||
type=chain_result.type,
|
||||
input=json.dumps(chain_result.prompt),
|
||||
output=''
|
||||
)
|
||||
|
||||
db.session.add(message_chain)
|
||||
db.session.flush()
|
||||
|
||||
return message_chain
|
||||
|
||||
def on_chain_end(self, message_chain: MessageChain, chain_result: ChainResult):
|
||||
message_chain.output = json.dumps(chain_result.completion)
|
||||
|
||||
self._pub_handler.pub_chain(message_chain)
|
||||
|
||||
def on_agent_end(self, message_chain: MessageChain, agent_model_name: str,
|
||||
agent_loop: AgentLoop):
|
||||
agent_message_unit_price = llm_constant.model_prices[agent_model_name]['prompt']
|
||||
agent_answer_unit_price = llm_constant.model_prices[agent_model_name]['completion']
|
||||
|
||||
loop_message_tokens = agent_loop.prompt_tokens
|
||||
loop_answer_tokens = agent_loop.completion_tokens
|
||||
|
||||
loop_total_price = self.calc_total_price(
|
||||
loop_message_tokens,
|
||||
agent_message_unit_price,
|
||||
loop_answer_tokens,
|
||||
agent_answer_unit_price
|
||||
)
|
||||
|
||||
message_agent_loop = MessageAgentThought(
|
||||
message_id=self.message.id,
|
||||
message_chain_id=message_chain.id,
|
||||
position=agent_loop.position,
|
||||
thought=agent_loop.thought,
|
||||
tool=agent_loop.tool_name,
|
||||
tool_input=agent_loop.tool_input,
|
||||
observation=agent_loop.tool_output,
|
||||
tool_process_data='', # currently not support
|
||||
message=agent_loop.prompt,
|
||||
message_token=loop_message_tokens,
|
||||
message_unit_price=agent_message_unit_price,
|
||||
answer=agent_loop.completion,
|
||||
answer_token=loop_answer_tokens,
|
||||
answer_unit_price=agent_answer_unit_price,
|
||||
latency=agent_loop.latency,
|
||||
tokens=agent_loop.prompt_tokens + agent_loop.completion_tokens,
|
||||
total_price=loop_total_price,
|
||||
currency=llm_constant.model_currency,
|
||||
created_by_role=('account' if isinstance(self.user, Account) else 'end_user'),
|
||||
created_by=self.user.id
|
||||
)
|
||||
|
||||
db.session.add(message_agent_loop)
|
||||
db.session.flush()
|
||||
|
||||
self._pub_handler.pub_agent_thought(message_agent_loop)
|
||||
|
||||
def on_dataset_query_end(self, dataset_query_obj: DatasetQueryObj):
|
||||
dataset_query = DatasetQuery(
|
||||
dataset_id=dataset_query_obj.dataset_id,
|
||||
content=dataset_query_obj.query,
|
||||
source='app',
|
||||
source_app_id=self.app.id,
|
||||
created_by_role=('account' if isinstance(self.user, Account) else 'end_user'),
|
||||
created_by=self.user.id
|
||||
)
|
||||
|
||||
db.session.add(dataset_query)
|
||||
|
||||
def calc_total_price(self, message_tokens, message_unit_price, answer_tokens, answer_unit_price):
|
||||
message_tokens_per_1k = (decimal.Decimal(message_tokens) / 1000).quantize(decimal.Decimal('0.001'),
|
||||
rounding=decimal.ROUND_HALF_UP)
|
||||
answer_tokens_per_1k = (decimal.Decimal(answer_tokens) / 1000).quantize(decimal.Decimal('0.001'),
|
||||
rounding=decimal.ROUND_HALF_UP)
|
||||
|
||||
total_price = message_tokens_per_1k * message_unit_price + answer_tokens_per_1k * answer_unit_price
|
||||
return total_price.quantize(decimal.Decimal('0.0000001'), rounding=decimal.ROUND_HALF_UP)
|
||||
|
||||
|
||||
class PubHandler:
|
||||
def __init__(self, user: Union[Account | User], task_id: str,
|
||||
message: Message, conversation: Conversation,
|
||||
chain_pub: bool = False, agent_thought_pub: bool = False):
|
||||
self._channel = PubHandler.generate_channel_name(user, task_id)
|
||||
self._stopped_cache_key = PubHandler.generate_stopped_cache_key(user, task_id)
|
||||
|
||||
self._task_id = task_id
|
||||
self._message = message
|
||||
self._conversation = conversation
|
||||
self._chain_pub = chain_pub
|
||||
self._agent_thought_pub = agent_thought_pub
|
||||
|
||||
@classmethod
|
||||
def generate_channel_name(cls, user: Union[Account | User], task_id: str):
|
||||
user_str = 'account-' + user.id if isinstance(user, Account) else 'end-user-' + user.id
|
||||
return "generate_result:{}-{}".format(user_str, task_id)
|
||||
|
||||
@classmethod
|
||||
def generate_stopped_cache_key(cls, user: Union[Account | User], task_id: str):
|
||||
user_str = 'account-' + user.id if isinstance(user, Account) else 'end-user-' + user.id
|
||||
return "generate_result_stopped:{}-{}".format(user_str, task_id)
|
||||
|
||||
def pub_text(self, text: str):
|
||||
content = {
|
||||
'event': 'message',
|
||||
'data': {
|
||||
'task_id': self._task_id,
|
||||
'message_id': self._message.id,
|
||||
'text': text,
|
||||
'mode': self._conversation.mode,
|
||||
'conversation_id': self._conversation.id
|
||||
}
|
||||
}
|
||||
|
||||
redis_client.publish(self._channel, json.dumps(content))
|
||||
|
||||
if self._is_stopped():
|
||||
self.pub_end()
|
||||
raise ConversationTaskStoppedException()
|
||||
|
||||
def pub_chain(self, message_chain: MessageChain):
|
||||
if self._chain_pub:
|
||||
content = {
|
||||
'event': 'chain',
|
||||
'data': {
|
||||
'task_id': self._task_id,
|
||||
'message_id': self._message.id,
|
||||
'chain_id': message_chain.id,
|
||||
'type': message_chain.type,
|
||||
'input': json.loads(message_chain.input),
|
||||
'output': json.loads(message_chain.output),
|
||||
'mode': self._conversation.mode,
|
||||
'conversation_id': self._conversation.id
|
||||
}
|
||||
}
|
||||
|
||||
redis_client.publish(self._channel, json.dumps(content))
|
||||
|
||||
if self._is_stopped():
|
||||
self.pub_end()
|
||||
raise ConversationTaskStoppedException()
|
||||
|
||||
def pub_agent_thought(self, message_agent_thought: MessageAgentThought):
|
||||
if self._agent_thought_pub:
|
||||
content = {
|
||||
'event': 'agent_thought',
|
||||
'data': {
|
||||
'task_id': self._task_id,
|
||||
'message_id': self._message.id,
|
||||
'chain_id': message_agent_thought.message_chain_id,
|
||||
'agent_thought_id': message_agent_thought.id,
|
||||
'position': message_agent_thought.position,
|
||||
'thought': message_agent_thought.thought,
|
||||
'tool': message_agent_thought.tool,
|
||||
'tool_input': message_agent_thought.tool_input,
|
||||
'observation': message_agent_thought.observation,
|
||||
'answer': message_agent_thought.answer,
|
||||
'mode': self._conversation.mode,
|
||||
'conversation_id': self._conversation.id
|
||||
}
|
||||
}
|
||||
|
||||
redis_client.publish(self._channel, json.dumps(content))
|
||||
|
||||
if self._is_stopped():
|
||||
self.pub_end()
|
||||
raise ConversationTaskStoppedException()
|
||||
|
||||
|
||||
def pub_end(self):
|
||||
content = {
|
||||
'event': 'end',
|
||||
}
|
||||
|
||||
redis_client.publish(self._channel, json.dumps(content))
|
||||
|
||||
@classmethod
|
||||
def pub_error(cls, user: Union[Account | User], task_id: str, e):
|
||||
content = {
|
||||
'error': type(e).__name__,
|
||||
'description': e.description if getattr(e, 'description', None) is not None else str(e)
|
||||
}
|
||||
|
||||
channel = cls.generate_channel_name(user, task_id)
|
||||
redis_client.publish(channel, json.dumps(content))
|
||||
|
||||
def _is_stopped(self):
|
||||
return redis_client.get(self._stopped_cache_key) is not None
|
||||
|
||||
@classmethod
|
||||
def stop(cls, user: Union[Account | User], task_id: str):
|
||||
stopped_cache_key = cls.generate_stopped_cache_key(user, task_id)
|
||||
redis_client.setex(stopped_cache_key, 600, 1)
|
||||
|
||||
|
||||
class ConversationTaskStoppedException(Exception):
|
||||
pass
|
||||
190
api/core/docstore/dataset_docstore.py
Normal file
190
api/core/docstore/dataset_docstore.py
Normal file
@@ -0,0 +1,190 @@
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
|
||||
import tiktoken
|
||||
from llama_index.data_structs import Node
|
||||
from llama_index.docstore.types import BaseDocumentStore
|
||||
from llama_index.docstore.utils import json_to_doc
|
||||
from llama_index.schema import BaseDocument
|
||||
from sqlalchemy import func
|
||||
|
||||
from core.llm.token_calculator import TokenCalculator
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, DocumentSegment
|
||||
|
||||
|
||||
class DatesetDocumentStore(BaseDocumentStore):
|
||||
def __init__(
|
||||
self,
|
||||
dataset: Dataset,
|
||||
user_id: str,
|
||||
embedding_model_name: str,
|
||||
document_id: Optional[str] = None,
|
||||
):
|
||||
self._dataset = dataset
|
||||
self._user_id = user_id
|
||||
self._embedding_model_name = embedding_model_name
|
||||
self._document_id = document_id
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, config_dict: Dict[str, Any]) -> "DatesetDocumentStore":
|
||||
return cls(**config_dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Serialize to dict."""
|
||||
return {
|
||||
"dataset_id": self._dataset.id,
|
||||
}
|
||||
|
||||
@property
|
||||
def dateset_id(self) -> Any:
|
||||
return self._dataset.id
|
||||
|
||||
@property
|
||||
def user_id(self) -> Any:
|
||||
return self._user_id
|
||||
|
||||
@property
|
||||
def embedding_model_name(self) -> Any:
|
||||
return self._embedding_model_name
|
||||
|
||||
@property
|
||||
def docs(self) -> Dict[str, BaseDocument]:
|
||||
document_segments = db.session.query(DocumentSegment).filter(
|
||||
DocumentSegment.dataset_id == self._dataset.id
|
||||
).all()
|
||||
|
||||
output = {}
|
||||
for document_segment in document_segments:
|
||||
doc_id = document_segment.index_node_id
|
||||
result = self.segment_to_dict(document_segment)
|
||||
output[doc_id] = json_to_doc(result)
|
||||
|
||||
return output
|
||||
|
||||
def add_documents(
|
||||
self, docs: Sequence[BaseDocument], allow_update: bool = True
|
||||
) -> None:
|
||||
max_position = db.session.query(func.max(DocumentSegment.position)).filter(
|
||||
DocumentSegment.document == self._document_id
|
||||
).scalar()
|
||||
|
||||
if max_position is None:
|
||||
max_position = 0
|
||||
|
||||
for doc in docs:
|
||||
if doc.is_doc_id_none:
|
||||
raise ValueError("doc_id not set")
|
||||
|
||||
if not isinstance(doc, Node):
|
||||
raise ValueError("doc must be a Node")
|
||||
|
||||
segment_document = self.get_document(doc_id=doc.get_doc_id(), raise_error=False)
|
||||
|
||||
# NOTE: doc could already exist in the store, but we overwrite it
|
||||
if not allow_update and segment_document:
|
||||
raise ValueError(
|
||||
f"doc_id {doc.get_doc_id()} already exists. "
|
||||
"Set allow_update to True to overwrite."
|
||||
)
|
||||
|
||||
# calc embedding use tokens
|
||||
tokens = TokenCalculator.get_num_tokens(self._embedding_model_name, doc.get_text())
|
||||
|
||||
if not segment_document:
|
||||
max_position += 1
|
||||
|
||||
segment_document = DocumentSegment(
|
||||
tenant_id=self._dataset.tenant_id,
|
||||
dataset_id=self._dataset.id,
|
||||
document_id=self._document_id,
|
||||
index_node_id=doc.get_doc_id(),
|
||||
index_node_hash=doc.get_doc_hash(),
|
||||
position=max_position,
|
||||
content=doc.get_text(),
|
||||
word_count=len(doc.get_text()),
|
||||
tokens=tokens,
|
||||
created_by=self._user_id,
|
||||
)
|
||||
db.session.add(segment_document)
|
||||
else:
|
||||
segment_document.content = doc.get_text()
|
||||
segment_document.index_node_hash = doc.get_doc_hash()
|
||||
segment_document.word_count = len(doc.get_text())
|
||||
segment_document.tokens = tokens
|
||||
|
||||
db.session.commit()
|
||||
|
||||
def document_exists(self, doc_id: str) -> bool:
|
||||
"""Check if document exists."""
|
||||
result = self.get_document_segment(doc_id)
|
||||
return result is not None
|
||||
|
||||
def get_document(
|
||||
self, doc_id: str, raise_error: bool = True
|
||||
) -> Optional[BaseDocument]:
|
||||
document_segment = self.get_document_segment(doc_id)
|
||||
|
||||
if document_segment is None:
|
||||
if raise_error:
|
||||
raise ValueError(f"doc_id {doc_id} not found.")
|
||||
else:
|
||||
return None
|
||||
|
||||
result = self.segment_to_dict(document_segment)
|
||||
return json_to_doc(result)
|
||||
|
||||
def delete_document(self, doc_id: str, raise_error: bool = True) -> None:
|
||||
document_segment = self.get_document_segment(doc_id)
|
||||
|
||||
if document_segment is None:
|
||||
if raise_error:
|
||||
raise ValueError(f"doc_id {doc_id} not found.")
|
||||
else:
|
||||
return None
|
||||
|
||||
db.session.delete(document_segment)
|
||||
db.session.commit()
|
||||
|
||||
def set_document_hash(self, doc_id: str, doc_hash: str) -> None:
|
||||
"""Set the hash for a given doc_id."""
|
||||
document_segment = self.get_document_segment(doc_id)
|
||||
|
||||
if document_segment is None:
|
||||
return None
|
||||
|
||||
document_segment.index_node_hash = doc_hash
|
||||
db.session.commit()
|
||||
|
||||
def get_document_hash(self, doc_id: str) -> Optional[str]:
|
||||
"""Get the stored hash for a document, if it exists."""
|
||||
document_segment = self.get_document_segment(doc_id)
|
||||
|
||||
if document_segment is None:
|
||||
return None
|
||||
|
||||
return document_segment.index_node_hash
|
||||
|
||||
def update_docstore(self, other: "BaseDocumentStore") -> None:
|
||||
"""Update docstore.
|
||||
|
||||
Args:
|
||||
other (BaseDocumentStore): docstore to update from
|
||||
|
||||
"""
|
||||
self.add_documents(list(other.docs.values()))
|
||||
|
||||
def get_document_segment(self, doc_id: str) -> DocumentSegment:
|
||||
document_segment = db.session.query(DocumentSegment).filter(
|
||||
DocumentSegment.dataset_id == self._dataset.id,
|
||||
DocumentSegment.index_node_id == doc_id
|
||||
).first()
|
||||
|
||||
return document_segment
|
||||
|
||||
def segment_to_dict(self, segment: DocumentSegment) -> Dict[str, Any]:
|
||||
return {
|
||||
"doc_id": segment.index_node_id,
|
||||
"doc_hash": segment.index_node_hash,
|
||||
"text": segment.content,
|
||||
"__type__": Node.get_type()
|
||||
}
|
||||
51
api/core/docstore/empty_docstore.py
Normal file
51
api/core/docstore/empty_docstore.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
from llama_index.docstore.types import BaseDocumentStore
|
||||
from llama_index.schema import BaseDocument
|
||||
|
||||
|
||||
class EmptyDocumentStore(BaseDocumentStore):
|
||||
@classmethod
|
||||
def from_dict(cls, config_dict: Dict[str, Any]) -> "EmptyDocumentStore":
|
||||
return cls()
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Serialize to dict."""
|
||||
return {}
|
||||
|
||||
@property
|
||||
def docs(self) -> Dict[str, BaseDocument]:
|
||||
return {}
|
||||
|
||||
def add_documents(
|
||||
self, docs: Sequence[BaseDocument], allow_update: bool = True
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def document_exists(self, doc_id: str) -> bool:
|
||||
"""Check if document exists."""
|
||||
return False
|
||||
|
||||
def get_document(
|
||||
self, doc_id: str, raise_error: bool = True
|
||||
) -> Optional[BaseDocument]:
|
||||
return None
|
||||
|
||||
def delete_document(self, doc_id: str, raise_error: bool = True) -> None:
|
||||
pass
|
||||
|
||||
def set_document_hash(self, doc_id: str, doc_hash: str) -> None:
|
||||
"""Set the hash for a given doc_id."""
|
||||
pass
|
||||
|
||||
def get_document_hash(self, doc_id: str) -> Optional[str]:
|
||||
"""Get the stored hash for a document, if it exists."""
|
||||
return None
|
||||
|
||||
def update_docstore(self, other: "BaseDocumentStore") -> None:
|
||||
"""Update docstore.
|
||||
|
||||
Args:
|
||||
other (BaseDocumentStore): docstore to update from
|
||||
|
||||
"""
|
||||
self.add_documents(list(other.docs.values()))
|
||||
176
api/core/embedding/openai_embedding.py
Normal file
176
api/core/embedding/openai_embedding.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from typing import Optional, Any, List
|
||||
|
||||
import openai
|
||||
from llama_index.embeddings.base import BaseEmbedding
|
||||
from llama_index.embeddings.openai import OpenAIEmbeddingMode, OpenAIEmbeddingModelType, _QUERY_MODE_MODEL_DICT, \
|
||||
_TEXT_MODE_MODEL_DICT
|
||||
from tenacity import wait_random_exponential, retry, stop_after_attempt
|
||||
|
||||
from core.llm.error_handle_wraps import handle_llm_exceptions, handle_llm_exceptions_async
|
||||
|
||||
|
||||
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
def get_embedding(
|
||||
text: str,
|
||||
engine: Optional[str] = None,
|
||||
openai_api_key: Optional[str] = None,
|
||||
) -> List[float]:
|
||||
"""Get embedding.
|
||||
|
||||
NOTE: Copied from OpenAI's embedding utils:
|
||||
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
|
||||
|
||||
Copied here to avoid importing unnecessary dependencies
|
||||
like matplotlib, plotly, scipy, sklearn.
|
||||
|
||||
"""
|
||||
text = text.replace("\n", " ")
|
||||
return openai.Embedding.create(input=[text], engine=engine, api_key=openai_api_key)["data"][0]["embedding"]
|
||||
|
||||
|
||||
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
async def aget_embedding(text: str, engine: Optional[str] = None, openai_api_key: Optional[str] = None) -> List[float]:
|
||||
"""Asynchronously get embedding.
|
||||
|
||||
NOTE: Copied from OpenAI's embedding utils:
|
||||
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
|
||||
|
||||
Copied here to avoid importing unnecessary dependencies
|
||||
like matplotlib, plotly, scipy, sklearn.
|
||||
|
||||
"""
|
||||
# replace newlines, which can negatively affect performance.
|
||||
text = text.replace("\n", " ")
|
||||
|
||||
return (await openai.Embedding.acreate(input=[text], engine=engine, api_key=openai_api_key))["data"][0][
|
||||
"embedding"
|
||||
]
|
||||
|
||||
|
||||
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
def get_embeddings(
|
||||
list_of_text: List[str],
|
||||
engine: Optional[str] = None,
|
||||
openai_api_key: Optional[str] = None
|
||||
) -> List[List[float]]:
|
||||
"""Get embeddings.
|
||||
|
||||
NOTE: Copied from OpenAI's embedding utils:
|
||||
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
|
||||
|
||||
Copied here to avoid importing unnecessary dependencies
|
||||
like matplotlib, plotly, scipy, sklearn.
|
||||
|
||||
"""
|
||||
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
|
||||
|
||||
# replace newlines, which can negatively affect performance.
|
||||
list_of_text = [text.replace("\n", " ") for text in list_of_text]
|
||||
|
||||
data = openai.Embedding.create(input=list_of_text, engine=engine, api_key=openai_api_key).data
|
||||
data = sorted(data, key=lambda x: x["index"]) # maintain the same order as input.
|
||||
return [d["embedding"] for d in data]
|
||||
|
||||
|
||||
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
async def aget_embeddings(
|
||||
list_of_text: List[str], engine: Optional[str] = None, openai_api_key: Optional[str] = None
|
||||
) -> List[List[float]]:
|
||||
"""Asynchronously get embeddings.
|
||||
|
||||
NOTE: Copied from OpenAI's embedding utils:
|
||||
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
|
||||
|
||||
Copied here to avoid importing unnecessary dependencies
|
||||
like matplotlib, plotly, scipy, sklearn.
|
||||
|
||||
"""
|
||||
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
|
||||
|
||||
# replace newlines, which can negatively affect performance.
|
||||
list_of_text = [text.replace("\n", " ") for text in list_of_text]
|
||||
|
||||
data = (await openai.Embedding.acreate(input=list_of_text, engine=engine, api_key=openai_api_key)).data
|
||||
data = sorted(data, key=lambda x: x["index"]) # maintain the same order as input.
|
||||
return [d["embedding"] for d in data]
|
||||
|
||||
|
||||
class OpenAIEmbedding(BaseEmbedding):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mode: str = OpenAIEmbeddingMode.TEXT_SEARCH_MODE,
|
||||
model: str = OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002,
|
||||
deployment_name: Optional[str] = None,
|
||||
openai_api_key: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(**kwargs)
|
||||
self.mode = OpenAIEmbeddingMode(mode)
|
||||
self.model = OpenAIEmbeddingModelType(model)
|
||||
self.deployment_name = deployment_name
|
||||
self.openai_api_key = openai_api_key
|
||||
|
||||
@handle_llm_exceptions
|
||||
def _get_query_embedding(self, query: str) -> List[float]:
|
||||
"""Get query embedding."""
|
||||
if self.deployment_name is not None:
|
||||
engine = self.deployment_name
|
||||
else:
|
||||
key = (self.mode, self.model)
|
||||
if key not in _QUERY_MODE_MODEL_DICT:
|
||||
raise ValueError(f"Invalid mode, model combination: {key}")
|
||||
engine = _QUERY_MODE_MODEL_DICT[key]
|
||||
return get_embedding(query, engine=engine, openai_api_key=self.openai_api_key)
|
||||
|
||||
def _get_text_embedding(self, text: str) -> List[float]:
|
||||
"""Get text embedding."""
|
||||
if self.deployment_name is not None:
|
||||
engine = self.deployment_name
|
||||
else:
|
||||
key = (self.mode, self.model)
|
||||
if key not in _TEXT_MODE_MODEL_DICT:
|
||||
raise ValueError(f"Invalid mode, model combination: {key}")
|
||||
engine = _TEXT_MODE_MODEL_DICT[key]
|
||||
return get_embedding(text, engine=engine, openai_api_key=self.openai_api_key)
|
||||
|
||||
async def _aget_text_embedding(self, text: str) -> List[float]:
|
||||
"""Asynchronously get text embedding."""
|
||||
if self.deployment_name is not None:
|
||||
engine = self.deployment_name
|
||||
else:
|
||||
key = (self.mode, self.model)
|
||||
if key not in _TEXT_MODE_MODEL_DICT:
|
||||
raise ValueError(f"Invalid mode, model combination: {key}")
|
||||
engine = _TEXT_MODE_MODEL_DICT[key]
|
||||
return await aget_embedding(text, engine=engine, openai_api_key=self.openai_api_key)
|
||||
|
||||
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Get text embeddings.
|
||||
|
||||
By default, this is a wrapper around _get_text_embedding.
|
||||
Can be overriden for batch queries.
|
||||
|
||||
"""
|
||||
if self.deployment_name is not None:
|
||||
engine = self.deployment_name
|
||||
else:
|
||||
key = (self.mode, self.model)
|
||||
if key not in _TEXT_MODE_MODEL_DICT:
|
||||
raise ValueError(f"Invalid mode, model combination: {key}")
|
||||
engine = _TEXT_MODE_MODEL_DICT[key]
|
||||
embeddings = get_embeddings(texts, engine=engine, openai_api_key=self.openai_api_key)
|
||||
return embeddings
|
||||
|
||||
async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Asynchronously get text embeddings."""
|
||||
if self.deployment_name is not None:
|
||||
engine = self.deployment_name
|
||||
else:
|
||||
key = (self.mode, self.model)
|
||||
if key not in _TEXT_MODE_MODEL_DICT:
|
||||
raise ValueError(f"Invalid mode, model combination: {key}")
|
||||
engine = _TEXT_MODE_MODEL_DICT[key]
|
||||
embeddings = await aget_embeddings(texts, engine=engine, openai_api_key=self.openai_api_key)
|
||||
return embeddings
|
||||
120
api/core/generator/llm_generator.py
Normal file
120
api/core/generator/llm_generator.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import logging
|
||||
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.schema import HumanMessage
|
||||
|
||||
from core.constant import llm_constant
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
from core.llm.streamable_open_ai import StreamableOpenAI
|
||||
from core.llm.token_calculator import TokenCalculator
|
||||
|
||||
from core.prompt.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser
|
||||
from core.prompt.prompt_template import OutLinePromptTemplate
|
||||
from core.prompt.prompts import CONVERSATION_TITLE_PROMPT, CONVERSATION_SUMMARY_PROMPT, INTRODUCTION_GENERATE_PROMPT
|
||||
|
||||
|
||||
# gpt-3.5-turbo works not well
|
||||
generate_base_model = 'text-davinci-003'
|
||||
|
||||
|
||||
class LLMGenerator:
|
||||
@classmethod
|
||||
def generate_conversation_name(cls, tenant_id: str, query, answer):
|
||||
prompt = CONVERSATION_TITLE_PROMPT
|
||||
prompt = prompt.format(query=query, answer=answer)
|
||||
llm: StreamableOpenAI = LLMBuilder.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name=generate_base_model,
|
||||
max_tokens=50
|
||||
)
|
||||
|
||||
if isinstance(llm, BaseChatModel):
|
||||
prompt = [HumanMessage(content=prompt)]
|
||||
|
||||
response = llm.generate([prompt])
|
||||
answer = response.generations[0][0].text
|
||||
return answer.strip()
|
||||
|
||||
@classmethod
|
||||
def generate_conversation_summary(cls, tenant_id: str, messages):
|
||||
max_tokens = 200
|
||||
|
||||
prompt = CONVERSATION_SUMMARY_PROMPT
|
||||
prompt_with_empty_context = prompt.format(context='')
|
||||
prompt_tokens = TokenCalculator.get_num_tokens(generate_base_model, prompt_with_empty_context)
|
||||
rest_tokens = llm_constant.max_context_token_length[generate_base_model] - prompt_tokens - max_tokens
|
||||
|
||||
context = ''
|
||||
for message in messages:
|
||||
if not message.answer:
|
||||
continue
|
||||
|
||||
message_qa_text = "Human:" + message.query + "\nAI:" + message.answer + "\n"
|
||||
if rest_tokens - TokenCalculator.get_num_tokens(generate_base_model, context + message_qa_text) > 0:
|
||||
context += message_qa_text
|
||||
|
||||
prompt = prompt.format(context=context)
|
||||
|
||||
llm: StreamableOpenAI = LLMBuilder.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name=generate_base_model,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
|
||||
if isinstance(llm, BaseChatModel):
|
||||
prompt = [HumanMessage(content=prompt)]
|
||||
|
||||
response = llm.generate([prompt])
|
||||
answer = response.generations[0][0].text
|
||||
return answer.strip()
|
||||
|
||||
@classmethod
|
||||
def generate_introduction(cls, tenant_id: str, pre_prompt: str):
|
||||
prompt = INTRODUCTION_GENERATE_PROMPT
|
||||
prompt = prompt.format(prompt=pre_prompt)
|
||||
|
||||
llm: StreamableOpenAI = LLMBuilder.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name=generate_base_model,
|
||||
)
|
||||
|
||||
if isinstance(llm, BaseChatModel):
|
||||
prompt = [HumanMessage(content=prompt)]
|
||||
|
||||
response = llm.generate([prompt])
|
||||
answer = response.generations[0][0].text
|
||||
return answer.strip()
|
||||
|
||||
@classmethod
|
||||
def generate_suggested_questions_after_answer(cls, tenant_id: str, histories: str):
|
||||
output_parser = SuggestedQuestionsAfterAnswerOutputParser()
|
||||
format_instructions = output_parser.get_format_instructions()
|
||||
|
||||
prompt = OutLinePromptTemplate(
|
||||
template="{histories}\n{format_instructions}\nquestions:\n",
|
||||
input_variables=["histories"],
|
||||
partial_variables={"format_instructions": format_instructions}
|
||||
)
|
||||
|
||||
_input = prompt.format_prompt(histories=histories)
|
||||
|
||||
llm: StreamableOpenAI = LLMBuilder.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name=generate_base_model,
|
||||
temperature=0,
|
||||
max_tokens=256
|
||||
)
|
||||
|
||||
if isinstance(llm, BaseChatModel):
|
||||
query = [HumanMessage(content=_input.to_string())]
|
||||
else:
|
||||
query = _input.to_string()
|
||||
|
||||
try:
|
||||
output = llm(query)
|
||||
questions = output_parser.parse(output)
|
||||
except Exception:
|
||||
logging.exception("Error generating suggested questions after answer")
|
||||
questions = []
|
||||
|
||||
return questions
|
||||
45
api/core/index/index_builder.py
Normal file
45
api/core/index/index_builder.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from langchain.callbacks import CallbackManager
|
||||
from llama_index import ServiceContext, PromptHelper, LLMPredictor
|
||||
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
|
||||
from core.embedding.openai_embedding import OpenAIEmbedding
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
|
||||
|
||||
class IndexBuilder:
|
||||
@classmethod
|
||||
def get_default_service_context(cls, tenant_id: str) -> ServiceContext:
|
||||
# set number of output tokens
|
||||
num_output = 512
|
||||
|
||||
# only for verbose
|
||||
callback_manager = CallbackManager([DifyStdOutCallbackHandler()])
|
||||
|
||||
llm = LLMBuilder.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name='text-davinci-003',
|
||||
temperature=0,
|
||||
max_tokens=num_output,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
|
||||
llm_predictor = LLMPredictor(llm=llm)
|
||||
|
||||
# These parameters here will affect the logic of segmenting the final synthesized response.
|
||||
# The number of refinement iterations in the synthesis process depends
|
||||
# on whether the length of the segmented output exceeds the max_input_size.
|
||||
prompt_helper = PromptHelper(
|
||||
max_input_size=3500,
|
||||
num_output=num_output,
|
||||
max_chunk_overlap=20
|
||||
)
|
||||
|
||||
model_credentials = LLMBuilder.get_model_credentials(
|
||||
tenant_id=tenant_id,
|
||||
model_name='text-embedding-ada-002'
|
||||
)
|
||||
|
||||
return ServiceContext.from_defaults(
|
||||
llm_predictor=llm_predictor,
|
||||
prompt_helper=prompt_helper,
|
||||
embed_model=OpenAIEmbedding(**model_credentials),
|
||||
)
|
||||
159
api/core/index/keyword_table/jieba_keyword_table.py
Normal file
159
api/core/index/keyword_table/jieba_keyword_table.py
Normal file
@@ -0,0 +1,159 @@
|
||||
import re
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Set,
|
||||
Optional
|
||||
)
|
||||
|
||||
import jieba.analyse
|
||||
|
||||
from core.index.keyword_table.stopwords import STOPWORDS
|
||||
from llama_index.indices.query.base import IS
|
||||
from llama_index import QueryMode
|
||||
from llama_index.indices.base import QueryMap
|
||||
from llama_index.indices.keyword_table.base import BaseGPTKeywordTableIndex
|
||||
from llama_index.indices.keyword_table.query import BaseGPTKeywordTableQuery
|
||||
from llama_index.docstore import BaseDocumentStore
|
||||
from llama_index.indices.postprocessor.node import (
|
||||
BaseNodePostprocessor,
|
||||
)
|
||||
from llama_index.indices.response.response_builder import ResponseMode
|
||||
from llama_index.indices.service_context import ServiceContext
|
||||
from llama_index.optimization.optimizer import BaseTokenUsageOptimizer
|
||||
from llama_index.prompts.prompts import (
|
||||
QuestionAnswerPrompt,
|
||||
RefinePrompt,
|
||||
SimpleInputPrompt,
|
||||
)
|
||||
|
||||
from core.index.query.synthesizer import EnhanceResponseSynthesizer
|
||||
|
||||
|
||||
def jieba_extract_keywords(
|
||||
text_chunk: str,
|
||||
max_keywords: Optional[int] = None,
|
||||
expand_with_subtokens: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Extract keywords with JIEBA tfidf."""
|
||||
keywords = jieba.analyse.extract_tags(
|
||||
sentence=text_chunk,
|
||||
topK=max_keywords,
|
||||
)
|
||||
|
||||
if expand_with_subtokens:
|
||||
return set(expand_tokens_with_subtokens(keywords))
|
||||
else:
|
||||
return set(keywords)
|
||||
|
||||
|
||||
def expand_tokens_with_subtokens(tokens: Set[str]) -> Set[str]:
|
||||
"""Get subtokens from a list of tokens., filtering for stopwords."""
|
||||
results = set()
|
||||
for token in tokens:
|
||||
results.add(token)
|
||||
sub_tokens = re.findall(r"\w+", token)
|
||||
if len(sub_tokens) > 1:
|
||||
results.update({w for w in sub_tokens if w not in list(STOPWORDS)})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class GPTJIEBAKeywordTableIndex(BaseGPTKeywordTableIndex):
|
||||
"""GPT JIEBA Keyword Table Index.
|
||||
|
||||
This index uses a JIEBA keyword extractor to extract keywords from the text.
|
||||
|
||||
"""
|
||||
|
||||
def _extract_keywords(self, text: str) -> Set[str]:
|
||||
"""Extract keywords from text."""
|
||||
return jieba_extract_keywords(text, max_keywords=self.max_keywords_per_chunk)
|
||||
|
||||
@classmethod
|
||||
def get_query_map(self) -> QueryMap:
|
||||
"""Get query map."""
|
||||
super_map = super().get_query_map()
|
||||
super_map[QueryMode.DEFAULT] = GPTKeywordTableJIEBAQuery
|
||||
return super_map
|
||||
|
||||
def _delete(self, doc_id: str, **delete_kwargs: Any) -> None:
|
||||
"""Delete a document."""
|
||||
# get set of ids that correspond to node
|
||||
node_idxs_to_delete = {doc_id}
|
||||
|
||||
# delete node_idxs from keyword to node idxs mapping
|
||||
keywords_to_delete = set()
|
||||
for keyword, node_idxs in self._index_struct.table.items():
|
||||
if node_idxs_to_delete.intersection(node_idxs):
|
||||
self._index_struct.table[keyword] = node_idxs.difference(
|
||||
node_idxs_to_delete
|
||||
)
|
||||
if not self._index_struct.table[keyword]:
|
||||
keywords_to_delete.add(keyword)
|
||||
|
||||
for keyword in keywords_to_delete:
|
||||
del self._index_struct.table[keyword]
|
||||
|
||||
|
||||
class GPTKeywordTableJIEBAQuery(BaseGPTKeywordTableQuery):
|
||||
"""GPT Keyword Table Index JIEBA Query.
|
||||
|
||||
Extracts keywords using JIEBA keyword extractor.
|
||||
Set when `mode="jieba"` in `query` method of `GPTKeywordTableIndex`.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
response = index.query("<query_str>", mode="jieba")
|
||||
|
||||
See BaseGPTKeywordTableQuery for arguments.
|
||||
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_args(
|
||||
cls,
|
||||
index_struct: IS,
|
||||
service_context: ServiceContext,
|
||||
docstore: Optional[BaseDocumentStore] = None,
|
||||
node_postprocessors: Optional[List[BaseNodePostprocessor]] = None,
|
||||
verbose: bool = False,
|
||||
# response synthesizer args
|
||||
response_mode: ResponseMode = ResponseMode.DEFAULT,
|
||||
text_qa_template: Optional[QuestionAnswerPrompt] = None,
|
||||
refine_template: Optional[RefinePrompt] = None,
|
||||
simple_template: Optional[SimpleInputPrompt] = None,
|
||||
response_kwargs: Optional[Dict] = None,
|
||||
use_async: bool = False,
|
||||
streaming: bool = False,
|
||||
optimizer: Optional[BaseTokenUsageOptimizer] = None,
|
||||
# class-specific args
|
||||
**kwargs: Any,
|
||||
) -> "BaseGPTIndexQuery":
|
||||
response_synthesizer = EnhanceResponseSynthesizer.from_args(
|
||||
service_context=service_context,
|
||||
text_qa_template=text_qa_template,
|
||||
refine_template=refine_template,
|
||||
simple_template=simple_template,
|
||||
response_mode=response_mode,
|
||||
response_kwargs=response_kwargs,
|
||||
use_async=use_async,
|
||||
streaming=streaming,
|
||||
optimizer=optimizer,
|
||||
)
|
||||
return cls(
|
||||
index_struct=index_struct,
|
||||
service_context=service_context,
|
||||
response_synthesizer=response_synthesizer,
|
||||
docstore=docstore,
|
||||
node_postprocessors=node_postprocessors,
|
||||
verbose=verbose,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _get_keywords(self, query_str: str) -> List[str]:
|
||||
"""Extract keywords."""
|
||||
return list(
|
||||
jieba_extract_keywords(query_str, max_keywords=self.max_keywords_per_query)
|
||||
)
|
||||
90
api/core/index/keyword_table/stopwords.py
Normal file
90
api/core/index/keyword_table/stopwords.py
Normal file
@@ -0,0 +1,90 @@
|
||||
STOPWORDS = {
|
||||
"during", "when", "but", "then", "further", "isn", "mustn't", "until", "own", "i", "couldn", "y", "only", "you've",
|
||||
"ours", "who", "where", "ourselves", "has", "to", "was", "didn't", "themselves", "if", "against", "through", "her",
|
||||
"an", "your", "can", "those", "didn", "about", "aren't", "shan't", "be", "not", "these", "again", "so", "t",
|
||||
"theirs", "weren", "won't", "won", "itself", "just", "same", "while", "why", "doesn", "aren", "him", "haven",
|
||||
"for", "you'll", "that", "we", "am", "d", "by", "having", "wasn't", "than", "weren't", "out", "from", "now",
|
||||
"their", "too", "hadn", "o", "needn", "most", "it", "under", "needn't", "any", "some", "few", "ll", "hers", "which",
|
||||
"m", "you're", "off", "other", "had", "she", "you'd", "do", "you", "does", "s", "will", "each", "wouldn't", "hasn't",
|
||||
"such", "more", "whom", "she's", "my", "yours", "yourself", "of", "on", "very", "hadn't", "with", "yourselves",
|
||||
"been", "ma", "them", "mightn't", "shan", "mustn", "they", "what", "both", "that'll", "how", "is", "he", "because",
|
||||
"down", "haven't", "are", "no", "it's", "our", "being", "the", "or", "above", "myself", "once", "don't", "doesn't",
|
||||
"as", "nor", "here", "herself", "hasn", "mightn", "have", "its", "all", "were", "ain", "this", "at", "after",
|
||||
"over", "shouldn't", "into", "before", "don", "wouldn", "re", "couldn't", "wasn", "in", "should", "there",
|
||||
"himself", "isn't", "should've", "doing", "ve", "shouldn", "a", "did", "and", "his", "between", "me", "up", "below",
|
||||
"人民", "末##末", "啊", "阿", "哎", "哎呀", "哎哟", "唉", "俺", "俺们", "按", "按照", "吧", "吧哒", "把", "罢了", "被", "本",
|
||||
"本着", "比", "比方", "比如", "鄙人", "彼", "彼此", "边", "别", "别的", "别说", "并", "并且", "不比", "不成", "不单", "不但",
|
||||
"不独", "不管", "不光", "不过", "不仅", "不拘", "不论", "不怕", "不然", "不如", "不特", "不惟", "不问", "不只", "朝", "朝着",
|
||||
"趁", "趁着", "乘", "冲", "除", "除此之外", "除非", "除了", "此", "此间", "此外", "从", "从而", "打", "待", "但", "但是", "当",
|
||||
"当着", "到", "得", "的", "的话", "等", "等等", "地", "第", "叮咚", "对", "对于", "多", "多少", "而", "而况", "而且", "而是",
|
||||
"而外", "而言", "而已", "尔后", "反过来", "反过来说", "反之", "非但", "非徒", "否则", "嘎", "嘎登", "该", "赶", "个", "各",
|
||||
"各个", "各位", "各种", "各自", "给", "根据", "跟", "故", "故此", "固然", "关于", "管", "归", "果然", "果真", "过", "哈",
|
||||
"哈哈", "呵", "和", "何", "何处", "何况", "何时", "嘿", "哼", "哼唷", "呼哧", "乎", "哗", "还是", "还有", "换句话说", "换言之",
|
||||
"或", "或是", "或者", "极了", "及", "及其", "及至", "即", "即便", "即或", "即令", "即若", "即使", "几", "几时", "己", "既",
|
||||
"既然", "既是", "继而", "加之", "假如", "假若", "假使", "鉴于", "将", "较", "较之", "叫", "接着", "结果", "借", "紧接着",
|
||||
"进而", "尽", "尽管", "经", "经过", "就", "就是", "就是说", "据", "具体地说", "具体说来", "开始", "开外", "靠", "咳", "可",
|
||||
"可见", "可是", "可以", "况且", "啦", "来", "来着", "离", "例如", "哩", "连", "连同", "两者", "了", "临", "另", "另外",
|
||||
"另一方面", "论", "嘛", "吗", "慢说", "漫说", "冒", "么", "每", "每当", "们", "莫若", "某", "某个", "某些", "拿", "哪",
|
||||
"哪边", "哪儿", "哪个", "哪里", "哪年", "哪怕", "哪天", "哪些", "哪样", "那", "那边", "那儿", "那个", "那会儿", "那里", "那么",
|
||||
"那么些", "那么样", "那时", "那些", "那样", "乃", "乃至", "呢", "能", "你", "你们", "您", "宁", "宁可", "宁肯", "宁愿", "哦",
|
||||
"呕", "啪达", "旁人", "呸", "凭", "凭借", "其", "其次", "其二", "其他", "其它", "其一", "其余", "其中", "起", "起见", "岂但",
|
||||
"恰恰相反", "前后", "前者", "且", "然而", "然后", "然则", "让", "人家", "任", "任何", "任凭", "如", "如此", "如果", "如何",
|
||||
"如其", "如若", "如上所述", "若", "若非", "若是", "啥", "上下", "尚且", "设若", "设使", "甚而", "甚么", "甚至", "省得", "时候",
|
||||
"什么", "什么样", "使得", "是", "是的", "首先", "谁", "谁知", "顺", "顺着", "似的", "虽", "虽然", "虽说", "虽则", "随", "随着",
|
||||
"所", "所以", "他", "他们", "他人", "它", "它们", "她", "她们", "倘", "倘或", "倘然", "倘若", "倘使", "腾", "替", "通过", "同",
|
||||
"同时", "哇", "万一", "往", "望", "为", "为何", "为了", "为什么", "为着", "喂", "嗡嗡", "我", "我们", "呜", "呜呼", "乌乎",
|
||||
"无论", "无宁", "毋宁", "嘻", "吓", "相对而言", "像", "向", "向着", "嘘", "呀", "焉", "沿", "沿着", "要", "要不", "要不然",
|
||||
"要不是", "要么", "要是", "也", "也罢", "也好", "一", "一般", "一旦", "一方面", "一来", "一切", "一样", "一则", "依", "依照",
|
||||
"矣", "以", "以便", "以及", "以免", "以至", "以至于", "以致", "抑或", "因", "因此", "因而", "因为", "哟", "用", "由",
|
||||
"由此可见", "由于", "有", "有的", "有关", "有些", "又", "于", "于是", "于是乎", "与", "与此同时", "与否", "与其", "越是",
|
||||
"云云", "哉", "再说", "再者", "在", "在下", "咱", "咱们", "则", "怎", "怎么", "怎么办", "怎么样", "怎样", "咋", "照", "照着",
|
||||
"者", "这", "这边", "这儿", "这个", "这会儿", "这就是说", "这里", "这么", "这么点儿", "这么些", "这么样", "这时", "这些", "这样",
|
||||
"正如", "吱", "之", "之类", "之所以", "之一", "只是", "只限", "只要", "只有", "至", "至于", "诸位", "着", "着呢", "自", "自从",
|
||||
"自个儿", "自各儿", "自己", "自家", "自身", "综上所述", "总的来看", "总的来说", "总的说来", "总而言之", "总之", "纵", "纵令",
|
||||
"纵然", "纵使", "遵照", "作为", "兮", "呃", "呗", "咚", "咦", "喏", "啐", "喔唷", "嗬", "嗯", "嗳", "~", "!", ".", ":",
|
||||
"\"", "'", "(", ")", "*", "A", "白", "社会主义", "--", "..", ">>", " [", " ]", "", "<", ">", "/", "\\", "|", "-", "_",
|
||||
"+", "=", "&", "^", "%", "#", "@", "`", ";", "$", "(", ")", "——", "—", "¥", "·", "...", "‘", "’", "〉", "〈", "…",
|
||||
" ", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "二",
|
||||
"三", "四", "五", "六", "七", "八", "九", "零", ">", "<", "@", "#", "$", "%", "︿", "&", "*", "+", "~", "|", "[",
|
||||
"]", "{", "}", "啊哈", "啊呀", "啊哟", "挨次", "挨个", "挨家挨户", "挨门挨户", "挨门逐户", "挨着", "按理", "按期", "按时",
|
||||
"按说", "暗地里", "暗中", "暗自", "昂然", "八成", "白白", "半", "梆", "保管", "保险", "饱", "背地里", "背靠背", "倍感", "倍加",
|
||||
"本人", "本身", "甭", "比起", "比如说", "比照", "毕竟", "必", "必定", "必将", "必须", "便", "别人", "并非", "并肩", "并没",
|
||||
"并没有", "并排", "并无", "勃然", "不", "不必", "不常", "不大", "不但...而且", "不得", "不得不", "不得了", "不得已", "不迭",
|
||||
"不定", "不对", "不妨", "不管怎样", "不会", "不仅...而且", "不仅仅", "不仅仅是", "不经意", "不可开交", "不可抗拒", "不力", "不了",
|
||||
"不料", "不满", "不免", "不能不", "不起", "不巧", "不然的话", "不日", "不少", "不胜", "不时", "不是", "不同", "不能", "不要",
|
||||
"不外", "不外乎", "不下", "不限", "不消", "不已", "不亦乐乎", "不由得", "不再", "不择手段", "不怎么", "不曾", "不知不觉", "不止",
|
||||
"不止一次", "不至于", "才", "才能", "策略地", "差不多", "差一点", "常", "常常", "常言道", "常言说", "常言说得好", "长此下去",
|
||||
"长话短说", "长期以来", "长线", "敞开儿", "彻夜", "陈年", "趁便", "趁机", "趁热", "趁势", "趁早", "成年", "成年累月", "成心",
|
||||
"乘机", "乘胜", "乘势", "乘隙", "乘虚", "诚然", "迟早", "充分", "充其极", "充其量", "抽冷子", "臭", "初", "出", "出来", "出去",
|
||||
"除此", "除此而外", "除此以外", "除开", "除去", "除却", "除外", "处处", "川流不息", "传", "传说", "传闻", "串行", "纯", "纯粹",
|
||||
"此后", "此中", "次第", "匆匆", "从不", "从此", "从此以后", "从古到今", "从古至今", "从今以后", "从宽", "从来", "从轻", "从速",
|
||||
"从头", "从未", "从无到有", "从小", "从新", "从严", "从优", "从早到晚", "从中", "从重", "凑巧", "粗", "存心", "达旦", "打从",
|
||||
"打开天窗说亮话", "大", "大不了", "大大", "大抵", "大都", "大多", "大凡", "大概", "大家", "大举", "大略", "大面儿上", "大事",
|
||||
"大体", "大体上", "大约", "大张旗鼓", "大致", "呆呆地", "带", "殆", "待到", "单", "单纯", "单单", "但愿", "弹指之间", "当场",
|
||||
"当儿", "当即", "当口儿", "当然", "当庭", "当头", "当下", "当真", "当中", "倒不如", "倒不如说", "倒是", "到处", "到底", "到了儿",
|
||||
"到目前为止", "到头", "到头来", "得起", "得天独厚", "的确", "等到", "叮当", "顶多", "定", "动不动", "动辄", "陡然", "都", "独",
|
||||
"独自", "断然", "顿时", "多次", "多多", "多多少少", "多多益善", "多亏", "多年来", "多年前", "而后", "而论", "而又", "尔等",
|
||||
"二话不说", "二话没说", "反倒", "反倒是", "反而", "反手", "反之亦然", "反之则", "方", "方才", "方能", "放量", "非常", "非得",
|
||||
"分期", "分期分批", "分头", "奋勇", "愤然", "风雨无阻", "逢", "弗", "甫", "嘎嘎", "该当", "概", "赶快", "赶早不赶晚", "敢",
|
||||
"敢情", "敢于", "刚", "刚才", "刚好", "刚巧", "高低", "格外", "隔日", "隔夜", "个人", "各式", "更", "更加", "更进一步", "更为",
|
||||
"公然", "共", "共总", "够瞧的", "姑且", "古来", "故而", "故意", "固", "怪", "怪不得", "惯常", "光", "光是", "归根到底",
|
||||
"归根结底", "过于", "毫不", "毫无", "毫无保留地", "毫无例外", "好在", "何必", "何尝", "何妨", "何苦", "何乐而不为", "何须",
|
||||
"何止", "很", "很多", "很少", "轰然", "后来", "呼啦", "忽地", "忽然", "互", "互相", "哗啦", "话说", "还", "恍然", "会", "豁然",
|
||||
"活", "伙同", "或多或少", "或许", "基本", "基本上", "基于", "极", "极大", "极度", "极端", "极力", "极其", "极为", "急匆匆",
|
||||
"即将", "即刻", "即是说", "几度", "几番", "几乎", "几经", "既...又", "继之", "加上", "加以", "间或", "简而言之", "简言之",
|
||||
"简直", "见", "将才", "将近", "将要", "交口", "较比", "较为", "接连不断", "接下来", "皆可", "截然", "截至", "藉以", "借此",
|
||||
"借以", "届时", "仅", "仅仅", "谨", "进来", "进去", "近", "近几年来", "近来", "近年来", "尽管如此", "尽可能", "尽快", "尽量",
|
||||
"尽然", "尽如人意", "尽心竭力", "尽心尽力", "尽早", "精光", "经常", "竟", "竟然", "究竟", "就此", "就地", "就算", "居然", "局外",
|
||||
"举凡", "据称", "据此", "据实", "据说", "据我所知", "据悉", "具体来说", "决不", "决非", "绝", "绝不", "绝顶", "绝对", "绝非",
|
||||
"均", "喀", "看", "看来", "看起来", "看上去", "看样子", "可好", "可能", "恐怕", "快", "快要", "来不及", "来得及", "来讲",
|
||||
"来看", "拦腰", "牢牢", "老", "老大", "老老实实", "老是", "累次", "累年", "理当", "理该", "理应", "历", "立", "立地", "立刻",
|
||||
"立马", "立时", "联袂", "连连", "连日", "连日来", "连声", "连袂", "临到", "另方面", "另行", "另一个", "路经", "屡", "屡次",
|
||||
"屡次三番", "屡屡", "缕缕", "率尔", "率然", "略", "略加", "略微", "略为", "论说", "马上", "蛮", "满", "没", "没有", "每逢",
|
||||
"每每", "每时每刻", "猛然", "猛然间", "莫", "莫不", "莫非", "莫如", "默默地", "默然", "呐", "那末", "奈", "难道", "难得", "难怪",
|
||||
"难说", "内", "年复一年", "凝神", "偶而", "偶尔", "怕", "砰", "碰巧", "譬如", "偏偏", "乒", "平素", "颇", "迫于", "扑通",
|
||||
"其后", "其实", "奇", "齐", "起初", "起来", "起首", "起头", "起先", "岂", "岂非", "岂止", "迄", "恰逢", "恰好", "恰恰", "恰巧",
|
||||
"恰如", "恰似", "千", "千万", "千万千万", "切", "切不可", "切莫", "切切", "切勿", "窃", "亲口", "亲身", "亲手", "亲眼", "亲自",
|
||||
"顷", "顷刻", "顷刻间", "顷刻之间", "请勿", "穷年累月", "取道", "去", "权时", "全都", "全力", "全年", "全然", "全身心", "然",
|
||||
"人人", "仍", "仍旧", "仍然", "日复一日", "日见", "日渐", "日益", "日臻", "如常", "如此等等", "如次", "如今", "如期", "如前所述",
|
||||
"如上", "如下", "汝", "三番两次", "三番五次", "三天两头", "瑟瑟", "沙沙", "上", "上来", "上去", "一个", "月", "日", "\n"
|
||||
}
|
||||
135
api/core/index/keyword_table_index.py
Normal file
135
api/core/index/keyword_table_index.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import json
|
||||
from typing import List, Optional
|
||||
|
||||
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding
|
||||
from llama_index.data_structs import KeywordTable, Node
|
||||
from llama_index.indices.keyword_table.base import BaseGPTKeywordTableIndex
|
||||
from llama_index.indices.registry import load_index_struct_from_dict
|
||||
|
||||
from core.docstore.dataset_docstore import DatesetDocumentStore
|
||||
from core.docstore.empty_docstore import EmptyDocumentStore
|
||||
from core.index.index_builder import IndexBuilder
|
||||
from core.index.keyword_table.jieba_keyword_table import GPTJIEBAKeywordTableIndex
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, DatasetKeywordTable, DocumentSegment
|
||||
|
||||
|
||||
class KeywordTableIndex:
|
||||
|
||||
def __init__(self, dataset: Dataset):
|
||||
self._dataset = dataset
|
||||
|
||||
def add_nodes(self, nodes: List[Node]):
|
||||
llm = LLMBuilder.to_llm(
|
||||
tenant_id=self._dataset.tenant_id,
|
||||
model_name='fake'
|
||||
)
|
||||
|
||||
service_context = ServiceContext.from_defaults(
|
||||
llm_predictor=LLMPredictor(llm=llm),
|
||||
embed_model=OpenAIEmbedding()
|
||||
)
|
||||
|
||||
dataset_keyword_table = self.get_keyword_table()
|
||||
if not dataset_keyword_table or not dataset_keyword_table.keyword_table_dict:
|
||||
index_struct = KeywordTable()
|
||||
else:
|
||||
index_struct_dict = dataset_keyword_table.keyword_table_dict
|
||||
index_struct: KeywordTable = load_index_struct_from_dict(index_struct_dict)
|
||||
|
||||
# create index
|
||||
index = GPTJIEBAKeywordTableIndex(
|
||||
index_struct=index_struct,
|
||||
docstore=EmptyDocumentStore(),
|
||||
service_context=service_context
|
||||
)
|
||||
|
||||
for node in nodes:
|
||||
keywords = index._extract_keywords(node.get_text())
|
||||
self.update_segment_keywords(node.doc_id, list(keywords))
|
||||
index._index_struct.add_node(list(keywords), node)
|
||||
|
||||
index_struct_dict = index.index_struct.to_dict()
|
||||
|
||||
if not dataset_keyword_table:
|
||||
dataset_keyword_table = DatasetKeywordTable(
|
||||
dataset_id=self._dataset.id,
|
||||
keyword_table=json.dumps(index_struct_dict)
|
||||
)
|
||||
db.session.add(dataset_keyword_table)
|
||||
else:
|
||||
dataset_keyword_table.keyword_table = json.dumps(index_struct_dict)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
def del_nodes(self, node_ids: List[str]):
|
||||
llm = LLMBuilder.to_llm(
|
||||
tenant_id=self._dataset.tenant_id,
|
||||
model_name='fake'
|
||||
)
|
||||
|
||||
service_context = ServiceContext.from_defaults(
|
||||
llm_predictor=LLMPredictor(llm=llm),
|
||||
embed_model=OpenAIEmbedding()
|
||||
)
|
||||
|
||||
dataset_keyword_table = self.get_keyword_table()
|
||||
if not dataset_keyword_table or not dataset_keyword_table.keyword_table_dict:
|
||||
return
|
||||
else:
|
||||
index_struct_dict = dataset_keyword_table.keyword_table_dict
|
||||
index_struct: KeywordTable = load_index_struct_from_dict(index_struct_dict)
|
||||
|
||||
# create index
|
||||
index = GPTJIEBAKeywordTableIndex(
|
||||
index_struct=index_struct,
|
||||
docstore=EmptyDocumentStore(),
|
||||
service_context=service_context
|
||||
)
|
||||
|
||||
for node_id in node_ids:
|
||||
index.delete(node_id)
|
||||
|
||||
index_struct_dict = index.index_struct.to_dict()
|
||||
|
||||
if not dataset_keyword_table:
|
||||
dataset_keyword_table = DatasetKeywordTable(
|
||||
dataset_id=self._dataset.id,
|
||||
keyword_table=json.dumps(index_struct_dict)
|
||||
)
|
||||
db.session.add(dataset_keyword_table)
|
||||
else:
|
||||
dataset_keyword_table.keyword_table = json.dumps(index_struct_dict)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
@property
|
||||
def query_index(self) -> Optional[BaseGPTKeywordTableIndex]:
|
||||
docstore = DatesetDocumentStore(
|
||||
dataset=self._dataset,
|
||||
user_id=self._dataset.created_by,
|
||||
embedding_model_name="text-embedding-ada-002"
|
||||
)
|
||||
|
||||
service_context = IndexBuilder.get_default_service_context(tenant_id=self._dataset.tenant_id)
|
||||
|
||||
dataset_keyword_table = self.get_keyword_table()
|
||||
if not dataset_keyword_table or not dataset_keyword_table.keyword_table_dict:
|
||||
return None
|
||||
|
||||
index_struct: KeywordTable = load_index_struct_from_dict(dataset_keyword_table.keyword_table_dict)
|
||||
|
||||
return GPTJIEBAKeywordTableIndex(index_struct=index_struct, docstore=docstore, service_context=service_context)
|
||||
|
||||
def get_keyword_table(self):
|
||||
dataset_keyword_table = self._dataset.dataset_keyword_table
|
||||
if dataset_keyword_table:
|
||||
return dataset_keyword_table
|
||||
return None
|
||||
|
||||
def update_segment_keywords(self, node_id: str, keywords: List[str]):
|
||||
document_segment = db.session.query(DocumentSegment).filter(DocumentSegment.index_node_id == node_id).first()
|
||||
if document_segment:
|
||||
document_segment.keywords = keywords
|
||||
db.session.commit()
|
||||
79
api/core/index/query/synthesizer.py
Normal file
79
api/core/index/query/synthesizer.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
Optional, Sequence,
|
||||
)
|
||||
|
||||
from llama_index.indices.response.response_synthesis import ResponseSynthesizer
|
||||
from llama_index.indices.response.response_builder import ResponseMode, BaseResponseBuilder, get_response_builder
|
||||
from llama_index.indices.service_context import ServiceContext
|
||||
from llama_index.optimization.optimizer import BaseTokenUsageOptimizer
|
||||
from llama_index.prompts.prompts import (
|
||||
QuestionAnswerPrompt,
|
||||
RefinePrompt,
|
||||
SimpleInputPrompt,
|
||||
)
|
||||
from llama_index.types import RESPONSE_TEXT_TYPE
|
||||
|
||||
|
||||
class EnhanceResponseSynthesizer(ResponseSynthesizer):
|
||||
@classmethod
|
||||
def from_args(
|
||||
cls,
|
||||
service_context: ServiceContext,
|
||||
streaming: bool = False,
|
||||
use_async: bool = False,
|
||||
text_qa_template: Optional[QuestionAnswerPrompt] = None,
|
||||
refine_template: Optional[RefinePrompt] = None,
|
||||
simple_template: Optional[SimpleInputPrompt] = None,
|
||||
response_mode: ResponseMode = ResponseMode.DEFAULT,
|
||||
response_kwargs: Optional[Dict] = None,
|
||||
optimizer: Optional[BaseTokenUsageOptimizer] = None,
|
||||
) -> "ResponseSynthesizer":
|
||||
response_builder: Optional[BaseResponseBuilder] = None
|
||||
if response_mode != ResponseMode.NO_TEXT:
|
||||
if response_mode == 'no_synthesizer':
|
||||
response_builder = NoSynthesizer(
|
||||
service_context=service_context,
|
||||
simple_template=simple_template,
|
||||
streaming=streaming,
|
||||
)
|
||||
else:
|
||||
response_builder = get_response_builder(
|
||||
service_context,
|
||||
text_qa_template,
|
||||
refine_template,
|
||||
simple_template,
|
||||
response_mode,
|
||||
use_async=use_async,
|
||||
streaming=streaming,
|
||||
)
|
||||
return cls(response_builder, response_mode, response_kwargs, optimizer)
|
||||
|
||||
|
||||
class NoSynthesizer(BaseResponseBuilder):
|
||||
def __init__(
|
||||
self,
|
||||
service_context: ServiceContext,
|
||||
simple_template: Optional[SimpleInputPrompt] = None,
|
||||
streaming: bool = False,
|
||||
) -> None:
|
||||
super().__init__(service_context, streaming)
|
||||
|
||||
async def aget_response(
|
||||
self,
|
||||
query_str: str,
|
||||
text_chunks: Sequence[str],
|
||||
prev_response: Optional[str] = None,
|
||||
**response_kwargs: Any,
|
||||
) -> RESPONSE_TEXT_TYPE:
|
||||
return "\n".join(text_chunks)
|
||||
|
||||
def get_response(
|
||||
self,
|
||||
query_str: str,
|
||||
text_chunks: Sequence[str],
|
||||
prev_response: Optional[str] = None,
|
||||
**response_kwargs: Any,
|
||||
) -> RESPONSE_TEXT_TYPE:
|
||||
return "\n".join(text_chunks)
|
||||
22
api/core/index/readers/html_parser.py
Normal file
22
api/core/index/readers/html_parser.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from llama_index.readers.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
"""HTML parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||||
"""Parse file."""
|
||||
with open(file, "rb") as fp:
|
||||
soup = BeautifulSoup(fp, 'html.parser')
|
||||
text = soup.get_text()
|
||||
text = text.strip() if text else ''
|
||||
|
||||
return text
|
||||
56
api/core/index/readers/pdf_parser.py
Normal file
56
api/core/index/readers/pdf_parser.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from flask import current_app
|
||||
from llama_index.readers.file.base_parser import BaseParser
|
||||
from pypdf import PdfReader
|
||||
|
||||
from extensions.ext_storage import storage
|
||||
from models.model import UploadFile
|
||||
|
||||
|
||||
class PDFParser(BaseParser):
|
||||
"""PDF parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||||
"""Parse file."""
|
||||
if not current_app.config.get('PDF_PREVIEW', True):
|
||||
return ''
|
||||
|
||||
plaintext_file_key = ''
|
||||
plaintext_file_exists = False
|
||||
if self._parser_config and 'upload_file' in self._parser_config and self._parser_config['upload_file']:
|
||||
upload_file: UploadFile = self._parser_config['upload_file']
|
||||
if upload_file.hash:
|
||||
plaintext_file_key = 'upload_files/' + upload_file.tenant_id + '/' + upload_file.hash + '.plaintext'
|
||||
try:
|
||||
text = storage.load(plaintext_file_key).decode('utf-8')
|
||||
plaintext_file_exists = True
|
||||
return text
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
text_list = []
|
||||
with open(file, "rb") as fp:
|
||||
# Create a PDF object
|
||||
pdf = PdfReader(fp)
|
||||
|
||||
# Get the number of pages in the PDF document
|
||||
num_pages = len(pdf.pages)
|
||||
|
||||
# Iterate over every page
|
||||
for page in range(num_pages):
|
||||
# Extract the text from the page
|
||||
page_text = pdf.pages[page].extract_text()
|
||||
text_list.append(page_text)
|
||||
text = "\n".join(text_list)
|
||||
|
||||
# save plaintext file for caching
|
||||
if not plaintext_file_exists and plaintext_file_key:
|
||||
storage.save(plaintext_file_key, text.encode('utf-8'))
|
||||
|
||||
return text
|
||||
136
api/core/index/vector_index.py
Normal file
136
api/core/index/vector_index.py
Normal file
@@ -0,0 +1,136 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from llama_index.data_structs import Node
|
||||
from requests import ReadTimeout
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from tenacity import retry, stop_after_attempt, retry_if_exception_type
|
||||
|
||||
from core.index.index_builder import IndexBuilder
|
||||
from core.vector_store.base import BaseGPTVectorStoreIndex
|
||||
from extensions.ext_vector_store import vector_store
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, Embedding
|
||||
|
||||
|
||||
class VectorIndex:
|
||||
|
||||
def __init__(self, dataset: Dataset):
|
||||
self._dataset = dataset
|
||||
|
||||
def add_nodes(self, nodes: List[Node], duplicate_check: bool = False):
|
||||
if not self._dataset.index_struct_dict:
|
||||
index_id = "Vector_index_" + self._dataset.id.replace("-", "_")
|
||||
self._dataset.index_struct = json.dumps(vector_store.to_index_struct(index_id))
|
||||
db.session.commit()
|
||||
|
||||
service_context = IndexBuilder.get_default_service_context(tenant_id=self._dataset.tenant_id)
|
||||
|
||||
index = vector_store.get_index(
|
||||
service_context=service_context,
|
||||
index_struct=self._dataset.index_struct_dict
|
||||
)
|
||||
|
||||
if duplicate_check:
|
||||
nodes = self._filter_duplicate_nodes(index, nodes)
|
||||
|
||||
embedding_queue_nodes = []
|
||||
embedded_nodes = []
|
||||
for node in nodes:
|
||||
node_hash = node.doc_hash
|
||||
|
||||
# if node hash in cached embedding tables, use cached embedding
|
||||
embedding = db.session.query(Embedding).filter_by(hash=node_hash).first()
|
||||
if embedding:
|
||||
node.embedding = embedding.get_embedding()
|
||||
embedded_nodes.append(node)
|
||||
else:
|
||||
embedding_queue_nodes.append(node)
|
||||
|
||||
if embedding_queue_nodes:
|
||||
embedding_results = index._get_node_embedding_results(
|
||||
embedding_queue_nodes,
|
||||
set(),
|
||||
)
|
||||
|
||||
# pre embed nodes for cached embedding
|
||||
for embedding_result in embedding_results:
|
||||
node = embedding_result.node
|
||||
node.embedding = embedding_result.embedding
|
||||
|
||||
try:
|
||||
embedding = Embedding(hash=node.doc_hash)
|
||||
embedding.set_embedding(node.embedding)
|
||||
db.session.add(embedding)
|
||||
db.session.commit()
|
||||
except IntegrityError:
|
||||
db.session.rollback()
|
||||
continue
|
||||
except:
|
||||
logging.exception('Failed to add embedding to db')
|
||||
continue
|
||||
|
||||
embedded_nodes.append(node)
|
||||
|
||||
self.index_insert_nodes(index, embedded_nodes)
|
||||
|
||||
@retry(reraise=True, retry=retry_if_exception_type(ReadTimeout), stop=stop_after_attempt(3))
|
||||
def index_insert_nodes(self, index: BaseGPTVectorStoreIndex, nodes: List[Node]):
|
||||
index.insert_nodes(nodes)
|
||||
|
||||
def del_nodes(self, node_ids: List[str]):
|
||||
if not self._dataset.index_struct_dict:
|
||||
return
|
||||
|
||||
service_context = IndexBuilder.get_default_service_context(tenant_id=self._dataset.tenant_id)
|
||||
|
||||
index = vector_store.get_index(
|
||||
service_context=service_context,
|
||||
index_struct=self._dataset.index_struct_dict
|
||||
)
|
||||
|
||||
for node_id in node_ids:
|
||||
self.index_delete_node(index, node_id)
|
||||
|
||||
@retry(reraise=True, retry=retry_if_exception_type(ReadTimeout), stop=stop_after_attempt(3))
|
||||
def index_delete_node(self, index: BaseGPTVectorStoreIndex, node_id: str):
|
||||
index.delete_node(node_id)
|
||||
|
||||
def del_doc(self, doc_id: str):
|
||||
if not self._dataset.index_struct_dict:
|
||||
return
|
||||
|
||||
service_context = IndexBuilder.get_default_service_context(tenant_id=self._dataset.tenant_id)
|
||||
|
||||
index = vector_store.get_index(
|
||||
service_context=service_context,
|
||||
index_struct=self._dataset.index_struct_dict
|
||||
)
|
||||
|
||||
self.index_delete_doc(index, doc_id)
|
||||
|
||||
@retry(reraise=True, retry=retry_if_exception_type(ReadTimeout), stop=stop_after_attempt(3))
|
||||
def index_delete_doc(self, index: BaseGPTVectorStoreIndex, doc_id: str):
|
||||
index.delete(doc_id)
|
||||
|
||||
@property
|
||||
def query_index(self) -> Optional[BaseGPTVectorStoreIndex]:
|
||||
if not self._dataset.index_struct_dict:
|
||||
return None
|
||||
|
||||
service_context = IndexBuilder.get_default_service_context(tenant_id=self._dataset.tenant_id)
|
||||
|
||||
return vector_store.get_index(
|
||||
service_context=service_context,
|
||||
index_struct=self._dataset.index_struct_dict
|
||||
)
|
||||
|
||||
def _filter_duplicate_nodes(self, index: BaseGPTVectorStoreIndex, nodes: List[Node]) -> List[Node]:
|
||||
for node in nodes:
|
||||
node_id = node.doc_id
|
||||
exists_duplicate_node = index.exists_by_node_id(node_id)
|
||||
if exists_duplicate_node:
|
||||
nodes.remove(node)
|
||||
|
||||
return nodes
|
||||
467
api/core/indexing_runner.py
Normal file
467
api/core/indexing_runner.py
Normal file
@@ -0,0 +1,467 @@
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from llama_index import SimpleDirectoryReader
|
||||
from llama_index.data_structs import Node
|
||||
from llama_index.data_structs.node_v2 import DocumentRelationship
|
||||
from llama_index.node_parser import SimpleNodeParser, NodeParser
|
||||
from llama_index.readers.file.base import DEFAULT_FILE_EXTRACTOR
|
||||
from llama_index.readers.file.markdown_parser import MarkdownParser
|
||||
|
||||
from core.docstore.dataset_docstore import DatesetDocumentStore
|
||||
from core.index.keyword_table_index import KeywordTableIndex
|
||||
from core.index.readers.html_parser import HTMLParser
|
||||
from core.index.readers.pdf_parser import PDFParser
|
||||
from core.index.vector_index import VectorIndex
|
||||
from core.llm.token_calculator import TokenCalculator
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from extensions.ext_storage import storage
|
||||
from models.dataset import Document, Dataset, DocumentSegment, DatasetProcessRule
|
||||
from models.model import UploadFile
|
||||
|
||||
|
||||
class IndexingRunner:
|
||||
|
||||
def __init__(self, embedding_model_name: str = "text-embedding-ada-002"):
|
||||
self.storage = storage
|
||||
self.embedding_model_name = embedding_model_name
|
||||
|
||||
def run(self, document: Document):
|
||||
"""Run the indexing process."""
|
||||
# get dataset
|
||||
dataset = Dataset.query.filter_by(
|
||||
id=document.dataset_id
|
||||
).first()
|
||||
|
||||
if not dataset:
|
||||
raise ValueError("no dataset found")
|
||||
|
||||
# load file
|
||||
text_docs = self._load_data(document)
|
||||
|
||||
# get the process rule
|
||||
processing_rule = db.session.query(DatasetProcessRule). \
|
||||
filter(DatasetProcessRule.id == document.dataset_process_rule_id). \
|
||||
first()
|
||||
|
||||
# get node parser for splitting
|
||||
node_parser = self._get_node_parser(processing_rule)
|
||||
|
||||
# split to nodes
|
||||
nodes = self._step_split(
|
||||
text_docs=text_docs,
|
||||
node_parser=node_parser,
|
||||
dataset=dataset,
|
||||
document=document,
|
||||
processing_rule=processing_rule
|
||||
)
|
||||
|
||||
# build index
|
||||
self._build_index(
|
||||
dataset=dataset,
|
||||
document=document,
|
||||
nodes=nodes
|
||||
)
|
||||
|
||||
def run_in_splitting_status(self, document: Document):
|
||||
"""Run the indexing process when the index_status is splitting."""
|
||||
# get dataset
|
||||
dataset = Dataset.query.filter_by(
|
||||
id=document.dataset_id
|
||||
).first()
|
||||
|
||||
if not dataset:
|
||||
raise ValueError("no dataset found")
|
||||
|
||||
# get exist document_segment list and delete
|
||||
document_segments = DocumentSegment.query.filter_by(
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id
|
||||
).all()
|
||||
db.session.delete(document_segments)
|
||||
db.session.commit()
|
||||
# load file
|
||||
text_docs = self._load_data(document)
|
||||
|
||||
# get the process rule
|
||||
processing_rule = db.session.query(DatasetProcessRule). \
|
||||
filter(DatasetProcessRule.id == document.dataset_process_rule_id). \
|
||||
first()
|
||||
|
||||
# get node parser for splitting
|
||||
node_parser = self._get_node_parser(processing_rule)
|
||||
|
||||
# split to nodes
|
||||
nodes = self._step_split(
|
||||
text_docs=text_docs,
|
||||
node_parser=node_parser,
|
||||
dataset=dataset,
|
||||
document=document,
|
||||
processing_rule=processing_rule
|
||||
)
|
||||
|
||||
# build index
|
||||
self._build_index(
|
||||
dataset=dataset,
|
||||
document=document,
|
||||
nodes=nodes
|
||||
)
|
||||
|
||||
def run_in_indexing_status(self, document: Document):
|
||||
"""Run the indexing process when the index_status is indexing."""
|
||||
# get dataset
|
||||
dataset = Dataset.query.filter_by(
|
||||
id=document.dataset_id
|
||||
).first()
|
||||
|
||||
if not dataset:
|
||||
raise ValueError("no dataset found")
|
||||
|
||||
# get exist document_segment list and delete
|
||||
document_segments = DocumentSegment.query.filter_by(
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id
|
||||
).all()
|
||||
nodes = []
|
||||
if document_segments:
|
||||
for document_segment in document_segments:
|
||||
# transform segment to node
|
||||
if document_segment.status != "completed":
|
||||
relationships = {
|
||||
DocumentRelationship.SOURCE: document_segment.document_id,
|
||||
}
|
||||
|
||||
previous_segment = document_segment.previous_segment
|
||||
if previous_segment:
|
||||
relationships[DocumentRelationship.PREVIOUS] = previous_segment.index_node_id
|
||||
|
||||
next_segment = document_segment.next_segment
|
||||
if next_segment:
|
||||
relationships[DocumentRelationship.NEXT] = next_segment.index_node_id
|
||||
node = Node(
|
||||
doc_id=document_segment.index_node_id,
|
||||
doc_hash=document_segment.index_node_hash,
|
||||
text=document_segment.content,
|
||||
extra_info=None,
|
||||
node_info=None,
|
||||
relationships=relationships
|
||||
)
|
||||
nodes.append(node)
|
||||
|
||||
# build index
|
||||
self._build_index(
|
||||
dataset=dataset,
|
||||
document=document,
|
||||
nodes=nodes
|
||||
)
|
||||
|
||||
def indexing_estimate(self, file_detail: UploadFile, tmp_processing_rule: dict) -> dict:
|
||||
"""
|
||||
Estimate the indexing for the document.
|
||||
"""
|
||||
# load data from file
|
||||
text_docs = self._load_data_from_file(file_detail)
|
||||
|
||||
processing_rule = DatasetProcessRule(
|
||||
mode=tmp_processing_rule["mode"],
|
||||
rules=json.dumps(tmp_processing_rule["rules"])
|
||||
)
|
||||
|
||||
# get node parser for splitting
|
||||
node_parser = self._get_node_parser(processing_rule)
|
||||
|
||||
# split to nodes
|
||||
nodes = self._split_to_nodes(
|
||||
text_docs=text_docs,
|
||||
node_parser=node_parser,
|
||||
processing_rule=processing_rule
|
||||
)
|
||||
|
||||
tokens = 0
|
||||
preview_texts = []
|
||||
for node in nodes:
|
||||
if len(preview_texts) < 5:
|
||||
preview_texts.append(node.get_text())
|
||||
|
||||
tokens += TokenCalculator.get_num_tokens(self.embedding_model_name, node.get_text())
|
||||
|
||||
return {
|
||||
"total_segments": len(nodes),
|
||||
"tokens": tokens,
|
||||
"total_price": '{:f}'.format(TokenCalculator.get_token_price(self.embedding_model_name, tokens)),
|
||||
"currency": TokenCalculator.get_currency(self.embedding_model_name),
|
||||
"preview": preview_texts
|
||||
}
|
||||
|
||||
def _load_data(self, document: Document) -> List[Document]:
|
||||
# load file
|
||||
if document.data_source_type != "upload_file":
|
||||
return []
|
||||
|
||||
data_source_info = document.data_source_info_dict
|
||||
if not data_source_info or 'upload_file_id' not in data_source_info:
|
||||
raise ValueError("no upload file found")
|
||||
|
||||
file_detail = db.session.query(UploadFile). \
|
||||
filter(UploadFile.id == data_source_info['upload_file_id']). \
|
||||
one_or_none()
|
||||
|
||||
text_docs = self._load_data_from_file(file_detail)
|
||||
|
||||
# update document status to splitting
|
||||
self._update_document_index_status(
|
||||
document_id=document.id,
|
||||
after_indexing_status="splitting",
|
||||
extra_update_params={
|
||||
Document.file_id: file_detail.id,
|
||||
Document.word_count: sum([len(text_doc.text) for text_doc in text_docs]),
|
||||
Document.parsing_completed_at: datetime.datetime.utcnow()
|
||||
}
|
||||
)
|
||||
|
||||
# replace doc id to document model id
|
||||
for text_doc in text_docs:
|
||||
# remove invalid symbol
|
||||
text_doc.text = self.filter_string(text_doc.get_text())
|
||||
text_doc.doc_id = document.id
|
||||
|
||||
return text_docs
|
||||
|
||||
def filter_string(self, text):
|
||||
pattern = re.compile('[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]')
|
||||
return pattern.sub('', text)
|
||||
|
||||
def _load_data_from_file(self, upload_file: UploadFile) -> List[Document]:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
suffix = Path(upload_file.key).suffix
|
||||
filepath = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"
|
||||
self.storage.download(upload_file.key, filepath)
|
||||
|
||||
file_extractor = DEFAULT_FILE_EXTRACTOR.copy()
|
||||
file_extractor[".markdown"] = MarkdownParser()
|
||||
file_extractor[".html"] = HTMLParser()
|
||||
file_extractor[".htm"] = HTMLParser()
|
||||
file_extractor[".pdf"] = PDFParser({'upload_file': upload_file})
|
||||
|
||||
loader = SimpleDirectoryReader(input_files=[filepath], file_extractor=file_extractor)
|
||||
text_docs = loader.load_data()
|
||||
|
||||
return text_docs
|
||||
|
||||
def _get_node_parser(self, processing_rule: DatasetProcessRule) -> NodeParser:
|
||||
"""
|
||||
Get the NodeParser object according to the processing rule.
|
||||
"""
|
||||
if processing_rule.mode == "custom":
|
||||
# The user-defined segmentation rule
|
||||
rules = json.loads(processing_rule.rules)
|
||||
segmentation = rules["segmentation"]
|
||||
if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > 1000:
|
||||
raise ValueError("Custom segment length should be between 50 and 1000.")
|
||||
|
||||
separator = segmentation["separator"]
|
||||
if not separator:
|
||||
separators = ["\n\n", "。", ".", " ", ""]
|
||||
else:
|
||||
separator = separator.replace('\\n', '\n')
|
||||
separators = [separator, ""]
|
||||
|
||||
character_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||
chunk_size=segmentation["max_tokens"],
|
||||
chunk_overlap=0,
|
||||
separators=separators
|
||||
)
|
||||
else:
|
||||
# Automatic segmentation
|
||||
character_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||
chunk_size=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens'],
|
||||
chunk_overlap=0,
|
||||
separators=["\n\n", "。", ".", " ", ""]
|
||||
)
|
||||
|
||||
return SimpleNodeParser(text_splitter=character_splitter, include_extra_info=True)
|
||||
|
||||
def _step_split(self, text_docs: List[Document], node_parser: NodeParser,
|
||||
dataset: Dataset, document: Document, processing_rule: DatasetProcessRule) -> List[Node]:
|
||||
"""
|
||||
Split the text documents into nodes and save them to the document segment.
|
||||
"""
|
||||
nodes = self._split_to_nodes(
|
||||
text_docs=text_docs,
|
||||
node_parser=node_parser,
|
||||
processing_rule=processing_rule
|
||||
)
|
||||
|
||||
# save node to document segment
|
||||
doc_store = DatesetDocumentStore(
|
||||
dataset=dataset,
|
||||
user_id=document.created_by,
|
||||
embedding_model_name=self.embedding_model_name,
|
||||
document_id=document.id
|
||||
)
|
||||
|
||||
doc_store.add_documents(nodes)
|
||||
|
||||
# update document status to indexing
|
||||
cur_time = datetime.datetime.utcnow()
|
||||
self._update_document_index_status(
|
||||
document_id=document.id,
|
||||
after_indexing_status="indexing",
|
||||
extra_update_params={
|
||||
Document.cleaning_completed_at: cur_time,
|
||||
Document.splitting_completed_at: cur_time,
|
||||
}
|
||||
)
|
||||
|
||||
# update segment status to indexing
|
||||
self._update_segments_by_document(
|
||||
document_id=document.id,
|
||||
update_params={
|
||||
DocumentSegment.status: "indexing",
|
||||
DocumentSegment.indexing_at: datetime.datetime.utcnow()
|
||||
}
|
||||
)
|
||||
|
||||
return nodes
|
||||
|
||||
def _split_to_nodes(self, text_docs: List[Document], node_parser: NodeParser,
|
||||
processing_rule: DatasetProcessRule) -> List[Node]:
|
||||
"""
|
||||
Split the text documents into nodes.
|
||||
"""
|
||||
all_nodes = []
|
||||
for text_doc in text_docs:
|
||||
# document clean
|
||||
document_text = self._document_clean(text_doc.get_text(), processing_rule)
|
||||
text_doc.text = document_text
|
||||
|
||||
# parse document to nodes
|
||||
nodes = node_parser.get_nodes_from_documents([text_doc])
|
||||
|
||||
all_nodes.extend(nodes)
|
||||
|
||||
return all_nodes
|
||||
|
||||
def _document_clean(self, text: str, processing_rule: DatasetProcessRule) -> str:
|
||||
"""
|
||||
Clean the document text according to the processing rules.
|
||||
"""
|
||||
if processing_rule.mode == "automatic":
|
||||
rules = DatasetProcessRule.AUTOMATIC_RULES
|
||||
else:
|
||||
rules = json.loads(processing_rule.rules) if processing_rule.rules else {}
|
||||
|
||||
if 'pre_processing_rules' in rules:
|
||||
pre_processing_rules = rules["pre_processing_rules"]
|
||||
for pre_processing_rule in pre_processing_rules:
|
||||
if pre_processing_rule["id"] == "remove_extra_spaces" and pre_processing_rule["enabled"] is True:
|
||||
# Remove extra spaces
|
||||
pattern = r'\n{3,}'
|
||||
text = re.sub(pattern, '\n\n', text)
|
||||
pattern = r'[\t\f\r\x20\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]{2,}'
|
||||
text = re.sub(pattern, ' ', text)
|
||||
elif pre_processing_rule["id"] == "remove_urls_emails" and pre_processing_rule["enabled"] is True:
|
||||
# Remove email
|
||||
pattern = r'([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
|
||||
text = re.sub(pattern, '', text)
|
||||
|
||||
# Remove URL
|
||||
pattern = r'https?://[^\s]+'
|
||||
text = re.sub(pattern, '', text)
|
||||
|
||||
return text
|
||||
|
||||
def _build_index(self, dataset: Dataset, document: Document, nodes: List[Node]) -> None:
|
||||
"""
|
||||
Build the index for the document.
|
||||
"""
|
||||
vector_index = VectorIndex(dataset=dataset)
|
||||
keyword_table_index = KeywordTableIndex(dataset=dataset)
|
||||
|
||||
# chunk nodes by chunk size
|
||||
indexing_start_at = time.perf_counter()
|
||||
tokens = 0
|
||||
chunk_size = 100
|
||||
for i in range(0, len(nodes), chunk_size):
|
||||
# check document is paused
|
||||
self._check_document_paused_status(document.id)
|
||||
chunk_nodes = nodes[i:i + chunk_size]
|
||||
|
||||
tokens += sum(
|
||||
TokenCalculator.get_num_tokens(self.embedding_model_name, node.get_text()) for node in chunk_nodes
|
||||
)
|
||||
|
||||
# save vector index
|
||||
if dataset.indexing_technique == "high_quality":
|
||||
vector_index.add_nodes(chunk_nodes)
|
||||
|
||||
# save keyword index
|
||||
keyword_table_index.add_nodes(chunk_nodes)
|
||||
|
||||
node_ids = [node.doc_id for node in chunk_nodes]
|
||||
db.session.query(DocumentSegment).filter(
|
||||
DocumentSegment.document_id == document.id,
|
||||
DocumentSegment.index_node_id.in_(node_ids),
|
||||
DocumentSegment.status == "indexing"
|
||||
).update({
|
||||
DocumentSegment.status: "completed",
|
||||
DocumentSegment.completed_at: datetime.datetime.utcnow()
|
||||
})
|
||||
|
||||
db.session.commit()
|
||||
|
||||
indexing_end_at = time.perf_counter()
|
||||
|
||||
# update document status to completed
|
||||
self._update_document_index_status(
|
||||
document_id=document.id,
|
||||
after_indexing_status="completed",
|
||||
extra_update_params={
|
||||
Document.tokens: tokens,
|
||||
Document.completed_at: datetime.datetime.utcnow(),
|
||||
Document.indexing_latency: indexing_end_at - indexing_start_at,
|
||||
}
|
||||
)
|
||||
|
||||
def _check_document_paused_status(self, document_id: str):
|
||||
indexing_cache_key = 'document_{}_is_paused'.format(document_id)
|
||||
result = redis_client.get(indexing_cache_key)
|
||||
if result:
|
||||
raise DocumentIsPausedException()
|
||||
|
||||
def _update_document_index_status(self, document_id: str, after_indexing_status: str,
|
||||
extra_update_params: Optional[dict] = None) -> None:
|
||||
"""
|
||||
Update the document indexing status.
|
||||
"""
|
||||
count = Document.query.filter_by(id=document_id, is_paused=True).count()
|
||||
if count > 0:
|
||||
raise DocumentIsPausedException()
|
||||
|
||||
update_params = {
|
||||
Document.indexing_status: after_indexing_status
|
||||
}
|
||||
|
||||
if extra_update_params:
|
||||
update_params.update(extra_update_params)
|
||||
|
||||
Document.query.filter_by(id=document_id).update(update_params)
|
||||
db.session.commit()
|
||||
|
||||
def _update_segments_by_document(self, document_id: str, update_params: dict) -> None:
|
||||
"""
|
||||
Update the document segment by document id.
|
||||
"""
|
||||
DocumentSegment.query.filter_by(document_id=document_id).update(update_params)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
class DocumentIsPausedException(Exception):
|
||||
pass
|
||||
55
api/core/llm/error.py
Normal file
55
api/core/llm/error.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class LLMError(Exception):
|
||||
"""Base class for all LLM exceptions."""
|
||||
description: Optional[str] = None
|
||||
|
||||
def __init__(self, description: Optional[str] = None) -> None:
|
||||
self.description = description
|
||||
|
||||
|
||||
class LLMBadRequestError(LLMError):
|
||||
"""Raised when the LLM returns bad request."""
|
||||
description = "Bad Request"
|
||||
|
||||
|
||||
class LLMAPIConnectionError(LLMError):
|
||||
"""Raised when the LLM returns API connection error."""
|
||||
description = "API Connection Error"
|
||||
|
||||
|
||||
class LLMAPIUnavailableError(LLMError):
|
||||
"""Raised when the LLM returns API unavailable error."""
|
||||
description = "API Unavailable Error"
|
||||
|
||||
|
||||
class LLMRateLimitError(LLMError):
|
||||
"""Raised when the LLM returns rate limit error."""
|
||||
description = "Rate Limit Error"
|
||||
|
||||
|
||||
class LLMAuthorizationError(LLMError):
|
||||
"""Raised when the LLM returns authorization error."""
|
||||
description = "Authorization Error"
|
||||
|
||||
|
||||
class ProviderTokenNotInitError(Exception):
|
||||
"""
|
||||
Custom exception raised when the provider token is not initialized.
|
||||
"""
|
||||
description = "Provider Token Not Init"
|
||||
|
||||
|
||||
class QuotaExceededError(Exception):
|
||||
"""
|
||||
Custom exception raised when the quota for a provider has been exceeded.
|
||||
"""
|
||||
description = "Quota Exceeded"
|
||||
|
||||
|
||||
class ModelCurrentlyNotSupportError(Exception):
|
||||
"""
|
||||
Custom exception raised when the model not support
|
||||
"""
|
||||
description = "Model Currently Not Support"
|
||||
51
api/core/llm/error_handle_wraps.py
Normal file
51
api/core/llm/error_handle_wraps.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
import openai
|
||||
|
||||
from core.llm.error import LLMAPIConnectionError, LLMAPIUnavailableError, LLMRateLimitError, LLMAuthorizationError, \
|
||||
LLMBadRequestError
|
||||
|
||||
|
||||
def handle_llm_exceptions(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except openai.error.InvalidRequestError as e:
|
||||
logging.exception("Invalid request to OpenAI API.")
|
||||
raise LLMBadRequestError(str(e))
|
||||
except openai.error.APIConnectionError as e:
|
||||
logging.exception("Failed to connect to OpenAI API.")
|
||||
raise LLMAPIConnectionError(str(e))
|
||||
except (openai.error.APIError, openai.error.ServiceUnavailableError, openai.error.Timeout) as e:
|
||||
logging.exception("OpenAI service unavailable.")
|
||||
raise LLMAPIUnavailableError(str(e))
|
||||
except openai.error.RateLimitError as e:
|
||||
raise LLMRateLimitError(str(e))
|
||||
except openai.error.AuthenticationError as e:
|
||||
raise LLMAuthorizationError(str(e))
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def handle_llm_exceptions_async(func):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
except openai.error.InvalidRequestError as e:
|
||||
logging.exception("Invalid request to OpenAI API.")
|
||||
raise LLMBadRequestError(str(e))
|
||||
except openai.error.APIConnectionError as e:
|
||||
logging.exception("Failed to connect to OpenAI API.")
|
||||
raise LLMAPIConnectionError(str(e))
|
||||
except (openai.error.APIError, openai.error.ServiceUnavailableError, openai.error.Timeout) as e:
|
||||
logging.exception("OpenAI service unavailable.")
|
||||
raise LLMAPIUnavailableError(str(e))
|
||||
except openai.error.RateLimitError as e:
|
||||
raise LLMRateLimitError(str(e))
|
||||
except openai.error.AuthenticationError as e:
|
||||
raise LLMAuthorizationError(str(e))
|
||||
|
||||
return wrapper
|
||||
103
api/core/llm/llm_builder.py
Normal file
103
api/core/llm/llm_builder.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from typing import Union, Optional
|
||||
|
||||
from langchain.callbacks import CallbackManager
|
||||
from langchain.llms.fake import FakeListLLM
|
||||
|
||||
from core.constant import llm_constant
|
||||
from core.llm.provider.llm_provider_service import LLMProviderService
|
||||
from core.llm.streamable_chat_open_ai import StreamableChatOpenAI
|
||||
from core.llm.streamable_open_ai import StreamableOpenAI
|
||||
|
||||
|
||||
class LLMBuilder:
|
||||
"""
|
||||
This class handles the following logic:
|
||||
1. For providers with the name 'OpenAI', the OPENAI_API_KEY value is stored directly in encrypted_config.
|
||||
2. For providers with the name 'Azure OpenAI', encrypted_config stores the serialized values of four fields, as shown below:
|
||||
OPENAI_API_TYPE=azure
|
||||
OPENAI_API_VERSION=2022-12-01
|
||||
OPENAI_API_BASE=https://your-resource-name.openai.azure.com
|
||||
OPENAI_API_KEY=<your Azure OpenAI API key>
|
||||
3. For providers with the name 'Anthropic', the ANTHROPIC_API_KEY value is stored directly in encrypted_config.
|
||||
4. For providers with the name 'Cohere', the COHERE_API_KEY value is stored directly in encrypted_config.
|
||||
5. For providers with the name 'HUGGINGFACEHUB', the HUGGINGFACEHUB_API_KEY value is stored directly in encrypted_config.
|
||||
6. Providers with the provider_type 'CUSTOM' can be created through the admin interface, while 'System' providers cannot be created through the admin interface.
|
||||
7. If both CUSTOM and System providers exist in the records, the CUSTOM provider is preferred by default, but this preference can be changed via an input parameter.
|
||||
8. For providers with the provider_type 'System', the quota_used must not exceed quota_limit. If the quota is exceeded, the provider cannot be used. Currently, only the TRIAL quota_type is supported, which is permanently non-resetting.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def to_llm(cls, tenant_id: str, model_name: str, **kwargs) -> Union[StreamableOpenAI, StreamableChatOpenAI, FakeListLLM]:
|
||||
if model_name == 'fake':
|
||||
return FakeListLLM(responses=[])
|
||||
|
||||
mode = cls.get_mode_by_model(model_name)
|
||||
if mode == 'chat':
|
||||
# llm_cls = StreamableAzureChatOpenAI
|
||||
llm_cls = StreamableChatOpenAI
|
||||
elif mode == 'completion':
|
||||
llm_cls = StreamableOpenAI
|
||||
else:
|
||||
raise ValueError(f"model name {model_name} is not supported.")
|
||||
|
||||
model_credentials = cls.get_model_credentials(tenant_id, model_name)
|
||||
|
||||
return llm_cls(
|
||||
model_name=model_name,
|
||||
temperature=kwargs.get('temperature', 0),
|
||||
max_tokens=kwargs.get('max_tokens', 256),
|
||||
top_p=kwargs.get('top_p', 1),
|
||||
frequency_penalty=kwargs.get('frequency_penalty', 0),
|
||||
presence_penalty=kwargs.get('presence_penalty', 0),
|
||||
callback_manager=kwargs.get('callback_manager', None),
|
||||
streaming=kwargs.get('streaming', False),
|
||||
# request_timeout=None
|
||||
**model_credentials
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def to_llm_from_model(cls, tenant_id: str, model: dict, streaming: bool = False,
|
||||
callback_manager: Optional[CallbackManager] = None) -> Union[StreamableOpenAI, StreamableChatOpenAI]:
|
||||
model_name = model.get("name")
|
||||
completion_params = model.get("completion_params", {})
|
||||
|
||||
return cls.to_llm(
|
||||
tenant_id=tenant_id,
|
||||
model_name=model_name,
|
||||
temperature=completion_params.get('temperature', 0),
|
||||
max_tokens=completion_params.get('max_tokens', 256),
|
||||
top_p=completion_params.get('top_p', 0),
|
||||
frequency_penalty=completion_params.get('frequency_penalty', 0.1),
|
||||
presence_penalty=completion_params.get('presence_penalty', 0.1),
|
||||
streaming=streaming,
|
||||
callback_manager=callback_manager
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_mode_by_model(cls, model_name: str) -> str:
|
||||
if not model_name:
|
||||
raise ValueError(f"empty model name is not supported.")
|
||||
|
||||
if model_name in llm_constant.models_by_mode['chat']:
|
||||
return "chat"
|
||||
elif model_name in llm_constant.models_by_mode['completion']:
|
||||
return "completion"
|
||||
else:
|
||||
raise ValueError(f"model name {model_name} is not supported.")
|
||||
|
||||
@classmethod
|
||||
def get_model_credentials(cls, tenant_id: str, model_name: str) -> dict:
|
||||
"""
|
||||
Returns the API credentials for the given tenant_id and model_name, based on the model's provider.
|
||||
Raises an exception if the model_name is not found or if the provider is not found.
|
||||
"""
|
||||
if not model_name:
|
||||
raise Exception('model name not found')
|
||||
|
||||
if model_name not in llm_constant.models:
|
||||
raise Exception('model {} not found'.format(model_name))
|
||||
|
||||
model_provider = llm_constant.models[model_name]
|
||||
|
||||
provider_service = LLMProviderService(tenant_id=tenant_id, provider_name=model_provider)
|
||||
return provider_service.get_credentials(model_name)
|
||||
15
api/core/llm/moderation.py
Normal file
15
api/core/llm/moderation.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import openai
|
||||
from models.provider import ProviderName
|
||||
|
||||
|
||||
class Moderation:
|
||||
|
||||
def __init__(self, provider: str, api_key: str):
|
||||
self.provider = provider
|
||||
self.api_key = api_key
|
||||
|
||||
if self.provider == ProviderName.OPENAI.value:
|
||||
self.client = openai.Moderation
|
||||
|
||||
def moderate(self, text):
|
||||
return self.client.create(input=text, api_key=self.api_key)
|
||||
23
api/core/llm/provider/anthropic_provider.py
Normal file
23
api/core/llm/provider/anthropic_provider.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.llm.provider.base import BaseProvider
|
||||
from models.provider import ProviderName
|
||||
|
||||
|
||||
class AnthropicProvider(BaseProvider):
|
||||
def get_models(self, model_id: Optional[str] = None) -> list[dict]:
|
||||
credentials = self.get_credentials(model_id)
|
||||
# todo
|
||||
return []
|
||||
|
||||
def get_credentials(self, model_id: Optional[str] = None) -> dict:
|
||||
"""
|
||||
Returns the API credentials for Azure OpenAI as a dictionary, for the given tenant_id.
|
||||
The dictionary contains keys: azure_api_type, azure_api_version, azure_api_base, and azure_api_key.
|
||||
"""
|
||||
return {
|
||||
'anthropic_api_key': self.get_provider_api_key(model_id=model_id)
|
||||
}
|
||||
|
||||
def get_provider_name(self):
|
||||
return ProviderName.ANTHROPIC
|
||||
105
api/core/llm/provider/azure_provider.py
Normal file
105
api/core/llm/provider/azure_provider.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import json
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from core.llm.provider.base import BaseProvider
|
||||
from models.provider import ProviderName
|
||||
|
||||
|
||||
class AzureProvider(BaseProvider):
|
||||
def get_models(self, model_id: Optional[str] = None) -> list[dict]:
|
||||
credentials = self.get_credentials(model_id)
|
||||
url = "{}/openai/deployments?api-version={}".format(
|
||||
credentials.get('openai_api_base'),
|
||||
credentials.get('openai_api_version')
|
||||
)
|
||||
|
||||
headers = {
|
||||
"api-key": credentials.get('openai_api_key'),
|
||||
"content-type": "application/json; charset=utf-8"
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
return [{
|
||||
'id': deployment['id'],
|
||||
'name': '{} ({})'.format(deployment['id'], deployment['model'])
|
||||
} for deployment in result['data'] if deployment['status'] == 'succeeded']
|
||||
else:
|
||||
# TODO: optimize in future
|
||||
raise Exception('Failed to get deployments from Azure OpenAI. Status code: {}'.format(response.status_code))
|
||||
|
||||
def get_credentials(self, model_id: Optional[str] = None) -> dict:
|
||||
"""
|
||||
Returns the API credentials for Azure OpenAI as a dictionary.
|
||||
"""
|
||||
encrypted_config = self.get_provider_api_key(model_id=model_id)
|
||||
config = json.loads(encrypted_config)
|
||||
config['openai_api_type'] = 'azure'
|
||||
config['deployment_name'] = model_id
|
||||
return config
|
||||
|
||||
def get_provider_name(self):
|
||||
return ProviderName.AZURE_OPENAI
|
||||
|
||||
def get_provider_configs(self, obfuscated: bool = False) -> Union[str | dict]:
|
||||
"""
|
||||
Returns the provider configs.
|
||||
"""
|
||||
try:
|
||||
config = self.get_provider_api_key()
|
||||
config = json.loads(config)
|
||||
except:
|
||||
config = {
|
||||
'openai_api_type': 'azure',
|
||||
'openai_api_version': '2023-03-15-preview',
|
||||
'openai_api_base': 'https://foo.microsoft.com/bar',
|
||||
'openai_api_key': ''
|
||||
}
|
||||
|
||||
if obfuscated:
|
||||
if not config.get('openai_api_key'):
|
||||
config = {
|
||||
'openai_api_type': 'azure',
|
||||
'openai_api_version': '2023-03-15-preview',
|
||||
'openai_api_base': 'https://foo.microsoft.com/bar',
|
||||
'openai_api_key': ''
|
||||
}
|
||||
|
||||
config['openai_api_key'] = self.obfuscated_token(config.get('openai_api_key'))
|
||||
return config
|
||||
|
||||
return config
|
||||
|
||||
def get_token_type(self):
|
||||
# TODO: change to dict when implemented
|
||||
return lambda value: value
|
||||
|
||||
def config_validate(self, config: Union[dict | str]):
|
||||
"""
|
||||
Validates the given config.
|
||||
"""
|
||||
# TODO: implement
|
||||
pass
|
||||
|
||||
def get_encrypted_token(self, config: Union[dict | str]):
|
||||
"""
|
||||
Returns the encrypted token.
|
||||
"""
|
||||
return json.dumps({
|
||||
'openai_api_type': 'azure',
|
||||
'openai_api_version': '2023-03-15-preview',
|
||||
'openai_api_base': config['openai_api_base'],
|
||||
'openai_api_key': self.encrypt_token(config['openai_api_key'])
|
||||
})
|
||||
|
||||
def get_decrypted_token(self, token: str):
|
||||
"""
|
||||
Returns the decrypted token.
|
||||
"""
|
||||
config = json.loads(token)
|
||||
config['openai_api_key'] = self.decrypt_token(config['openai_api_key'])
|
||||
return config
|
||||
124
api/core/llm/provider/base.py
Normal file
124
api/core/llm/provider/base.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import base64
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Union
|
||||
|
||||
from core import hosted_llm_credentials
|
||||
from core.llm.error import QuotaExceededError, ModelCurrentlyNotSupportError, ProviderTokenNotInitError
|
||||
from extensions.ext_database import db
|
||||
from libs import rsa
|
||||
from models.account import Tenant
|
||||
from models.provider import Provider, ProviderType, ProviderName
|
||||
|
||||
|
||||
class BaseProvider(ABC):
|
||||
def __init__(self, tenant_id: str):
|
||||
self.tenant_id = tenant_id
|
||||
|
||||
def get_provider_api_key(self, model_id: Optional[str] = None, prefer_custom: bool = True) -> str:
|
||||
"""
|
||||
Returns the decrypted API key for the given tenant_id and provider_name.
|
||||
If the provider is of type SYSTEM and the quota is exceeded, raises a QuotaExceededError.
|
||||
If the provider is not found or not valid, raises a ProviderTokenNotInitError.
|
||||
"""
|
||||
provider = self.get_provider(prefer_custom)
|
||||
if not provider:
|
||||
raise ProviderTokenNotInitError()
|
||||
|
||||
if provider.provider_type == ProviderType.SYSTEM.value:
|
||||
quota_used = provider.quota_used if provider.quota_used is not None else 0
|
||||
quota_limit = provider.quota_limit if provider.quota_limit is not None else 0
|
||||
|
||||
if model_id and model_id == 'gpt-4':
|
||||
raise ModelCurrentlyNotSupportError()
|
||||
|
||||
if quota_used >= quota_limit:
|
||||
raise QuotaExceededError()
|
||||
|
||||
return self.get_hosted_credentials()
|
||||
else:
|
||||
return self.get_decrypted_token(provider.encrypted_config)
|
||||
|
||||
def get_provider(self, prefer_custom: bool) -> Optional[Provider]:
|
||||
"""
|
||||
Returns the Provider instance for the given tenant_id and provider_name.
|
||||
If both CUSTOM and System providers exist, the preferred provider will be returned based on the prefer_custom flag.
|
||||
"""
|
||||
providers = db.session.query(Provider).filter(
|
||||
Provider.tenant_id == self.tenant_id,
|
||||
Provider.provider_name == self.get_provider_name().value
|
||||
).order_by(Provider.provider_type.desc() if prefer_custom else Provider.provider_type).all()
|
||||
|
||||
custom_provider = None
|
||||
system_provider = None
|
||||
|
||||
for provider in providers:
|
||||
if provider.provider_type == ProviderType.CUSTOM.value:
|
||||
custom_provider = provider
|
||||
elif provider.provider_type == ProviderType.SYSTEM.value:
|
||||
system_provider = provider
|
||||
|
||||
if custom_provider and custom_provider.is_valid and custom_provider.encrypted_config:
|
||||
return custom_provider
|
||||
elif system_provider and system_provider.is_valid:
|
||||
return system_provider
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_hosted_credentials(self) -> str:
|
||||
if self.get_provider_name() != ProviderName.OPENAI:
|
||||
raise ProviderTokenNotInitError()
|
||||
|
||||
if not hosted_llm_credentials.openai or not hosted_llm_credentials.openai.api_key:
|
||||
raise ProviderTokenNotInitError()
|
||||
|
||||
return hosted_llm_credentials.openai.api_key
|
||||
|
||||
def get_provider_configs(self, obfuscated: bool = False) -> Union[str | dict]:
|
||||
"""
|
||||
Returns the provider configs.
|
||||
"""
|
||||
try:
|
||||
config = self.get_provider_api_key()
|
||||
except:
|
||||
config = 'THIS-IS-A-MOCK-TOKEN'
|
||||
|
||||
if obfuscated:
|
||||
return self.obfuscated_token(config)
|
||||
|
||||
return config
|
||||
|
||||
def obfuscated_token(self, token: str):
|
||||
return token[:6] + '*' * (len(token) - 8) + token[-2:]
|
||||
|
||||
def get_token_type(self):
|
||||
return str
|
||||
|
||||
def get_encrypted_token(self, config: Union[dict | str]):
|
||||
return self.encrypt_token(config)
|
||||
|
||||
def get_decrypted_token(self, token: str):
|
||||
return self.decrypt_token(token)
|
||||
|
||||
def encrypt_token(self, token):
|
||||
tenant = db.session.query(Tenant).filter(Tenant.id == self.tenant_id).first()
|
||||
encrypted_token = rsa.encrypt(token, tenant.encrypt_public_key)
|
||||
return base64.b64encode(encrypted_token).decode()
|
||||
|
||||
def decrypt_token(self, token):
|
||||
return rsa.decrypt(base64.b64decode(token), self.tenant_id)
|
||||
|
||||
@abstractmethod
|
||||
def get_provider_name(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_credentials(self, model_id: Optional[str] = None) -> dict:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_models(self, model_id: Optional[str] = None) -> list[dict]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def config_validate(self, config: str):
|
||||
raise NotImplementedError
|
||||
2
api/core/llm/provider/errors.py
Normal file
2
api/core/llm/provider/errors.py
Normal file
@@ -0,0 +1,2 @@
|
||||
class ValidateFailedError(Exception):
|
||||
description = "Provider Validate failed"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user