Merge remote-tracking branch 'base/feat/jim' into main

1 year ago · 531bd4657e
parent d4911a9028 79d1ccfdcd
commit 531bd4657e
82 changed files with 2512 additions and 344 deletions
--- a/11
+++ b/11
@ -1,18 +1,19 @@
 # Variables
-DOCKER_REGISTRY=langgenius
-WEB_IMAGE=$(DOCKER_REGISTRY)/dify-web
-API_IMAGE=$(DOCKER_REGISTRY)/dify-api
+DOCKER_REGISTRY=akiyu303
+WEB_IMAGE=$(DOCKER_REGISTRY)/lefeng-web
+API_IMAGE=$(DOCKER_REGISTRY)/lefeng-api
 VERSION=latest
+PLATFORM=linux/amd64

 # Build Docker images
 build-web:
 	@echo "Building web Docker image: $(WEB_IMAGE):$(VERSION)..."
-	docker build -t $(WEB_IMAGE):$(VERSION) ./web
+	docker build --platform $(PLATFORM) -t $(WEB_IMAGE):$(VERSION) ./web
 	@echo "Web Docker image built successfully: $(WEB_IMAGE):$(VERSION)"

 build-api:
 	@echo "Building API Docker image: $(API_IMAGE):$(VERSION)..."
-	docker build -t $(API_IMAGE):$(VERSION) ./api
+	docker build --platform $(PLATFORM) -t $(API_IMAGE):$(VERSION) ./api
 	@echo "API Docker image built successfully: $(API_IMAGE):$(VERSION)"

 # Push Docker images
--- a/api/app_factory.py
+++ b/api/app_factory.py
@ -51,6 +51,7 @@ def initialize_extensions(app: DifyApp):
        ext_storage,
        ext_timezone,
        ext_warnings,
+        ext_swagger
    )

    extensions = [
@ -74,6 +75,7 @@ def initialize_extensions(app: DifyApp):
        ext_proxy_fix,
        ext_blueprints,
        ext_commands,
+        ext_swagger
    ]
    for ext in extensions:
        short_name = ext.__name__.split(".")[-1]
--- a/api/controllers/admin/init.py
+++ b/api/controllers/admin/init.py
@ -0,0 +1,9 @@
+from flask import Blueprint
+
+from libs.external_api import ExternalApi
+
+bp = Blueprint("admin_api", __name__, url_prefix="/admin")
+api = ExternalApi(bp)
+
+from .auth import login
+from .stats import stats
--- a/api/controllers/admin/auth/login.py
+++ b/api/controllers/admin/auth/login.py
@ -0,0 +1,124 @@
+from flask import Blueprint
+from flask_restful import Api, Resource # type: ignore
+
+from controllers.admin import api
+
+class SendVerificationCodeApi(Resource):
+    def post(self):
+        """Send verification code to admin's phone number.
+        ---
+        tags:
+          - admin
+        summary: Send Verification Code
+        description: Sends a verification code to the provided admin phone number for authentication
+        parameters:
+          - in: body
+            name: body
+            required: true
+            schema:
+              type: object
+              required:
+                - phone
+              properties:
+                phone:
+                  type: string
+                  description: Admin's phone number
+                  example: "13800138000"
+        responses:
+          200:
+            description: Code sent successfully
+            schema:
+              type: object
+              properties:
+                success:
+                  type: boolean
+                message:
+                  type: string
+          400:
+            description: Invalid phone number format
+          404:
+            description: Phone number not registered as admin
+        """
+        pass
+
+class LoginApi(Resource):
+    def post(self):
+        """Admin login with phone number and verification code.
+        ---
+        tags:
+          - admin
+        summary: Admin Login
+        description: Authenticates an admin using phone number and verification code
+        parameters:
+          - in: body
+            name: body
+            required: true
+            schema:
+              type: object
+              required:
+                - phone
+                - code
+              properties:
+                phone:
+                  type: string
+                  description: Admin's phone number
+                  example: "13800138000"
+                code:
+                  type: string
+                  description: Verification code
+                  example: "123456"
+        responses:
+          200:
+            description: Login successful
+            schema:
+              type: object
+              properties:
+                token:
+                  type: string
+                  description: JWT access token
+                user:
+                  type: object
+                  properties:
+                    id:
+                      type: string
+                    phone:
+                      type: string
+                    name:
+                      type: string
+                    role:
+                      type: string
+                      enum: [admin, super_admin]
+          400:
+            description: Invalid or expired verification code
+          404:
+            description: Phone number not registered
+        """
+        pass
+
+class LogoutApi(Resource):
+    def post(self):
+        """Admin logout.
+        ---
+        tags:
+          - admin
+        summary: Admin Logout
+        description: Logs out the authenticated admin and invalidates the JWT token
+        security:
+          - JWT: []
+        responses:
+          200:
+            description: Logout successful
+            schema:
+              type: object
+              properties:
+                success:
+                  type: boolean
+          401:
+            description: Missing or invalid token
+        """
+        pass
+
+# Register the resources
+api.add_resource(SendVerificationCodeApi, '/auth/send-code')
+api.add_resource(LoginApi, '/auth/login')
+api.add_resource(LogoutApi, '/auth/logout')
--- a/api/controllers/admin/stats/stats.py
+++ b/api/controllers/admin/stats/stats.py
@ -0,0 +1,151 @@
+from flask import Blueprint
+from flask_restful import Api, Resource # type: ignore
+
+from controllers.admin import api
+
+class RiskStats(Resource):
+    def get(self):
+        """Get risk level statistics.
+        ---
+        tags:
+          - admin
+        summary: Get risk level user counts
+        description: Get counts of users at different risk levels and their changes
+        security:
+          - JWT: []
+        parameters:
+          - name: start_date
+            in: query
+            type: string
+            format: date
+            required: true
+            description: Start date of the statistics period (YYYY-MM-DD)
+          - name: end_date
+            in: query  
+            type: string
+            format: date
+            required: true
+            description: End date of the statistics period (YYYY-MM-DD)
+        responses:
+          200:
+            description: Risk statistics retrieved successfully
+            schema:
+              type: object
+              properties:
+                high_risk_count:
+                  type: integer
+                  description: Current number of high risk users
+                daily_changes:
+                  type: object
+                  properties:
+                    from_yesterday:
+                      type: integer
+                      description: Change in high risk users compared to yesterday
+                    from_last_week:
+                      type: integer 
+                      description: Change in high risk users compared to last week
+          400:
+            description: Invalid date parameters
+        """
+        pass
+
+class UserStats(Resource):
+    def get(self):
+        """Get daily user statistics.
+        ---
+        tags:
+          - admin  
+        summary: Get daily active and new user counts
+        description: Get statistics of daily active users and new users
+        security:
+          - JWT: []
+        parameters:
+          - name: start_date
+            in: query
+            type: string
+            format: date
+            required: true
+            description: Start date of the statistics period (YYYY-MM-DD)
+          - name: end_date
+            in: query
+            type: string 
+            format: date
+            required: true
+            description: End date of the statistics period (YYYY-MM-DD)
+        responses:
+          200:
+            description: User statistics retrieved successfully
+            schema:
+              type: object
+              properties:
+                daily_stats:
+                  type: array
+                  items:
+                    type: object
+                    properties:
+                      date:
+                        type: string
+                        format: date
+                      active_users:
+                        type: integer
+                        description: Number of active users on this date
+                      new_users:
+                        type: integer
+                        description: Number of new users on this date
+          400:
+            description: Invalid date parameters
+        """
+        pass
+
+class ConversationStats(Resource):
+    def get(self):
+        """Get daily conversation statistics.
+        ---
+        tags:
+          - admin
+        summary: Get daily conversation counts and averages
+        description: Get statistics of daily total conversations and average conversations per user
+        security:
+          - JWT: []
+        parameters:
+          - name: start_date
+            in: query
+            type: string
+            format: date
+            required: true
+            description: Start date of the statistics period (YYYY-MM-DD)
+          - name: end_date
+            in: query
+            type: string
+            format: date 
+            required: true
+            description: End date of the statistics period (YYYY-MM-DD)
+        responses:
+          200:
+            description: Conversation statistics retrieved successfully
+            schema:
+              type: object
+              properties:
+                daily_stats:
+                  type: array
+                  items:
+                    type: object
+                    properties:
+                      date:
+                        type: string
+                        format: date
+                      total_conversations:
+                        type: integer
+                        description: Total number of conversations on this date
+                      avg_conversations_per_user:
+                        type: number
+                        format: float
+                        description: Average conversations per active user on this date
+          400:
+            description: Invalid date parameters
+        """
+        pass
+
+api.add_resource(RiskStats, '/risk-stats')
+api.add_resource(UserStats, '/user-stats') 
+api.add_resource(ConversationStats, '/conversation-stats')
--- a/api/controllers/common/helpers.py
+++ b/api/controllers/common/helpers.py
@ -7,6 +7,7 @@ from typing import Any
 from uuid import uuid4

 import httpx
+import magic
 from pydantic import BaseModel

 from configs import dify_config
@ -47,6 +48,13 @@ def guess_file_info_from_response(response: httpx.Response):
        # If guessing fails, use Content-Type from response headers
        mimetype = response.headers.get("Content-Type", "application/octet-stream")

+    # Use python-magic to guess MIME type if still unknown or generic
+    if mimetype == "application/octet-stream":
+        try:
+            mimetype = magic.from_buffer(response.content[:1024], mime=True)
+        except magic.MagicException:
+            pass
+
    extension = os.path.splitext(filename)[1]

    # Ensure filename has an extension
--- a/api/controllers/console/auth/login.py
+++ b/api/controllers/console/auth/login.py
@ -34,6 +34,7 @@ from services.errors.workspace import WorkSpaceNotAllowedCreateError
 from services.feature_service import FeatureService


+# TODO: copy as a separate auth service api
 class LoginApi(Resource):
    """Resource for user login."""

--- a/api/controllers/inner_api/workspace/workspace.py
+++ b/api/controllers/inner_api/workspace/workspace.py
@ -1,3 +1,5 @@
+import json
+
 from flask_restful import Resource, reqparse  # type: ignore

 from controllers.console.wraps import setup_required
@ -29,4 +31,34 @@ class EnterpriseWorkspace(Resource):
        return {"message": "enterprise workspace created."}


+class EnterpriseWorkspaceNoOwnerEmail(Resource):
+    @setup_required
+    @inner_api_only
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("name", type=str, required=True, location="json")
+        args = parser.parse_args()
+
+        tenant = TenantService.create_tenant(args["name"], is_from_dashboard=True)
+
+        tenant_was_created.send(tenant)
+
+        resp = {
+            "id": tenant.id,
+            "name": tenant.name,
+            "encrypt_public_key": tenant.encrypt_public_key,
+            "plan": tenant.plan,
+            "status": tenant.status,
+            "custom_config": json.loads(tenant.custom_config) if tenant.custom_config else {},
+            "created_at": tenant.created_at.isoformat() if tenant.created_at else None,
+            "updated_at": tenant.updated_at.isoformat() if tenant.updated_at else None,
+        }
+
+        return {
+            "message": "enterprise workspace created.",
+            "tenant": resp,
+        }
+
+
 api.add_resource(EnterpriseWorkspace, "/enterprise/workspace")
+api.add_resource(EnterpriseWorkspaceNoOwnerEmail, "/enterprise/workspace/ownerless")
--- a/api/controllers/service_api/init.py
+++ b/api/controllers/service_api/init.py
@ -8,3 +8,4 @@ api = ExternalApi(bp)
 from . import index
 from .app import app, audio, completion, conversation, file, message, workflow
 from .dataset import dataset, document, hit_testing, segment, upload_file
+from .auth import login
--- a/api/controllers/service_api/app/completion.py
+++ b/api/controllers/service_api/app/completion.py
@ -1,5 +1,6 @@
 import logging

+from libs.login import login_required
 from flask_restful import Resource, reqparse  # type: ignore
 from werkzeug.exceptions import InternalServerError, NotFound

--- a/api/controllers/service_api/auth/error.py
+++ b/api/controllers/service_api/auth/error.py
@ -0,0 +1,61 @@
+from libs.exception import BaseHTTPException
+
+
+class ApiKeyAuthFailedError(BaseHTTPException):
+    error_code = "auth_failed"
+    description = "{message}"
+    code = 500
+
+
+class InvalidEmailError(BaseHTTPException):
+    error_code = "invalid_email"
+    description = "The email address is not valid."
+    code = 400
+
+
+class PasswordMismatchError(BaseHTTPException):
+    error_code = "password_mismatch"
+    description = "The passwords do not match."
+    code = 400
+
+
+class InvalidTokenError(BaseHTTPException):
+    error_code = "invalid_or_expired_token"
+    description = "The token is invalid or has expired."
+    code = 400
+
+
+class PasswordResetRateLimitExceededError(BaseHTTPException):
+    error_code = "password_reset_rate_limit_exceeded"
+    description = "Too many password reset emails have been sent. Please try again in 1 minutes."
+    code = 429
+
+
+class EmailCodeError(BaseHTTPException):
+    error_code = "email_code_error"
+    description = "Email code is invalid or expired."
+    code = 400
+
+
+class EmailOrPasswordMismatchError(BaseHTTPException):
+    error_code = "email_or_password_mismatch"
+    description = "The email or password is mismatched."
+    code = 400
+
+
+class EmailPasswordLoginLimitError(BaseHTTPException):
+    error_code = "email_code_login_limit"
+    description = "Too many incorrect password attempts. Please try again later."
+    code = 429
+
+
+class EmailCodeLoginRateLimitExceededError(BaseHTTPException):
+    error_code = "email_code_login_rate_limit_exceeded"
+    description = "Too many login emails have been sent. Please try again in 5 minutes."
+    code = 429
+
+
+class EmailCodeAccountDeletionRateLimitExceededError(BaseHTTPException):
+    error_code = "email_code_account_deletion_rate_limit_exceeded"
+    description = "Too many account deletion emails have been sent. Please try again in 5 minutes."
+    code = 429
--- a/api/controllers/service_api/auth/login.py
+++ b/api/controllers/service_api/auth/login.py
@ -0,0 +1,271 @@
+from typing import cast
+
+import flask_login  # type: ignore
+from flask import request
+from flask_restful import Resource, reqparse  # type: ignore
+
+from constants.languages import languages
+from controllers.service_api import api
+from controllers.service_api.auth.error import (
+    EmailCodeError,
+    InvalidEmailError,
+    InvalidTokenError,
+)
+from controllers.service_api.error import (
+    AccountInFreezeError,
+    AccountNotFound,
+    EmailSendIpLimitError,
+    NotAllowedCreateWorkspace,
+)
+from events.tenant_event import tenant_was_created
+from libs.helper import email, extract_remote_ip
+from models.account import Account
+from services.account_service import AccountService, TenantService
+from services.errors.account import AccountRegisterError
+from services.errors.workspace import WorkSpaceNotAllowedCreateError
+from services.feature_service import FeatureService
+
+
+class LogoutApi(Resource):
+    def get(self):
+        """Logout user.
+        ---
+        tags:
+          - user-end
+        summary: Logout User
+        description: Logs out the authenticated user and invalidates the session
+        security:
+          - JWT: []
+        responses:
+          200:
+            description: Successfully logged out
+            schema:
+              type: object
+              properties:
+                result:
+                  type: string
+                  example: "success"
+          401:
+            description: Unauthorized, invalid or missing token
+        """
+        account = cast(Account, flask_login.current_user)
+        if isinstance(account, flask_login.AnonymousUserMixin):
+            return {"result": "success"}
+        AccountService.logout(account=account)
+        flask_login.logout_user()
+        return {"result": "success"}
+
+
+class EmailCodeLoginSendEmailApi(Resource):
+    def post(self):
+        """Send email code for login.
+        ---
+        tags:
+          - user-end
+        summary: Email Code Login Email Sending
+        description: Sends an email with a verification code for login
+        parameters:
+          - in: body
+            name: body
+            required: true
+            schema:
+              type: object
+              required:
+                - email
+              properties:
+                email:
+                  type: string
+                  description: The user's email
+                language:
+                  type: string
+                  description: Preferred language for the email
+                  enum: ["en-US", "zh-Hans"]
+        responses:
+          200:
+            description: Successfully sent the email code
+            schema:
+              type: object
+              properties:
+                result:
+                  type: string
+                  example: "success"
+                data:
+                  type: object
+                  description: Token data
+          429:
+            description: Too many requests, IP limit reached
+          404:
+            description: Account not found
+        """
+        parser = reqparse.RequestParser()
+        parser.add_argument("email", type=email, required=True, location="json")
+        parser.add_argument("language", type=str, required=False, location="json")
+        args = parser.parse_args()
+
+        ip_address = extract_remote_ip(request)
+        if AccountService.is_email_send_ip_limit(ip_address):
+            raise EmailSendIpLimitError()
+
+        if args["language"] is not None and args["language"] == "zh-Hans":
+            language = "zh-Hans"
+        else:
+            language = "en-US"
+        try:
+            account = AccountService.get_user_through_email(args["email"])
+        except AccountRegisterError as are:
+            raise AccountInFreezeError()
+
+        if account is None:
+            if FeatureService.get_system_features().is_allow_register:
+                token = AccountService.send_email_code_login_email(email=args["email"], language=language)
+            else:
+                raise AccountNotFound()
+        else:
+            token = AccountService.send_email_code_login_email(account=account, language=language)
+
+        return {"result": "success", "data": token}
+
+
+class EmailCodeLoginApi(Resource):
+    def post(self):
+        """Login using email code.
+        ---
+        tags:
+          - user-end
+        summary: Email Code Login
+        description: Allows the user to login using a verification code and token sent via email
+        parameters:
+          - in: body
+            name: body
+            required: true
+            schema:
+              type: object
+              required:
+                - email
+                - code
+                - token
+              properties:
+                email:
+                  type: string
+                  description: The user's email
+                code:
+                  type: string
+                  description: The verification code sent to the email
+                token:
+                  type: string
+                  description: The token associated with the email code login
+        responses:
+          200:
+            description: Successfully logged in
+            schema:
+              type: object
+              properties:
+                result:
+                  type: string
+                  example: "success"
+                data:
+                  type: object
+                  description: Token pair data
+          400:
+            description: Invalid token, email or code
+        """
+        parser = reqparse.RequestParser()
+        parser.add_argument("email", type=str, required=True, location="json")
+        parser.add_argument("code", type=str, required=True, location="json")
+        parser.add_argument("token", type=str, required=True, location="json")
+        args = parser.parse_args()
+
+        user_email = args["email"]
+
+        token_data = AccountService.get_email_code_login_data(args["token"])
+        if token_data is None:
+            raise InvalidTokenError()
+
+        if token_data["email"] != args["email"]:
+            raise InvalidEmailError()
+
+        if token_data["code"] != args["code"]:
+            raise EmailCodeError()
+
+        AccountService.revoke_email_code_login_token(args["token"])
+        try:
+            account = AccountService.get_user_through_email(user_email)
+        except AccountRegisterError as are:
+            raise AccountInFreezeError()
+        if account:
+            tenant = TenantService.get_join_tenants(account)
+            if not tenant:
+                if not FeatureService.get_system_features().is_allow_create_workspace:
+                    raise NotAllowedCreateWorkspace()
+                else:
+                    tenant = TenantService.create_tenant(f"{account.name}'s Workspace")
+                    TenantService.create_tenant_member(tenant, account, role="owner")
+                    account.current_tenant = tenant
+                    tenant_was_created.send(tenant)
+
+        if account is None:
+            try:
+                account = AccountService.create_account_and_tenant(
+                    email=user_email, name=user_email, interface_language=languages[0]
+                )
+            except WorkSpaceNotAllowedCreateError:
+                return NotAllowedCreateWorkspace()
+            except AccountRegisterError as are:
+                raise AccountInFreezeError()
+        token_pair = AccountService.login(account, ip_address=extract_remote_ip(request))
+        AccountService.reset_login_error_rate_limit(args["email"])
+        return {"result": "success", "data": token_pair.model_dump()}
+
+
+class RefreshTokenApi(Resource):
+    def post(self):
+        """Refresh authentication token.
+        ---
+        tags:
+          - user-end
+        summary: Refresh Token
+        description: Refreshes an access token using a valid refresh token
+        security:
+          - JWT: []
+        parameters:
+          - in: body
+            name: body
+            required: true
+            schema:
+              type: object
+              required:
+                - refresh_token
+              properties:
+                refresh_token:
+                  type: string
+                  description: The refresh token provided in the request
+        responses:
+          200:
+            description: Successfully refreshed token
+            schema:
+              type: object
+              properties:
+                result:
+                  type: string
+                  example: "success"
+                data:
+                  type: object
+                  description: New token pair data
+          401:
+            description: Unauthorized, invalid or missing token
+        """
+        parser = reqparse.RequestParser()
+        parser.add_argument("refresh_token", type=str, required=True, location="json")
+        args = parser.parse_args()
+
+        try:
+            new_token_pair = AccountService.refresh_token(args["refresh_token"])
+            return {"result": "success", "data": new_token_pair.model_dump()}
+        except Exception as e:
+            return {"result": "fail", "data": str(e)}, 401
+
+
+api.add_resource(LogoutApi, "/logout")
+api.add_resource(EmailCodeLoginSendEmailApi, "/email-code-login")
+api.add_resource(EmailCodeLoginApi, "/email-code-login/validity")
+api.add_resource(RefreshTokenApi, "/refresh-token")
--- a/api/controllers/service_api/error.py
+++ b/api/controllers/service_api/error.py
@ -0,0 +1,103 @@
+from libs.exception import BaseHTTPException
+
+
+class AlreadySetupError(BaseHTTPException):
+    error_code = "already_setup"
+    description = "Dify has been successfully installed. Please refresh the page or return to the dashboard homepage."
+    code = 403
+
+
+class NotSetupError(BaseHTTPException):
+    error_code = "not_setup"
+    description = (
+        "Dify has not been initialized and installed yet. "
+        "Please proceed with the initialization and installation process first."
+    )
+    code = 401
+
+
+class NotInitValidateError(BaseHTTPException):
+    error_code = "not_init_validated"
+    description = "Init validation has not been completed yet. Please proceed with the init validation process first."
+    code = 401
+
+
+class InitValidateFailedError(BaseHTTPException):
+    error_code = "init_validate_failed"
+    description = "Init validation failed. Please check the password and try again."
+    code = 401
+
+
+class AccountNotLinkTenantError(BaseHTTPException):
+    error_code = "account_not_link_tenant"
+    description = "Account not link tenant."
+    code = 403
+
+
+class AlreadyActivateError(BaseHTTPException):
+    error_code = "already_activate"
+    description = "Auth Token is invalid or account already activated, please check again."
+    code = 403
+
+
+class NotAllowedCreateWorkspace(BaseHTTPException):
+    error_code = "not_allowed_create_workspace"
+    description = "Workspace not found, please contact system admin to invite you to join in a workspace."
+    code = 400
+
+
+class AccountBannedError(BaseHTTPException):
+    error_code = "account_banned"
+    description = "Account is banned."
+    code = 400
+
+
+class AccountNotFound(BaseHTTPException):
+    error_code = "account_not_found"
+    description = "Account not found."
+    code = 400
+
+
+class EmailSendIpLimitError(BaseHTTPException):
+    error_code = "email_send_ip_limit"
+    description = "Too many emails have been sent from this IP address recently. Please try again later."
+    code = 429
+
+
+class FileTooLargeError(BaseHTTPException):
+    error_code = "file_too_large"
+    description = "File size exceeded. {message}"
+    code = 413
+
+
+class UnsupportedFileTypeError(BaseHTTPException):
+    error_code = "unsupported_file_type"
+    description = "File type not allowed."
+    code = 415
+
+
+class TooManyFilesError(BaseHTTPException):
+    error_code = "too_many_files"
+    description = "Only one file is allowed."
+    code = 400
+
+
+class NoFileUploadedError(BaseHTTPException):
+    error_code = "no_file_uploaded"
+    description = "Please upload your file."
+    code = 400
+
+
+class UnauthorizedAndForceLogout(BaseHTTPException):
+    error_code = "unauthorized_and_force_logout"
+    description = "Unauthorized and force logout."
+    code = 401
+
+
+class AccountInFreezeError(BaseHTTPException):
+    error_code = "account_in_freeze"
+    code = 400
+    description = (
+        "This email account has been deleted within the past 30 days"
+        "and is temporarily unavailable for new account registration."
+    )
--- a/api/controllers/service_api/index.py
+++ b/api/controllers/service_api/index.py
@ -1,3 +1,4 @@
+from libs.login import login_required
 from flask_restful import Resource  # type: ignore

 from configs import dify_config
@ -5,6 +6,7 @@ from controllers.service_api import api


 class IndexApi(Resource):
+    @login_required
    def get(self):
        return {
            "welcome": "Dify OpenAPI",
--- a/api/controllers/service_api/wraps.py
+++ b/api/controllers/service_api/wraps.py
@ -34,6 +34,7 @@ class FetchUserArg(BaseModel):
    required: bool = False


+# TODO: add auth jwt token check
 def validate_app_token(view: Optional[Callable] = None, *, fetch_user_arg: Optional[FetchUserArg] = None):
    def decorator(view_func):
        @wraps(view_func)
--- a/api/core/app/apps/agent_chat/app_runner.py
+++ b/api/core/app/apps/agent_chat/app_runner.py
@ -202,7 +202,7 @@ class AgentChatAppRunner(AppRunner):
        # change function call strategy based on LLM model
        llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
        model_schema = llm_model.get_model_schema(model_instance.model, model_instance.credentials)
-        if not model_schema or not model_schema.features:
+        if not model_schema:
            raise ValueError("Model schema not found")

        if {ModelFeature.MULTI_TOOL_CALL, ModelFeature.TOOL_CALL}.intersection(model_schema.features or []):
--- a/api/core/model_runtime/model_providers/novita/_assets/icon_l_en.svg
+++ b/api/core/model_runtime/model_providers/novita/_assets/icon_l_en.svg
@ -1,19 +1,11 @@
-<svg width="162" height="36" viewBox="0 0 162 36" fill="none" xmlns="http://www.w3.org/2000/svg">
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="white"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="url(#paint0_linear_1473_71)"/>
-<path d="M55.9397 27.8804H59.0566V19.0803C59.0566 14.9105 56.381 12.7172 52.8228 12.7172C51.0023 12.7172 49.3197 13.4483 48.2991 14.6668V12.9609H45.1546V27.8804H48.2991V19.5406C48.2991 16.8059 49.8162 15.3978 52.1332 15.3978C54.4226 15.3978 55.9397 16.8059 55.9397 19.5406V27.8804Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M69.7881 12.7172C74.1187 12.7172 77.539 15.7228 77.539 20.4071C77.539 25.0915 74.0083 28.1241 69.6502 28.1241C65.3196 28.1241 62.0372 25.0915 62.0372 20.4071C62.0372 15.7228 65.4575 12.7172 69.7881 12.7172ZM69.7342 15.3979C67.362 15.3979 65.2381 17.0225 65.2381 20.4071C65.2381 23.7918 67.2793 25.4435 69.6514 25.4435C71.996 25.4435 74.313 23.7918 74.313 20.4071C74.313 17.0225 72.0788 15.3979 69.7342 15.3979Z" fill="#11101A"/>
-<path d="M78.861 12.9609L84.6259 27.8804H88.3772L94.1697 12.9609H90.8321L86.5291 25.1185L82.2261 12.9609H78.861Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M100.13 9.00761C100.13 10.1178 99.2477 10.9842 98.1443 10.9842C97.0134 10.9842 96.1308 10.1178 96.1308 9.00761C96.1308 7.89745 97.0134 7.03098 98.1443 7.03098C99.2477 7.03098 100.13 7.89745 100.13 9.00761ZM99.6882 27.8804H96.5437V12.9609H99.6882V27.8804Z" fill="#11101A"/>
-<path d="M104.322 23.7376C104.322 26.7702 106.004 27.8804 108.708 27.8804H111.19V25.308H109.259C107.935 25.308 107.494 24.8477 107.494 23.7376V15.479H111.19V12.9609H107.494V9.25128H104.322V12.9609H102.529V15.479H104.322V23.7376Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M120.154 28.1241C116.209 28.1241 113.037 24.9561 113.037 20.353C113.037 15.7498 116.209 12.7172 120.209 12.7172C122.774 12.7172 124.539 13.9086 125.477 15.1271V12.9609H128.649V27.8804H125.477V25.6601C124.512 26.9327 122.691 28.1241 120.154 28.1241ZM120.87 25.4435C123.242 25.4435 125.476 23.6293 125.476 20.4071C125.476 17.212 123.242 15.3979 120.87 15.3979C118.526 15.3979 116.264 17.1308 116.264 20.353C116.264 23.5752 118.526 25.4435 120.87 25.4435Z" fill="#11101A"/>
-<path d="M136.043 26.0933C136.043 24.9832 135.16 24.1167 134.057 24.1167C132.926 24.1167 132.043 24.9832 132.043 26.0933C132.043 27.2035 132.926 28.07 134.057 28.07C135.16 28.07 136.043 27.2035 136.043 26.0933Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M145.502 28.1241C141.558 28.1241 138.386 24.9561 138.386 20.353C138.386 15.7498 141.558 12.7172 145.557 12.7172C148.123 12.7172 149.888 13.9086 150.826 15.1271V12.9609H153.998V27.8804H150.826V25.6601C149.86 26.9327 148.04 28.1241 145.502 28.1241ZM146.219 25.4435C148.591 25.4435 150.825 23.6293 150.825 20.4071C150.825 17.212 148.591 15.3979 146.219 15.3979C143.874 15.3979 141.612 17.1308 141.612 20.353C141.612 23.5752 143.874 25.4435 146.219 25.4435Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M161.722 9.00761C161.722 10.1178 160.84 10.9842 159.736 10.9842C158.605 10.9842 157.723 10.1178 157.723 9.00761C157.723 7.89745 158.605 7.03098 159.736 7.03098C160.84 7.03098 161.722 7.89745 161.722 9.00761ZM161.28 27.8804H158.136V12.9609H161.28V27.8804Z" fill="#11101A"/>
+<svg width="88" height="24" viewBox="0 0 88 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_1923_1287)">
+<path d="M24 18.8323V18.8326H14.3246L9.16716 13.6751V18.8326H0V18.8314L9.16716 9.66422V4H9.16774L24 18.8323Z" fill="black"/>
+</g>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M73.2505 16.8061H76.5869V18.9145H73.9391C72.0857 18.9145 70.9202 17.8952 70.9202 15.9977V10.3921H69.0316V8.26609H70.9202L71.4677 5.47209H73.2329V8.26609H76.5869V10.3921H73.2505V16.8061ZM33.8133 4.85699L38.6679 15.681H38.809V4.85699H41.3333V18.9145H37.52L32.6654 8.09046H32.5243V18.9145H30V4.85699H33.8133ZM47.812 19.1254C44.7225 19.1254 42.7457 16.9641 42.7457 13.6079C42.7457 10.2517 44.6873 8.05518 47.812 8.05518C50.9367 8.05518 52.8429 10.1635 52.8429 13.6079C52.8429 17.0523 50.9014 19.1254 47.812 19.1254ZM47.812 17.017C49.1891 17.017 50.3363 16.5423 50.3715 15.1894V12.0265C50.3715 10.6383 49.2068 10.1635 47.812 10.1635C46.4172 10.1635 45.2171 10.6383 45.2171 12.0265V15.1894C45.2524 16.5599 46.4348 17.017 47.812 17.017ZM55.5444 8.24846L58.2979 16.6826H58.439L61.1926 8.24846H63.7346L59.9389 18.8968H56.7966L53.0186 8.24846H55.5429H55.5444ZM65.0419 8.26609H67.3722V18.9145H65.0419V8.26609ZM64.9001 4.85699H67.5126V6.86027H64.9001V4.85699ZM82.3064 19.143C79.4639 19.143 77.6458 16.9817 77.6458 13.6079C77.6458 10.2341 79.4286 8.07282 82.3064 8.07282C83.6483 8.07282 84.7425 8.59973 85.3958 9.58373H85.5369L85.9962 8.26609H87.7614V18.9145H85.9962L85.5369 17.6314H85.3958C84.6896 18.5625 83.5072 19.1423 82.3064 19.1423V19.143ZM82.7826 17.017C84.1774 17.017 85.3951 16.5776 85.4304 15.1894V12.0265C85.4304 10.603 84.159 10.1988 82.7297 10.1988C81.3004 10.1988 80.1172 10.6383 80.1172 12.0265V15.1894C80.1525 16.5952 81.3709 17.017 82.7826 17.017Z" fill="black"/>
 <defs>
-<linearGradient id="paint0_linear_1473_71" x1="31" y1="-2" x2="0.975591" y2="14.2625" gradientUnits="userSpaceOnUse">
-<stop stop-color="#2622FF"/>
-<stop offset="1" stop-color="#A717FF"/>
-</linearGradient>
+<clipPath id="clip0_1923_1287">
+<rect width="24" height="14.8326" fill="white" transform="translate(0 4)"/>
+</clipPath>
 </defs>
 </svg>
--- a/api/core/model_runtime/model_providers/novita/_assets/icon_s_en.svg
+++ b/api/core/model_runtime/model_providers/novita/_assets/icon_s_en.svg
@ -1,10 +1,3 @@
-<svg width="32" height="36" viewBox="0 0 32 36" fill="none" xmlns="http://www.w3.org/2000/svg">
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="white"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="url(#paint0_linear_1473_97)"/>
-<defs>
-<linearGradient id="paint0_linear_1473_97" x1="31" y1="-2" x2="0.975591" y2="14.2625" gradientUnits="userSpaceOnUse">
-<stop stop-color="#2622FF"/>
-<stop offset="1" stop-color="#A717FF"/>
-</linearGradient>
-</defs>
+<svg width="24" height="15" viewBox="0 0 24 15" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M24 14.8323V14.8326H14.3246L9.16716 9.67507V14.8326H0V14.8314L9.16716 5.66422V0H9.16774L24 14.8323Z" fill="black"/>
 </svg>
--- a/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml
@ -0,0 +1,41 @@
+model: Sao10K/L3-8B-Stheno-v3.2
+label:
+  zh_Hans: L3 8B Stheno V3.2
+  en_US: L3 8B Stheno V3.2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/_position.yaml
@ -0,0 +1,41 @@
+# Deepseek Models
+- deepseek/deepseek-r1
+- deepseek/deepseek_v3
+
+# LLaMA Models
+- meta-llama/llama-3.3-70b-instruct
+- meta-llama/llama-3.2-11b-vision-instruct
+- meta-llama/llama-3.2-3b-instruct
+- meta-llama/llama-3.2-1b-instruct
+- meta-llama/llama-3.1-70b-instruct
+- meta-llama/llama-3.1-8b-instruct
+- meta-llama/llama-3.1-8b-instruct-max
+- meta-llama/llama-3.1-8b-instruct-bf16
+- meta-llama/llama-3-70b-instruct
+- meta-llama/llama-3-8b-instruct
+
+# Mistral Models
+- mistralai/mistral-nemo
+- mistralai/mistral-7b-instruct
+
+# Qwen Models
+- qwen/qwen-2.5-72b-instruct
+- qwen/qwen-2-72b-instruct
+- qwen/qwen-2-vl-72b-instruct
+- qwen/qwen-2-7b-instruct
+
+# Other Models
+- sao10k/L3-8B-Stheno-v3.2
+- sao10k/l3-70b-euryale-v2.1
+- sao10k/l31-70b-euryale-v2.2
+- sao10k/l3-8b-lunaris
+- jondurbin/airoboros-l2-70b
+- cognitivecomputations/dolphin-mixtral-8x22b
+- google/gemma-2-9b-it
+- nousresearch/hermes-2-pro-llama-3-8b
+- sophosympatheia/midnight-rose-70b
+- gryphe/mythomax-l2-13b
+- nousresearch/nous-hermes-llama2-13b
+- openchat/openchat-7b
+- teknium/openhermes-2.5-mistral-7b
+- microsoft/wizardlm-2-8x22b
--- a/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml
@ -1,7 +1,7 @@
 model: jondurbin/airoboros-l2-70b
 label:
-  zh_Hans: jondurbin/airoboros-l2-70b
-  en_US: jondurbin/airoboros-l2-70b
+  zh_Hans: Airoboros L2 70B
+  en_US: Airoboros L2 70B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml
@ -0,0 +1,41 @@
+model: deepseek/deepseek-r1
+label:
+  zh_Hans: DeepSeek R1
+  en_US: DeepSeek R1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.04'
+  output: '0.04'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml
@ -0,0 +1,41 @@
+model: deepseek/deepseek_v3
+label:
+  zh_Hans: DeepSeek V3
+  en_US: DeepSeek V3
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0089'
+  output: '0.0089'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml
@ -1,7 +1,7 @@
 model: cognitivecomputations/dolphin-mixtral-8x22b
 label:
-  zh_Hans: cognitivecomputations/dolphin-mixtral-8x22b
-  en_US: cognitivecomputations/dolphin-mixtral-8x22b
+  zh_Hans: Dolphin Mixtral 8x22B
+  en_US: Dolphin Mixtral 8x22B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml
@ -1,7 +1,7 @@
 model: google/gemma-2-9b-it
 label:
-  zh_Hans: google/gemma-2-9b-it
-  en_US: google/gemma-2-9b-it
+  zh_Hans: Gemma 2 9B
+  en_US: Gemma 2 9B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
@ -1,7 +1,7 @@
 model: nousresearch/hermes-2-pro-llama-3-8b
 label:
-  zh_Hans: nousresearch/hermes-2-pro-llama-3-8b
-  en_US: nousresearch/hermes-2-pro-llama-3-8b
+  zh_Hans: Hermes 2 Pro Llama 3 8B
+  en_US: Hermes 2 Pro Llama 3 8B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml
@ -1,7 +1,7 @@
 model: sao10k/l3-70b-euryale-v2.1
 label:
-  zh_Hans: sao10k/l3-70b-euryale-v2.1
-  en_US: sao10k/l3-70b-euryale-v2.1
+  zh_Hans: "L3 70B Euryale V2.1\t"
+  en_US: "L3 70B Euryale V2.1\t"
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml
@ -0,0 +1,41 @@
+model: sao10k/l3-8b-lunaris
+label:
+  zh_Hans: "Sao10k L3 8B Lunaris"
+  en_US: "Sao10k L3 8B Lunaris"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml
@ -0,0 +1,41 @@
+model: sao10k/l31-70b-euryale-v2.2
+label:
+  zh_Hans: L31 70B Euryale V2.2
+  en_US: L31 70B Euryale V2.2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0148'
+  output: '0.0148'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3-70b-instruct
 label:
-  zh_Hans: meta-llama/llama-3-70b-instruct
-  en_US: meta-llama/llama-3-70b-instruct
+  zh_Hans: Llama3 70b Instruct
+  en_US: Llama3 70b Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3-8b-instruct
 label:
-  zh_Hans: meta-llama/llama-3-8b-instruct
-  en_US: meta-llama/llama-3-8b-instruct
+  zh_Hans: Llama 3 8B Instruct
+  en_US: Llama 3 8B Instruct
 model_type: llm
 features:
  - agent-thought
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.00063'
-  output: '0.00063'
+  input: '0.0004'
+  output: '0.0004'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml
@ -1,13 +1,13 @@
 model: meta-llama/llama-3.1-70b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.1-70b-instruct
-  en_US: meta-llama/llama-3.1-70b-instruct
+  zh_Hans: Llama 3.1 70B Instruct
+  en_US: Llama 3.1 70B Instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
-  context_size: 8192
+  context_size: 32768
 parameter_rules:
  - name: temperature
    use_template: temperature
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.0055'
-  output: '0.0076'
+  input: '0.0034'
+  output: '0.0039'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml
@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-8b-instruct-bf16
+label:
+  zh_Hans: Llama 3.1 8B Instruct BF16
+  en_US: Llama 3.1 8B Instruct BF16
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0006'
+  output: '0.0006'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml
@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-8b-instruct-max
+label:
+  zh_Hans: "Llama3.1 8B Instruct Max\t"
+  en_US: "Llama3.1 8B Instruct Max\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16384
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml
@ -1,13 +1,13 @@
 model: meta-llama/llama-3.1-8b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.1-8b-instruct
-  en_US: meta-llama/llama-3.1-8b-instruct
+  zh_Hans: Llama 3.1 8B Instruct
+  en_US: Llama 3.1 8B Instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
-  context_size: 8192
+  context_size: 16384
 parameter_rules:
  - name: temperature
    use_template: temperature
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.001'
-  output: '0.001'
+  input: '0.0005'
+  output: '0.0005'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml
@ -0,0 +1,41 @@
+model: meta-llama/llama-3.2-11b-vision-instruct
+label:
+  zh_Hans: "Llama 3.2 11B Vision Instruct\t"
+  en_US: "Llama 3.2 11B Vision Instruct\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0006'
+  output: '0.0006'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml
@ -0,0 +1,41 @@
+model: meta-llama/llama-3.2-1b-instruct
+label:
+  zh_Hans: "Llama 3.2 1B Instruct\t"
+  en_US: "Llama 3.2 1B Instruct\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0002'
+  output: '0.0002'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/Nous-Hermes-2-Mixtral-8x7B-DPO.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/Nous-Hermes-2-Mixtral-8x7B-DPO.yaml
@ -1,7 +1,7 @@
-model: Nous-Hermes-2-Mixtral-8x7B-DPO
+model: meta-llama/llama-3.2-3b-instruct
 label:
-  zh_Hans: Nous-Hermes-2-Mixtral-8x7B-DPO
-  en_US: Nous-Hermes-2-Mixtral-8x7B-DPO
+  zh_Hans: Llama 3.2 3B Instruct
+  en_US: Llama 3.2 3B Instruct
 model_type: llm
 features:
  - agent-thought
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.0027'
-  output: '0.0027'
+  input: '0.0003'
+  output: '0.0005'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml
@ -0,0 +1,41 @@
+model: meta-llama/llama-3.3-70b-instruct
+label:
+  zh_Hans: Llama 3.3 70B Instruct
+  en_US: Llama 3.3 70B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0039'
+  output: '0.0039'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml
@ -1,7 +1,7 @@
 model: sophosympatheia/midnight-rose-70b
 label:
-  zh_Hans: sophosympatheia/midnight-rose-70b
-  en_US: sophosympatheia/midnight-rose-70b
+  zh_Hans: Midnight Rose 70B
+  en_US: Midnight Rose 70B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml
@ -1,7 +1,7 @@
 model: mistralai/mistral-7b-instruct
 label:
-  zh_Hans: mistralai/mistral-7b-instruct
-  en_US: mistralai/mistral-7b-instruct
+  zh_Hans: Mistral 7B Instruct
+  en_US: Mistral 7B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml
@ -0,0 +1,41 @@
+model: mistralai/mistral-nemo
+label:
+  zh_Hans: Mistral Nemo
+  en_US: Mistral Nemo
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0017'
+  output: '0.0017'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml
@ -1,7 +1,7 @@
 model: gryphe/mythomax-l2-13b
 label:
-  zh_Hans: gryphe/mythomax-l2-13b
-  en_US: gryphe/mythomax-l2-13b
+  zh_Hans: Mythomax L2 13B
+  en_US: Mythomax L2 13B
 model_type: llm
 features:
  - agent-thought
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.00119'
-  output: '0.00119'
+  input: '0.0009'
+  output: '0.0009'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml
@ -1,7 +1,7 @@
 model: nousresearch/nous-hermes-llama2-13b
 label:
-  zh_Hans: nousresearch/nous-hermes-llama2-13b
-  en_US: nousresearch/nous-hermes-llama2-13b
+  zh_Hans: Nous Hermes Llama2 13B
+  en_US: Nous Hermes Llama2 13B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml
@ -1,7 +1,7 @@
-model: lzlv_70b
+model: openchat/openchat-7b
 label:
-  zh_Hans: lzlv_70b
-  en_US: lzlv_70b
+  zh_Hans: OpenChat 7B
+  en_US: OpenChat 7B
 model_type: llm
 features:
  - agent-thought
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.0058'
-  output: '0.0078'
+  input: '0.0006'
+  output: '0.0006'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml
@ -1,7 +1,7 @@
 model: teknium/openhermes-2.5-mistral-7b
 label:
-  zh_Hans: teknium/openhermes-2.5-mistral-7b
-  en_US: teknium/openhermes-2.5-mistral-7b
+  zh_Hans: Openhermes2.5 Mistral 7B
+  en_US: Openhermes2.5 Mistral 7B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-405b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-405b-instruct.yaml
@ -1,7 +1,7 @@
-model: meta-llama/llama-3.1-405b-instruct
+model: qwen/qwen-2-72b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.1-405b-instruct
-  en_US: meta-llama/llama-3.1-405b-instruct
+  zh_Hans: Qwen2 72B Instruct
+  en_US: Qwen2 72B Instruct
 model_type: llm
 features:
  - agent-thought
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.03'
-  output: '0.05'
+  input: '0.0034'
+  output: '0.0039'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml
@ -0,0 +1,41 @@
+model: qwen/qwen-2-7b-instruct
+label:
+  zh_Hans: Qwen 2 7B Instruct
+  en_US: Qwen 2 7B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.00054'
+  output: '0.00054'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml
@ -0,0 +1,41 @@
+model: qwen/qwen-2-vl-72b-instruct
+label:
+  zh_Hans: Qwen 2 VL 72B Instruct
+  en_US: Qwen 2 VL 72B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0045'
+  output: '0.0045'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml
@ -0,0 +1,41 @@
+model: qwen/qwen-2.5-72b-instruct
+label:
+  zh_Hans: Qwen 2.5 72B Instruct
+  en_US: Qwen 2.5 72B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0038'
+  output: '0.004'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml
@ -1,7 +1,7 @@
 model: microsoft/wizardlm-2-8x22b
 label:
-  zh_Hans: microsoft/wizardlm-2-8x22b
-  en_US: microsoft/wizardlm-2-8x22b
+  zh_Hans: Wizardlm 2 8x22B
+  en_US: Wizardlm 2 8x22B
 model_type: llm
 features:
  - agent-thought
@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.0064'
-  output: '0.0064'
+  input: '0.0062'
+  output: '0.0062'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/novita.yaml
+++ b/api/core/model_runtime/model_providers/novita/novita.yaml
@ -1,6 +1,6 @@
 provider: novita
 label:
-  en_US: novita.ai
+  en_US: Novita AI
 description:
  en_US: An LLM API that matches various application scenarios with high cost-effectiveness.
  zh_Hans: 适配多种海外应用场景的高性价比 LLM API
@ -8,13 +8,13 @@ icon_small:
  en_US: icon_s_en.svg
 icon_large:
  en_US: icon_l_en.svg
-background: "#eadeff"
+background: "#c7fce2"
 help:
  title:
-    en_US: Get your API key from novita.ai
-    zh_Hans: 从 novita.ai 获取 API Key
+    en_US: Get your API key from Novita AI
+    zh_Hans: 从 Novita AI 获取 API Key
  url:
-    en_US: https://novita.ai/settings#key-management?utm_source=dify&utm_medium=ch&utm_campaign=api
+    en_US: https://novita.ai/settings/key-management?utm_source=dify&utm_medium=ch&utm_campaign=api
 supported_model_types:
  - llm
 configurate_methods:
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@ -1,29 +1,13 @@
-import json
-import time
-from decimal import Decimal
 from typing import Optional
-from urllib.parse import urljoin
-
-import numpy as np
-import requests

 from core.entities.embedding_type import EmbeddingInputType
-from core.model_runtime.entities.common_entities import I18nObject
-from core.model_runtime.entities.model_entities import (
-    AIModelEntity,
-    FetchFrom,
-    ModelPropertyKey,
-    ModelType,
-    PriceConfig,
-    PriceType,
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
+    OAICompatEmbeddingModel,
 )
-from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
-from core.model_runtime.errors.validate import CredentialsValidateFailedError
-from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
-from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOaiApiCompat


-class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
+class PerfXCloudEmbeddingModel(OAICompatEmbeddingModel):
    """
    Model class for an OpenAI API-compatible text embedding model.
    """
@ -47,86 +31,10 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :return: embeddings result
        """

-        # Prepare headers and payload for the request
-        headers = {"Content-Type": "application/json"}
-
-        api_key = credentials.get("api_key")
-        if api_key:
-            headers["Authorization"] = f"Bearer {api_key}"
-        endpoint_url: Optional[str]
        if "endpoint_url" not in credentials or credentials["endpoint_url"] == "":
-            endpoint_url = "https://cloud.perfxlab.cn/v1/"
-        else:
-            endpoint_url = credentials.get("endpoint_url")
-            assert endpoint_url is not None, "endpoint_url is required in credentials"
-            if not endpoint_url.endswith("/"):
-                endpoint_url += "/"
-
-        assert isinstance(endpoint_url, str)
-        endpoint_url = urljoin(endpoint_url, "embeddings")
-
-        extra_model_kwargs = {}
-        if user:
-            extra_model_kwargs["user"] = user
-
-        extra_model_kwargs["encoding_format"] = "float"
-
-        # get model properties
-        context_size = self._get_context_size(model, credentials)
-        max_chunks = self._get_max_chunks(model, credentials)
-
-        inputs = []
-        indices = []
-        used_tokens = 0
-
-        for i, text in enumerate(texts):
-            # Here token count is only an approximation based on the GPT2 tokenizer
-            # TODO: Optimize for better token estimation and chunking
-            num_tokens = self._get_num_tokens_by_gpt2(text)
-
-            if num_tokens >= context_size:
-                cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
-                # if num tokens is larger than context length, only use the start
-                inputs.append(text[0:cutoff])
-            else:
-                inputs.append(text)
-            indices += [i]
-
-        batched_embeddings = []
-        _iter = range(0, len(inputs), max_chunks)
-
-        for i in _iter:
-            # Prepare the payload for the request
-            payload = {"input": inputs[i : i + max_chunks], "model": model, **extra_model_kwargs}
-
-            # Make the request to the OpenAI API
-            response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300))
+            credentials["endpoint_url"] = "https://cloud.perfxlab.cn/v1/"

-            response.raise_for_status()  # Raise an exception for HTTP errors
-            response_data = response.json()
-
-            # Extract embeddings and used tokens from the response
-            embeddings_batch = [data["embedding"] for data in response_data["data"]]
-            embedding_used_tokens = response_data["usage"]["total_tokens"]
-
-            used_tokens += embedding_used_tokens
-            batched_embeddings += embeddings_batch
-
-        # calc usage
-        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
-
-        return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
-
-    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
-        """
-        Approximate number of tokens for given messages using GPT2 tokenizer
-
-        :param model: model name
-        :param credentials: model credentials
-        :param texts: texts to embed
-        :return:
-        """
-        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+        return OAICompatEmbeddingModel._invoke(self, model, credentials, texts, user, input_type)

    def validate_credentials(self, model: str, credentials: dict) -> None:
        """
@ -136,93 +44,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :param credentials: model credentials
        :return:
        """
-        try:
-            headers = {"Content-Type": "application/json"}
-
-            api_key = credentials.get("api_key")
-
-            if api_key:
-                headers["Authorization"] = f"Bearer {api_key}"
-
-            endpoint_url: Optional[str]
-            if "endpoint_url" not in credentials or credentials["endpoint_url"] == "":
-                endpoint_url = "https://cloud.perfxlab.cn/v1/"
-            else:
-                endpoint_url = credentials.get("endpoint_url")
-                assert endpoint_url is not None, "endpoint_url is required in credentials"
-                if not endpoint_url.endswith("/"):
-                    endpoint_url += "/"
-
-            assert isinstance(endpoint_url, str)
-            endpoint_url = urljoin(endpoint_url, "embeddings")
-
-            payload = {"input": "ping", "model": model}
-
-            response = requests.post(url=endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300))
-
-            if response.status_code != 200:
-                raise CredentialsValidateFailedError(
-                    f"Credentials validation failed with status code {response.status_code}"
-                )
-
-            try:
-                json_result = response.json()
-            except json.JSONDecodeError as e:
-                raise CredentialsValidateFailedError("Credentials validation failed: JSON decode error")
-
-            if "model" not in json_result:
-                raise CredentialsValidateFailedError("Credentials validation failed: invalid response")
-        except CredentialsValidateFailedError:
-            raise
-        except Exception as ex:
-            raise CredentialsValidateFailedError(str(ex))
-
-    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
-        """
-        generate custom model entities from credentials
-        """
-        entity = AIModelEntity(
-            model=model,
-            label=I18nObject(en_US=model),
-            model_type=ModelType.TEXT_EMBEDDING,
-            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
-            model_properties={
-                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 512)),
-                ModelPropertyKey.MAX_CHUNKS: 1,
-            },
-            parameter_rules=[],
-            pricing=PriceConfig(
-                input=Decimal(credentials.get("input_price", 0)),
-                unit=Decimal(credentials.get("unit", 0)),
-                currency=credentials.get("currency", "USD"),
-            ),
-        )
-
-        return entity
-
-    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
-        """
-        Calculate response usage
-
-        :param model: model name
-        :param credentials: model credentials
-        :param tokens: input tokens
-        :return: usage
-        """
-        # get input price info
-        input_price_info = self.get_price(
-            model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
-        )
-
-        # transform usage
-        usage = EmbeddingUsage(
-            tokens=tokens,
-            total_tokens=tokens,
-            unit_price=input_price_info.unit_price,
-            price_unit=input_price_info.unit,
-            total_price=input_price_info.total_amount,
-            currency=input_price_info.currency,
-            latency=time.perf_counter() - self.started_at,
-        )
+        if "endpoint_url" not in credentials or credentials["endpoint_url"] == "":
+            credentials["endpoint_url"] = "https://cloud.perfxlab.cn/v1/"

-        return usage
+        OAICompatEmbeddingModel.validate_credentials(self, model, credentials)
--- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@ -33,6 +33,8 @@
 - qwen2.5-3b-instruct
 - qwen2.5-1.5b-instruct
 - qwen2.5-0.5b-instruct
+- qwen2.5-14b-instruct-1m
+- qwen2.5-7b-instruct-1m
 - qwen2.5-coder-7b-instruct
 - qwen2-math-72b-instruct
 - qwen2-math-7b-instruct
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@ -219,8 +219,12 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        if response.status_code not in {200, HTTPStatus.OK}:
            raise ServiceUnavailableError(response.message)
        # transform assistant message to prompt message
+        resp_content = response.output.choices[0].message.content
+        # special for qwen-vl
+        if isinstance(resp_content, list):
+            resp_content = resp_content[0]["text"]
        assistant_prompt_message = AssistantPromptMessage(
-            content=response.output.choices[0].message.content,
+            content=resp_content,
        )

        # transform usage
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct-1m.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct-1m.yaml
@ -0,0 +1,75 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
+model: qwen2.5-14b-instruct-1m
+label:
+  en_US: qwen2.5-14b-instruct-1m
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 1000000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.001'
+  output: '0.003'
+  unit: '0.001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct-1m.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct-1m.yaml
@ -0,0 +1,75 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
+model: qwen2.5-7b-instruct-1m
+label:
+  en_US: qwen2.5-7b-instruct-1m
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 1000000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.0005'
+  output: '0.001'
+  unit: '0.001'
+  currency: RMB
--- a/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py
+++ b/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py
@ -13,9 +13,10 @@ class FirecrawlWebExtractor(BaseExtractor):
        api_key: The API key for Firecrawl.
        base_url: The base URL for the Firecrawl API. Defaults to 'https://api.firecrawl.dev'.
        mode: The mode of operation. Defaults to 'scrape'. Options are 'crawl', 'scrape' and 'crawl_return_urls'.
+        only_main_content: Only return the main content of the page excluding headers, navs, footers, etc.
    """

-    def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = False):
+    def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = True):
        """Initialize with url, api_key, base_url and mode."""
        self._url = url
        self.job_id = job_id
--- a/api/core/tools/provider/builtin/aws/tools/bedrock_config.py
+++ b/api/core/tools/provider/builtin/aws/tools/bedrock_config.py
@ -0,0 +1,114 @@
+"""
+Configuration classes for AWS Bedrock retrieve and generate API
+"""
+
+from dataclasses import dataclass
+from typing import Any, Literal, Optional
+
+
+@dataclass
+class TextInferenceConfig:
+    """Text inference configuration"""
+
+    maxTokens: Optional[int] = None
+    stopSequences: Optional[list[str]] = None
+    temperature: Optional[float] = None
+    topP: Optional[float] = None
+
+
+@dataclass
+class PerformanceConfig:
+    """Performance configuration"""
+
+    latency: Literal["standard", "optimized"]
+
+
+@dataclass
+class PromptTemplate:
+    """Prompt template configuration"""
+
+    textPromptTemplate: str
+
+
+@dataclass
+class GuardrailConfig:
+    """Guardrail configuration"""
+
+    guardrailId: str
+    guardrailVersion: str
+
+
+@dataclass
+class GenerationConfig:
+    """Generation configuration"""
+
+    additionalModelRequestFields: Optional[dict[str, Any]] = None
+    guardrailConfiguration: Optional[GuardrailConfig] = None
+    inferenceConfig: Optional[dict[str, TextInferenceConfig]] = None
+    performanceConfig: Optional[PerformanceConfig] = None
+    promptTemplate: Optional[PromptTemplate] = None
+
+
+@dataclass
+class VectorSearchConfig:
+    """Vector search configuration"""
+
+    filter: Optional[dict[str, Any]] = None
+    numberOfResults: Optional[int] = None
+    overrideSearchType: Optional[Literal["HYBRID", "SEMANTIC"]] = None
+
+
+@dataclass
+class RetrievalConfig:
+    """Retrieval configuration"""
+
+    vectorSearchConfiguration: VectorSearchConfig
+
+
+@dataclass
+class OrchestrationConfig:
+    """Orchestration configuration"""
+
+    additionalModelRequestFields: Optional[dict[str, Any]] = None
+    inferenceConfig: Optional[dict[str, TextInferenceConfig]] = None
+    performanceConfig: Optional[PerformanceConfig] = None
+    promptTemplate: Optional[PromptTemplate] = None
+
+
+@dataclass
+class KnowledgeBaseConfig:
+    """Knowledge base configuration"""
+
+    generationConfiguration: GenerationConfig
+    knowledgeBaseId: str
+    modelArn: str
+    orchestrationConfiguration: Optional[OrchestrationConfig] = None
+    retrievalConfiguration: Optional[RetrievalConfig] = None
+
+
+@dataclass
+class SessionConfig:
+    """Session configuration"""
+
+    kmsKeyArn: Optional[str] = None
+    sessionId: Optional[str] = None
+
+
+@dataclass
+class RetrieveAndGenerateConfiguration:
+    """Retrieve and generate configuration
+    The use of knowledgeBaseConfiguration or externalSourcesConfiguration depends on the type value
+    """
+
+    type: str = "KNOWLEDGE_BASE"
+    knowledgeBaseConfiguration: Optional[KnowledgeBaseConfig] = None
+
+
+@dataclass
+class RetrieveAndGenerateConfig:
+    """Retrieve and generate main configuration"""
+
+    input: dict[str, str]
+    retrieveAndGenerateConfiguration: RetrieveAndGenerateConfiguration
+    sessionConfiguration: Optional[SessionConfig] = None
+    sessionId: Optional[str] = None
--- a/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve_and_generate.py
+++ b/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve_and_generate.py
@ -0,0 +1,324 @@
+import json
+from typing import Any, Optional
+
+import boto3
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class BedrockRetrieveAndGenerateTool(BuiltinTool):
+    bedrock_client: Any = None
+
+    def _create_text_inference_config(
+        self,
+        max_tokens: Optional[int] = None,
+        stop_sequences: Optional[str] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+    ) -> Optional[dict]:
+        """Create text inference configuration"""
+        if any([max_tokens, stop_sequences, temperature, top_p]):
+            config = {}
+            if max_tokens is not None:
+                config["maxTokens"] = max_tokens
+            if stop_sequences:
+                try:
+                    config["stopSequences"] = json.loads(stop_sequences)
+                except json.JSONDecodeError:
+                    config["stopSequences"] = []
+            if temperature is not None:
+                config["temperature"] = temperature
+            if top_p is not None:
+                config["topP"] = top_p
+            return config
+        return None
+
+    def _create_guardrail_config(
+        self,
+        guardrail_id: Optional[str] = None,
+        guardrail_version: Optional[str] = None,
+    ) -> Optional[dict]:
+        """Create guardrail configuration"""
+        if guardrail_id and guardrail_version:
+            return {"guardrailId": guardrail_id, "guardrailVersion": guardrail_version}
+        return None
+
+    def _create_generation_config(
+        self,
+        additional_model_fields: Optional[str] = None,
+        guardrail_config: Optional[dict] = None,
+        text_inference_config: Optional[dict] = None,
+        performance_mode: Optional[str] = None,
+        prompt_template: Optional[str] = None,
+    ) -> dict:
+        """Create generation configuration"""
+        config = {}
+
+        if additional_model_fields:
+            try:
+                config["additionalModelRequestFields"] = json.loads(additional_model_fields)
+            except json.JSONDecodeError:
+                pass
+
+        if guardrail_config:
+            config["guardrailConfiguration"] = guardrail_config
+
+        if text_inference_config:
+            config["inferenceConfig"] = {"textInferenceConfig": text_inference_config}
+
+        if performance_mode:
+            config["performanceConfig"] = {"latency": performance_mode}
+
+        if prompt_template:
+            config["promptTemplate"] = {"textPromptTemplate": prompt_template}
+
+        return config
+
+    def _create_orchestration_config(
+        self,
+        orchestration_additional_model_fields: Optional[str] = None,
+        orchestration_text_inference_config: Optional[dict] = None,
+        orchestration_performance_mode: Optional[str] = None,
+        orchestration_prompt_template: Optional[str] = None,
+    ) -> dict:
+        """Create orchestration configuration"""
+        config = {}
+
+        if orchestration_additional_model_fields:
+            try:
+                config["additionalModelRequestFields"] = json.loads(orchestration_additional_model_fields)
+            except json.JSONDecodeError:
+                pass
+
+        if orchestration_text_inference_config:
+            config["inferenceConfig"] = {"textInferenceConfig": orchestration_text_inference_config}
+
+        if orchestration_performance_mode:
+            config["performanceConfig"] = {"latency": orchestration_performance_mode}
+
+        if orchestration_prompt_template:
+            config["promptTemplate"] = {"textPromptTemplate": orchestration_prompt_template}
+
+        return config
+
+    def _create_vector_search_config(
+        self,
+        number_of_results: int = 5,
+        search_type: str = "SEMANTIC",
+        metadata_filter: Optional[dict] = None,
+    ) -> dict:
+        """Create vector search configuration"""
+        config = {
+            "numberOfResults": number_of_results,
+            "overrideSearchType": search_type,
+        }
+
+        # Only add filter if metadata_filter is not empty
+        if metadata_filter:
+            config["filter"] = metadata_filter
+
+        return config
+
+    def _bedrock_retrieve_and_generate(
+        self,
+        query: str,
+        knowledge_base_id: str,
+        model_arn: str,
+        # Generation Configuration
+        additional_model_fields: Optional[str] = None,
+        guardrail_id: Optional[str] = None,
+        guardrail_version: Optional[str] = None,
+        max_tokens: Optional[int] = None,
+        stop_sequences: Optional[str] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        performance_mode: str = "standard",
+        prompt_template: Optional[str] = None,
+        # Orchestration Configuration
+        orchestration_additional_model_fields: Optional[str] = None,
+        orchestration_max_tokens: Optional[int] = None,
+        orchestration_stop_sequences: Optional[str] = None,
+        orchestration_temperature: Optional[float] = None,
+        orchestration_top_p: Optional[float] = None,
+        orchestration_performance_mode: Optional[str] = None,
+        orchestration_prompt_template: Optional[str] = None,
+        # Retrieval Configuration
+        number_of_results: int = 5,
+        search_type: str = "SEMANTIC",
+        metadata_filter: Optional[dict] = None,
+        # Additional Configuration
+        session_id: Optional[str] = None,
+    ) -> dict[str, Any]:
+        try:
+            # Create text inference configurations
+            text_inference_config = self._create_text_inference_config(max_tokens, stop_sequences, temperature, top_p)
+            orchestration_text_inference_config = self._create_text_inference_config(
+                orchestration_max_tokens, orchestration_stop_sequences, orchestration_temperature, orchestration_top_p
+            )
+
+            # Create guardrail configuration
+            guardrail_config = self._create_guardrail_config(guardrail_id, guardrail_version)
+
+            # Create vector search configuration
+            vector_search_config = self._create_vector_search_config(number_of_results, search_type, metadata_filter)
+
+            # Create generation configuration
+            generation_config = self._create_generation_config(
+                additional_model_fields, guardrail_config, text_inference_config, performance_mode, prompt_template
+            )
+
+            # Create orchestration configuration
+            orchestration_config = self._create_orchestration_config(
+                orchestration_additional_model_fields,
+                orchestration_text_inference_config,
+                orchestration_performance_mode,
+                orchestration_prompt_template,
+            )
+
+            # Create knowledge base configuration
+            knowledge_base_config = {
+                "knowledgeBaseId": knowledge_base_id,
+                "modelArn": model_arn,
+                "generationConfiguration": generation_config,
+                "orchestrationConfiguration": orchestration_config,
+                "retrievalConfiguration": {"vectorSearchConfiguration": vector_search_config},
+            }
+
+            # Create request configuration
+            request_config = {
+                "input": {"text": query},
+                "retrieveAndGenerateConfiguration": {
+                    "type": "KNOWLEDGE_BASE",
+                    "knowledgeBaseConfiguration": knowledge_base_config,
+                },
+            }
+
+            # Add session configuration if provided
+            if session_id and len(session_id) >= 2:
+                request_config["sessionConfiguration"] = {"sessionId": session_id}
+                request_config["sessionId"] = session_id
+
+            # Send request
+            response = self.bedrock_client.retrieve_and_generate(**request_config)
+
+            # Process response
+            result = {"output": response.get("output", {}).get("text", ""), "citations": []}
+
+            # Process citations
+            for citation in response.get("citations", []):
+                citation_info = {
+                    "text": citation.get("generatedResponsePart", {}).get("textResponsePart", {}).get("text", ""),
+                    "references": [],
+                }
+
+                for ref in citation.get("retrievedReferences", []):
+                    reference = {
+                        "content": ref.get("content", {}).get("text", ""),
+                        "metadata": ref.get("metadata", {}),
+                        "location": None,
+                    }
+
+                    location = ref.get("location", {})
+                    if location.get("type") == "S3":
+                        reference["location"] = location.get("s3Location", {}).get("uri")
+
+                    citation_info["references"].append(reference)
+
+                result["citations"].append(citation_info)
+
+            return result
+
+        except Exception as e:
+            raise Exception(f"Error calling Bedrock service: {str(e)}")
+
+    def _invoke(
+        self,
+        user_id: str,
+        tool_parameters: dict[str, Any],
+    ) -> ToolInvokeMessage:
+        try:
+            # Initialize Bedrock client if not already initialized
+            if not self.bedrock_client:
+                aws_region = tool_parameters.get("aws_region")
+                aws_access_key_id = tool_parameters.get("aws_access_key_id")
+                aws_secret_access_key = tool_parameters.get("aws_secret_access_key")
+
+                client_kwargs = {
+                    "service_name": "bedrock-agent-runtime",
+                }
+                if aws_region:
+                    client_kwargs["region_name"] = aws_region
+                # Only add credentials if both access key and secret key are provided
+                if aws_access_key_id and aws_secret_access_key:
+                    client_kwargs.update(
+                        {"aws_access_key_id": aws_access_key_id, "aws_secret_access_key": aws_secret_access_key}
+                    )
+
+                try:
+                    self.bedrock_client = boto3.client(**client_kwargs)
+                except Exception as e:
+                    return self.create_text_message(f"Failed to initialize Bedrock client: {str(e)}")
+
+            # Parse metadata filter if provided
+            metadata_filter = None
+            if metadata_filter_str := tool_parameters.get("metadata_filter"):
+                try:
+                    parsed_filter = json.loads(metadata_filter_str)
+                    if parsed_filter:  # Only set if not empty
+                        metadata_filter = parsed_filter
+                except json.JSONDecodeError:
+                    return self.create_text_message("metadata_filter must be a valid JSON string")
+
+            try:
+                response = self._bedrock_retrieve_and_generate(
+                    query=tool_parameters["query"],
+                    knowledge_base_id=tool_parameters["knowledge_base_id"],
+                    model_arn=tool_parameters["model_arn"],
+                    # Generation Configuration
+                    additional_model_fields=tool_parameters.get("additional_model_fields"),
+                    guardrail_id=tool_parameters.get("guardrail_id"),
+                    guardrail_version=tool_parameters.get("guardrail_version"),
+                    max_tokens=tool_parameters.get("max_tokens"),
+                    stop_sequences=tool_parameters.get("stop_sequences"),
+                    temperature=tool_parameters.get("temperature"),
+                    top_p=tool_parameters.get("top_p"),
+                    performance_mode=tool_parameters.get("performance_mode", "standard"),
+                    prompt_template=tool_parameters.get("prompt_template"),
+                    # Orchestration Configuration
+                    orchestration_additional_model_fields=tool_parameters.get("orchestration_additional_model_fields"),
+                    orchestration_max_tokens=tool_parameters.get("orchestration_max_tokens"),
+                    orchestration_stop_sequences=tool_parameters.get("orchestration_stop_sequences"),
+                    orchestration_temperature=tool_parameters.get("orchestration_temperature"),
+                    orchestration_top_p=tool_parameters.get("orchestration_top_p"),
+                    orchestration_performance_mode=tool_parameters.get("orchestration_performance_mode"),
+                    orchestration_prompt_template=tool_parameters.get("orchestration_prompt_template"),
+                    # Retrieval Configuration
+                    number_of_results=tool_parameters.get("number_of_results", 5),
+                    search_type=tool_parameters.get("search_type", "SEMANTIC"),
+                    metadata_filter=metadata_filter,
+                    # Additional Configuration
+                    session_id=tool_parameters.get("session_id"),
+                )
+                return self.create_json_message(response)
+
+            except Exception as e:
+                return self.create_text_message(f"Tool invocation error: {str(e)}")
+
+        except Exception as e:
+            return self.create_text_message(f"Tool execution error: {str(e)}")
+
+    def validate_parameters(self, parameters: dict[str, Any]) -> None:
+        """Validate the parameters"""
+        required_params = ["query", "model_arn", "knowledge_base_id"]
+        for param in required_params:
+            if not parameters.get(param):
+                raise ValueError(f"{param} is required")
+
+        # Validate metadata filter if provided
+        if metadata_filter_str := parameters.get("metadata_filter"):
+            try:
+                if not isinstance(json.loads(metadata_filter_str), dict):
+                    raise ValueError("metadata_filter must be a valid JSON object")
+            except json.JSONDecodeError:
+                raise ValueError("metadata_filter must be a valid JSON string")
--- a/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve_and_generate.yaml
+++ b/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve_and_generate.yaml
@ -0,0 +1,358 @@
+identity:
+  name: bedrock_retrieve_and_generate
+  author: AWS
+  label:
+    en_US: Bedrock Retrieve and Generate
+    zh_Hans: Bedrock检索和生成
+  icon: icon.svg
+
+description:
+  human:
+    en_US: A tool for retrieving and generating information using Amazon Bedrock Knowledge Base
+    zh_Hans: 使用Amazon Bedrock知识库进行信息检索和生成的工具
+  llm: A tool for retrieving and generating information using Amazon Bedrock Knowledge Base
+
+parameters:
+# Additional Configuration
+  - name: session_id
+    type: string
+    required: false
+    label:
+      en_US: Session ID
+      zh_Hans: 会话ID
+    human_description:
+      en_US: Optional session ID for continuous conversations
+      zh_Hans: 用于连续对话的可选会话ID
+    form: form
+
+  # AWS Configuration
+  - name: aws_region
+    type: string
+    required: false
+    label:
+      en_US: AWS Region
+      zh_Hans: AWS区域
+    human_description:
+      en_US: AWS region for the Bedrock service
+      zh_Hans: Bedrock服务的AWS区域
+    form: form
+
+  - name: aws_access_key_id
+    type: string
+    required: false
+    label:
+      en_US: AWS Access Key ID
+      zh_Hans: AWS访问密钥ID
+    human_description:
+      en_US: AWS access key ID for authentication (optional)
+      zh_Hans: 用于身份验证的AWS访问密钥ID（可选）
+    form: form
+
+  - name: aws_secret_access_key
+    type: string
+    required: false
+    label:
+      en_US: AWS Secret Access Key
+      zh_Hans: AWS秘密访问密钥
+    human_description:
+      en_US: AWS secret access key for authentication (optional)
+      zh_Hans: 用于身份验证的AWS秘密访问密钥（可选）
+    form: form
+
+  # Knowledge Base Configuration
+  - name: knowledge_base_id
+    type: string
+    required: true
+    label:
+      en_US: Knowledge Base ID
+      zh_Hans: 知识库ID
+    human_description:
+      en_US: ID of the Bedrock Knowledge Base
+      zh_Hans: Bedrock知识库的ID
+    form: form
+
+  - name: model_arn
+    type: string
+    required: true
+    label:
+      en_US: Model ARN
+      zh_Hans: 模型ARN
+    human_description:
+      en_US: The ARN of the model to use
+      zh_Hans: 要使用的模型ARN
+    form: form
+
+  # Retrieval Configuration
+  - name: query
+    type: string
+    required: true
+    label:
+      en_US: Query
+      zh_Hans: 查询
+    human_description:
+      en_US: The search query to retrieve information
+      zh_Hans: 用于检索信息的查询语句
+    form: llm
+
+  - name: number_of_results
+    type: number
+    required: false
+    label:
+      en_US: Number of Results
+      zh_Hans: 结果数量
+    human_description:
+      en_US: Number of results to retrieve (1-10)
+      zh_Hans: 要检索的结果数量（1-10）
+    default: 5
+    min: 1
+    max: 10
+    form: form
+
+  - name: search_type
+    type: select
+    required: false
+    label:
+      en_US: Search Type
+      zh_Hans: 搜索类型
+    human_description:
+      en_US: Type of search to perform
+      zh_Hans: 要执行的搜索类型
+    default: SEMANTIC
+    options:
+      - value: SEMANTIC
+        label:
+          en_US: Semantic Search
+          zh_Hans: 语义搜索
+      - value: HYBRID
+        label:
+          en_US: Hybrid Search
+          zh_Hans: 混合搜索
+    form: form
+
+  - name: metadata_filter
+    type: string
+    required: false
+    label:
+      en_US: Metadata Filter
+      zh_Hans: 元数据过滤器
+    human_description:
+      en_US: JSON formatted filter conditions for metadata, supporting operations like equals, greaterThan, lessThan, etc.
+      zh_Hans: 元数据的JSON格式过滤条件，支持等于、大于、小于等操作
+    default: "{}"
+    form: form
+
+# Generation Configuration
+  - name: guardrail_id
+    type: string
+    required: false
+    label:
+      en_US: Guardrail ID
+      zh_Hans: 防护栏ID
+    human_description:
+      en_US: ID of the guardrail to apply
+      zh_Hans: 要应用的防护栏ID
+    form: form
+
+  - name: guardrail_version
+    type: string
+    required: false
+    label:
+      en_US: Guardrail Version
+      zh_Hans: 防护栏版本
+    human_description:
+      en_US: Version of the guardrail to apply
+      zh_Hans: 要应用的防护栏版本
+    form: form
+
+  - name: max_tokens
+    type: number
+    required: false
+    label:
+      en_US: Maximum Tokens
+      zh_Hans: 最大令牌数
+    human_description:
+      en_US: Maximum number of tokens to generate
+      zh_Hans: 生成的最大令牌数
+    default: 2048
+    form: form
+
+  - name: stop_sequences
+    type: string
+    required: false
+    label:
+      en_US: Stop Sequences
+      zh_Hans: 停止序列
+    human_description:
+      en_US: JSON array of strings that will stop generation when encountered
+      zh_Hans: JSON数组格式的字符串，遇到这些序列时将停止生成
+    default: "[]"
+    form: form
+
+  - name: temperature
+    type: number
+    required: false
+    label:
+      en_US: Temperature
+      zh_Hans: 温度
+    human_description:
+      en_US: Controls randomness in the output (0-1)
+      zh_Hans: 控制输出的随机性（0-1）
+    default: 0.7
+    min: 0
+    max: 1
+    form: form
+
+  - name: top_p
+    type: number
+    required: false
+    label:
+      en_US: Top P
+      zh_Hans: Top P值
+    human_description:
+      en_US: Controls diversity via nucleus sampling (0-1)
+      zh_Hans: 通过核采样控制多样性（0-1）
+    default: 0.95
+    min: 0
+    max: 1
+    form: form
+
+  - name: performance_mode
+    type: select
+    required: false
+    label:
+      en_US: Performance Mode
+      zh_Hans: 性能模式
+    human_description:
+      en_US: Select performance optimization mode(performanceConfig.latency)
+      zh_Hans: 选择性能优化模式(performanceConfig.latency)
+    default: standard
+    options:
+      - value: standard
+        label:
+          en_US: Standard
+          zh_Hans: 标准
+      - value: optimized
+        label:
+          en_US: Optimized
+          zh_Hans: 优化
+    form: form
+
+  - name: prompt_template
+    type: string
+    required: false
+    label:
+      en_US: Prompt Template
+      zh_Hans: 提示模板
+    human_description:
+      en_US: Custom prompt template for generation
+      zh_Hans: 用于生成的自定义提示模板
+    form: form
+
+  - name: additional_model_fields
+    type: string
+    required: false
+    label:
+      en_US: Additional Model Fields
+      zh_Hans: 额外模型字段
+    human_description:
+      en_US: JSON formatted additional fields for model configuration
+      zh_Hans: JSON格式的额外模型配置字段
+    default: "{}"
+    form: form
+
+# Orchestration Configuration
+  - name: orchestration_max_tokens
+    type: number
+    required: false
+    label:
+      en_US: Orchestration Maximum Tokens
+      zh_Hans: 编排最大令牌数
+    human_description:
+      en_US: Maximum number of tokens for orchestration
+      zh_Hans: 编排过程的最大令牌数
+    default: 2048
+    form: form
+
+  - name: orchestration_stop_sequences
+    type: string
+    required: false
+    label:
+      en_US: Orchestration Stop Sequences
+      zh_Hans: 编排停止序列
+    human_description:
+      en_US: JSON array of strings that will stop orchestration when encountered
+      zh_Hans: JSON数组格式的字符串，遇到这些序列时将停止编排
+    default: "[]"
+    form: form
+
+  - name: orchestration_temperature
+    type: number
+    required: false
+    label:
+      en_US: Orchestration Temperature
+      zh_Hans: 编排温度
+    human_description:
+      en_US: Controls randomness in the orchestration output (0-1)
+      zh_Hans: 控制编排输出的随机性（0-1）
+    default: 0.7
+    min: 0
+    max: 1
+    form: form
+
+  - name: orchestration_top_p
+    type: number
+    required: false
+    label:
+      en_US: Orchestration Top P
+      zh_Hans: 编排Top P值
+    human_description:
+      en_US: Controls diversity via nucleus sampling in orchestration (0-1)
+      zh_Hans: 通过核采样控制编排的多样性（0-1）
+    default: 0.95
+    min: 0
+    max: 1
+    form: form
+
+  - name: orchestration_performance_mode
+    type: select
+    required: false
+    label:
+      en_US: Orchestration Performance Mode
+      zh_Hans: 编排性能模式
+    human_description:
+      en_US: Select performance optimization mode for orchestration
+      zh_Hans: 选择编排的性能优化模式
+    default: standard
+    options:
+      - value: standard
+        label:
+          en_US: Standard
+          zh_Hans: 标准
+      - value: optimized
+        label:
+          en_US: Optimized
+          zh_Hans: 优化
+    form: form
+
+  - name: orchestration_prompt_template
+    type: string
+    required: false
+    label:
+      en_US: Orchestration Prompt Template
+      zh_Hans: 编排提示模板
+    human_description:
+      en_US: Custom prompt template for orchestration
+      zh_Hans: 用于编排的自定义提示模板
+    form: form
+
+  - name: orchestration_additional_model_fields
+    type: string
+    required: false
+    label:
+      en_US: Orchestration Additional Model Fields
+      zh_Hans: 编排额外模型字段
+    human_description:
+      en_US: JSON formatted additional fields for orchestration model configuration
+      zh_Hans: JSON格式的编排模型额外配置字段
+    default: "{}"
+    form: form
--- a/api/docker/entrypoint.sh
+++ b/api/docker/entrypoint.sh
@ -20,11 +20,11 @@ if [[ "${MODE}" == "worker" ]]; then
    CONCURRENCY_OPTION="-c ${CELERY_WORKER_AMOUNT:-1}"
  fi

-  exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION --loglevel ${LOG_LEVEL} \
+  exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION --loglevel ${LOG_LEVEL:-INFO} \
    -Q ${CELERY_QUEUES:-dataset,mail,ops_trace,app_deletion}

 elif [[ "${MODE}" == "beat" ]]; then
-  exec celery -A app.celery beat --loglevel ${LOG_LEVEL}
+  exec celery -A app.celery beat --loglevel ${LOG_LEVEL:-INFO}
 else
  if [[ "${DEBUG}" == "true" ]]; then
    exec flask run --host=${DIFY_BIND_ADDRESS:-0.0.0.0} --port=${DIFY_PORT:-5001} --debug
--- a/api/extensions/ext_blueprints.py
+++ b/api/extensions/ext_blueprints.py
@ -12,6 +12,7 @@ def init_app(app: DifyApp):
    from controllers.inner_api import bp as inner_api_bp
    from controllers.service_api import bp as service_api_bp
    from controllers.web import bp as web_bp
+    from controllers.admin import bp as admin_bp

    CORS(
        service_api_bp,
@ -46,3 +47,10 @@ def init_app(app: DifyApp):
    app.register_blueprint(files_bp)

    app.register_blueprint(inner_api_bp)
+
+    CORS(
+        admin_bp,
+        resources={r"/*": {"origins": dify_config.CONSOLE_CORS_ALLOW_ORIGINS}},
+        supports_credentials=True,
+    )
+    app.register_blueprint(admin_bp)
--- a/api/extensions/ext_logging.py
+++ b/api/extensions/ext_logging.py
@ -27,12 +27,11 @@ def init_app(app: DifyApp):
    # Always add StreamHandler to log to console
    sh = logging.StreamHandler(sys.stdout)
    sh.addFilter(RequestIdFilter())
-    log_formatter = logging.Formatter(fmt=dify_config.LOG_FORMAT)
-    sh.setFormatter(log_formatter)
    log_handlers.append(sh)

    logging.basicConfig(
        level=dify_config.LOG_LEVEL,
+        format=dify_config.LOG_FORMAT,
        datefmt=dify_config.LOG_DATEFORMAT,
        handlers=log_handlers,
        force=True,
--- a/api/extensions/ext_login.py
+++ b/api/extensions/ext_login.py
@ -17,8 +17,11 @@ login_manager = flask_login.LoginManager()
@login_manager.request_loader
 def load_user_from_request(request_from_flask_login):
    """Load user based on the request."""
-    if request.blueprint not in {"console", "inner_api"}:
+    # TODO: if in the service_api, then we need to check the jwt token
+
+    if request.blueprint not in {"console", "inner_api", "service_api", "admin_api"}:
        return None
+
    # Check if the user_id contains a dot, indicating the old format
    auth_header = request.headers.get("Authorization", "")
    if not auth_header:
--- a/api/extensions/ext_swagger.py
+++ b/api/extensions/ext_swagger.py
@ -0,0 +1,21 @@
+from dify_app import DifyApp
+
+
+def init_app(app: DifyApp):
+
+    from flasgger import Swagger
+
+    app.config['SWAGGER'] = {
+        'title': 'API Docs',
+        'uiversion': 3,
+        'securityDefinitions': {
+            'JWT': {
+                'type': 'apiKey',
+                'name': 'access-token',  # name of the cookie
+                'in': 'header',  # specify that auth is in cookie
+                'description': 'JWT Authorization cookie'
+            }
+        }
+    }
+
+    Swagger(app)
--- a/api/poetry.lock
+++ b/api/poetry.lock
@ -931,36 +931,36 @@ files = [

 [[package]]
 name = "boto3"
-version = "1.35.74"
+version = "1.36.4"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 markers = "python_version == \"3.11\" or python_version >= \"3.12\""
 files = [
-    {file = "boto3-1.35.74-py3-none-any.whl", hash = "sha256:dab5bddbbe57dc707b6f6a1f25dc2823b8e234b6fe99fafef7fc406ab73031b9"},
-    {file = "boto3-1.35.74.tar.gz", hash = "sha256:88370c6845ba71a4dae7f6b357099df29b3965da584be040c8e72c9902bc9492"},
+    {file = "boto3-1.36.4-py3-none-any.whl", hash = "sha256:9f8f699e75ec63fcc98c4dd7290997c7c06c68d3ac8161ad4735fe71f5fe945c"},
+    {file = "boto3-1.36.4.tar.gz", hash = "sha256:eeceeb74ef8b65634d358c27aa074917f4449dc828f79301f1075232618eb502"},
 ]

 [package.dependencies]
-botocore = ">=1.35.74,<1.36.0"
+botocore = ">=1.36.4,<1.37.0"
 jmespath = ">=0.7.1,<2.0.0"
-s3transfer = ">=0.10.0,<0.11.0"
+s3transfer = ">=0.11.0,<0.12.0"

 [package.extras]
 crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]

 [[package]]
 name = "botocore"
-version = "1.35.94"
+version = "1.36.5"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 markers = "python_version == \"3.11\" or python_version >= \"3.12\""
 files = [
-    {file = "botocore-1.35.94-py3-none-any.whl", hash = "sha256:d784d944865d8279c79d2301fc09ac28b5221d4e7328fb4e23c642c253b9932c"},
-    {file = "botocore-1.35.94.tar.gz", hash = "sha256:2b3309b356541faa4d88bb957dcac1d8004aa44953c0b7d4521a6cc5d3d5d6ba"},
+    {file = "botocore-1.36.5-py3-none-any.whl", hash = "sha256:6d9f70afa9bf9d21407089dc22b8cc8ec6fa44866d4660858c062c74fc8555eb"},
+    {file = "botocore-1.36.5.tar.gz", hash = "sha256:234ed3d29a8954c37a551c933453bf14c6ae44a69a4f273ffef377a2612ca6a6"},
 ]

 [package.dependencies]
@ -969,7 +969,7 @@ python-dateutil = ">=2.1,<3.0.0"
 urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}

 [package.extras]
-crt = ["awscrt (==0.22.0)"]
+crt = ["awscrt (==0.23.4)"]

 [[package]]
 name = "bottleneck"
@ -2126,7 +2126,6 @@ files = [
    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb"},
    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b"},
    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543"},
-    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385"},
    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e"},
    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e"},
    {file = "cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053"},
@ -2137,7 +2136,6 @@ files = [
    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289"},
    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7"},
    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c"},
-    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba"},
    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64"},
    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285"},
    {file = "cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417"},
@ -2814,7 +2812,7 @@ version = "0.9.7.1"
 description = "Extract swagger specs from your flask project"
 optional = false
 python-versions = "*"
-groups = ["tools"]
+groups = ["main", "tools"]
 markers = "python_version == \"3.11\" or python_version >= \"3.12\""
 files = [
    {file = "flasgger-0.9.7.1.tar.gz", hash = "sha256:ca098e10bfbb12f047acc6299cc70a33851943a746e550d86e65e60d4df245fb"},
@ -5730,7 +5728,7 @@ version = "3.1.0"
 description = "A sane and fast Markdown parser with useful plugins and renderers"
 optional = false
 python-versions = ">=3.8"
-groups = ["tools"]
+groups = ["main", "tools"]
 markers = "python_version == \"3.11\" or python_version >= \"3.12\""
 files = [
    {file = "mistune-3.1.0-py3-none-any.whl", hash = "sha256:b05198cf6d671b3deba6c87ec6cf0d4eb7b72c524636eddb6dbf13823b52cee1"},
@ -7803,6 +7801,7 @@ files = [
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
+    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@ -9563,22 +9562,22 @@ files = [

 [[package]]
 name = "s3transfer"
-version = "0.10.4"
+version = "0.11.2"
 description = "An Amazon S3 Transfer Manager"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 markers = "python_version == \"3.11\" or python_version >= \"3.12\""
 files = [
-    {file = "s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e"},
-    {file = "s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7"},
+    {file = "s3transfer-0.11.2-py3-none-any.whl", hash = "sha256:be6ecb39fadd986ef1701097771f87e4d2f821f27f6071c872143884d2950fbc"},
+    {file = "s3transfer-0.11.2.tar.gz", hash = "sha256:3b39185cb72f5acc77db1a58b6e25b977f28d20496b6e58d6813d75f464d632f"},
 ]

 [package.dependencies]
-botocore = ">=1.33.2,<2.0a.0"
+botocore = ">=1.36.0,<2.0a.0"

 [package.extras]
-crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
+crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"]

 [[package]]
 name = "safetensors"
@ -12248,4 +12247,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11,<3.13"
-content-hash = "fdc2199389f0e4b6d81b4b7fe2c1d303b1995643fe802ad3a28b196e68c258ae"
+content-hash = "b26521378e2b5423ccebc7b1af0890275ddbc3dbcea748de74b83efd1c06e877"
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -21,7 +21,7 @@ azure-ai-inference = "~1.0.0b3"
 azure-ai-ml = "~1.20.0"
 azure-identity = "1.16.1"
 beautifulsoup4 = "4.12.2"
-boto3 = "1.35.74"
+boto3 = "1.36.4"
 bs4 = "~0.0.1"
 cachetools = "~5.3.0"
 celery = "~5.4.0"
@ -104,6 +104,7 @@ zhipuai = "~2.1.5"
 # Related transparent dependencies with pinned version
 # required by main implementations
 ############################################################
+flasgger = "^0.9.7.1"
 [tool.poetry.group.indirect.dependencies]
 kaleido = "0.2.1"
 rank-bm25 = "~0.2.2"
--- a/api/services/auth/firecrawl/firecrawl.py
+++ b/api/services/auth/firecrawl/firecrawl.py
@ -21,8 +21,8 @@ class FirecrawlAuth(ApiKeyAuthBase):
        headers = self._prepare_headers()
        options = {
            "url": "https://example.com",
-            "excludes": [],
-            "includes": [],
+            "includePaths": [],
+            "excludePaths": [],
            "limit": 1,
            "scrapeOptions": {"onlyMainContent": True},
        }
--- a/api/services/website_service.py
+++ b/api/services/website_service.py
@ -38,9 +38,8 @@ class WebsiteService:
            only_main_content = options.get("only_main_content", False)
            if not crawl_sub_pages:
                params = {
-                    "includes": [],
-                    "excludes": [],
-                    "generateImgAltText": True,
+                    "includePaths": [],
+                    "excludePaths": [],
                    "limit": 1,
                    "scrapeOptions": {"onlyMainContent": only_main_content},
                }
@ -48,9 +47,8 @@ class WebsiteService:
                includes = options.get("includes").split(",") if options.get("includes") else []
                excludes = options.get("excludes").split(",") if options.get("excludes") else []
                params = {
-                    "includes": includes,
-                    "excludes": excludes,
-                    "generateImgAltText": True,
+                    "includePaths": includes,
+                    "excludePaths": excludes,
                    "limit": options.get("limit", 1),
                    "scrapeOptions": {"onlyMainContent": only_main_content},
                }
--- a/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py
+++ b/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py
@ -10,9 +10,8 @@ def test_firecrawl_web_extractor_crawl_mode(mocker):
    base_url = "https://api.firecrawl.dev"
    firecrawl_app = FirecrawlApp(api_key=api_key, base_url=base_url)
    params = {
-        "includes": [],
-        "excludes": [],
-        "generateImgAltText": True,
+        "includePaths": [],
+        "excludePaths": [],
        "maxDepth": 1,
        "limit": 1,
    }
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@ -1,4 +1,4 @@
-x-shared-env: &shared-api-worker-env
+x-shared-env: &shared-api-worker-env 
 services:
  # API service
  api:
--- a/web/app/components/base/tag-management/index.tsx
+++ b/web/app/components/base/tag-management/index.tsx
@ -75,7 +75,7 @@ const TagManagementModal = ({ show, type }: TagManagementModalProps) => {
          autoFocus
          value={name}
          onChange={e => setName(e.target.value)}
-          onKeyDown={e => e.key === 'Enter' && createNewTag()}
+          onKeyDown={e => e.key === 'Enter' && !e.nativeEvent.isComposing && createNewTag()}
          onBlur={createNewTag}
        />
        {tagList.map(tag => (
--- a/web/app/components/workflow/nodes/_base/components/node-control.tsx
+++ b/web/app/components/workflow/nodes/_base/components/node-control.tsx
@ -45,13 +45,13 @@ const NodeControl: FC<NodeControlProps> = ({
      `}
    >
      <div
-        className='flex items-center px-0.5 h-6 bg-white rounded-lg border-[0.5px] border-gray-100 shadow-xs text-gray-500'
+        className='flex items-center px-0.5 h-6 bg-components-actionbar-bg rounded-lg border-[0.5px] border-components-actionbar-border backdrop-blur-[5px] shadow-md text-text-tertiary'
        onClick={e => e.stopPropagation()}
      >
        {
          canRunBySingle(data.type) && (
            <div
-              className='flex items-center justify-center w-5 h-5 rounded-md cursor-pointer hover:bg-black/5'
+              className='flex items-center justify-center w-5 h-5 rounded-md cursor-pointer hover:bg-state-base-hover'
              onClick={() => {
                handleNodeDataUpdate({
                  id,
--- a/web/app/components/workflow/nodes/_base/components/panel-operator/index.tsx
+++ b/web/app/components/workflow/nodes/_base/components/panel-operator/index.tsx
@ -54,12 +54,12 @@ const PanelOperator = ({
        <div
          className={`
            flex items-center justify-center w-6 h-6 rounded-md cursor-pointer
-            hover:bg-black/5
-            ${open && 'bg-black/5'}
+            hover:bg-state-base-hover
+            ${open && 'bg-state-base-hover'}
            ${triggerClassName}
          `}
        >
-          <RiMoreFill className={`w-4 h-4 ${inNode ? 'text-gray-500' : 'text-gray-700'}`} />
+          <RiMoreFill className={'w-4 h-4 text-text-tertiary'} />
        </div>
      </PortalToFollowElemTrigger>
      <PortalToFollowElemContent className='z-[11]'>
--- a/web/app/components/workflow/nodes/_base/components/title-description-input.tsx
+++ b/web/app/components/workflow/nodes/_base/components/title-description-input.tsx
@ -33,10 +33,8 @@ export const TitleInput = memo(({
      value={localValue}
      onChange={e => setLocalValue(e.target.value)}
      className={`
-        grow mr-2 px-1 h-6 text-base text-gray-900 font-semibold rounded-lg border border-transparent appearance-none outline-none
-        hover:bg-gray-50 
-        focus:border-gray-300 focus:shadow-xs focus:bg-white caret-[#295EFF]
-        min-w-0
+        grow mr-2 px-1 h-7 text-text-primary system-xl-semibold rounded-md border border-transparent appearance-none outline-none
+        focus:shadow-xs min-w-0
      `}
      placeholder={t('workflow.common.addTitle') || ''}
      onBlur={handleBlur}
@ -66,8 +64,8 @@ export const DescriptionInput = memo(({
    <div
      className={`
        group flex px-2 py-[5px] max-h-[60px] rounded-lg overflow-y-auto
-        border border-transparent hover:bg-gray-50 leading-0
-        ${focus && '!border-gray-300 shadow-xs !bg-gray-50'}
+        leading-0 bg-components-panel-bg
+        ${focus && '!shadow-xs'}
      `}
    >
      <Textarea
--- a/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
+++ b/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
@ -270,7 +270,7 @@ const VarReferencePicker: FC<Props> = ({
                  <AddButton onClick={() => { }}></AddButton>
                </div>
              )
-              : (<div ref={!isSupportConstantValue ? triggerRef : null} className={cn((open || isFocus) ? 'border-gray-300' : 'border-gray-100', 'relative group/wrap flex items-center w-full h-8', !isSupportConstantValue && 'p-1 rounded-lg bg-gray-100 border', isInTable && 'bg-transparent border-none', readonly && 'bg-components-input-bg-disabled')}>
+              : (<div ref={!isSupportConstantValue ? triggerRef : null} className={cn((open || isFocus) ? 'border-gray-300' : 'border-gray-100', 'relative group/wrap flex items-center w-full h-8', !isSupportConstantValue && 'p-1 rounded-lg bg-components-input-bg-normal', isInTable && 'bg-transparent border-none', readonly && 'bg-components-input-bg-disabled')}>
                {isSupportConstantValue
                  ? <div onClick={(e) => {
                    e.stopPropagation()
--- a/web/app/components/workflow/nodes/_base/node.tsx
+++ b/web/app/components/workflow/nodes/_base/node.tsx
@ -107,7 +107,7 @@ const BaseNode: FC<BaseNodeProps> = ({
          'group relative pb-1 shadow-xs',
          'border border-transparent rounded-[15px]',
          data.type !== BlockEnum.Iteration && 'w-[240px] bg-workflow-block-bg',
-          data.type === BlockEnum.Iteration && 'flex flex-col w-full h-full bg-[#fcfdff]/80',
+          data.type === BlockEnum.Iteration && 'flex flex-col w-full h-full bg-workflow-block-bg-transparent border-workflow-block-border',
          !data._runningStatus && 'hover:shadow-lg',
          showRunningBorder && '!border-state-accent-solid',
          showSuccessBorder && '!border-state-success-solid',
@ -169,7 +169,7 @@ const BaseNode: FC<BaseNodeProps> = ({
        }
        <div className={cn(
          'flex items-center px-3 pt-3 pb-2 rounded-t-2xl',
-          data.type === BlockEnum.Iteration && 'bg-[rgba(250,252,255,0.9)]',
+          data.type === BlockEnum.Iteration && 'bg-transparent',
        )}>
          <BlockIcon
            className='shrink-0 mr-2'
--- a/web/app/components/workflow/nodes/iteration-start/index.tsx
+++ b/web/app/components/workflow/nodes/iteration-start/index.tsx
@ -9,7 +9,7 @@ const IterationStartNode = ({ id, data }: NodeProps) => {
  const { t } = useTranslation()

  return (
-    <div className='group flex nodrag items-center justify-center w-11 h-11 mt-1 rounded-2xl border border-workflow-block-border bg-white'>
+    <div className='group flex nodrag items-center justify-center w-11 h-11 mt-1 rounded-2xl border border-workflow-block-border bg-workflow-block-bg shadow-xs'>
      <Tooltip popupContent={t('workflow.blocks.iteration-start')} asChild={false}>
        <div className='flex items-center justify-center w-6 h-6 rounded-full border-[0.5px] border-components-panel-border-subtle bg-util-colors-blue-brand-blue-brand-500'>
          <RiHome5Fill className='w-3 h-3 text-text-primary-on-surface' />
--- a/web/app/components/workflow/nodes/iteration/add-block.tsx
+++ b/web/app/components/workflow/nodes/iteration/add-block.tsx
@ -49,9 +49,9 @@ const AddBlock = ({
  const renderTriggerElement = useCallback((open: boolean) => {
    return (
      <div className={cn(
-        'relative inline-flex items-center px-3 h-8 rounded-lg border-[0.5px] border-gray-50 bg-white shadow-xs cursor-pointer hover:bg-gray-200 text-[13px] font-medium text-gray-700',
-        `${nodesReadOnly && '!cursor-not-allowed opacity-50'}`,
-        open && '!bg-gray-50',
+        'relative inline-flex items-center px-3 h-8 rounded-lg border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg shadow-xs cursor-pointer hover:bg-components-button-secondary-bg-hover system-sm-medium text-components-button-secondary-text backdrop-blur-[5px]',
+        `${nodesReadOnly && '!cursor-not-allowed bg-components-button-secondary-bg-disabled'}`,
+        open && 'bg-components-button-secondary-bg-hover',
      )}>
        <RiAddLine className='mr-1 w-4 h-4' />
        {t('workflow.common.addBlock')}
--- a/web/app/components/workflow/nodes/iteration/node.tsx
+++ b/web/app/components/workflow/nodes/iteration/node.tsx
@ -43,14 +43,14 @@ const Node: FC<NodeProps<IterationNodeType>> = ({

  return (
    <div className={cn(
-      'relative min-w-[240px] min-h-[90px] w-full h-full rounded-2xl bg-[#F0F2F7]/90',
+      'relative min-w-[240px] min-h-[90px] w-full h-full rounded-2xl',
    )}>
      <Background
        id={`iteration-background-${id}`}
        className='rounded-2xl !z-0'
        gap={[14 / zoom, 14 / zoom]}
        size={2 / zoom}
-        color='#E4E5E7'
+        color='var(--color-workflow-canvas-workflow-dot-color)'
      />
      {
        data._isCandidate && (
--- a/web/app/components/workflow/nodes/iteration/panel.tsx
+++ b/web/app/components/workflow/nodes/iteration/panel.tsx
@ -76,7 +76,7 @@ const Panel: FC<NodePanelProps<IterationNodeType>> = ({
        <Field
          title={t(`${i18nPrefix}.input`)}
          operations={(
-            <div className='flex items-center h-[18px] px-1 border border-black/8 rounded-[5px] text-xs font-medium text-gray-500 capitalize'>Array</div>
+            <div className='flex items-center h-[18px] px-1 border border-divider-deep rounded-[5px] system-2xs-medium-uppercase text-text-tertiary capitalize'>Array</div>
          )}
        >
          <VarReferencePicker
@ -94,7 +94,7 @@ const Panel: FC<NodePanelProps<IterationNodeType>> = ({
        <Field
          title={t(`${i18nPrefix}.output`)}
          operations={(
-            <div className='flex items-center h-[18px] px-1 border border-black/8 rounded-[5px] text-xs font-medium text-gray-500 capitalize'>Array</div>
+            <div className='flex items-center h-[18px] px-1 border border-divider-deep rounded-[5px] system-2xs-medium-uppercase text-text-tertiary capitalize'>Array</div>
          )}
        >
          <VarReferencePicker
@ -134,8 +134,7 @@ const Panel: FC<NodePanelProps<IterationNodeType>> = ({

      <div className='px-4 py-2'>
        <Field title={t(`${i18nPrefix}.errorResponseMethod`)} >
-          <Select items={responseMethod} defaultValue={inputs.error_handle_mode} onSelect={changeErrorResponseMode} allowSearch={false}>
-          </Select>
+          <Select items={responseMethod} defaultValue={inputs.error_handle_mode} onSelect={changeErrorResponseMode} allowSearch={false} />
        </Field>
      </div>