Merge branch 'main' into feat/external-knowledge-api

# Conflicts: # api/poetry.lock
2 years ago · 020766a5e8
parent c9e3a9e56a c828a5dfdf
commit 020766a5e8
243 changed files with 4719 additions and 615 deletions
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@ -0,0 +1,46 @@
+name: Web Tests
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - web/**
+
+concurrency:
+  group: web-tests-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Web Tests
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./web
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: web/**
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        if: steps.changed-files.outputs.any_changed == 'true'
+        with:
+          node-version: 20
+          cache: yarn
+          cache-dependency-path: ./web/package.json
+
+      - name: Install dependencies
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn install --frozen-lockfile
+
+      - name: Run tests
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn test
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@ -309,7 +309,7 @@ class AppRunner:
            if not prompt_messages:
                prompt_messages = result.prompt_messages

-            if not usage and result.delta.usage:
+            if result.delta.usage:
                usage = result.delta.usage

        if not usage:
--- a/api/core/embedding/cached_embedding.py
+++ b/api/core/embedding/cached_embedding.py
@ -5,6 +5,7 @@ from typing import Optional, cast
 import numpy as np
 from sqlalchemy.exc import IntegrityError

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
                for i in range(0, len(embedding_queue_texts), max_chunks):
                    batch_texts = embedding_queue_texts[i : i + max_chunks]

-                    embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
+                    embedding_result = self._model_instance.invoke_text_embedding(
+                        texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
+                    )

                    for vector in embedding_result.embeddings:
                        try:
@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
            redis_client.expire(embedding_cache_key, 600)
            return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
        try:
-            embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
+            embedding_result = self._model_instance.invoke_text_embedding(
+                texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
+            )

            embedding_results = embedding_result.embeddings[0]
            embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
--- a/api/core/embedding/embedding_constant.py
+++ b/api/core/embedding/embedding_constant.py
@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class EmbeddingInputType(Enum):
+    """
+    Enum for embedding input type.
+    """
+
+    DOCUMENT = "document"
+    QUERY = "query"
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@ -3,6 +3,7 @@ import os
 from collections.abc import Callable, Generator, Sequence
 from typing import IO, Optional, Union, cast

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
 from core.entities.provider_entities import ModelLoadBalancingConfiguration
 from core.errors.error import ProviderTokenNotInitError
@ -158,12 +159,15 @@ class ModelInstance:
            tools=tools,
        )

-    def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
+    def invoke_text_embedding(
+        self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
+    ) -> TextEmbeddingResult:
        """
        Invoke large language model

        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        if not isinstance(self.model_type_instance, TextEmbeddingModel):
@ -176,6 +180,7 @@ class ModelInstance:
            credentials=self.credentials,
            texts=texts,
            user=user,
+            input_type=input_type,
        )

    def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@ -4,6 +4,7 @@ from typing import Optional

 from pydantic import ConfigDict

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.__base.ai_model import AIModel
@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
    model_config = ConfigDict(protected_namespaces=())

    def invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
-        Invoke large language model
+        Invoke text embedding model

        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        self.started_at = time.perf_counter()

        try:
-            return self._invoke(model, credentials, texts, user)
+            return self._invoke(model, credentials, texts, user, input_type)
        except Exception as e:
            raise self._transform_invoke_error(e)

    @abstractmethod
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
-        Invoke large language model
+        Invoke text embedding model

        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        raise NotImplementedError
--- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
@ -7,6 +7,7 @@ import numpy as np
 import tiktoken
 from openai import AzureOpenAI

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_

 class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
        base_model_name = credentials["base_model_name"]
        credentials_kwargs = self._to_credential_kwargs(credentials)
        client = AzureOpenAI(**credentials_kwargs)
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from requests import post

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "http://api.baichuan-ai.com/v1/embeddings"

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@ -13,6 +13,7 @@ from botocore.exceptions import (
    UnknownServiceError,
 )

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)

 class BedrockTextEmbeddingModel(TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        client_config = Config(region_name=credentials["aws_region"])
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ import cohere
 import numpy as np
 from cohere.core import RequestOptions

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        # get model properties
--- a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@ -15,6 +15,7 @@ help:
    en_US: https://fireworks.ai/account/api-keys
 supported_model_types:
  - llm
+  - text-embedding
 configurate_methods:
  - predefined-model
 provider_credential_schema:
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 11B Vision Instruct
+  en_US: Llama 3.2 11B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-1b-instruct
+label:
+  zh_Hans: Llama 3.2 1B Instruct
+  en_US: Llama 3.2 1B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-3b-instruct
+label:
+  zh_Hans: Llama 3.2 3B Instruct
+  en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 90B Vision Instruct
+  en_US: Llama 3.2 90B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
@ -0,0 +1,12 @@
+model: WhereIsAI/UAE-Large-V1
+label:
+  zh_Hans: UAE-Large-V1
+  en_US: UAE-Large-V1
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/init.py
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/init.py
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
@ -0,0 +1,12 @@
+model: thenlper/gte-base
+label:
+  zh_Hans: GTE-base
+  en_US: GTE-base
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
@ -0,0 +1,12 @@
+model: thenlper/gte-large
+label:
+  zh_Hans: GTE-large
+  en_US: GTE-large
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1.5
+label:
+  zh_Hans: nomic-embed-text-v1.5
+  en_US: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+  max_chunks: 16
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1
+label:
+  zh_Hans: nomic-embed-text-v1
+  en_US: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+  max_chunks: 16
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
@ -0,0 +1,151 @@
+import time
+from collections.abc import Mapping
+from typing import Optional, Union
+
+import numpy as np
+from openai import OpenAI
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
+
+
+class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
+    """
+    Model class for Fireworks text embedding model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
+
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+        if user:
+            extra_model_kwargs["user"] = user
+
+        extra_model_kwargs["encoding_format"] = "float"
+
+        context_size = self._get_context_size(model, credentials)
+        max_chunks = self._get_max_chunks(model, credentials)
+
+        inputs = []
+        indices = []
+        used_tokens = 0
+
+        for i, text in enumerate(texts):
+            # Here token count is only an approximation based on the GPT2 tokenizer
+            # TODO: Optimize for better token estimation and chunking
+            num_tokens = self._get_num_tokens_by_gpt2(text)
+
+            if num_tokens >= context_size:
+                cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
+                # if num tokens is larger than context length, only use the start
+                inputs.append(text[0:cutoff])
+            else:
+                inputs.append(text)
+            indices += [i]
+
+        batched_embeddings = []
+        _iter = range(0, len(inputs), max_chunks)
+
+        for i in _iter:
+            embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+                model=model,
+                client=client,
+                texts=inputs[i : i + max_chunks],
+                extra_model_kwargs=extra_model_kwargs,
+            )
+            used_tokens += embedding_used_tokens
+            batched_embeddings += embeddings_batch
+
+        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
+        return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+    def validate_credentials(self, model: str, credentials: Mapping) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            # transform credentials to kwargs for model instance
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            # call embedding model
+            self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    def _embedding_invoke(
+        self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
+    ) -> tuple[list[list[float]], int]:
+        """
+        Invoke embedding model
+        :param model: model name
+        :param client: model client
+        :param texts: texts to embed
+        :param extra_model_kwargs: extra model kwargs
+        :return: embeddings and used tokens
+        """
+        response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
+        return [data.embedding for data in response.data], response.usage.total_tokens
+
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        input_price_info = self.get_price(
+            model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
+        )
+
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at,
+        )
+
+        return usage
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash-001
+label:
+  en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash-002
+label:
+  en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash-8b-exp-0924
+label:
+  en_US: Gemini 1.5 Flash 8B 0924
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@ -1,6 +1,6 @@
 model: gemini-1.5-flash-latest
 label:
-  en_US: Gemini 1.5 Flash
+  en_US: Gemini 1.5 Flash Latest
 model_type: llm
 features:
  - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash
+label:
+  en_US: Gemini 1.5 Flash
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-pro-001
+label:
+  en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-pro-002
+label:
+  en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@ -1,6 +1,6 @@
 model: gemini-1.5-pro-latest
 label:
-  en_US: Gemini 1.5 Pro
+  en_US: Gemini 1.5 Pro Latest
 model_type: llm
 features:
  - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-pro
+label:
+  en_US: Gemini 1.5 Pro
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@ -27,6 +27,15 @@ parameter_rules:
    default: 4096
    min: 1
    max: 4096
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@ -31,6 +31,15 @@ parameter_rules:
    max: 2048
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/llm.py
+++ b/api/core/model_runtime/model_providers/google/llm/llm.py
@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
 import google.generativeai as genai
 import requests
 from google.api_core import exceptions
-from google.generativeai import client
-from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
+from google.generativeai.client import _ClientManager
+from google.generativeai.types import ContentType, GenerateContentResponse
 from google.generativeai.types.content_types import to_part
 from PIL import Image

@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
                    history.append(content)

        # Create a new ClientManager with tenant's API key
-        new_client_manager = client._ClientManager()
+        new_client_manager = _ClientManager()
        new_client_manager.configure(api_key=credentials["google_api_key"])
        new_custom_client = new_client_manager.make_client("generative")

        google_model._client = new_custom_client

-        safety_settings = {
-            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-        }
-
        response = google_model.generate_content(
            contents=history,
            generation_config=genai.types.GenerationConfig(**config_kwargs),
            stream=stream,
-            safety_settings=safety_settings,
            tools=self._convert_tools_to_glm_tool(tools) if tools else None,
            request_options={"timeout": 600},
        )
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-11b-text-preview
+label:
+  zh_Hans: Llama 3.2 11B Text (Preview)
+  en_US: Llama 3.2 11B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-1b-preview
+label:
+  zh_Hans: Llama 3.2 1B Text (Preview)
+  en_US: Llama 3.2 1B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-3b-preview
+label:
+  zh_Hans: Llama 3.2 3B Text (Preview)
+  en_US: Llama 3.2 3B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-90b-text-preview
+label:
+  zh_Hans: Llama 3.2 90B Text (Preview)
+  en_US: Llama 3.2 90B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ import numpy as np
 import requests
 from huggingface_hub import HfApi, InferenceClient

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/

 class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
        client = InferenceClient(token=credentials["huggingfacehub_api_token"])

        execute_model = model
--- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
@ -1,6 +1,7 @@
 import time
 from typing import Optional

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        server_url = credentials["server_url"]
--- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
 from tencentcloud.common.profile.http_profile import HttpProfile
 from tencentcloud.hunyuan.v20230901 import hunyuan_client, models

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """

--- a/api/core/model_runtime/model_providers/jina/jina.yaml
+++ b/api/core/model_runtime/model_providers/jina/jina.yaml
@ -67,46 +67,3 @@ model_credential_schema:
      required: false
      type: text-input
      default: '8192'
-    - variable: task
-      label:
-        zh_Hans: 下游任务
-        en_US: Downstream task
-      placeholder:
-        zh_Hans: 选择将使用向量模型的下游任务。模型将返回针对该任务优化的向量。
-        en_US: Select the downstream task for which the embeddings will be used. The model will return the optimized embeddings for that task.
-      required: false
-      type: select
-      options:
-        - value: retrieval.query
-          label:
-            en_US: retrieval.query
-        - value: retrieval.passage
-          label:
-            en_US: retrieval.passage
-        - value: separation
-          label:
-            en_US: separation
-        - value: classification
-          label:
-            en_US: classification
-        - value: text-matching
-          label:
-            en_US: text-matching
-    - variable: dimensions
-      label:
-        zh_Hans: 输出维度
-        en_US: Output dimensions
-      placeholder:
-        zh_Hans: 输入您的输出维度
-        en_US: Enter output dimensions
-      required: false
-      type: text-input
-    - variable: late_chunking
-      label:
-        zh_Hans: 后期分块
-        en_US: Late chunking
-      placeholder:
-        zh_Hans: 应用后期分块技术来利用模型的长上下文功能来生成上下文块向量化。
-        en_US: Apply the late chunking technique to leverage the model's long-context capabilities for generating contextual chunk embeddings.
-      required: false
-      type: switch
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from requests import post

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,7 +28,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):

    api_base: str = "https://api.jina.ai/v1"

-    def _to_payload(self, model: str, texts: list[str], credentials: dict) -> dict:
+    def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
        """
        Parse model credentials

@ -44,23 +45,20 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):

        data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}

-        task = credentials.get("task")
-        dimensions = credentials.get("dimensions")
-        late_chunking = credentials.get("late_chunking")
-
-        if task is not None:
-            data["task"] = task
-
-        if dimensions is not None:
-            data["dimensions"] = int(dimensions)
-
-        if late_chunking is not None:
-            data["late_chunking"] = late_chunking
+        # model specific parameters
+        if model == "jina-embeddings-v3":
+            # set `task` type according to input type for the best performance
+            data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"

        return data

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -69,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
@ -81,7 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
        url = base_url + "/embeddings"
        headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}

-        data = self._to_payload(model=model, texts=texts, credentials=credentials)
+        data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)

        try:
            response = post(url, headers=headers, data=dumps(data))
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ from typing import Optional
 from requests import post
 from yarl import URL

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE

 class LocalAITextEmbeddingModel(TextEmbeddingModel):
    """
-    Model class for Jina text embedding model.
+    Model class for LocalAI text embedding model.
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        if len(texts) != 1:
--- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from requests import post

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "https://api.minimax.chat/v1/embeddings"

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["minimax_api_key"]
--- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 import requests

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,7 +28,12 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "https://api.mixedbread.ai/v1"

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -36,6 +42,7 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
--- a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ from typing import Optional
 from nomic import embed
 from nomic import login as nomic_login

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import (
    EmbeddingUsage,
@ -46,6 +47,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -54,6 +56,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        embeddings, prompt_tokens, total_tokens = self.embed_text(
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from requests import post

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
    models: list[str] = ["NV-Embed-QA"]

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
--- a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ from typing import Optional
 import numpy as np
 import oci

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        # get model properties
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):

            if chunk_json["done"]:
                # calculate num tokens
-                if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
-                    # transform usage
+                if "prompt_eval_count" in chunk_json:
                    prompt_tokens = chunk_json["prompt_eval_count"]
-                    completion_tokens = chunk_json["eval_count"]
                else:
-                    # calculate num tokens
-                    prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
-                    completion_tokens = self._get_num_tokens_by_gpt2(full_text)
+                    prompt_message_content = prompt_messages[0].content
+                    if isinstance(prompt_message_content, str):
+                        prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
+                    else:
+                        content_text = ""
+                        for message_content in prompt_message_content:
+                            if message_content.type == PromptMessageContentType.TEXT:
+                                message_content = cast(TextPromptMessageContent, message_content)
+                                content_text += message_content.data
+                        prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
+
+                completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))

                # transform usage
                usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
--- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
@ -8,6 +8,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """

--- a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ import numpy as np
 import tiktoken
 from openai import OpenAI

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        # transform credentials to kwargs for model instance
--- a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
@ -7,6 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """

--- a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ from typing import Optional
 from requests import post
 from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        server_url = credentials["server_url"]
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@ -7,6 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """

--- a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from replicate import Client as ReplicateClient

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat

 class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
        client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)

        if "model_version" in credentials:
--- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
+++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
@ -84,8 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
    Model class for Cohere large language model.
    """

-    sagemaker_client: Any = None
+    sagemaker_session: Any = None
    predictor: Any = None
+    sagemaker_endpoint: str = None

    def _handle_chat_generate_response(
        self,
@ -211,7 +212,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
        :param user: unique user id
        :return: full response or stream response chunk generator result
        """
-        if not self.sagemaker_client:
+        if not self.sagemaker_session:
            access_key = credentials.get("aws_access_key_id")
            secret_key = credentials.get("aws_secret_access_key")
            aws_region = credentials.get("aws_region")
@ -226,11 +227,14 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
            else:
                boto_session = boto3.Session()

-            self.sagemaker_client = boto_session.client("sagemaker")
-            sagemaker_session = Session(boto_session=boto_session, sagemaker_client=self.sagemaker_client)
+            sagemaker_client = boto_session.client("sagemaker")
+            self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
+
+        if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
+            self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
            self.predictor = Predictor(
-                endpoint_name=credentials.get("sagemaker_endpoint"),
-                sagemaker_session=sagemaker_session,
+                endpoint_name=self.sagemaker_endpoint,
+                sagemaker_session=self.sagemaker_session,
                serializer=serializers.JSONSerializer(),
            )

--- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ from typing import Any, Optional

 import boto3

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
        return embeddings

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        # get model properties
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@ -1,25 +1,38 @@
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-14B-Instruct
- Qwen/Qwen2.5-32B-Instruct
 - Qwen/Qwen2.5-72B-Instruct
+- Qwen/Qwen2.5-Math-72B-Instruct
+- Qwen/Qwen2.5-32B-Instruct
+- Qwen/Qwen2.5-14B-Instruct
+- Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-Coder-7B-Instruct
+- deepseek-ai/DeepSeek-V2.5
 - Qwen/Qwen2-72B-Instruct
 - Qwen/Qwen2-57B-A14B-Instruct
 - Qwen/Qwen2-7B-Instruct
 - Qwen/Qwen2-1.5B-Instruct
- 01-ai/Yi-1.5-34B-Chat
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
- THUDM/glm-4-9b-chat
- deepseek-ai/DeepSeek-V2.5
 - deepseek-ai/DeepSeek-V2-Chat
 - deepseek-ai/DeepSeek-Coder-V2-Instruct
+- THUDM/glm-4-9b-chat
+- THUDM/chatglm3-6b
+- 01-ai/Yi-1.5-34B-Chat-16K
+- 01-ai/Yi-1.5-9B-Chat-16K
+- 01-ai/Yi-1.5-6B-Chat
+- internlm/internlm2_5-20b-chat
 - internlm/internlm2_5-7b-chat
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- meta-llama/Meta-Llama-3-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
 - meta-llama/Meta-Llama-3.1-405B-Instruct
 - meta-llama/Meta-Llama-3.1-70B-Instruct
 - meta-llama/Meta-Llama-3.1-8B-Instruct
- mistralai/Mixtral-8x7B-Instruct-v0.1
+- meta-llama/Meta-Llama-3-70B-Instruct
+- meta-llama/Meta-Llama-3-8B-Instruct
+- google/gemma-2-27b-it
+- google/gemma-2-9b-it
 - mistralai/Mistral-7B-Instruct-v0.2
+- Pro/Qwen/Qwen2-7B-Instruct
+- Pro/Qwen/Qwen2-1.5B-Instruct
+- Pro/THUDM/glm-4-9b-chat
+- Pro/THUDM/chatglm3-6b
+- Pro/01-ai/Yi-1.5-9B-Chat-16K
+- Pro/01-ai/Yi-1.5-6B-Chat
+- Pro/internlm/internlm2_5-7b-chat
+- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
+- Pro/meta-llama/Meta-Llama-3-8B-Instruct
+- Pro/google/gemma-2-9b-it
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@ -28,3 +28,4 @@ pricing:
  output: '0'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@ -28,3 +28,4 @@ pricing:
  output: '1.26'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
@ -1,5 +1,6 @@
 from typing import Optional

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
    OAICompatEmbeddingModel,
@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
        super().validate_credentials(model, credentials)

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
        self._add_custom_parameters(credentials)
        return super()._invoke(model, credentials, texts, user)

--- a/api/core/model_runtime/model_providers/spark/llm/llm.py
+++ b/api/core/model_runtime/model_providers/spark/llm/llm.py
@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
        :param prompt_messages: prompt messages
        :return: llm response chunk generator result
        """
+        completion = ""
        for index, content in enumerate(client.subscribe()):
            if isinstance(content, dict):
                delta = content["data"]
            else:
                delta = content
-
+            completion += delta
            assistant_prompt_message = AssistantPromptMessage(
                content=delta or "",
            )
-
+            temp_assistant_prompt_message = AssistantPromptMessage(
+                content=completion,
+            )
            prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
-            completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
+            completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])

            # transform usage
            usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
--- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: farui-plus
 label:
  en_US: farui-plus
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@ -18,7 +18,7 @@ from dashscope.common.error import (
    UnsupportedModel,
 )

-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import (
    AssistantPromptMessage,
    ImagePromptMessageContent,
@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
    FetchFrom,
    I18nObject,
    ModelFeature,
+    ModelPropertyKey,
    ModelType,
    ParameterRule,
    ParameterType,
@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        :param tools: tools for tool calling
        :return:
        """
+        # Check if the model was added via get_customizable_model_schema
+        if self.get_customizable_model_schema(model, credentials) is not None:
+            # For custom models, tokens are not calculated.
+            return 0
+
        if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
            model = model.replace("-chat", "")
        if model == "farui-plus":
@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        :param credentials: model credentials
        :return: AIModelEntity or None
        """
-        rules = [
-            ParameterRule(
-                name="temperature",
-                type=ParameterType.FLOAT,
-                use_template="temperature",
-                label=I18nObject(zh_Hans="温度", en_US="Temperature"),
-            ),
-            ParameterRule(
-                name="top_p",
-                type=ParameterType.FLOAT,
-                use_template="top_p",
-                label=I18nObject(zh_Hans="Top P", en_US="Top P"),
-            ),
-            ParameterRule(
-                name="top_k",
-                type=ParameterType.INT,
-                min=0,
-                max=99,
-                label=I18nObject(zh_Hans="top_k", en_US="top_k"),
-            ),
-            ParameterRule(
-                name="max_tokens",
-                type=ParameterType.INT,
-                min=1,
-                max=128000,
-                default=1024,
-                label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
-            ),
-            ParameterRule(
-                name="seed",
-                type=ParameterType.INT,
-                default=1234,
-                label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
-            ),
-            ParameterRule(
-                name="repetition_penalty",
-                type=ParameterType.FLOAT,
-                default=1.1,
-                label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
-            ),
-        ]
-
-        entity = AIModelEntity(
+        return AIModelEntity(
            model=model,
-            label=I18nObject(en_US=model),
-            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            label=I18nObject(en_US=model, zh_Hans=model),
            model_type=ModelType.LLM,
-            model_properties={},
-            parameter_rules=rules,
+            features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
+            if credentials.get("function_calling_type") == "tool_call"
+            else [],
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={
+                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
+                ModelPropertyKey.MODE: LLMMode.CHAT.value,
+            },
+            parameter_rules=[
+                ParameterRule(
+                    name="temperature",
+                    use_template="temperature",
+                    label=I18nObject(en_US="Temperature", zh_Hans="温度"),
+                    type=ParameterType.FLOAT,
+                ),
+                ParameterRule(
+                    name="max_tokens",
+                    use_template="max_tokens",
+                    default=512,
+                    min=1,
+                    max=int(credentials.get("max_tokens", 1024)),
+                    label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+                    type=ParameterType.INT,
+                ),
+                ParameterRule(
+                    name="top_p",
+                    use_template="top_p",
+                    label=I18nObject(en_US="Top P", zh_Hans="Top P"),
+                    type=ParameterType.FLOAT,
+                ),
+                ParameterRule(
+                    name="top_k",
+                    use_template="top_k",
+                    label=I18nObject(en_US="Top K", zh_Hans="Top K"),
+                    type=ParameterType.FLOAT,
+                ),
+                ParameterRule(
+                    name="frequency_penalty",
+                    use_template="frequency_penalty",
+                    label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
+                    type=ParameterType.FLOAT,
+                ),
+            ],
        )
-
-        return entity
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo-0919
 label:
  en_US: qwen-coder-turbo-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo-latest
 label:
  en_US: qwen-coder-turbo-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo
 label:
  en_US: qwen-coder-turbo
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@ -1,4 +1,4 @@
-# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-long
 label:
  en_US: qwen-long
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-0816
 label:
  en_US: qwen-math-plus-0816
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-0919
 label:
  en_US: qwen-math-plus-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-latest
 label:
  en_US: qwen-math-plus-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus
 label:
  en_US: qwen-math-plus
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo-0919
 label:
  en_US: qwen-math-turbo-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo-latest
 label:
  en_US: qwen-math-turbo-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo
 label:
  en_US: qwen-math-turbo
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0107
 label:
  en_US: qwen-max-0107
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0403, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0403
 label:
  en_US: qwen-max-0403
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0428, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0428
 label:
  en_US: qwen-max-0428
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0919
 label:
  en_US: qwen-max-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-1201
 label:
  en_US: qwen-max-1201
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-latest
 label:
  en_US: qwen-max-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-longcontext
 label:
  en_US: qwen-max-longcontext
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max
 label:
  en_US: qwen-max
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0206
 label:
  en_US: qwen-plus-0206
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0624
 label:
  en_US: qwen-plus-0624
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0723, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0723
 label:
  en_US: qwen-plus-0723
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0806, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0806
 label:
  en_US: qwen-plus-0806
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0919
 label:
  en_US: qwen-plus-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-chat
 label:
  en_US: qwen-plus-chat
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-latest
 label:
  en_US: qwen-plus-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus
 label:
  en_US: qwen-plus
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@ -1,3 +1,6 @@
+# this model corresponds to qwen-turbo-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
+
 model: qwen-turbo-0206
 label:
  en_US: qwen-turbo-0206
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-0624
 label:
  en_US: qwen-turbo-0624
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-0919
 label:
  en_US: qwen-turbo-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-chat
 label:
  en_US: qwen-turbo-chat
--- a/Show More
+++ b/Show More