Merge remote-tracking branch 'dify/main' into main

1 year ago · 3767923eaf
parent aa71f8b3e6 b4b09ddc3c
commit 3767923eaf
43 changed files with 277 additions and 264 deletions
--- a/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml
@ -1,7 +1,7 @@
 model: Sao10K/L3-8B-Stheno-v3.2
 label:
-  zh_Hans: Sao10K/L3-8B-Stheno-v3.2
+  zh_Hans: L3 8B Stheno V3.2
-  en_US: Sao10K/L3-8B-Stheno-v3.2
+  en_US: L3 8B Stheno V3.2
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/_position.yaml
@ -1,4 +1,5 @@
 # Deepseek Models
 - deepseek/deepseek-r1
 - deepseek/deepseek_v3
 # LLaMA Models
--- a/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml
@ -1,7 +1,7 @@
 model: jondurbin/airoboros-l2-70b
 label:
-  zh_Hans: jondurbin/airoboros-l2-70b
+  zh_Hans: Airoboros L2 70B
-  en_US: jondurbin/airoboros-l2-70b
+  en_US: Airoboros L2 70B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml
@ -0,0 +1,41 @@
 model: deepseek/deepseek-r1
 label:
  zh_Hans: DeepSeek R1
  en_US: DeepSeek R1
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 64000
 parameter_rules:
  - name: temperature
    use_template: temperature
    min: 0
    max: 2
    default: 1
  - name: top_p
    use_template: top_p
    min: 0
    max: 1
    default: 1
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 2048
    default: 512
  - name: frequency_penalty
    use_template: frequency_penalty
    min: -2
    max: 2
    default: 0
  - name: presence_penalty
    use_template: presence_penalty
    min: -2
    max: 2
    default: 0
 pricing:
  input: '0.04'
  output: '0.04'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml
@ -1,7 +1,7 @@
 model: deepseek/deepseek_v3
 label:
-  zh_Hans: deepseek/deepseek_v3
+  zh_Hans: DeepSeek V3
-  en_US: deepseek/deepseek_v3
+  en_US: DeepSeek V3
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml
@ -1,7 +1,7 @@
 model: cognitivecomputations/dolphin-mixtral-8x22b
 label:
-  zh_Hans: cognitivecomputations/dolphin-mixtral-8x22b
+  zh_Hans: Dolphin Mixtral 8x22B
-  en_US: cognitivecomputations/dolphin-mixtral-8x22b
+  en_US: Dolphin Mixtral 8x22B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml
@ -1,7 +1,7 @@
 model: google/gemma-2-9b-it
 label:
-  zh_Hans: google/gemma-2-9b-it
+  zh_Hans: Gemma 2 9B
-  en_US: google/gemma-2-9b-it
+  en_US: Gemma 2 9B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
@ -1,7 +1,7 @@
 model: nousresearch/hermes-2-pro-llama-3-8b
 label:
-  zh_Hans: nousresearch/hermes-2-pro-llama-3-8b
+  zh_Hans: Hermes 2 Pro Llama 3 8B
-  en_US: nousresearch/hermes-2-pro-llama-3-8b
+  en_US: Hermes 2 Pro Llama 3 8B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml
@ -1,7 +1,7 @@
 model: sao10k/l3-70b-euryale-v2.1
 label:
-  zh_Hans: sao10k/l3-70b-euryale-v2.1
+  zh_Hans: "L3 70B Euryale V2.1\t"
-  en_US: sao10k/l3-70b-euryale-v2.1
+  en_US: "L3 70B Euryale V2.1\t"
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml
@ -1,7 +1,7 @@
 model: sao10k/l3-8b-lunaris
 label:
-  zh_Hans: sao10k/l3-8b-lunaris
+  zh_Hans: "Sao10k L3 8B Lunaris"
-  en_US: sao10k/l3-8b-lunaris
+  en_US: "Sao10k L3 8B Lunaris"
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml
@ -1,7 +1,7 @@
 model: sao10k/l31-70b-euryale-v2.2
 label:
-  zh_Hans: sao10k/l31-70b-euryale-v2.2
+  zh_Hans: L31 70B Euryale V2.2
-  en_US: sao10k/l31-70b-euryale-v2.2
+  en_US: L31 70B Euryale V2.2
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3-70b-instruct
 label:
-  zh_Hans: meta-llama/llama-3-70b-instruct
+  zh_Hans: Llama3 70b Instruct
-  en_US: meta-llama/llama-3-70b-instruct
+  en_US: Llama3 70b Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3-8b-instruct
 label:
-  zh_Hans: meta-llama/llama-3-8b-instruct
+  zh_Hans: Llama 3 8B Instruct
-  en_US: meta-llama/llama-3-8b-instruct
+  en_US: Llama 3 8B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.1-70b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.1-70b-instruct
+  zh_Hans: Llama 3.1 70B Instruct
-  en_US: meta-llama/llama-3.1-70b-instruct
+  en_US: Llama 3.1 70B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.1-8b-instruct-bf16
 label:
-  zh_Hans: meta-llama/llama-3.1-8b-instruct-bf16
+  zh_Hans: Llama 3.1 8B Instruct BF16
-  en_US: meta-llama/llama-3.1-8b-instruct-bf16
+  en_US: Llama 3.1 8B Instruct BF16
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.1-8b-instruct-max
 label:
-  zh_Hans: meta-llama/llama-3.1-8b-instruct-max
+  zh_Hans: "Llama3.1 8B Instruct Max\t"
-  en_US: meta-llama/llama-3.1-8b-instruct-max
+  en_US: "Llama3.1 8B Instruct Max\t"
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.1-8b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.1-8b-instruct
+  zh_Hans: Llama 3.1 8B Instruct
-  en_US: meta-llama/llama-3.1-8b-instruct
+  en_US: Llama 3.1 8B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.2-11b-vision-instruct
 label:
-  zh_Hans: meta-llama/llama-3.2-11b-vision-instruct
+  zh_Hans: "Llama 3.2 11B Vision Instruct\t"
-  en_US: meta-llama/llama-3.2-11b-vision-instruct
+  en_US: "Llama 3.2 11B Vision Instruct\t"
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.2-1b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.2-1b-instruct
+  zh_Hans: "Llama 3.2 1B Instruct\t"
-  en_US: meta-llama/llama-3.2-1b-instruct
+  en_US: "Llama 3.2 1B Instruct\t"
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-3b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.2-3b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.2-3b-instruct
+  zh_Hans: Llama 3.2 3B Instruct
-  en_US: meta-llama/llama-3.2-3b-instruct
+  en_US: Llama 3.2 3B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml
@ -1,7 +1,7 @@
 model: meta-llama/llama-3.3-70b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.3-70b-instruct
+  zh_Hans: Llama 3.3 70B Instruct
-  en_US: meta-llama/llama-3.3-70b-instruct
+  en_US: Llama 3.3 70B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml
@ -1,7 +1,7 @@
 model: sophosympatheia/midnight-rose-70b
 label:
-  zh_Hans: sophosympatheia/midnight-rose-70b
+  zh_Hans: Midnight Rose 70B
-  en_US: sophosympatheia/midnight-rose-70b
+  en_US: Midnight Rose 70B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml
@ -1,7 +1,7 @@
 model: mistralai/mistral-7b-instruct
 label:
-  zh_Hans: mistralai/mistral-7b-instruct
+  zh_Hans: Mistral 7B Instruct
-  en_US: mistralai/mistral-7b-instruct
+  en_US: Mistral 7B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml
@ -1,7 +1,7 @@
 model: mistralai/mistral-nemo
 label:
-  zh_Hans: mistralai/mistral-nemo
+  zh_Hans: Mistral Nemo
-  en_US: mistralai/mistral-nemo
+  en_US: Mistral Nemo
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml
@ -1,7 +1,7 @@
 model: gryphe/mythomax-l2-13b
 label:
-  zh_Hans: gryphe/mythomax-l2-13b
+  zh_Hans: Mythomax L2 13B
-  en_US: gryphe/mythomax-l2-13b
+  en_US: Mythomax L2 13B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml
@ -1,7 +1,7 @@
 model: nousresearch/nous-hermes-llama2-13b
 label:
-  zh_Hans: nousresearch/nous-hermes-llama2-13b
+  zh_Hans: Nous Hermes Llama2 13B
-  en_US: nousresearch/nous-hermes-llama2-13b
+  en_US: Nous Hermes Llama2 13B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml
@ -1,7 +1,7 @@
 model: openchat/openchat-7b
 label:
-  zh_Hans: openchat/openchat-7b
+  zh_Hans: OpenChat 7B
-  en_US: openchat/openchat-7b
+  en_US: OpenChat 7B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml
@ -1,7 +1,7 @@
 model: teknium/openhermes-2.5-mistral-7b
 label:
-  zh_Hans: teknium/openhermes-2.5-mistral-7b
+  zh_Hans: Openhermes2.5 Mistral 7B
-  en_US: teknium/openhermes-2.5-mistral-7b
+  en_US: Openhermes2.5 Mistral 7B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-72b-instruct.yaml
@ -1,7 +1,7 @@
 model: qwen/qwen-2-72b-instruct
 label:
-  zh_Hans: qwen/qwen-2-72b-instruct
+  zh_Hans: Qwen2 72B Instruct
-  en_US: qwen/qwen-2-72b-instruct
+  en_US: Qwen2 72B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml
@ -1,7 +1,7 @@
 model: qwen/qwen-2-7b-instruct
 label:
-  zh_Hans: qwen/qwen-2-7b-instruct
+  zh_Hans: Qwen 2 7B Instruct
-  en_US: qwen/qwen-2-7b-instruct
+  en_US: Qwen 2 7B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml
@ -1,7 +1,7 @@
 model: qwen/qwen-2-vl-72b-instruct
 label:
-  zh_Hans: qwen/qwen-2-vl-72b-instruct
+  zh_Hans: Qwen 2 VL 72B Instruct
-  en_US: qwen/qwen-2-vl-72b-instruct
+  en_US: Qwen 2 VL 72B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml
@ -1,7 +1,7 @@
 model: qwen/qwen-2.5-72b-instruct
 label:
-  zh_Hans: qwen/qwen-2.5-72b-instruct
+  zh_Hans: Qwen 2.5 72B Instruct
-  en_US: qwen/qwen-2.5-72b-instruct
+  en_US: Qwen 2.5 72B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml
@ -1,7 +1,7 @@
 model: microsoft/wizardlm-2-8x22b
 label:
-  zh_Hans: microsoft/wizardlm-2-8x22b
+  zh_Hans: Wizardlm 2 8x22B
-  en_US: microsoft/wizardlm-2-8x22b
+  en_US: Wizardlm 2 8x22B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/novita.yaml
+++ b/api/core/model_runtime/model_providers/novita/novita.yaml
@ -8,7 +8,7 @@ icon_small:
  en_US: icon_s_en.svg
 icon_large:
  en_US: icon_l_en.svg
-background: "#eadeff"
+background: "#c7fce2"
 help:
  title:
    en_US: Get your API key from Novita AI
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@ -1,29 +1,13 @@
 import json
 import time
 from decimal import Decimal
 from typing import Optional
 from urllib.parse import urljoin
 import numpy as np
 import requests
 from core.entities.embedding_type import EmbeddingInputType
-from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
-from core.model_runtime.entities.model_entities import (
+from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
-    AIModelEntity,
+    OAICompatEmbeddingModel,
    FetchFrom,
    ModelPropertyKey,
    ModelType,
    PriceConfig,
    PriceType,
 )
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
 from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOaiApiCompat
-class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
+class PerfXCloudEmbeddingModel(OAICompatEmbeddingModel):
    """
    Model class for an OpenAI API-compatible text embedding model.
    """
@ -47,86 +31,10 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :return: embeddings result
        """
        # Prepare headers and payload for the request
        headers = {"Content-Type": "application/json"}
        api_key = credentials.get("api_key")
        if api_key:
            headers["Authorization"] = f"Bearer {api_key}"
        endpoint_url: Optional[str]
        if "endpoint_url" not in credentials or credentials["endpoint_url"] == "":
-            endpoint_url = "https://cloud.perfxlab.cn/v1/"
+            credentials["endpoint_url"] = "https://cloud.perfxlab.cn/v1/"
        else:
            endpoint_url = credentials.get("endpoint_url")
            assert endpoint_url is not None, "endpoint_url is required in credentials"
            if not endpoint_url.endswith("/"):
                endpoint_url += "/"
        assert isinstance(endpoint_url, str)
        endpoint_url = urljoin(endpoint_url, "embeddings")
        extra_model_kwargs = {}
        if user:
            extra_model_kwargs["user"] = user
        extra_model_kwargs["encoding_format"] = "float"
        # get model properties
        context_size = self._get_context_size(model, credentials)
        max_chunks = self._get_max_chunks(model, credentials)
        inputs = []
        indices = []
        used_tokens = 0
        for i, text in enumerate(texts):
            # Here token count is only an approximation based on the GPT2 tokenizer
            # TODO: Optimize for better token estimation and chunking
            num_tokens = self._get_num_tokens_by_gpt2(text)
            if num_tokens >= context_size:
                cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
                # if num tokens is larger than context length, only use the start
                inputs.append(text[0:cutoff])
            else:
                inputs.append(text)
            indices += [i]
        batched_embeddings = []
        _iter = range(0, len(inputs), max_chunks)
        for i in _iter:
            # Prepare the payload for the request
            payload = {"input": inputs[i : i + max_chunks], "model": model, **extra_model_kwargs}
            # Make the request to the OpenAI API
            response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300))
-            response.raise_for_status()  # Raise an exception for HTTP errors
+        return OAICompatEmbeddingModel._invoke(self, model, credentials, texts, user, input_type)
            response_data = response.json()
            # Extract embeddings and used tokens from the response
            embeddings_batch = [data["embedding"] for data in response_data["data"]]
            embedding_used_tokens = response_data["usage"]["total_tokens"]
            used_tokens += embedding_used_tokens
            batched_embeddings += embeddings_batch
        # calc usage
        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
        return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
        """
        Approximate number of tokens for given messages using GPT2 tokenizer
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :return:
        """
        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
    def validate_credentials(self, model: str, credentials: dict) -> None:
        """
@ -136,93 +44,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :param credentials: model credentials
        :return:
        """
-        try:
+        if "endpoint_url" not in credentials or credentials["endpoint_url"] == "":
-            headers = {"Content-Type": "application/json"}
+            credentials["endpoint_url"] = "https://cloud.perfxlab.cn/v1/"
            api_key = credentials.get("api_key")
            if api_key:
                headers["Authorization"] = f"Bearer {api_key}"
            endpoint_url: Optional[str]
            if "endpoint_url" not in credentials or credentials["endpoint_url"] == "":
                endpoint_url = "https://cloud.perfxlab.cn/v1/"
            else:
                endpoint_url = credentials.get("endpoint_url")
                assert endpoint_url is not None, "endpoint_url is required in credentials"
                if not endpoint_url.endswith("/"):
                    endpoint_url += "/"
            assert isinstance(endpoint_url, str)
            endpoint_url = urljoin(endpoint_url, "embeddings")
            payload = {"input": "ping", "model": model}
            response = requests.post(url=endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300))
            if response.status_code != 200:
                raise CredentialsValidateFailedError(
                    f"Credentials validation failed with status code {response.status_code}"
                )
            try:
                json_result = response.json()
            except json.JSONDecodeError as e:
                raise CredentialsValidateFailedError("Credentials validation failed: JSON decode error")
            if "model" not in json_result:
                raise CredentialsValidateFailedError("Credentials validation failed: invalid response")
        except CredentialsValidateFailedError:
            raise
        except Exception as ex:
            raise CredentialsValidateFailedError(str(ex))
    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
        """
        generate custom model entities from credentials
        """
        entity = AIModelEntity(
            model=model,
            label=I18nObject(en_US=model),
            model_type=ModelType.TEXT_EMBEDDING,
            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
            model_properties={
                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 512)),
                ModelPropertyKey.MAX_CHUNKS: 1,
            },
            parameter_rules=[],
            pricing=PriceConfig(
                input=Decimal(credentials.get("input_price", 0)),
                unit=Decimal(credentials.get("unit", 0)),
                currency=credentials.get("currency", "USD"),
            ),
        )
        return entity
    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
        """
        Calculate response usage
        :param model: model name
        :param credentials: model credentials
        :param tokens: input tokens
        :return: usage
        """
        # get input price info
        input_price_info = self.get_price(
            model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
        )
        # transform usage
        usage = EmbeddingUsage(
            tokens=tokens,
            total_tokens=tokens,
            unit_price=input_price_info.unit_price,
            price_unit=input_price_info.unit,
            total_price=input_price_info.total_amount,
            currency=input_price_info.currency,
            latency=time.perf_counter() - self.started_at,
        )
-        return usage
+        OAICompatEmbeddingModel.validate_credentials(self, model, credentials)
--- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@ -33,6 +33,8 @@
 - qwen2.5-3b-instruct
 - qwen2.5-1.5b-instruct
 - qwen2.5-0.5b-instruct
 - qwen2.5-14b-instruct-1m
 - qwen2.5-7b-instruct-1m
 - qwen2.5-coder-7b-instruct
 - qwen2-math-72b-instruct
 - qwen2-math-7b-instruct
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct-1m.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct-1m.yaml
@ -0,0 +1,75 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-14b-instruct-1m
 label:
  en_US: qwen2.5-14b-instruct-1m
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 1000000
 parameter_rules:
  - name: temperature
    use_template: temperature
    type: float
    default: 0.3
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
  - name: max_tokens
    use_template: max_tokens
    type: int
    default: 8192
    min: 1
    max: 8192
    help:
      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
  - name: top_p
    use_template: top_p
    type: float
    default: 0.8
    min: 0.1
    max: 0.9
    help:
      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
  - name: top_k
    type: int
    min: 0
    max: 99
    label:
      zh_Hans: 取样数量
      en_US: Top k
    help:
      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
  - name: seed
    required: false
    type: int
    default: 1234
    label:
      zh_Hans: 随机种子
      en_US: Random seed
    help:
      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
  - name: repetition_penalty
    required: false
    type: float
    default: 1.1
    label:
      zh_Hans: 重复惩罚
      en_US: Repetition penalty
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
  - name: response_format
    use_template: response_format
 pricing:
  input: '0.001'
  output: '0.003'
  unit: '0.001'
  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct-1m.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct-1m.yaml
@ -0,0 +1,75 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-7b-instruct-1m
 label:
  en_US: qwen2.5-7b-instruct-1m
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 1000000
 parameter_rules:
  - name: temperature
    use_template: temperature
    type: float
    default: 0.3
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
  - name: max_tokens
    use_template: max_tokens
    type: int
    default: 8192
    min: 1
    max: 8192
    help:
      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
  - name: top_p
    use_template: top_p
    type: float
    default: 0.8
    min: 0.1
    max: 0.9
    help:
      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
  - name: top_k
    type: int
    min: 0
    max: 99
    label:
      zh_Hans: 取样数量
      en_US: Top k
    help:
      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
  - name: seed
    required: false
    type: int
    default: 1234
    label:
      zh_Hans: 随机种子
      en_US: Random seed
    help:
      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
  - name: repetition_penalty
    required: false
    type: float
    default: 1.1
    label:
      zh_Hans: 重复惩罚
      en_US: Repetition penalty
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
  - name: response_format
    use_template: response_format
 pricing:
  input: '0.0005'
  output: '0.001'
  unit: '0.001'
  currency: RMB
--- a/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py
+++ b/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py
@ -13,9 +13,10 @@ class FirecrawlWebExtractor(BaseExtractor):
        api_key: The API key for Firecrawl.
        base_url: The base URL for the Firecrawl API. Defaults to 'https://api.firecrawl.dev'.
        mode: The mode of operation. Defaults to 'scrape'. Options are 'crawl', 'scrape' and 'crawl_return_urls'.
        only_main_content: Only return the main content of the page excluding headers, navs, footers, etc.
    """
-    def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = False):
+    def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = True):
        """Initialize with url, api_key, base_url and mode."""
        self._url = url
        self.job_id = job_id
--- a/api/extensions/ext_logging.py
+++ b/api/extensions/ext_logging.py
@ -27,12 +27,11 @@ def init_app(app: DifyApp):
    # Always add StreamHandler to log to console
    sh = logging.StreamHandler(sys.stdout)
    sh.addFilter(RequestIdFilter())
    log_formatter = logging.Formatter(fmt=dify_config.LOG_FORMAT)
    sh.setFormatter(log_formatter)
    log_handlers.append(sh)
    logging.basicConfig(
        level=dify_config.LOG_LEVEL,
        format=dify_config.LOG_FORMAT,
        datefmt=dify_config.LOG_DATEFORMAT,
        handlers=log_handlers,
        force=True,
--- a/api/services/auth/firecrawl/firecrawl.py
+++ b/api/services/auth/firecrawl/firecrawl.py
@ -21,8 +21,8 @@ class FirecrawlAuth(ApiKeyAuthBase):
        headers = self._prepare_headers()
        options = {
            "url": "https://example.com",
-            "excludes": [],
+            "includePaths": [],
-            "includes": [],
+            "excludePaths": [],
            "limit": 1,
            "scrapeOptions": {"onlyMainContent": True},
        }
--- a/api/services/website_service.py
+++ b/api/services/website_service.py
@ -38,9 +38,8 @@ class WebsiteService:
            only_main_content = options.get("only_main_content", False)
            if not crawl_sub_pages:
                params = {
-                    "includes": [],
+                    "includePaths": [],
-                    "excludes": [],
+                    "excludePaths": [],
                    "generateImgAltText": True,
                    "limit": 1,
                    "scrapeOptions": {"onlyMainContent": only_main_content},
                }
@ -48,9 +47,8 @@ class WebsiteService:
                includes = options.get("includes").split(",") if options.get("includes") else []
                excludes = options.get("excludes").split(",") if options.get("excludes") else []
                params = {
-                    "includes": includes,
+                    "includePaths": includes,
-                    "excludes": excludes,
+                    "excludePaths": excludes,
                    "generateImgAltText": True,
                    "limit": options.get("limit", 1),
                    "scrapeOptions": {"onlyMainContent": only_main_content},
                }
--- a/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py
+++ b/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py
@ -10,9 +10,8 @@ def test_firecrawl_web_extractor_crawl_mode(mocker):
    base_url = "https://api.firecrawl.dev"
    firecrawl_app = FirecrawlApp(api_key=api_key, base_url=base_url)
    params = {
-        "includes": [],
+        "includePaths": [],
-        "excludes": [],
+        "excludePaths": [],
        "generateImgAltText": True,
        "maxDepth": 1,
        "limit": 1,
    }