feat: support predefined models for openrouter (#5494)

2 years ago · 877a2c144b
parent f7900f298f
commit 877a2c144b
25 changed files with 874 additions and 19 deletions
--- a/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
@ -0,0 +1,21 @@
 - openai/gpt-4o
 - openai/gpt-4
 - openai/gpt-4-32k
 - openai/gpt-3.5-turbo
 - anthropic/claude-3.5-sonnet
 - anthropic/claude-3-haiku
 - anthropic/claude-3-opus
 - anthropic/claude-3-sonnet
 - google/gemini-pro-1.5
 - google/gemini-flash-1.5
 - google/gemini-pro
 - cohere/command-r-plus
 - cohere/command-r
 - meta-llama/llama-3-70b-instruct
 - meta-llama/llama-3-8b-instruct
 - mistralai/mixtral-8x22b-instruct
 - mistralai/mixtral-8x7b-instruct
 - mistralai/mistral-7b-instruct
 - qwen/qwen-2-72b-instruct
 - deepseek/deepseek-chat
 - deepseek/deepseek-coder
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-5-sonnet.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-5-sonnet.yaml
@ -0,0 +1,39 @@
 model: anthropic/claude-3.5-sonnet
 label:
  en_US: claude-3.5-sonnet
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 200000
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 4096
    min: 1
    max: 4096
  - name: response_format
    use_template: response_format
 pricing:
  input: "3.00"
  output: "15.00"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-haiku.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-haiku.yaml
@ -0,0 +1,39 @@
 model: anthropic/claude-3-haiku
 label:
  en_US: claude-3-haiku
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 200000
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 4096
    min: 1
    max: 4096
  - name: response_format
    use_template: response_format
 pricing:
  input: "0.25"
  output: "1.25"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-opus.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-opus.yaml
@ -0,0 +1,39 @@
 model: anthropic/claude-3-opus
 label:
  en_US: claude-3-opus
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 200000
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 4096
    min: 1
    max: 4096
  - name: response_format
    use_template: response_format
 pricing:
  input: "15.00"
  output: "75.00"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-sonnet.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-sonnet.yaml
@ -0,0 +1,39 @@
 model: anthropic/claude-3-sonnet
 label:
  en_US: claude-3-sonnet
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 200000
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 4096
    min: 1
    max: 4096
  - name: response_format
    use_template: response_format
 pricing:
  input: "3.00"
  output: "15.00"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/command-r-plus.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/command-r-plus.yaml
@ -0,0 +1,45 @@
 model: cohere/command-r-plus
 label:
  en_US: command-r-plus
 model_type: llm
 features:
  - multi-tool-call
  - agent-thought
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 128000
 parameter_rules:
  - name: temperature
    use_template: temperature
    max: 5.0
  - name: top_p
    use_template: top_p
    default: 0.75
    min: 0.01
    max: 0.99
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
    default: 0
    min: 0
    max: 500
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
    use_template: frequency_penalty
  - name: max_tokens
    use_template: max_tokens
    default: 1024
    max: 4096
 pricing:
  input: "3"
  output: "15"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/command-r.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/command-r.yaml
@ -0,0 +1,45 @@
 model: cohere/command-r
 label:
  en_US: command-r
 model_type: llm
 features:
  - multi-tool-call
  - agent-thought
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 128000
 parameter_rules:
  - name: temperature
    use_template: temperature
    max: 5.0
  - name: top_p
    use_template: top_p
    default: 0.75
    min: 0.01
    max: 0.99
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
    default: 0
    min: 0
    max: 500
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
    use_template: frequency_penalty
  - name: max_tokens
    use_template: max_tokens
    default: 1024
    max: 4096
 pricing:
  input: "0.5"
  output: "1.5"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/deepseek-chat.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/deepseek-chat.yaml
@ -0,0 +1,50 @@
 model: deepseek/deepseek-chat
 label:
  en_US: deepseek-chat
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 32000
 parameter_rules:
  - name: temperature
    use_template: temperature
    type: float
    default: 1
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
  - name: max_tokens
    use_template: max_tokens
    type: int
    default: 4096
    min: 1
    max: 4096
    help:
      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
    type: float
    default: 1
    min: 0.01
    max: 1.00
    help:
      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
  - name: frequency_penalty
    use_template: frequency_penalty
    default: 0
    min: -2.0
    max: 2.0
    help:
      zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正，那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚，降低模型重复相同内容的可能性。
      en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
 pricing:
  input: "0.14"
  output: "0.28"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/deepseek-coder.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/deepseek-coder.yaml
@ -0,0 +1,30 @@
 model: deepseek/deepseek-coder
 label:
  en_US: deepseek-coder
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 32000
 parameter_rules:
  - name: temperature
    use_template: temperature
    min: 0
    max: 1
    default: 0.5
  - name: top_p
    use_template: top_p
    min: 0
    max: 1
    default: 1
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 4096
    default: 1024
 pricing:
  input: "0.14"
  output: "0.28"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-flash.yaml
@ -0,0 +1,39 @@
 model: google/gemini-flash-1.5
 label:
  en_US: gemini-flash-1.5
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 1048576
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
 pricing:
  input: "0.25"
  output: "0.75"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-pro.yaml
@ -0,0 +1,39 @@
 model: google/gemini-pro-1.5
 label:
  en_US: gemini-pro-1.5
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 1048576
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
 pricing:
  input: "2.5"
  output: "7.5"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gemini-pro.yaml
@ -0,0 +1,38 @@
 model: google/gemini-pro
 label:
  en_US: gemini-pro
 model_type: llm
 features:
  - agent-thought
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 30720
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 2048
    min: 1
    max: 2048
  - name: response_format
    use_template: response_format
 pricing:
  input: "0.125"
  output: "0.375"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-3.5-turbo.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-3.5-turbo.yaml
@ -0,0 +1,42 @@
 model: openai/gpt-3.5-turbo
 label:
  en_US: gpt-3.5-turbo
 model_type: llm
 features:
  - multi-tool-call
  - agent-thought
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 16385
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
    use_template: frequency_penalty
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 4096
  - name: response_format
    label:
      zh_Hans: 回复格式
      en_US: response_format
    type: string
    help:
      zh_Hans: 指定模型必须输出的格式
      en_US: specifying the format that the model must output
    required: false
    options:
      - text
      - json_object
 pricing:
  input: "0.5"
  output: "1.5"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4-32k.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4-32k.yaml
@ -0,0 +1,57 @@
 model: openai/gpt-4-32k
 label:
  en_US: gpt-4-32k
 model_type: llm
 features:
  - multi-tool-call
  - agent-thought
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 32768
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
    use_template: frequency_penalty
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 32768
  - name: seed
    label:
      zh_Hans: 种子
      en_US: Seed
    type: int
    help:
      zh_Hans:
        如果指定，模型将尽最大努力进行确定性采样，使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性，您应该参考 system_fingerprint
        响应参数来监视变化。
      en_US:
        If specified, model will make a best effort to sample deterministically,
        such that repeated requests with the same seed and parameters should return
        the same result. Determinism is not guaranteed, and you should refer to the
        system_fingerprint response parameter to monitor changes in the backend.
    required: false
  - name: response_format
    label:
      zh_Hans: 回复格式
      en_US: response_format
    type: string
    help:
      zh_Hans: 指定模型必须输出的格式
      en_US: specifying the format that the model must output
    required: false
    options:
      - text
      - json_object
 pricing:
  input: "60"
  output: "120"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4.yaml
@ -0,0 +1,57 @@
 model: openai/gpt-4
 label:
  en_US: gpt-4
 model_type: llm
 features:
  - multi-tool-call
  - agent-thought
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 8192
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
    use_template: frequency_penalty
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 8192
  - name: seed
    label:
      zh_Hans: 种子
      en_US: Seed
    type: int
    help:
      zh_Hans:
        如果指定，模型将尽最大努力进行确定性采样，使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性，您应该参考 system_fingerprint
        响应参数来监视变化。
      en_US:
        If specified, model will make a best effort to sample deterministically,
        such that repeated requests with the same seed and parameters should return
        the same result. Determinism is not guaranteed, and you should refer to the
        system_fingerprint response parameter to monitor changes in the backend.
    required: false
  - name: response_format
    label:
      zh_Hans: 回复格式
      en_US: response_format
    type: string
    help:
      zh_Hans: 指定模型必须输出的格式
      en_US: specifying the format that the model must output
    required: false
    options:
      - text
      - json_object
 pricing:
  input: "30"
  output: "60"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o.yaml
@ -0,0 +1,43 @@
 model: openai/gpt-4o
 label:
  en_US: gpt-4o
 model_type: llm
 features:
  - multi-tool-call
  - agent-thought
  - stream-tool-call
  - vision
 model_properties:
  mode: chat
  context_size: 128000
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
    use_template: frequency_penalty
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 4096
  - name: response_format
    label:
      zh_Hans: 回复格式
      en_US: response_format
    type: string
    help:
      zh_Hans: 指定模型必须输出的格式
      en_US: specifying the format that the model must output
    required: false
    options:
      - text
      - json_object
 pricing:
  input: "5.00"
  output: "15.00"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3-70b-instruct.yaml
@ -0,0 +1,23 @@
 model: meta-llama/llama-3-70b-instruct
 label:
  en_US: llama-3-70b-instruct
 model_type: llm
 model_properties:
  mode: completion
  context_size: 8192
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 512
    min: 1
    max: 2048
 pricing:
  input: "0.59"
  output: "0.79"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3-8b-instruct.yaml
@ -0,0 +1,23 @@
 model: meta-llama/llama-3-8b-instruct
 label:
  en_US: llama-3-8b-instruct
 model_type: llm
 model_properties:
  mode: completion
  context_size: 8192
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: max_tokens
    use_template: max_tokens
    required: true
    default: 512
    min: 1
    max: 2048
 pricing:
  input: "0.07"
  output: "0.07"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llm.py
@ -9,38 +9,40 @@ from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAI
 class OpenRouterLargeLanguageModel(OAIAPICompatLargeLanguageModel):
-    def _update_endpoint_url(self, credentials: dict):
+    def _update_credential(self, model: str, credentials: dict):
        credentials['endpoint_url'] = "https://openrouter.ai/api/v1"
-        return credentials
+        credentials['mode'] = self.get_model_mode(model).value
        credentials['function_calling_type'] = 'tool_call'
        return
    def _invoke(self, model: str, credentials: dict,
                prompt_messages: list[PromptMessage], model_parameters: dict,
                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
                stream: bool = True, user: Optional[str] = None) \
            -> Union[LLMResult, Generator]:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)
-        return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
    def validate_credentials(self, model: str, credentials: dict) -> None:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)
-        return super().validate_credentials(model, cred_with_endpoint)
+        return super().validate_credentials(model, credentials)
    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
                  tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
                  stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)
-        return super()._generate(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
+        return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)
-        return super().get_customizable_model_schema(model, cred_with_endpoint)
+        return super().get_customizable_model_schema(model, credentials)
    def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
                       tools: Optional[list[PromptMessageTool]] = None) -> int:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)
-        return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
+        return super().get_num_tokens(model, credentials, prompt_messages, tools)
--- a/api/core/model_runtime/model_providers/openrouter/llm/mistral-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mistral-7b-instruct.yaml
@ -0,0 +1,30 @@
 model: mistralai/mistral-7b-instruct
 label:
  en_US: mistral-7b-instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: completion
  context_size: 8000
 parameter_rules:
  - name: temperature
    use_template: temperature
    default: 0.7
    min: 0
    max: 1
  - name: top_p
    use_template: top_p
    default: 1
    min: 0
    max: 1
  - name: max_tokens
    use_template: max_tokens
    default: 1024
    min: 1
    max: 2048
 pricing:
  input: "0.07"
  output: "0.07"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x22b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x22b-instruct.yaml
@ -0,0 +1,30 @@
 model: mistralai/mixtral-8x22b-instruct
 label:
  en_US: mixtral-8x22b-instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: completion
  context_size: 64000
 parameter_rules:
  - name: temperature
    use_template: temperature
    default: 0.7
    min: 0
    max: 1
  - name: top_p
    use_template: top_p
    default: 1
    min: 0
    max: 1
  - name: max_tokens
    use_template: max_tokens
    default: 1024
    min: 1
    max: 8000
 pricing:
  input: "0.65"
  output: "0.65"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x7b-instruct.yaml
@ -0,0 +1,31 @@
 model: mistralai/mixtral-8x7b-instruct
 label:
  zh_Hans: mixtral-8x7b-instruct
  en_US: mixtral-8x7b-instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: completion
  context_size: 32000
 parameter_rules:
  - name: temperature
    use_template: temperature
    default: 0.7
    min: 0
    max: 1
  - name: top_p
    use_template: top_p
    default: 1
    min: 0
    max: 1
  - name: max_tokens
    use_template: max_tokens
    default: 1024
    min: 1
    max: 8000
 pricing:
  input: "0.24"
  output: "0.24"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/qwen2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/qwen2-72b-instruct.yaml
@ -0,0 +1,30 @@
 model: qwen/qwen-2-72b-instruct
 label:
  en_US: qwen-2-72b-instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: completion
  context_size: 32768
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: max_tokens
    use_template: max_tokens
    type: int
    default: 512
    min: 1
    max: 4096
    help:
      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
  input: "0.59"
  output: "0.79"
  unit: "0.000001"
  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/openrouter.py
+++ b/api/core/model_runtime/model_providers/openrouter/openrouter.py
@ -1,5 +1,7 @@
 import logging
 from core.model_runtime.entities.model_entities import ModelType
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.model_provider import ModelProvider
 logger = logging.getLogger(__name__)
@ -8,4 +10,15 @@ logger = logging.getLogger(__name__)
 class OpenRouterProvider(ModelProvider):
    def validate_provider_credentials(self, credentials: dict) -> None:
-        pass
+        try:
            model_instance = self.get_model_instance(ModelType.LLM)
            model_instance.validate_credentials(
                model='openai/gpt-3.5-turbo',
                credentials=credentials
            )
        except CredentialsValidateFailedError as ex:
            raise ex
        except Exception as ex:
            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
            raise ex
--- a/api/core/model_runtime/model_providers/openrouter/openrouter.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/openrouter.yaml
@ -1,6 +1,6 @@
 provider: openrouter
 label:
-  en_US: openrouter.ai
+  en_US: OpenRouter
 icon_small:
  en_US: openrouter_square.svg
 icon_large:
@ -15,6 +15,7 @@ help:
 supported_model_types:
  - llm
 configurate_methods:
  - predefined-model
  - customizable-model
 model_credential_schema:
  model:
@ -82,13 +83,23 @@ model_credential_schema:
        en_US: Vision Support
      type: radio
      required: false
-      default: 'no_support'
+      default: "no_support"
      options:
-        - value: 'support'
+        - value: "support"
          label:
-            en_US: 'Yes'
+            en_US: "Yes"
            zh_Hans: 是
-        - value: 'no_support'
+        - value: "no_support"
          label:
-            en_US: 'No'
+            en_US: "No"
            zh_Hans: 否
 provider_credential_schema:
  credential_form_schemas:
    - variable: api_key
      required: true
      label:
        en_US: API Key
      type: secret-input
      placeholder:
        zh_Hans: 在此输入您的 API Key
        en_US: Enter your API Key