feat: support predefined models for openrouter (#5494)

2 years ago · 877a2c144b
parent f7900f298f
commit 877a2c144b
25 changed files with 874 additions and 19 deletions
--- a/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
@ -0,0 +1,21 @@
+- openai/gpt-4o
+- openai/gpt-4
+- openai/gpt-4-32k
+- openai/gpt-3.5-turbo
+- anthropic/claude-3.5-sonnet
+- anthropic/claude-3-haiku
+- anthropic/claude-3-opus
+- anthropic/claude-3-sonnet
+- google/gemini-pro-1.5
+- google/gemini-flash-1.5
+- google/gemini-pro
+- cohere/command-r-plus
+- cohere/command-r
+- meta-llama/llama-3-70b-instruct
+- meta-llama/llama-3-8b-instruct
+- mistralai/mixtral-8x22b-instruct
+- mistralai/mixtral-8x7b-instruct
+- mistralai/mistral-7b-instruct
+- qwen/qwen-2-72b-instruct
+- deepseek/deepseek-chat
+- deepseek/deepseek-coder
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-5-sonnet.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-5-sonnet.yaml
@ -0,0 +1,39 @@
+model: anthropic/claude-3.5-sonnet
+label:
+  en_US: claude-3.5-sonnet
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: "3.00"
+  output: "15.00"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-haiku.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-haiku.yaml
@ -0,0 +1,39 @@
+model: anthropic/claude-3-haiku
+label:
+  en_US: claude-3-haiku
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: "0.25"
+  output: "1.25"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-opus.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-opus.yaml
@ -0,0 +1,39 @@
+model: anthropic/claude-3-opus
+label:
+  en_US: claude-3-opus
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: "15.00"
+  output: "75.00"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/claude-3-sonnet.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/claude-3-sonnet.yaml
@ -0,0 +1,39 @@
+model: anthropic/claude-3-sonnet
+label:
+  en_US: claude-3-sonnet
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: "3.00"
+  output: "15.00"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/command-r-plus.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/command-r-plus.yaml
@ -0,0 +1,45 @@
+model: cohere/command-r-plus
+label:
+  en_US: command-r-plus
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    max: 5.0
+  - name: top_p
+    use_template: top_p
+    default: 0.75
+    min: 0.01
+    max: 0.99
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+    default: 0
+    min: 0
+    max: 500
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    max: 4096
+pricing:
+  input: "3"
+  output: "15"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/command-r.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/command-r.yaml
@ -0,0 +1,45 @@
+model: cohere/command-r
+label:
+  en_US: command-r
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    max: 5.0
+  - name: top_p
+    use_template: top_p
+    default: 0.75
+    min: 0.01
+    max: 0.99
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+    default: 0
+    min: 0
+    max: 500
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    max: 4096
+pricing:
+  input: "0.5"
+  output: "1.5"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/deepseek-chat.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/deepseek-chat.yaml
@ -0,0 +1,50 @@
+model: deepseek/deepseek-chat
+label:
+  en_US: deepseek-chat
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 1
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
+      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 4096
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 1
+    min: 0.01
+    max: 1.00
+    help:
+      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
+      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    default: 0
+    min: -2.0
+    max: 2.0
+    help:
+      zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正，那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚，降低模型重复相同内容的可能性。
+      en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
+pricing:
+  input: "0.14"
+  output: "0.28"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/deepseek-coder.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/deepseek-coder.yaml
@ -0,0 +1,30 @@
+model: deepseek/deepseek-coder
+label:
+  en_US: deepseek-coder
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+pricing:
+  input: "0.14"
+  output: "0.28"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-flash.yaml
@ -0,0 +1,39 @@
+model: google/gemini-flash-1.5
+label:
+  en_US: gemini-flash-1.5
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: "0.25"
+  output: "0.75"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gemini-1.5-pro.yaml
@ -0,0 +1,39 @@
+model: google/gemini-pro-1.5
+label:
+  en_US: gemini-pro-1.5
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: "2.5"
+  output: "7.5"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gemini-pro.yaml
@ -0,0 +1,38 @@
+model: google/gemini-pro
+label:
+  en_US: gemini-pro
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 30720
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 2048
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: "0.125"
+  output: "0.375"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-3.5-turbo.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-3.5-turbo.yaml
@ -0,0 +1,42 @@
+model: openai/gpt-3.5-turbo
+label:
+  en_US: gpt-3.5-turbo
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 16385
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 4096
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response_format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "0.5"
+  output: "1.5"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4-32k.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4-32k.yaml
@ -0,0 +1,57 @@
+model: openai/gpt-4-32k
+label:
+  en_US: gpt-4-32k
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 32768
+  - name: seed
+    label:
+      zh_Hans: 种子
+      en_US: Seed
+    type: int
+    help:
+      zh_Hans:
+        如果指定，模型将尽最大努力进行确定性采样，使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性，您应该参考 system_fingerprint
+        响应参数来监视变化。
+      en_US:
+        If specified, model will make a best effort to sample deterministically,
+        such that repeated requests with the same seed and parameters should return
+        the same result. Determinism is not guaranteed, and you should refer to the
+        system_fingerprint response parameter to monitor changes in the backend.
+    required: false
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response_format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "60"
+  output: "120"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4.yaml
@ -0,0 +1,57 @@
+model: openai/gpt-4
+label:
+  en_US: gpt-4
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: seed
+    label:
+      zh_Hans: 种子
+      en_US: Seed
+    type: int
+    help:
+      zh_Hans:
+        如果指定，模型将尽最大努力进行确定性采样，使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性，您应该参考 system_fingerprint
+        响应参数来监视变化。
+      en_US:
+        If specified, model will make a best effort to sample deterministically,
+        such that repeated requests with the same seed and parameters should return
+        the same result. Determinism is not guaranteed, and you should refer to the
+        system_fingerprint response parameter to monitor changes in the backend.
+    required: false
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response_format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "30"
+  output: "60"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o.yaml
@ -0,0 +1,43 @@
+model: openai/gpt-4o
+label:
+  en_US: gpt-4o
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 4096
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response_format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "5.00"
+  output: "15.00"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3-70b-instruct.yaml
@ -0,0 +1,23 @@
+model: meta-llama/llama-3-70b-instruct
+label:
+  en_US: llama-3-70b-instruct
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: "0.59"
+  output: "0.79"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3-8b-instruct.yaml
@ -0,0 +1,23 @@
+model: meta-llama/llama-3-8b-instruct
+label:
+  en_US: llama-3-8b-instruct
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: "0.07"
+  output: "0.07"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llm.py
@ -9,38 +9,40 @@ from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAI

 class OpenRouterLargeLanguageModel(OAIAPICompatLargeLanguageModel):

-    def _update_endpoint_url(self, credentials: dict):
+    def _update_credential(self, model: str, credentials: dict):
        credentials['endpoint_url'] = "https://openrouter.ai/api/v1"
-        return credentials
+        credentials['mode'] = self.get_model_mode(model).value
+        credentials['function_calling_type'] = 'tool_call'
+        return

    def _invoke(self, model: str, credentials: dict,
                prompt_messages: list[PromptMessage], model_parameters: dict,
                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
                stream: bool = True, user: Optional[str] = None) \
            -> Union[LLMResult, Generator]:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)

-        return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)

    def validate_credentials(self, model: str, credentials: dict) -> None:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)

-        return super().validate_credentials(model, cred_with_endpoint)
+        return super().validate_credentials(model, credentials)

    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
                  tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
                  stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)

-        return super()._generate(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
+        return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)

    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)

-        return super().get_customizable_model_schema(model, cred_with_endpoint)
+        return super().get_customizable_model_schema(model, credentials)

    def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
                       tools: Optional[list[PromptMessageTool]] = None) -> int:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+        self._update_credential(model, credentials)

-        return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
+        return super().get_num_tokens(model, credentials, prompt_messages, tools)
--- a/api/core/model_runtime/model_providers/openrouter/llm/mistral-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mistral-7b-instruct.yaml
@ -0,0 +1,30 @@
+model: mistralai/mistral-7b-instruct
+label:
+  en_US: mistral-7b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 8000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 2048
+pricing:
+  input: "0.07"
+  output: "0.07"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x22b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x22b-instruct.yaml
@ -0,0 +1,30 @@
+model: mistralai/mixtral-8x22b-instruct
+label:
+  en_US: mixtral-8x22b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8000
+pricing:
+  input: "0.65"
+  output: "0.65"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x7b-instruct.yaml
@ -0,0 +1,31 @@
+model: mistralai/mixtral-8x7b-instruct
+label:
+  zh_Hans: mixtral-8x7b-instruct
+  en_US: mixtral-8x7b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8000
+pricing:
+  input: "0.24"
+  output: "0.24"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/llm/qwen2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/qwen2-72b-instruct.yaml
@ -0,0 +1,30 @@
+model: qwen/qwen-2-72b-instruct
+label:
+  en_US: qwen-2-72b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: "0.59"
+  output: "0.79"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/openrouter/openrouter.py
+++ b/api/core/model_runtime/model_providers/openrouter/openrouter.py
@ -1,5 +1,7 @@
 import logging

+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.model_provider import ModelProvider

 logger = logging.getLogger(__name__)
@ -8,4 +10,15 @@ logger = logging.getLogger(__name__)
 class OpenRouterProvider(ModelProvider):

    def validate_provider_credentials(self, credentials: dict) -> None:
-        pass
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+
+            model_instance.validate_credentials(
+                model='openai/gpt-3.5-turbo',
+                credentials=credentials
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise ex
--- a/api/core/model_runtime/model_providers/openrouter/openrouter.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/openrouter.yaml
@ -1,6 +1,6 @@
 provider: openrouter
 label:
-  en_US: openrouter.ai
+  en_US: OpenRouter
 icon_small:
  en_US: openrouter_square.svg
 icon_large:
@ -15,6 +15,7 @@ help:
 supported_model_types:
  - llm
 configurate_methods:
+  - predefined-model
  - customizable-model
 model_credential_schema:
  model:
@ -82,13 +83,23 @@ model_credential_schema:
        en_US: Vision Support
      type: radio
      required: false
-      default: 'no_support'
+      default: "no_support"
      options:
-        - value: 'support'
+        - value: "support"
          label:
-            en_US: 'Yes'
+            en_US: "Yes"
            zh_Hans: 是
-        - value: 'no_support'
+        - value: "no_support"
          label:
-            en_US: 'No'
+            en_US: "No"
            zh_Hans: 否
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: api_key
+      required: true
+      label:
+        en_US: API Key
+      type: secret-input
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key