diff --git a/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml b/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml index 443cbef4b3..34e0374770 100644 --- a/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml @@ -1,7 +1,7 @@ model: Sao10K/L3-8B-Stheno-v3.2 label: - zh_Hans: Sao10K/L3-8B-Stheno-v3.2 - en_US: Sao10K/L3-8B-Stheno-v3.2 + zh_Hans: L3 8B Stheno V3.2 + en_US: L3 8B Stheno V3.2 model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/_position.yaml b/api/core/model_runtime/model_providers/novita/llm/_position.yaml index 4176fc19ca..9550dcba62 100644 --- a/api/core/model_runtime/model_providers/novita/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/_position.yaml @@ -1,4 +1,5 @@ # Deepseek Models +- deepseek/deepseek-r1 - deepseek/deepseek_v3 # LLaMA Models diff --git a/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml b/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml index b599418461..bcf9fa1b44 100644 --- a/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml @@ -1,7 +1,7 @@ model: jondurbin/airoboros-l2-70b label: - zh_Hans: jondurbin/airoboros-l2-70b - en_US: jondurbin/airoboros-l2-70b + zh_Hans: Airoboros L2 70B + en_US: Airoboros L2 70B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml b/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml new file mode 100644 index 0000000000..ce80aa8243 --- /dev/null +++ b/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml @@ -0,0 +1,41 @@ +model: deepseek/deepseek-r1 +label: + zh_Hans: DeepSeek R1 + en_US: DeepSeek R1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 2 + default: 1 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 2048 + default: 512 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 +pricing: + input: '0.04' + output: '0.04' + unit: '0.0001' + currency: USD diff --git a/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml b/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml index b18746622c..261a0a67c2 100644 --- a/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml @@ -1,7 +1,7 @@ model: deepseek/deepseek_v3 label: - zh_Hans: deepseek/deepseek_v3 - en_US: deepseek/deepseek_v3 + zh_Hans: DeepSeek V3 + en_US: DeepSeek V3 model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml b/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml index 72a181f5d3..708c51aeb5 100644 --- a/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml @@ -1,7 +1,7 @@ model: cognitivecomputations/dolphin-mixtral-8x22b label: - zh_Hans: cognitivecomputations/dolphin-mixtral-8x22b - en_US: cognitivecomputations/dolphin-mixtral-8x22b + zh_Hans: Dolphin Mixtral 8x22B + en_US: Dolphin Mixtral 8x22B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml b/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml index d1749bc882..f23e369498 100644 --- a/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml @@ -1,7 +1,7 @@ model: google/gemma-2-9b-it label: - zh_Hans: google/gemma-2-9b-it - en_US: google/gemma-2-9b-it + zh_Hans: Gemma 2 9B + en_US: Gemma 2 9B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml b/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml index 8b3228e56a..4b5fe4ab95 100644 --- a/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml @@ -1,7 +1,7 @@ model: nousresearch/hermes-2-pro-llama-3-8b label: - zh_Hans: nousresearch/hermes-2-pro-llama-3-8b - en_US: nousresearch/hermes-2-pro-llama-3-8b + zh_Hans: Hermes 2 Pro Llama 3 8B + en_US: Hermes 2 Pro Llama 3 8B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml b/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml index 5e27941c52..f294c2e452 100644 --- a/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml @@ -1,7 +1,7 @@ model: sao10k/l3-70b-euryale-v2.1 label: - zh_Hans: sao10k/l3-70b-euryale-v2.1 - en_US: sao10k/l3-70b-euryale-v2.1 + zh_Hans: "L3 70B Euryale V2.1\t" + en_US: "L3 70B Euryale V2.1\t" model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml b/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml index d28c84084a..d22ecaedf9 100644 --- a/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml @@ -1,7 +1,7 @@ model: sao10k/l3-8b-lunaris label: - zh_Hans: sao10k/l3-8b-lunaris - en_US: sao10k/l3-8b-lunaris + zh_Hans: "Sao10k L3 8B Lunaris" + en_US: "Sao10k L3 8B Lunaris" model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml b/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml index 9c39ce51c3..19cfe31a06 100644 --- a/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml @@ -1,7 +1,7 @@ model: sao10k/l31-70b-euryale-v2.2 label: - zh_Hans: sao10k/l31-70b-euryale-v2.2 - en_US: sao10k/l31-70b-euryale-v2.2 + zh_Hans: L31 70B Euryale V2.2 + en_US: L31 70B Euryale V2.2 model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml index 39709e1063..1c6a78257b 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3-70b-instruct label: - zh_Hans: meta-llama/llama-3-70b-instruct - en_US: meta-llama/llama-3-70b-instruct + zh_Hans: Llama3 70b Instruct + en_US: Llama3 70b Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml index 7a754dfc11..98ca0ff571 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3-8b-instruct label: - zh_Hans: meta-llama/llama-3-8b-instruct - en_US: meta-llama/llama-3-8b-instruct + zh_Hans: Llama 3 8B Instruct + en_US: Llama 3 8B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml index 96ef12a41c..be374abad3 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.1-70b-instruct label: - zh_Hans: meta-llama/llama-3.1-70b-instruct - en_US: meta-llama/llama-3.1-70b-instruct + zh_Hans: Llama 3.1 70B Instruct + en_US: Llama 3.1 70B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml index 5ef72c770f..b172084fff 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.1-8b-instruct-bf16 label: - zh_Hans: meta-llama/llama-3.1-8b-instruct-bf16 - en_US: meta-llama/llama-3.1-8b-instruct-bf16 + zh_Hans: Llama 3.1 8B Instruct BF16 + en_US: Llama 3.1 8B Instruct BF16 model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml index d75b3e461f..1ddd8e2d44 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.1-8b-instruct-max label: - zh_Hans: meta-llama/llama-3.1-8b-instruct-max - en_US: meta-llama/llama-3.1-8b-instruct-max + zh_Hans: "Llama3.1 8B Instruct Max\t" + en_US: "Llama3.1 8B Instruct Max\t" model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml index 49d9393e4d..a4ca86df9f 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.1-8b-instruct label: - zh_Hans: meta-llama/llama-3.1-8b-instruct - en_US: meta-llama/llama-3.1-8b-instruct + zh_Hans: Llama 3.1 8B Instruct + en_US: Llama 3.1 8B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml index 3952a86cac..f33fa6e507 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.2-11b-vision-instruct label: - zh_Hans: meta-llama/llama-3.2-11b-vision-instruct - en_US: meta-llama/llama-3.2-11b-vision-instruct + zh_Hans: "Llama 3.2 11B Vision Instruct\t" + en_US: "Llama 3.2 11B Vision Instruct\t" model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml index bf73b5cfc2..f09750f8ca 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.2-1b-instruct label: - zh_Hans: meta-llama/llama-3.2-1b-instruct - en_US: meta-llama/llama-3.2-1b-instruct + zh_Hans: "Llama 3.2 1B Instruct\t" + en_US: "Llama 3.2 1B Instruct\t" model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-3b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-3b-instruct.yaml index 66b4842faf..7a19ef475a 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-3b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-3b-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.2-3b-instruct label: - zh_Hans: meta-llama/llama-3.2-3b-instruct - en_US: meta-llama/llama-3.2-3b-instruct + zh_Hans: Llama 3.2 3B Instruct + en_US: Llama 3.2 3B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml index 0046dc92f1..efdc2cc9ee 100644 --- a/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml @@ -1,7 +1,7 @@ model: meta-llama/llama-3.3-70b-instruct label: - zh_Hans: meta-llama/llama-3.3-70b-instruct - en_US: meta-llama/llama-3.3-70b-instruct + zh_Hans: Llama 3.3 70B Instruct + en_US: Llama 3.3 70B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml b/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml index 19876bee17..26f06868a9 100644 --- a/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml @@ -1,7 +1,7 @@ model: sophosympatheia/midnight-rose-70b label: - zh_Hans: sophosympatheia/midnight-rose-70b - en_US: sophosympatheia/midnight-rose-70b + zh_Hans: Midnight Rose 70B + en_US: Midnight Rose 70B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml index 6fba47bcf0..237542554f 100644 --- a/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml @@ -1,7 +1,7 @@ model: mistralai/mistral-7b-instruct label: - zh_Hans: mistralai/mistral-7b-instruct - en_US: mistralai/mistral-7b-instruct + zh_Hans: Mistral 7B Instruct + en_US: Mistral 7B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml b/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml index cb11ebbf94..6f1167385f 100644 --- a/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/mistral-nemo.yaml @@ -1,7 +1,7 @@ model: mistralai/mistral-nemo label: - zh_Hans: mistralai/mistral-nemo - en_US: mistralai/mistral-nemo + zh_Hans: Mistral Nemo + en_US: Mistral Nemo model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml b/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml index fd859dcb39..4c32e106a0 100644 --- a/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/mythomax-l2-13b.yaml @@ -1,7 +1,7 @@ model: gryphe/mythomax-l2-13b label: - zh_Hans: gryphe/mythomax-l2-13b - en_US: gryphe/mythomax-l2-13b + zh_Hans: Mythomax L2 13B + en_US: Mythomax L2 13B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml b/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml index 75671c414c..21a5ee8c0b 100644 --- a/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/nous-hermes-llama2-13b.yaml @@ -1,7 +1,7 @@ model: nousresearch/nous-hermes-llama2-13b label: - zh_Hans: nousresearch/nous-hermes-llama2-13b - en_US: nousresearch/nous-hermes-llama2-13b + zh_Hans: Nous Hermes Llama2 13B + en_US: Nous Hermes Llama2 13B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml b/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml index cad52a4408..b21ea30153 100644 --- a/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/openchat-7b.yaml @@ -1,7 +1,7 @@ model: openchat/openchat-7b label: - zh_Hans: openchat/openchat-7b - en_US: openchat/openchat-7b + zh_Hans: OpenChat 7B + en_US: OpenChat 7B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml b/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml index 8b0deba4f7..272a4ed868 100644 --- a/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/openhermes-2.5-mistral-7b.yaml @@ -1,7 +1,7 @@ model: teknium/openhermes-2.5-mistral-7b label: - zh_Hans: teknium/openhermes-2.5-mistral-7b - en_US: teknium/openhermes-2.5-mistral-7b + zh_Hans: Openhermes2.5 Mistral 7B + en_US: Openhermes2.5 Mistral 7B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/qwen-2-72b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/qwen-2-72b-instruct.yaml index bb2e935c01..069f9096bc 100644 --- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-72b-instruct.yaml @@ -1,7 +1,7 @@ model: qwen/qwen-2-72b-instruct label: - zh_Hans: qwen/qwen-2-72b-instruct - en_US: qwen/qwen-2-72b-instruct + zh_Hans: Qwen2 72B Instruct + en_US: Qwen2 72B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml index ff93d2eb11..afc627f193 100644 --- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-7b-instruct.yaml @@ -1,7 +1,7 @@ model: qwen/qwen-2-7b-instruct label: - zh_Hans: qwen/qwen-2-7b-instruct - en_US: qwen/qwen-2-7b-instruct + zh_Hans: Qwen 2 7B Instruct + en_US: Qwen 2 7B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml index 97097778c3..06bdf0c837 100644 --- a/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2-vl-72b-instruct.yaml @@ -1,7 +1,7 @@ model: qwen/qwen-2-vl-72b-instruct label: - zh_Hans: qwen/qwen-2-vl-72b-instruct - en_US: qwen/qwen-2-vl-72b-instruct + zh_Hans: Qwen 2 VL 72B Instruct + en_US: Qwen 2 VL 72B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml index 729fac1da9..97f5af35b7 100644 --- a/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/qwen-2.5-72b-instruct.yaml @@ -1,7 +1,7 @@ model: qwen/qwen-2.5-72b-instruct label: - zh_Hans: qwen/qwen-2.5-72b-instruct - en_US: qwen/qwen-2.5-72b-instruct + zh_Hans: Qwen 2.5 72B Instruct + en_US: Qwen 2.5 72B Instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml b/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml index 6da4c7eca0..126670dda6 100644 --- a/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml +++ b/api/core/model_runtime/model_providers/novita/llm/wizardlm-2-8x22b.yaml @@ -1,7 +1,7 @@ model: microsoft/wizardlm-2-8x22b label: - zh_Hans: microsoft/wizardlm-2-8x22b - en_US: microsoft/wizardlm-2-8x22b + zh_Hans: Wizardlm 2 8x22B + en_US: Wizardlm 2 8x22B model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/novita/novita.yaml b/api/core/model_runtime/model_providers/novita/novita.yaml index b90d64c951..223085453d 100644 --- a/api/core/model_runtime/model_providers/novita/novita.yaml +++ b/api/core/model_runtime/model_providers/novita/novita.yaml @@ -8,7 +8,7 @@ icon_small: en_US: icon_s_en.svg icon_large: en_US: icon_l_en.svg -background: "#eadeff" +background: "#c7fce2" help: title: en_US: Get your API key from Novita AI diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py index 40ea4dc011..b3d9167bc6 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py @@ -1,29 +1,13 @@ -import json -import time -from decimal import Decimal from typing import Optional -from urllib.parse import urljoin - -import numpy as np -import requests from core.entities.embedding_type import EmbeddingInputType -from core.model_runtime.entities.common_entities import I18nObject -from core.model_runtime.entities.model_entities import ( - AIModelEntity, - FetchFrom, - ModelPropertyKey, - ModelType, - PriceConfig, - PriceType, +from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult +from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( + OAICompatEmbeddingModel, ) -from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult -from core.model_runtime.errors.validate import CredentialsValidateFailedError -from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel -from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOaiApiCompat -class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel): +class PerfXCloudEmbeddingModel(OAICompatEmbeddingModel): """ Model class for an OpenAI API-compatible text embedding model. """ @@ -47,86 +31,10 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel): :return: embeddings result """ - # Prepare headers and payload for the request - headers = {"Content-Type": "application/json"} - - api_key = credentials.get("api_key") - if api_key: - headers["Authorization"] = f"Bearer {api_key}" - endpoint_url: Optional[str] if "endpoint_url" not in credentials or credentials["endpoint_url"] == "": - endpoint_url = "https://cloud.perfxlab.cn/v1/" - else: - endpoint_url = credentials.get("endpoint_url") - assert endpoint_url is not None, "endpoint_url is required in credentials" - if not endpoint_url.endswith("/"): - endpoint_url += "/" - - assert isinstance(endpoint_url, str) - endpoint_url = urljoin(endpoint_url, "embeddings") - - extra_model_kwargs = {} - if user: - extra_model_kwargs["user"] = user - - extra_model_kwargs["encoding_format"] = "float" - - # get model properties - context_size = self._get_context_size(model, credentials) - max_chunks = self._get_max_chunks(model, credentials) - - inputs = [] - indices = [] - used_tokens = 0 - - for i, text in enumerate(texts): - # Here token count is only an approximation based on the GPT2 tokenizer - # TODO: Optimize for better token estimation and chunking - num_tokens = self._get_num_tokens_by_gpt2(text) - - if num_tokens >= context_size: - cutoff = int(np.floor(len(text) * (context_size / num_tokens))) - # if num tokens is larger than context length, only use the start - inputs.append(text[0:cutoff]) - else: - inputs.append(text) - indices += [i] - - batched_embeddings = [] - _iter = range(0, len(inputs), max_chunks) - - for i in _iter: - # Prepare the payload for the request - payload = {"input": inputs[i : i + max_chunks], "model": model, **extra_model_kwargs} - - # Make the request to the OpenAI API - response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300)) + credentials["endpoint_url"] = "https://cloud.perfxlab.cn/v1/" - response.raise_for_status() # Raise an exception for HTTP errors - response_data = response.json() - - # Extract embeddings and used tokens from the response - embeddings_batch = [data["embedding"] for data in response_data["data"]] - embedding_used_tokens = response_data["usage"]["total_tokens"] - - used_tokens += embedding_used_tokens - batched_embeddings += embeddings_batch - - # calc usage - usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens) - - return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model) - - def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: - """ - Approximate number of tokens for given messages using GPT2 tokenizer - - :param model: model name - :param credentials: model credentials - :param texts: texts to embed - :return: - """ - return sum(self._get_num_tokens_by_gpt2(text) for text in texts) + return OAICompatEmbeddingModel._invoke(self, model, credentials, texts, user, input_type) def validate_credentials(self, model: str, credentials: dict) -> None: """ @@ -136,93 +44,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel): :param credentials: model credentials :return: """ - try: - headers = {"Content-Type": "application/json"} - - api_key = credentials.get("api_key") - - if api_key: - headers["Authorization"] = f"Bearer {api_key}" - - endpoint_url: Optional[str] - if "endpoint_url" not in credentials or credentials["endpoint_url"] == "": - endpoint_url = "https://cloud.perfxlab.cn/v1/" - else: - endpoint_url = credentials.get("endpoint_url") - assert endpoint_url is not None, "endpoint_url is required in credentials" - if not endpoint_url.endswith("/"): - endpoint_url += "/" - - assert isinstance(endpoint_url, str) - endpoint_url = urljoin(endpoint_url, "embeddings") - - payload = {"input": "ping", "model": model} - - response = requests.post(url=endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300)) - - if response.status_code != 200: - raise CredentialsValidateFailedError( - f"Credentials validation failed with status code {response.status_code}" - ) - - try: - json_result = response.json() - except json.JSONDecodeError as e: - raise CredentialsValidateFailedError("Credentials validation failed: JSON decode error") - - if "model" not in json_result: - raise CredentialsValidateFailedError("Credentials validation failed: invalid response") - except CredentialsValidateFailedError: - raise - except Exception as ex: - raise CredentialsValidateFailedError(str(ex)) - - def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity: - """ - generate custom model entities from credentials - """ - entity = AIModelEntity( - model=model, - label=I18nObject(en_US=model), - model_type=ModelType.TEXT_EMBEDDING, - fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, - model_properties={ - ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 512)), - ModelPropertyKey.MAX_CHUNKS: 1, - }, - parameter_rules=[], - pricing=PriceConfig( - input=Decimal(credentials.get("input_price", 0)), - unit=Decimal(credentials.get("unit", 0)), - currency=credentials.get("currency", "USD"), - ), - ) - - return entity - - def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage: - """ - Calculate response usage - - :param model: model name - :param credentials: model credentials - :param tokens: input tokens - :return: usage - """ - # get input price info - input_price_info = self.get_price( - model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens - ) - - # transform usage - usage = EmbeddingUsage( - tokens=tokens, - total_tokens=tokens, - unit_price=input_price_info.unit_price, - price_unit=input_price_info.unit, - total_price=input_price_info.total_amount, - currency=input_price_info.currency, - latency=time.perf_counter() - self.started_at, - ) + if "endpoint_url" not in credentials or credentials["endpoint_url"] == "": + credentials["endpoint_url"] = "https://cloud.perfxlab.cn/v1/" - return usage + OAICompatEmbeddingModel.validate_credentials(self, model, credentials) diff --git a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml index 8ce336d60c..d7ba51e1d9 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml @@ -33,6 +33,8 @@ - qwen2.5-3b-instruct - qwen2.5-1.5b-instruct - qwen2.5-0.5b-instruct +- qwen2.5-14b-instruct-1m +- qwen2.5-7b-instruct-1m - qwen2.5-coder-7b-instruct - qwen2-math-72b-instruct - qwen2-math-7b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct-1m.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct-1m.yaml new file mode 100644 index 0000000000..c3d72ec47f --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct-1m.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-14b-instruct-1m +label: + en_US: qwen2.5-14b-instruct-1m +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 1000000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.001' + output: '0.003' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct-1m.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct-1m.yaml new file mode 100644 index 0000000000..44968e54d2 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct-1m.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-7b-instruct-1m +label: + en_US: qwen2.5-7b-instruct-1m +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 1000000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.0005' + output: '0.001' + unit: '0.001' + currency: RMB diff --git a/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py b/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py index b33ce167c2..355a2fb204 100644 --- a/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py +++ b/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py @@ -13,9 +13,10 @@ class FirecrawlWebExtractor(BaseExtractor): api_key: The API key for Firecrawl. base_url: The base URL for the Firecrawl API. Defaults to 'https://api.firecrawl.dev'. mode: The mode of operation. Defaults to 'scrape'. Options are 'crawl', 'scrape' and 'crawl_return_urls'. + only_main_content: Only return the main content of the page excluding headers, navs, footers, etc. """ - def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = False): + def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = True): """Initialize with url, api_key, base_url and mode.""" self._url = url self.job_id = job_id diff --git a/api/extensions/ext_logging.py b/api/extensions/ext_logging.py index 1b9e78828d..bf9b492a50 100644 --- a/api/extensions/ext_logging.py +++ b/api/extensions/ext_logging.py @@ -27,12 +27,11 @@ def init_app(app: DifyApp): # Always add StreamHandler to log to console sh = logging.StreamHandler(sys.stdout) sh.addFilter(RequestIdFilter()) - log_formatter = logging.Formatter(fmt=dify_config.LOG_FORMAT) - sh.setFormatter(log_formatter) log_handlers.append(sh) logging.basicConfig( level=dify_config.LOG_LEVEL, + format=dify_config.LOG_FORMAT, datefmt=dify_config.LOG_DATEFORMAT, handlers=log_handlers, force=True, diff --git a/api/services/auth/firecrawl/firecrawl.py b/api/services/auth/firecrawl/firecrawl.py index cc6eaaa42a..6ef034f292 100644 --- a/api/services/auth/firecrawl/firecrawl.py +++ b/api/services/auth/firecrawl/firecrawl.py @@ -21,8 +21,8 @@ class FirecrawlAuth(ApiKeyAuthBase): headers = self._prepare_headers() options = { "url": "https://example.com", - "excludes": [], - "includes": [], + "includePaths": [], + "excludePaths": [], "limit": 1, "scrapeOptions": {"onlyMainContent": True}, } diff --git a/api/services/website_service.py b/api/services/website_service.py index b30e2205f7..85d32c9e8a 100644 --- a/api/services/website_service.py +++ b/api/services/website_service.py @@ -38,9 +38,8 @@ class WebsiteService: only_main_content = options.get("only_main_content", False) if not crawl_sub_pages: params = { - "includes": [], - "excludes": [], - "generateImgAltText": True, + "includePaths": [], + "excludePaths": [], "limit": 1, "scrapeOptions": {"onlyMainContent": only_main_content}, } @@ -48,9 +47,8 @@ class WebsiteService: includes = options.get("includes").split(",") if options.get("includes") else [] excludes = options.get("excludes").split(",") if options.get("excludes") else [] params = { - "includes": includes, - "excludes": excludes, - "generateImgAltText": True, + "includePaths": includes, + "excludePaths": excludes, "limit": options.get("limit", 1), "scrapeOptions": {"onlyMainContent": only_main_content}, } diff --git a/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py b/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py index 120ca9c8ea..607728efd8 100644 --- a/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py +++ b/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py @@ -10,9 +10,8 @@ def test_firecrawl_web_extractor_crawl_mode(mocker): base_url = "https://api.firecrawl.dev" firecrawl_app = FirecrawlApp(api_key=api_key, base_url=base_url) params = { - "includes": [], - "excludes": [], - "generateImgAltText": True, + "includePaths": [], + "excludePaths": [], "maxDepth": 1, "limit": 1, }